-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRidge and Lasso Regression.R
90 lines (69 loc) · 2.41 KB
/
Ridge and Lasso Regression.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
library(tidyverse)
library(glmnet)
library(ISLR)
# df <- diamonds
# df <- as.data.frame(df)
# df <- df %>% dplyr::relocate(price, .after = last_col())
# df <- df[,1:ncol(df)-1]
# df$cut <- as.numeric(df$cut)
# df$color <- as.numeric(df$color)
# df$clarity <- as.numeric(df$clarity)
df <- mtcars
df <- df %>% dplyr::relocate(mpg, .after = last_col())
names(df)[names(df)==names(df[ncol(df)])] <- 'last'
df <- scale(df)
for (i in 1:ncol(df)){
df[,i] = poly(df[,i],4, raw = TRUE)
}
#### Initialize variables we will be using ####
MSEtemp <- 10000
MSE <- 0
MSE.df <- data.frame(MSE)
MSEtemp.df <- data.frame(MSEtemp)
y_predicted <- 0
y_predicted.df <- data.frame(y_predicted)
colnames(y_predicted.df) = "1"
r.squared <- 0
r.squared.df <- data.frame(r.squared)
i = 1
y <- mtcars$mpg # This is our target, we will use this to measure the success of our analysis
df <- df[,1:(ncol(df))-1] #Remove the last column (we just saved it)
for (i in 1:ncol(df)){ # this creates all possible permutations of the columns
print (c(i, "of", c(ncol(df))))
print(sprintf("MSE = %s", MSEtemp))
combin <- combinations(n = ncol(df), r = i, repeats.allowed = FALSE)
for (j in 1:nrow(combin)){
colvals <- c(combin[j,])
newdf <- data.frame(df[,colvals])
newdf <- cbind(newdf, y)
newdf <- as.data.frame(newdf)
newdf <- as.matrix(df)
grid=10^seq(10,-2,length=100)
model <- glmnet(newdf, y, alpha = 0,lambda = grid)
cv_model <- cv.glmnet(newdf, y, alpha = 0)
best_lambda <- cv_model$lambda.min
#find coefficients of best model
best_model <- glmnet(newdf, y, alpha = 0, lambda = best_lambda)
#calculate R-squared of model on training data
y_predicted <- predict(model, s = best_lambda, newx = newdf)
# Calculate MSE:
MSE <- mean(y - y_predicted)^2
if(MSE<MSEtemp){
MSEtemp.df <- rbind(MSEtemp.df, MSEtemp)
MSEtemp <- MSE
MSE.df <- rbind(MSE.df, MSE)
y_predicted.df <- rbind(y_predicted.df, y_predicted)
best.ridge.model <- saveRDS(best_model, "/tmp/best.ridge.model.rda")
sst <- sum((y - mean(y))^2)
sse <- sum((y_predicted - y)^2)
r.squared <- 1 - sse/sst
r.squared.df <- rbind(r.squared.df, r.squared)
}
}
}
###### ------ output results to the user ------------- ###########
best.ridge.model <- readRDS('/tmp/best.ridge.model.rda')
best.ridge.model
print(tail(MSE.df))
print(tail(MSEtemp.df))
print(tail(r.squared.df))