dl1mam.txt

------------------------------------------------------------------
# Reference
# https://inside-machinelearning.com/en/how-to-do-linear-regression-with-keras/
------------------------------------------------------------------
import pandas as pd
import numpy as np
from sklearn import metrics
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import pandas as pd
------------------------------------------------------------------
data=pd.read_csv("housing.csv")
data
------------------------------------------------------------------
data.dtypes
------------------------------------------------------------------
# Finding out the correlation between the features
corr = data.corr()
corr.shape
------------------------------------------------------------------
# Plotting the heatmap of correlation between features
plt.figure(figsize=(20,20))
sns.heatmap(corr, cbar=True, square= True, fmt='.1f', annot=True, annot_kws={'size':15}, cmap='gray')
------------------------------------------------------------------
X =data.drop(['MEDV'], axis = 1)# data['area']#
y = data['MEDV']
------------------------------------------------------------------
X
------------------------------------------------------------------
X.columns
------------------------------------------------------------------
y
------------------------------------------------------------------
y.describe()
------------------------------------------------------------------
# Splitting to training and testing data

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state = 4)
------------------------------------------------------------------
from sklearn.preprocessing import MinMaxScaler
# Instantiate the scaler and fit to training dataset, X_train
scaler = MinMaxScaler()
scaler.fit(X_train)

# Replace unscaled values with scaled values
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
------------------------------------------------------------------
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
------------------------------------------------------------------
model = Sequential()
model.add(Dense(64, input_dim =13, activation = 'relu'))
model.add(Dropout(0.15))
model.add(Dense(64, activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(40, activation = 'relu'))
model.add(Dropout(0.15))
model.add(Dense(54, activation = 'relu'))
model.add(Dropout(0.18))
model.add(Dense(1))
------------------------------------------------------------------
model.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics = ['mse', 'mae'])
------------------------------------------------------------------
history = model.fit(X_train, y_train, validation_split=0.2, epochs=200)
------------------------------------------------------------------
import matplotlib.pyplot as plt

#plot the loss and validation loss of the dataset
plt.plot(history.history['mae'], label='mae')
plt.plot(history.history['val_mae'], label='val_mae')

plt.legend()
------------------------------------------------------------------
scores = model.evaluate(X_test, y_test, verbose = 0)

print('Mean Squared Error : ', scores[1])
print('Mean Absolute Error : ', scores[2])
------------------------------------------------------------------
Y_pred = model.predict(X_test)
Y_pred
------------------------------------------------------------------
from sklearn.metrics import r2_score

print('r2 score: ', r2_score(y_test,Y_pred))
------------------------------------------------------------------
Y_pred = model.predict(X_test)

a = plt.axes(aspect='equal')

plt.xlabel('True values')
plt.ylabel('Predicted values')
plt.xlim([0, 50])
plt.ylim([0, 50])
plt.plot([0, 50], [0,50])
plt.scatter(y_test,Y_pred)
plt.plot()
------------------------------------------------------------------
plt.plot(y_test)

------------------------------------------------------------------
plt.plot(Y_pred)
Y_pred
------------------------------------------------------------------
print(Y_pred[:5])
print(y_test[:5])
y_test.head()
------------------------------------------------------------------
# Using ML MODEL lm for Linear Regression
------------------------------------------------------------------
# Import library for Linear Regression
from sklearn.linear_model import LinearRegression
------------------------------------------------------------------
# Create a Linear regressor
lm = LinearRegression()

# Train the model using the training sets 
lm.fit(X_train, y_train)
------------------------------------------------------------------
# Value of y intercept
lm.intercept_
------------------------------------------------------------------
#Converting the coefficient values to a dataframe
coeffcients = pd.DataFrame([X.columns,lm.coef_]).T
coeffcients = coeffcients.rename(columns={0: 'Attribute', 1: 'Coefficients'})
coeffcients
------------------------------------------------------------------
# Model prediction on train data
y_pred = lm.predict(X_train)
------------------------------------------------------------------
# Model Evaluation
print('R^2:',metrics.r2_score(y_train, y_pred))
print('Adjusted R^2:',1 - (1-metrics.r2_score(y_train, y_pred))*(len(y_train)-1)/(len(y_train)-X_train.shape[1]-1))
print('MAE:',metrics.mean_absolute_error(y_train, y_pred))
print('MSE:',metrics.mean_squared_error(y_train, y_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_train, y_pred)))
------------------------------------------------------------------
# Visualizing the differences between actual prices and predicted values
plt.scatter(y_train, y_pred)
plt.xlabel("Prices")
plt.ylabel("Predicted prices")
plt.title("Prices vs Predicted prices")
plt.show()
------------------------------------------------------------------
# Checking residuals
plt.scatter(y_pred,y_train-y_pred)
plt.title("Predicted vs residuals")
plt.xlabel("Predicted")
plt.ylabel("Residuals")
plt.show()
------------------------------------------------------------------
# Checking Normality of errors
sns.distplot(y_train-y_pred)
plt.title("Histogram of Residuals")
plt.xlabel("Residuals")
plt.ylabel("Frequency")
plt.show()
------------------------------------------------------------------
# Predicting Test data with the model
y_test_pred = lm.predict(X_test)
------------------------------------------------------------------
# Model Evaluation
acc_linreg = metrics.r2_score(y_test, y_test_pred)
print('R^2:', acc_linreg)
print('Adjusted R^2:',1 - (1-metrics.r2_score(y_test, y_test_pred))*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print('MAE:',metrics.mean_absolute_error(y_test, y_test_pred))
print('MSE:',metrics.mean_squared_error(y_test, y_test_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_test_pred)))
------------------------------------------------------------------
"""
x1=[1,2,3,4,5,6]
y1=[1,2,4,5,7,6]
plt.xlabel('True values')
plt.ylabel('Predicted values')
plt.xlim([0,10])
plt.ylim([0,10])
plt.plot([0, 10], [0,10])
plt.scatter(x1,y1)
plt.plot()
"""