-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhousing_common_funcs.py
25 lines (20 loc) · 995 Bytes
/
housing_common_funcs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
"""
Housing Dataset: Information about houses in the suburbs of Boston in 1978, contains 506 samples and 14 features
MEDV: Median value of owner-occupied homes in $1000s
"""
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data',
header=None, sep='\s+')
df.columns = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE',
'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']
# print(df.head())
def lin_reg_plot(x, y, model, colors=['blue', 'red']):
plt.scatter(x, y, c=colors[0], alpha=0.5, edgecolors='black')
plt.plot(x, model.predict(x), color=colors[1])
def housing_initializer(test=0.3, r_state=0):
x = df.iloc[:, :-1].values
y = df['MEDV'].values
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=test, random_state=r_state)
return x_train, x_test, y_train, y_test