Skip to content

Commit

Permalink
Random Forest
Browse files Browse the repository at this point in the history
  • Loading branch information
AkashKobal committed Dec 19, 2023
1 parent d041b6c commit a63e604
Show file tree
Hide file tree
Showing 29 changed files with 5,559 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# Linear Regression Single Variable
# How to predict home price using Machine Learning.
# We will use Linear Regression to predict the price of a home in the Bengaluru, YNK area.
# Price = m * area + b (m = slope intercept, b = Y intercept)
# area is an independent variable, Price is a dependent variable (depend on x)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
# importing the data file using pandas
df = pd.read_csv("Linear_Regression_Single_Variable_(DataSet).csv")
df

%matplotlib inline
import matplotlib.pyplot as plt



# importing the data file using pandas
df = pd.read_csv("Linear_Regression_Single_Variable_(DataSet).csv")

#ploting the scatter plot to get idea, .scatter(df.name_of_the_colum_for_x-axis, df.name_of_the_colum_for_y-axis, aditional feature(color,size,marker))
plt.scatter(df.area, df.price, color = "red", marker="+")
plt.xlabel("area(sq ft)") #labeling the x-axis
plt.ylabel("price(INR)") #labeling the y-axis

reg = linear_model.LinearRegression() #creating an object for linear regression using linear_model package from sklearn
# reg is the model name
reg.fit(df[["area"]],df.price) #fit the data (training the model with available data set)
#passing the argumnents i,e dataFrame in 2D as x-axis and price as y-axis
#know, It is ready to predict the price.

#Doing prediction
reg.predict([[3300]])
#By giving the new area , it is going to predict the new price
# y = m * x + b
reg.coef_ # to find the coefficient(m)
reg.intercept_ # to find the intercept(b)
# y = m * x +b
y = 135.78767123 * 3300 + 180616.43835616432 #3300 is the area which we want to predict the price
#ploting the line using the predicted data(x-axis(df.area),y-axis(reg.predict(df[['area']])))

plt.scatter(df.area, df.price, color = "red", marker="+")
plt.xlabel("area(sq ft)") #labeling the x-axis
plt.ylabel("price(INR)") #labeling the y-axis
plt.plot(df.area, reg.predict(df[["area"]]), color = "blue") #plotting the line
plt.show()#Predicted price of houses with area greater than 1000 sqft is : **<jupyter_code>print
#ploting the line using the formula y = m * x + b
# df without price
d = pd.read_csv("Linear_Regression_Single_Variable_(DataSet with area only).csv")
d.head(3)
#predicting the data set using the previous data
# previous data set contain area and price, but new data set contain only area , here we are going to predict whole price of the data set using previous dataset
p = reg.predict(d)
reg.predict(d)
d['price'] = p #creating a colum price to store or dispaly the data(predicted price data), and assigning the data(pridicted value) to it.
d
#to get the data (export the data in same csv file)
# d.to_csv("Linear_Regression_Single_Variable_(DataSet with area only).csv",index=False) #index = False to remove index value (which it will defalt add in csv file while exporting)
#Exercise predict the Canada income of the year 2020 using canada_per_capita_income.csv

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
data = pd.read_csv("canada_per_capita_income.csv")
data.head(5)

%matplotlib inline
import matplotlib.pyplot as plt
plt.scatter(df.year,df.income,color = "blue", marker="*")
plt.xlabel("area")
plt.ylabel("price")
plt.plot(df.year,reg.predict(df[['year']]),color = "red")
plt.show()
reg = linear_model.LinearRegression()
reg.fit(df[['year']],df.income)
reg.predict([[2020]])
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"ename": "FileNotFoundError",
"evalue": "[Errno 2] No such file or directory: 'Prediction\\\\stockData.csv'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32mc:\\Users\\Admin\\Desktop\\ADS Github\\Learnings\\Linear_Regression_Single_Variable.ipynb Cell 2\u001b[0m line \u001b[0;36m1\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/Users/Admin/Desktop/ADS%20Github/Learnings/Linear_Regression_Single_Variable.ipynb#W1sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m df \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39;49mread_csv(\u001b[39m\"\u001b[39;49m\u001b[39mPrediction\u001b[39;49m\u001b[39m\\\u001b[39;49m\u001b[39mstockData.csv\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/Admin/Desktop/ADS%20Github/Learnings/Linear_Regression_Single_Variable.ipynb#W1sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m df\n",
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python311\\site-packages\\pandas\\io\\parsers\\readers.py:912\u001b[0m, in \u001b[0;36mread_csv\u001b[1;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[0;32m 899\u001b[0m kwds_defaults \u001b[39m=\u001b[39m _refine_defaults_read(\n\u001b[0;32m 900\u001b[0m dialect,\n\u001b[0;32m 901\u001b[0m delimiter,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 908\u001b[0m dtype_backend\u001b[39m=\u001b[39mdtype_backend,\n\u001b[0;32m 909\u001b[0m )\n\u001b[0;32m 910\u001b[0m kwds\u001b[39m.\u001b[39mupdate(kwds_defaults)\n\u001b[1;32m--> 912\u001b[0m \u001b[39mreturn\u001b[39;00m _read(filepath_or_buffer, kwds)\n",
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python311\\site-packages\\pandas\\io\\parsers\\readers.py:577\u001b[0m, in \u001b[0;36m_read\u001b[1;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[0;32m 574\u001b[0m _validate_names(kwds\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mnames\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m))\n\u001b[0;32m 576\u001b[0m \u001b[39m# Create the parser.\u001b[39;00m\n\u001b[1;32m--> 577\u001b[0m parser \u001b[39m=\u001b[39m TextFileReader(filepath_or_buffer, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwds)\n\u001b[0;32m 579\u001b[0m \u001b[39mif\u001b[39;00m chunksize \u001b[39mor\u001b[39;00m iterator:\n\u001b[0;32m 580\u001b[0m \u001b[39mreturn\u001b[39;00m parser\n",
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python311\\site-packages\\pandas\\io\\parsers\\readers.py:1407\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[1;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[0;32m 1404\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39moptions[\u001b[39m\"\u001b[39m\u001b[39mhas_index_names\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m kwds[\u001b[39m\"\u001b[39m\u001b[39mhas_index_names\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[0;32m 1406\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles: IOHandles \u001b[39m|\u001b[39m \u001b[39mNone\u001b[39;00m \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m-> 1407\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_engine \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_make_engine(f, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mengine)\n",
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python311\\site-packages\\pandas\\io\\parsers\\readers.py:1661\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[1;34m(self, f, engine)\u001b[0m\n\u001b[0;32m 1659\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mb\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m mode:\n\u001b[0;32m 1660\u001b[0m mode \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mb\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m-> 1661\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles \u001b[39m=\u001b[39m get_handle(\n\u001b[0;32m 1662\u001b[0m f,\n\u001b[0;32m 1663\u001b[0m mode,\n\u001b[0;32m 1664\u001b[0m encoding\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mencoding\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[0;32m 1665\u001b[0m compression\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mcompression\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[0;32m 1666\u001b[0m memory_map\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mmemory_map\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mFalse\u001b[39;49;00m),\n\u001b[0;32m 1667\u001b[0m is_text\u001b[39m=\u001b[39;49mis_text,\n\u001b[0;32m 1668\u001b[0m errors\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mencoding_errors\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mstrict\u001b[39;49m\u001b[39m\"\u001b[39;49m),\n\u001b[0;32m 1669\u001b[0m storage_options\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mstorage_options\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[0;32m 1670\u001b[0m )\n\u001b[0;32m 1671\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m 1672\u001b[0m f \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles\u001b[39m.\u001b[39mhandle\n",
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python311\\site-packages\\pandas\\io\\common.py:859\u001b[0m, in \u001b[0;36mget_handle\u001b[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[0;32m 854\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(handle, \u001b[39mstr\u001b[39m):\n\u001b[0;32m 855\u001b[0m \u001b[39m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[0;32m 856\u001b[0m \u001b[39m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[0;32m 857\u001b[0m \u001b[39mif\u001b[39;00m ioargs\u001b[39m.\u001b[39mencoding \u001b[39mand\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mb\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m ioargs\u001b[39m.\u001b[39mmode:\n\u001b[0;32m 858\u001b[0m \u001b[39m# Encoding\u001b[39;00m\n\u001b[1;32m--> 859\u001b[0m handle \u001b[39m=\u001b[39m \u001b[39mopen\u001b[39;49m(\n\u001b[0;32m 860\u001b[0m handle,\n\u001b[0;32m 861\u001b[0m ioargs\u001b[39m.\u001b[39;49mmode,\n\u001b[0;32m 862\u001b[0m encoding\u001b[39m=\u001b[39;49mioargs\u001b[39m.\u001b[39;49mencoding,\n\u001b[0;32m 863\u001b[0m errors\u001b[39m=\u001b[39;49merrors,\n\u001b[0;32m 864\u001b[0m newline\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[0;32m 865\u001b[0m )\n\u001b[0;32m 866\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 867\u001b[0m \u001b[39m# Binary mode\u001b[39;00m\n\u001b[0;32m 868\u001b[0m handle \u001b[39m=\u001b[39m \u001b[39mopen\u001b[39m(handle, ioargs\u001b[39m.\u001b[39mmode)\n",
"\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'Prediction\\\\stockData.csv'"
]
}
],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
area
1000
1500
2300
3540
4120
4560
5490
3860
4750
2300
9000
8600
7100
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
area,price
2600,550000
3000,565000
3200,610000
3600,680000
4000,725000
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
print("Support Vector Machine")
# z = x^2 + y^2
# z is a transformation
#importing iris dataset from sklearn.datasets
import pandas as pd
from sklearn.datasets import load_iris
iris = load_iris()
#get the features of the dataset
iris.feature_names
#get the target of the dataset
iris.target_names

#convert dataset into dataframe
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df.head()
#create a target colum
df['target'] = iris.target
df.head()

df0 = df[df.target==0]
df1 = df[df.target==1]
df2 = df[df.target==2]
df['flower_name'] =df.target.apply(lambda x: iris.target_names[x])
#lambada is function or transformation, which transforms the target value to the
#corresponding flower name, for this transformation we use apply function
# target value 0 is converted into setosa
# target value 1 is converted into versicolor
# target value 2 is converted into virginica
df.head()
df0.head()# 0 for setosa
df1.head()# 1 for versicolor
df2.head()# 2 for virginica
# creating the graphs for better visualization
import matplotlib.pyplot as plt
# **Sepal length vs Sepal Width (Setosa vs Versicolor)**
plt.xlabel('Sepal Length')
plt.ylabel('Sepal Width')
plt.scatter(df0['sepal length (cm)'], df0['sepal width (cm)'],color="green",marker='+')
plt.scatter(df1['sepal length (cm)'], df1['sepal width (cm)'],color="blue",marker='.')


# **Petal length vs Pepal Width (Setosa vs Versicolor)**
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
plt.scatter(df0['petal length (cm)'], df0['petal width (cm)'],color="green",marker='+')
plt.scatter(df1['petal length (cm)'], df1['petal width (cm)'],color="blue",marker='.')

# **Train Using Support Vector Machine (SVM)**
from sklearn.model_selection import train_test_split
X = df.drop(['target','flower_name'], axis='columns')
y = df.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

#find the length of the model
len(X_train)
len(y_train)
#print out the first 5 rows of X_train and y_train
X_train.head()
y_train.head()

from sklearn.svm import SVC
#create an object to train.
model = SVC()
#train the model fit function
model.fit(X_train, y_train)

#check the score of the trained model
acc = model.score(X_test, y_test)
#print the accuracy of the test data
print("Accuracy of the test data is: ",acc*100,"%")
#prediction for seatosa
model.predict([[4.8,3.0,1.5,0.3]])
#moel.predict([[new sepal length (cm),new sepal width (cm),new petal length (cm),new petal width (cm)]])
#prediction for versicolor
model.predict([[6.0,2.9,4.5,1.5]])
#moel.predict([[new sepal length (cm),new sepal width (cm),new petal length (cm),new petal width (cm)]])
#prediction for virginica
model.predict([[6.0,3.4,4.5,2.8]])
#moel.predict([[new sepal length (cm),new sepal width (cm),new petal length (cm),new petal width (cm)]])
# **Tune parameters**
# **1. Regularization (C)**
model_C = SVC(C=1)
model_C.fit(X_train, y_train)
model_C.score(X_test, y_test)
model_C = SVC(C=10)
model_C.fit(X_train, y_train)
model_C.score(X_test, y_test)
# **2. Gamma**
model_g = SVC(gamma=10)
model_g.fit(X_train, y_train)
model_g.score(X_test, y_test)
# **3. Kernel**
model_linear_kernal = SVC(kernel='linear')
model_linear_kernal.fit(X_train, y_train)
model_linear_kernal.score(X_test, y_test)
Loading

0 comments on commit a63e604

Please # to comment.