Random Forest

AkashKobal · Dec 19, 2023 · a63e604 · a63e604
1 parent d041b6c
commit a63e604
Show file tree

Hide file tree

Showing 29 changed files with 5,559 additions and 0 deletions.
diff --git a/Machine Learning/1. Linear Regression Single Variable/LinearRegressionSingleVariable.py b/Machine Learning/1. Linear Regression Single Variable/LinearRegressionSingleVariable.py
@@ -0,0 +1,79 @@
+# Linear Regression Single Variable
+# How to predict home price using Machine Learning.
+# We will use Linear Regression to predict the price of a home in the Bengaluru, YNK area.
+# Price = m * area + b (m = slope intercept, b = Y intercept)
+# area is an independent variable, Price is a dependent variable (depend on x)
+
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn import linear_model
+# importing the data file using pandas
+df = pd.read_csv("Linear_Regression_Single_Variable_(DataSet).csv")
+df
+
+%matplotlib inline
+import matplotlib.pyplot as plt
+
+
+
+# importing the data file using pandas
+df = pd.read_csv("Linear_Regression_Single_Variable_(DataSet).csv")
+
+#ploting the scatter plot to get idea, .scatter(df.name_of_the_colum_for_x-axis, df.name_of_the_colum_for_y-axis, aditional feature(color,size,marker))
+plt.scatter(df.area, df.price, color = "red", marker="+")
+plt.xlabel("area(sq ft)") #labeling the x-axis
+plt.ylabel("price(INR)") #labeling the y-axis
+
+reg = linear_model.LinearRegression() #creating an object for linear regression using linear_model package from sklearn
+# reg is the model name 
+reg.fit(df[["area"]],df.price) #fit the data (training the model with available data set)
+#passing the argumnents i,e dataFrame in 2D as x-axis and price as y-axis
+#know, It is ready to predict the price.
+
+#Doing prediction
+reg.predict([[3300]])
+#By giving the new area , it is going to predict the new price
+# y = m * x + b
+reg.coef_ # to find the coefficient(m)
+reg.intercept_ # to find the intercept(b)  
+# y = m * x +b 
+y = 135.78767123 * 3300 + 180616.43835616432 #3300 is the area which we want to predict the price
+#ploting the line using the predicted data(x-axis(df.area),y-axis(reg.predict(df[['area']])))
+
+plt.scatter(df.area, df.price, color = "red", marker="+")
+plt.xlabel("area(sq ft)") #labeling the x-axis
+plt.ylabel("price(INR)") #labeling the y-axis
+plt.plot(df.area, reg.predict(df[["area"]]), color = "blue") #plotting the line
+plt.show()#Predicted price of houses with area greater than 1000 sqft is : **<jupyter_code>print
+#ploting the line using the formula y = m * x + b
+# df without price 
+d = pd.read_csv("Linear_Regression_Single_Variable_(DataSet with area only).csv")
+d.head(3)
+#predicting the data set using the previous data
+# previous data set contain area and price, but new data set contain only area , here we are going to predict whole price of  the data set using previous dataset
+p = reg.predict(d) 
+reg.predict(d)
+d['price'] = p #creating a colum price to store or dispaly the data(predicted price data), and assigning the data(pridicted value) to it.
+d
+#to get the data (export the data in same csv file)
+# d.to_csv("Linear_Regression_Single_Variable_(DataSet with area only).csv",index=False)  #index = False to remove index value (which it will defalt add in csv file while exporting)
+#Exercise predict the Canada income of the year 2020 using canada_per_capita_income.csv
+
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn import linear_model
+data = pd.read_csv("canada_per_capita_income.csv")
+data.head(5)
+
+%matplotlib inline
+import matplotlib.pyplot as plt
+plt.scatter(df.year,df.income,color = "blue", marker="*")
+plt.xlabel("area")
+plt.ylabel("price")
+plt.plot(df.year,reg.predict(df[['year']]),color = "red")
+plt.show()
+reg = linear_model.LinearRegression()
+reg.fit(df[['year']],df.income)
+reg.predict([[2020]])
diff --git a/...ine Learning/1. Linear Regression Single Variable/Linear_Regression_Single_Variable.ipynb b/...ine Learning/1. Linear Regression Single Variable/Linear_Regression_Single_Variable.ipynb
@@ -0,0 +1,56 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "FileNotFoundError",
+     "evalue": "[Errno 2] No such file or directory: 'Prediction\\\\stockData.csv'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[1;32mc:\\Users\\Admin\\Desktop\\ADS Github\\Learnings\\Linear_Regression_Single_Variable.ipynb Cell 2\u001b[0m line \u001b[0;36m1\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/Users/Admin/Desktop/ADS%20Github/Learnings/Linear_Regression_Single_Variable.ipynb#W1sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m df \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39;49mread_csv(\u001b[39m\"\u001b[39;49m\u001b[39mPrediction\u001b[39;49m\u001b[39m\\\u001b[39;49m\u001b[39mstockData.csv\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/Users/Admin/Desktop/ADS%20Github/Learnings/Linear_Regression_Single_Variable.ipynb#W1sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m df\n",
+      "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python311\\site-packages\\pandas\\io\\parsers\\readers.py:912\u001b[0m, in \u001b[0;36mread_csv\u001b[1;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[0;32m    899\u001b[0m kwds_defaults \u001b[39m=\u001b[39m _refine_defaults_read(\n\u001b[0;32m    900\u001b[0m     dialect,\n\u001b[0;32m    901\u001b[0m     delimiter,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    908\u001b[0m     dtype_backend\u001b[39m=\u001b[39mdtype_backend,\n\u001b[0;32m    909\u001b[0m )\n\u001b[0;32m    910\u001b[0m kwds\u001b[39m.\u001b[39mupdate(kwds_defaults)\n\u001b[1;32m--> 912\u001b[0m \u001b[39mreturn\u001b[39;00m _read(filepath_or_buffer, kwds)\n",
+      "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python311\\site-packages\\pandas\\io\\parsers\\readers.py:577\u001b[0m, in \u001b[0;36m_read\u001b[1;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[0;32m    574\u001b[0m _validate_names(kwds\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mnames\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m))\n\u001b[0;32m    576\u001b[0m \u001b[39m# Create the parser.\u001b[39;00m\n\u001b[1;32m--> 577\u001b[0m parser \u001b[39m=\u001b[39m TextFileReader(filepath_or_buffer, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwds)\n\u001b[0;32m    579\u001b[0m \u001b[39mif\u001b[39;00m chunksize \u001b[39mor\u001b[39;00m iterator:\n\u001b[0;32m    580\u001b[0m     \u001b[39mreturn\u001b[39;00m parser\n",
+      "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python311\\site-packages\\pandas\\io\\parsers\\readers.py:1407\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[1;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[0;32m   1404\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39moptions[\u001b[39m\"\u001b[39m\u001b[39mhas_index_names\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m kwds[\u001b[39m\"\u001b[39m\u001b[39mhas_index_names\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[0;32m   1406\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles: IOHandles \u001b[39m|\u001b[39m \u001b[39mNone\u001b[39;00m \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m-> 1407\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_engine \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_make_engine(f, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mengine)\n",
+      "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python311\\site-packages\\pandas\\io\\parsers\\readers.py:1661\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[1;34m(self, f, engine)\u001b[0m\n\u001b[0;32m   1659\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mb\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m mode:\n\u001b[0;32m   1660\u001b[0m         mode \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mb\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m-> 1661\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles \u001b[39m=\u001b[39m get_handle(\n\u001b[0;32m   1662\u001b[0m     f,\n\u001b[0;32m   1663\u001b[0m     mode,\n\u001b[0;32m   1664\u001b[0m     encoding\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mencoding\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[0;32m   1665\u001b[0m     compression\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mcompression\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[0;32m   1666\u001b[0m     memory_map\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mmemory_map\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mFalse\u001b[39;49;00m),\n\u001b[0;32m   1667\u001b[0m     is_text\u001b[39m=\u001b[39;49mis_text,\n\u001b[0;32m   1668\u001b[0m     errors\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mencoding_errors\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mstrict\u001b[39;49m\u001b[39m\"\u001b[39;49m),\n\u001b[0;32m   1669\u001b[0m     storage_options\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mstorage_options\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[0;32m   1670\u001b[0m )\n\u001b[0;32m   1671\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m   1672\u001b[0m f \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles\u001b[39m.\u001b[39mhandle\n",
+      "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python311\\site-packages\\pandas\\io\\common.py:859\u001b[0m, in \u001b[0;36mget_handle\u001b[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[0;32m    854\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(handle, \u001b[39mstr\u001b[39m):\n\u001b[0;32m    855\u001b[0m     \u001b[39m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[0;32m    856\u001b[0m     \u001b[39m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[0;32m    857\u001b[0m     \u001b[39mif\u001b[39;00m ioargs\u001b[39m.\u001b[39mencoding \u001b[39mand\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mb\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m ioargs\u001b[39m.\u001b[39mmode:\n\u001b[0;32m    858\u001b[0m         \u001b[39m# Encoding\u001b[39;00m\n\u001b[1;32m--> 859\u001b[0m         handle \u001b[39m=\u001b[39m \u001b[39mopen\u001b[39;49m(\n\u001b[0;32m    860\u001b[0m             handle,\n\u001b[0;32m    861\u001b[0m             ioargs\u001b[39m.\u001b[39;49mmode,\n\u001b[0;32m    862\u001b[0m             encoding\u001b[39m=\u001b[39;49mioargs\u001b[39m.\u001b[39;49mencoding,\n\u001b[0;32m    863\u001b[0m             errors\u001b[39m=\u001b[39;49merrors,\n\u001b[0;32m    864\u001b[0m             newline\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[0;32m    865\u001b[0m         )\n\u001b[0;32m    866\u001b[0m     \u001b[39melse\u001b[39;00m:\n\u001b[0;32m    867\u001b[0m         \u001b[39m# Binary mode\u001b[39;00m\n\u001b[0;32m    868\u001b[0m         handle \u001b[39m=\u001b[39m \u001b[39mopen\u001b[39m(handle, ioargs\u001b[39m.\u001b[39mmode)\n",
+      "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'Prediction\\\\stockData.csv'"
+     ]
+    }
+   ],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/...Regression Single Variable/Linear_Regression_Single_Variable_(DataSet with area only).csv b/...Regression Single Variable/Linear_Regression_Single_Variable_(DataSet with area only).csv
@@ -0,0 +1,14 @@
+area
+1000
+1500
+2300
+3540
+4120
+4560
+5490
+3860
+4750
+2300
+9000
+8600
+7100
diff --git a/...ning/1. Linear Regression Single Variable/Linear_Regression_Single_Variable_(DataSet).csv b/...ning/1. Linear Regression Single Variable/Linear_Regression_Single_Variable_(DataSet).csv
@@ -0,0 +1,6 @@
+area,price
+2600,550000
+3000,565000
+3200,610000
+3600,680000
+4000,725000
diff --git a/Machine Learning/10. Support Vector Machine (SVM)/support_vector_machine.py b/Machine Learning/10. Support Vector Machine (SVM)/support_vector_machine.py
@@ -0,0 +1,95 @@
+print("Support Vector Machine")
+# z = x^2 + y^2
+# z is a transformation
+#importing iris dataset from sklearn.datasets
+import pandas as pd
+from sklearn.datasets import load_iris
+iris = load_iris()
+#get the features of the dataset
+iris.feature_names
+#get the target of the dataset
+iris.target_names
+
+#convert dataset into dataframe
+df = pd.DataFrame(iris.data, columns=iris.feature_names)
+df.head()
+#create a target colum
+df['target'] = iris.target
+df.head()
+
+df0 = df[df.target==0]
+df1 = df[df.target==1]
+df2 = df[df.target==2]
+df['flower_name'] =df.target.apply(lambda x: iris.target_names[x])
+#lambada is function or transformation, which transforms the target value to the
+#corresponding flower name, for this transformation we use apply function
+# target value 0 is converted into setosa
+# target value 1 is converted into versicolor
+# target value 2 is converted into virginica
+df.head()
+df0.head()# 0 for setosa
+df1.head()# 1 for versicolor
+df2.head()# 2 for virginica
+# creating the graphs for better visualization
+import matplotlib.pyplot as plt
+# **Sepal length vs Sepal Width (Setosa vs Versicolor)**
+plt.xlabel('Sepal Length')
+plt.ylabel('Sepal Width')
+plt.scatter(df0['sepal length (cm)'], df0['sepal width (cm)'],color="green",marker='+')
+plt.scatter(df1['sepal length (cm)'], df1['sepal width (cm)'],color="blue",marker='.')
+
+
+# **Petal length vs Pepal Width (Setosa vs Versicolor)**
+plt.xlabel('Petal Length')
+plt.ylabel('Petal Width')
+plt.scatter(df0['petal length (cm)'], df0['petal width (cm)'],color="green",marker='+')
+plt.scatter(df1['petal length (cm)'], df1['petal width (cm)'],color="blue",marker='.')
+
+# **Train Using Support Vector Machine (SVM)**
+from sklearn.model_selection import train_test_split
+X = df.drop(['target','flower_name'], axis='columns')
+y = df.target
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+
+#find the length of the model
+len(X_train)
+len(y_train)
+#print out the first 5 rows of X_train and y_train
+X_train.head()
+y_train.head()
+
+from sklearn.svm import SVC
+#create an object to train.
+model = SVC()
+#train the model fit function
+model.fit(X_train, y_train)
+
+#check the score of the trained model
+acc = model.score(X_test, y_test)
+#print the accuracy of the test data
+print("Accuracy of the test data is: ",acc*100,"%")
+#prediction for seatosa
+model.predict([[4.8,3.0,1.5,0.3]])
+#moel.predict([[new sepal length (cm),new sepal width (cm),new petal length (cm),new petal width (cm)]])
+#prediction for versicolor
+model.predict([[6.0,2.9,4.5,1.5]])
+#moel.predict([[new sepal length (cm),new sepal width (cm),new petal length (cm),new petal width (cm)]])
+#prediction for virginica
+model.predict([[6.0,3.4,4.5,2.8]])
+#moel.predict([[new sepal length (cm),new sepal width (cm),new petal length (cm),new petal width (cm)]])
+# **Tune parameters**
+# **1. Regularization (C)**
+model_C = SVC(C=1)
+model_C.fit(X_train, y_train)
+model_C.score(X_test, y_test)
+model_C = SVC(C=10)
+model_C.fit(X_train, y_train)
+model_C.score(X_test, y_test)
+# **2. Gamma**
+model_g = SVC(gamma=10)
+model_g.fit(X_train, y_train)
+model_g.score(X_test, y_test)
+# **3. Kernel**
+model_linear_kernal = SVC(kernel='linear')
+model_linear_kernal.fit(X_train, y_train)
+model_linear_kernal.score(X_test, y_test)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,14 @@
		area
		1000
		1500
		2300
		3540
		4120
		4560
		5490
		3860
		4750
		2300
		9000
		8600
		7100