-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLinear_Regression_Model_RestarauntLocation_Profit.txt
94 lines (83 loc) · 2.85 KB
/
Linear_Regression_Model_RestarauntLocation_Profit.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
import sklearn
from sklearn.linear_model import LinearRegression
import pandas
from pandas.tools.plotting import scatter_matrix
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
import os
import numpy as np
import random
#Change the current directory
os.chdir('C:\\Users\\hassaanrafique\\desktop\\ex1')
#Load Data
housedata = np.loadtxt('ex1data1.txt',delimiter=',')
#Convert t panda dataframe
Hos = pd.DataFrame(housedata)
#Plot Histogram
Hos.hist()
#Showing plots for Hist and Scatter plot to get a better idea of the Dependencies.
scatter_matrix(Hos)
plt.show()
#Convert to array
array = Hos.values
X = array[:,0:1]
Y = array[:,1]
#Initialize LinearRegression
lm = LinearRegression()
#Fitting the linear model
lm.fit(X,Y)
print("Estimated intercept coefficient",lm.intercept_)
print("Number of Coefficient",len(lm.coef_))
#Plot the scatter plot for Prices of houses and population of area.
plt.scatter(X,Y)
# Testing prediction
lm.predict(X)[0:5]
# Validating Prediction.
plt.scatter(Y,lm.predict(X))
plt.xlabel("Original Prices")
plt.ylabel("Predicted Prices")
# Calculating Squared error
mseFull = np.mean((Y-lm.predict(X))**2)
print (mseFull)
# Checking for Errors in data
plt.scatter(lm.predict(X),lm.predict(X)- Y,c='b',s=40,alpha=0.5)
plt.hlines(y=0,xmin=0,xmax=50)
#Using training data to run predict for test data
r = random.randrange(60,70,5)
r_test = random.randrange(10,20,4)
train_data = Hos[:r]
test_data = Hos[:r_test]
array2 = train_data.values
array3 = test_data.values
X_train_data = array2[:,0:1]
Y_train_data = array2[:,1]
X_test_data = array3[:,0:1]
Y_test_data = array3[:,1]
lm.fit(X_train_data,Y_train_data)
#Calculating Mean Square and variance to get a better picture of my Model.
print("Estimated intercept coefficient",lm.intercept_)
print("Number of Coefficient",len(lm.coef_))
print("Mean squared error: %.2f"
% np.mean((lm.predict(X_test_data) - Y_test_data) ** 2))
lm.score(X_test_data,Y_test_data)
train_prediction = lm.predict(X = pd.DataFrame(test_data[0]))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % lm.score(X_train_data, Y_train_data))
plt.scatter(X_test_data, Y_test_data, color='black')
plt.plot(test_data[0],train_prediction,color = "blue")
plt.xticks(())
plt.yticks(())
plt.show()