-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvisualize.py
58 lines (45 loc) · 1.25 KB
/
visualize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# Understand data with simple visualization
'''
1.Univariate Plots
1.1. Histogram
1.2. Density plot
1.3. Box plot
2.Multivariate Plots
2.1 Correlation matrix plot
2.2 Scatter plot
'''
from matplotlib import pyplot
from pandas import read_csv
import numpy
import pandas.plotting.scatter_matrix
filename = 'pima-indians-diabetes.data.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
data = read_csv(filename, names=names)
# 1. Univariate Plots
# 1.1 Histogram
data.hist()
pyplot.show()
# 1.2 Density plots
data.plot(title = 'Density Plot', kind = 'density',subplots = True, layout=(3,3), sharex = False)
pyplot.show()
# 1.3 Box plots
data.plot(title = 'Box Plot', kind = 'box', subplots = True, layout=(3,3), sharex = False, sharey = False)
pyplot.show()
# 2. Multivariate plots
# 2.1 Correlation matrix plot
correlations = data.corr()
fig = pyplot.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(correlations, vmin = -1, vmax = 1)
fig.colorbar(cax)
# Modify this part for generic
ticks = numpy.arange(0,9,1)
ax.set_xticks(ticks)
ax_set_yticks(ticks)
ax.set_xticklabels(names)
ax.set_yticklabels(names)
#########################
pyplot.show()
# 2.2 Scatter plots
scatter_matrix(data)
pyplot.show()