-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathRandomized-Selection.py
151 lines (125 loc) · 5.99 KB
/
Randomized-Selection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# -*- coding: utf-8 -*-
"""Untitled0.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1nwShlx703rdHMOn58mgvZ0v_D-6xMhcV
"""
import sys
import argparse
import random
import numpy as np
import time
import math
import matplotlib.pyplot as plt
sys.setrecursionlimit(3000)
# sort the array and pick the k-th smallest element from the sorted-array
def sort_and_select(current_array, k) :
# sort the array
sorted_current_array = np.sort(current_array)
return sorted_current_array[k-1]
def randomized_select_with_multipe_pivots (current_array, k, no_of_pivots):
if (len(current_array) <= 5) :
return sort_and_select(current_array, k)
else:
min_array = []
for i in range(0, no_of_pivots):
# pick a random pivot-element
p = current_array[random.randint(0,len(current_array)-1)]
# split the current_array into three sub-arrays: Less_than_p, Equal_to_p and Greater_than_p
Less_than_p = []
Equal_to_p = []
Greater_than_p = []
for x in current_array :
if (x < p) :
Less_than_p.extend([x])
if (x == p) :
Equal_to_p.extend([x])
if (x > p) :
Greater_than_p.extend([x])
if (k > len(Less_than_p) and k <= len(Less_than_p) + len(Equal_to_p)) :
return p
else:
if(len(min_array) == 0):
if(k <= len(Less_than_p)):
min_array = Less_than_p
target_k = k
else:
min_array = Greater_than_p
target_k = k-len(Less_than_p) - len(Equal_to_p)
else:
if((k <= len(Less_than_p)) and (len(min_array) > len(Less_than_p))):
min_array = Less_than_p
target_k = k
elif((k > len(Less_than_p) + len(Equal_to_p)) and (len(min_array) > len(Greater_than_p))):
min_array = Greater_than_p
target_k = k-len(Less_than_p) - len(Equal_to_p)
return randomized_select_with_multipe_pivots (min_array, target_k, no_of_pivots)
# Maximum #pivots
max_no_of_pivots = 15
# Number of Trials
number_of_trials = 1000
# We are going to see if there is any observable difference in the slope of the Linear Regressor
# for the Mean (resp. Standard-Deviation) of the Running Time
# and the slope of standard-deviation-regressor as the number of pivots are increased
slope_of_mean_regressor_as_a_function_of_no_of_pivots = []
slope_of_std_dev_regressor_as_a_function_of_no_of_pivots = []
# I am going to plot a lot of things
# I found the stuff here -- https://matplotlib.org/gallery/subplots_axes_and_figures/figure_title.html
# to be useful. Instead, I just used what got from here to get the subplots not to get squished down --
# https://stackoverflow.com/questions/41530975/set-size-of-subplot-in-matplotlib
fig = plt.figure(figsize=(8, 20))
# try #pivots = 1,2,3,4 and see if having more pivots is helping with the run-time
for number_of_pivots in range(1, max_no_of_pivots+1) :
# arrays containing mean- and std-dev of running time as a function of
# array size starting from 100 to 4000 in steps of 100
mean_running_time = []
std_dev_running_time = []
# cycle through a bunch of array sizes, where "k" is randomly chosen
for j in range(1, 40) :
array_size = 100*j
# let is pick k to be (close to) the median
k = math.ceil(array_size/2)
# fill the array with random values
my_array = [random.randint(1,100*array_size) for _ in range(array_size)]
# run a bunch of random trials and get the algorithm's running time
running_time = []
for i in range(1, number_of_trials) :
t1 = time.time()
answer1 = randomized_select_with_multipe_pivots(my_array,k,number_of_pivots)
t2 = time.time()
running_time.extend([t2-t1])
# uncomment the lines below to verify the solution of randomized_select_with_pivots
answer2 = sort_and_select(my_array, k)
if (answer1 != answer2) :
print ("Something went wrong")
exit()
mean_running_time.extend([np.mean(running_time)])
std_dev_running_time.extend([np.std(running_time)])
# linear fit (cf. https://docs.scipy.org/doc/numpy-1.15.0/reference/generated/numpy.polyfit.html)
t = np.arange(100, 4000, 100)
z1 = np.polyfit(t, mean_running_time, 1)
p1 = np.poly1d(z1)
z2 = np.polyfit(t, std_dev_running_time, 1)
p2 = np.poly1d(z2)
print("#Pivots = ", number_of_pivots, "; Mean-Regressor's slope = ", z1[0], "; Std-Dev-Regressor's slope = ", z2[0])
slope_of_mean_regressor_as_a_function_of_no_of_pivots.extend([z1[0]])
slope_of_std_dev_regressor_as_a_function_of_no_of_pivots.extend([z2[0]])
# plot the mean and standard deviation of the running-time as a function of array-size
axs = fig.add_subplot(5, 3, number_of_pivots)
plt.plot(t, mean_running_time, 'r', t, std_dev_running_time, 'g', t, p1(t), 'r-', t, p2(t), 'g-')
axs.set_title('#Pivots =' + str(number_of_pivots))
plt.savefig("fig2.pdf", bbox_inches='tight')
plt.show()
# plot the slope of the two regressors as a function of #pivots
x = [i for i in range(1, max_no_of_pivots+1)]
plt.plot(x, slope_of_mean_regressor_as_a_function_of_no_of_pivots, 'r', x, slope_of_std_dev_regressor_as_a_function_of_no_of_pivots, 'g')
plt.title('Linear Regressor Slope for Mean- and Std-Dev vs. #Pivots')
plt.xlabel('#Pivots')
plt.ylabel('Seconds/Length')
plt.savefig("fig1.pdf", bbox_inches='tight')
plt.show()
# Checking if increasing the number of pivots is helping with the runtime in any significant manner...
z3 = np.polyfit(x, slope_of_mean_regressor_as_a_function_of_no_of_pivots, 1)
z4 = np.polyfit(x, slope_of_std_dev_regressor_as_a_function_of_no_of_pivots, 1)
print("Sensitivity of the Slope of the Linear Regressor of the Mean to the #Pivots = ", z3[0])
print("Sensitivity of the Slope of the Linear Regressor of the Std-Dev to the #Pivots = ", z4[0])