-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmedian_test.awk
117 lines (103 loc) · 5.5 KB
/
median_test.awk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# median_test.awk
# test code for median.awk
# This by Peter Miller 25-8-2024 is placed in the public domain
# run with wmawk2 -f median.awk -f qsort.awk -f prand.awk -f median_test.awk
function test1(arr,maxi, pos_median,med_torben,i,mint,maxt,sizet) # note errs is a global variable
{# test arr with maxi elements and a subset in the middle -increments errs whenever an error found
if(maxi>201)
{# also test a segment in the middle
mint=100
maxt=201
sizet=1+maxt-mint
pos_median=mint+int((sizet+1)/2)
med_torben=_median(arr,mint,maxt)
_qsort(arr,mint,maxt)
if(med_torben!=arr[pos_median]) {errs++; printf(" Error: _median(arr,%d,%d) returned %g but arr[%d]=%g\n",mint,maxt,med_torben,pos_median,arr[pos_median]) }
else printf(" _median(arr,%d,%d) returned %g and arr[%d]=%g\n",mint,maxt,med_torben,pos_median,arr[pos_median])
}
# now test using whole array
pos_median=int((maxi+1)/2)
med_torben=median(arr,maxi)
qsort(arr,maxi) # sort array so we can get median as middle element
if(med_torben!=arr[pos_median]) {errs++; printf(" Error: median() returned %g but arr[%d]=%g\n",med_torben,pos_median,arr[pos_median]) }
else printf(" median() returned %g and arr[%d]=%g\n",med_torben,pos_median,arr[pos_median])
# check number of elements unchanged by median/sort
if(length(arr)!=maxi) {errs++;printf("Number of elements in arr changed - should be %d found %d\n",length(arr),maxi)}
# now check result is sorted - nor normaly needed, so commeneted out, but useful if the test above fails to make sure sort worked correctly
#for(i=2;i<=maxi;++i)
# if(arr[i-1]>arr[i]) {errs++;printf("Array not properly sorted arr[%d]=%g arr[%d]=%g\n",i-1,arr[i-1],i,arr[i])}
return med_torben # allow caller to check result
}
# Actual test code
BEGIN {
errs=0
start_time=systime(1) # systime(1) gives UTC time to 1us resolution
maxi=10000001 # number of array elemnts to sort, should be odd. maxi=10000001 takes 74 secs on my PC
pos_median=int((maxi+1)/2)
printf("Testing median() for %d elements so medain at array index %d\n",maxi,pos_median)
print " testing random numbers 0-1 so expect median to be ~0.5"
for(i=1;i<=maxi;++i)
arr[i]=prand() # fill array with random positive numbers 0..1
r=test1(arr,maxi)
if(r<0.49 || r>0.51) {errs++; printf(" Error - expected median to be 0.49..0.51\n")}
print " testing random numbers +/-1 so expect median to be ~0"
for(i=1;i<=maxi;++i)
arr[i]=1-2*prand() # fill array with random positive numbers +/-1
r=test1(arr,maxi)
if(r<-0.01 || r>0.01) {errs++; printf(" Error - expected median to be -0.01..0.01\n")}
# testing integer numbers 0..maxi-1 (so already sorted)
for(i=1;i<=maxi;++i)
arr[i]=i-1 # fill array with integers 0..maxi-1
med=arr[pos_median] # already sorted so know result
print " testing integer numbers 0..maxi-1 so expect median to be",med
r=test1(arr,maxi)
if(r!=med) {errs++; printf(" Error - expected median to be %g\n",med)}
print " testing integer numbers maxi-1 to 0 so expect median to be",med
for(i=1;i<=maxi;++i)
arr[i]=maxi-i # fill array with integers maxi-1..0
r=test1(arr,maxi)
if(r!=med) {errs++; printf(" Error - expected median to be %g\n",med)}
print " testing integer numbers all 5, so expect median to be 5"
for(i=1;i<=maxi;++i)
arr[i]=5 # fill array with integers 5
r=test1(arr,maxi)
if(r!=5) {errs++; printf(" Error - expected median to be 5\n")}
print " testing integer numbers mainly 5, with a few other values - so expect median to be 5"
for(i=1;i<=maxi;++i)
arr[i]=5 # fill array with integers 5
for(i=1;i<10;++i)
arr[int(prand()*maxi)]=i # add in a few other values at random locations
r=test1(arr,maxi)
if(r!=5) {errs++; printf(" Error - expected median to be 5\n")}
# expected final values from random number generator - add values for different maxi as required
if(maxi==10000001)
expected_rand_value=20984299426 # 0x4e2c2ffa2
else if(maxi==20000001)
expected_rand_value=3075012908
else if(maxi==1000001)
expected_rand_value=2091721999
else if(maxi==2000001)
expected_rand_value=32355024622
else
{printf("Random number generators final value was %d - correct value is unknown for maxi=%d\n",rand_value,maxi)
expected_rand_value=0
}
if(expected_rand_value!=0 && rand_value!=expected_rand_value) {errs++; printf("Warning incorrect value from random number generator - got %d expected %d\n Have you changed the test program?\n",rand_value,expected_rand_value)}
if(errs==0) print "median() test finished OK, total execution time was",systime(1)-start_time,"seconds"
else print "median() test finished with",errs,"error(s), total execution time was",systime(1)-start_time,"seconds"
# for(i=1; i<=30;++i) # uncomment to see a few random numbers...
# printf("prand() returns %.14f [%16d]\n",prand(),rand_value)
# for maxi=10,000,001
# full code takes 70 secs
# with calls to qsort commeneted out takes 39 secs
# with calls to median commented out takes 33 secs
# => qsort takes 31 secs, median takes 37 secs and test harness takes 2 secs
#
# for maxi=20,000,001
# full code takes 150 secs
# with calls to qsort commented out takes 87 secs
# with calls to median commented out takes 68 secs
# => qsort takes 63 secs, median takes 82 secs and test harness takes 5 secs
# Means qsort is linear in n (within 1 sec measurement accuracy), median (37->82 =2.2* ) is closer to n*log2(n)
# for nlog2(n) change in execution time for n->2n is 2+2/log2(n) so for n=10000001 expect 2.09*
}