Skip to content

Commit 6f195f5

Browse files
committed
Remove round even
1 parent b48296c commit 6f195f5

File tree

4 files changed

+254
-3741
lines changed

4 files changed

+254
-3741
lines changed

Makefile

+5-43
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,15 @@ CC = gcc
66
# export ASAN_OPTIONS='detect_invalid_pointer_pairs=2'
77
# export LSAN_OPTIONS=''
88

9-
all: tst_convert64 tst_convert32 tst_convert64e tst_convert32e
9+
all: tst_convert64 tst_convert32
1010

1111
tst_convert64: fast_convert.c tst_convert.c
12-
${CC} -DROUND_EVEN=0 ${OPTIONS} fast_convert.c tst_convert.c -o tst_convert64
12+
${CC} ${OPTIONS} fast_convert.c tst_convert.c -o tst_convert64
1313

1414
tst_convert32: fast_convert.c tst_convert.c
15-
${CC} -DROUND_EVEN=0 -m32 ${OPTIONS} fast_convert.c tst_convert.c -o tst_convert32
15+
${CC} -m32 ${OPTIONS} fast_convert.c tst_convert.c -o tst_convert32
1616

17-
tst_convert64e: fast_convert.c tst_convert.c
18-
${CC} -DROUND_EVEN=1 ${OPTIONS} fast_convert.c tst_convert.c -o tst_convert64e
19-
20-
tst_convert32e: fast_convert.c tst_convert.c
21-
${CC} -DROUND_EVEN=1 -m32 ${OPTIONS} fast_convert.c tst_convert.c -o tst_convert32e
22-
23-
test: tst_convert64 tst_convert32 tst_convert64e tst_convert32e
17+
test: tst_convert64 tst_convert32
2418
./tst_convert64 f
2519
./tst_convert64 s
2620
./tst_convert64 d
@@ -53,38 +47,6 @@ test: tst_convert64 tst_convert32 tst_convert64e tst_convert32e
5347
./tst_convert32 tn
5448
./tst_convert32 Gn
5549
./tst_convert32 Tn
56-
./tst_convert64e f
57-
./tst_convert64e s
58-
./tst_convert64e d
59-
./tst_convert64e S
60-
./tst_convert64e fn
61-
./tst_convert64e sn
62-
./tst_convert64e dn
63-
./tst_convert64e Sn
64-
./tst_convert64e g
65-
./tst_convert64e t
66-
./tst_convert64e G
67-
./tst_convert64e T
68-
./tst_convert64e gn
69-
./tst_convert64e tn
70-
./tst_convert64e Gn
71-
./tst_convert64e Tn
72-
./tst_convert32e f
73-
./tst_convert32e s
74-
./tst_convert32e d
75-
./tst_convert32e S
76-
./tst_convert32e fn
77-
./tst_convert32e sn
78-
./tst_convert32e dn
79-
./tst_convert32e Sn
80-
./tst_convert32e g
81-
./tst_convert32e t
82-
./tst_convert32e G
83-
./tst_convert32e T
84-
./tst_convert32e gn
85-
./tst_convert32e tn
86-
./tst_convert32e Gn
87-
./tst_convert32e Tn
8850

8951
clean:
90-
rm -f tst_convert64 tst_convert32 tst_convert64e tst_convert32e
52+
rm -f tst_convert64 tst_convert32

README.md

+20-81
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,7 @@ fast\_convert
33

44
A fast implentation of some print and scan functions. This code can for example be used in writing a very fast json implementation. I tested this on a json library and got a speedup of about 7 times.
55

6-
Some speedup figures (ROUND\_EVEN == 0):
7-
8-
* fast\_ftoa is approx. 26 times as fast as sprintf (on x86\_64).
9-
* fast\_dtoa is approx. 29 times as fast as sprintf (on x86\_64).
10-
* fast\_strtof is approx. 4 times as fast as strtof (on x86\_64).
11-
* fast\_strtod is approx. 6 times as fast as strtod (on x86\_64).
12-
13-
Some speedup figures (ROUND\_EVEN == 1):
6+
Some speedup figures:
147

158
* fast\_ftoa is approx. 20 times as fast as sprintf (on x86\_64).
169
* fast\_dtoa is approx. 26 times as fast as sprintf (on x86\_64).
@@ -87,25 +80,7 @@ c count differences float
8780
C count differences double
8881
if option after first one is 'n' then no check is done
8982

90-
64 bits (i7-4700MQ + fedora 30) ROUND_EVEN=0
91-
f: 594.11 (2510.37 / 594.11 = 4.23)
92-
s: 2510.37
93-
d: 1464.08 (5799.09 / 1464.08 = 3.96)
94-
S: 5799.09
95-
fn: 71.55 (1870.78 / 71.55 = 26.15)
96-
sn: 1870.78
97-
dn: 142.64 (4194.16 / 142.64 = 29.40)
98-
Sn: 4194.16
99-
g: 188.74 ((597.76 - 71.55) / (188.74 - 71.55) = 4.49)
100-
t: 597.76
101-
G: 361.90 ((1463.43 - 142.64) / (361.90 - 142.64) = 6.02)
102-
T: 1463.43
103-
gn: 183.73 ((588.91 - 71.55) / (183.73 - 71.55) = 4.61)
104-
tn: 588.91
105-
Gn: 359.15 ((1453.78 - 142.64) / (359.15 - 142.64) = 6.06)
106-
Tn: 1453.78
107-
108-
64 bits (i7-4700MQ + fedora 30) ROUND_EVEN=1
83+
64 bits (i7-4700MQ + fedora 30)
10984
f: 625.17 (2510.37 / 625.17 = 4.02)
11085
s: 2510.37
11186
d: 1477.76 (5799.09 / 1477.76 = 3.92)
@@ -114,34 +89,16 @@ fn: 92.23 (1870.78 / 92.23 = 20.28)
11489
sn: 1870.78
11590
dn: 160.43 (4194.16 / 160.43 = 26.14)
11691
Sn: 4194.16
117-
g: 212.92 ((597.76 - 92.23) / (212.92 - 92.23) = 4.18)
92+
g: 217.58 ((597.76 - 92.23) / (217.58 - 92.23) = 4.03)
11893
t: 597.76
119-
G: 386.96 ((1463.43 - 160.43) / (386.96 - 160.43) = 5.75)
94+
G: 391.04 ((1463.43 - 160.43) / (391.04 - 160.43) = 5.65)
12095
T: 1463.43
121-
gn: 207.17 ((588.91 - 92.23) / (207.17 - 92.23) = 4.32)
96+
gn: 212.41 ((588.91 - 92.23) / (212.41 - 92.23) = 4.13)
12297
tn: 588.91
123-
Gn: 385.13 ((1453.78 - 160.43) / (385.13 - 160.43) = 5.76)
98+
Gn: 392.33 ((1453.78 - 160.43) / (392.33 - 160.43) = 5.58)
12499
Tn: 1453.78
125100

126-
32 bits (i7-4700MQ + fedora 30) ROUND_EVEN=0
127-
f: 1001.96 (2836.51 / 1001.96 = 2.83)
128-
s: 2836.51
129-
d: 2714.51 (8004.75 / 2714.51 = 2.95)
130-
S: 8004.75
131-
fn: 111.85 (1909.85 / 111.85 = 17.08)
132-
sn: 1909.85
133-
dn: 334.14 (5383.21 / 334.14 = 16.11)
134-
Sn: 5383.21
135-
g: 305.49 ((1083.00 - 111.85) / (305.49 - 111.85) = 5.02)
136-
t: 1083.00
137-
G: 798.17 ((2830.57 - 334.14) / (798.17 - 334.14) = 5.38)
138-
T: 2830.57
139-
gn: 292.99 ((1069.70 - 111.85) / (292.99 - 111.85) = 5.29)
140-
tn: 1069.70
141-
Gn: 787.92 ((2806.51 - 334.14) / (787.92 - 334.14) = 5.45)
142-
Tn: 2806.51
143-
144-
32 bits (i7-4700MQ + fedora 30) ROUND_EVEN=1
101+
32 bits (i7-4700MQ + fedora 30)
145102
f: 1023.48 (2836.51 / 1023.48 = 2.77)
146103
s: 2836.51
147104
d: 2769.17 (8004.75 / 2769.17 = 2.89)
@@ -150,34 +107,16 @@ fn: 147.78 (1909.85 / 147.78 = 12.92)
150107
sn: 1909.85
151108
dn: 381.08 (5383.21 / 381.08 = 14.12)
152109
Sn: 5383.21
153-
g: 345.59 ((1083.00 - 147.78) / (345.59 - 147.78) = 4.73)
110+
g: 449.30 ((1083.00 - 147.78) / (449.30 - 147.78) = 3.10)
154111
t: 1083.00
155-
G: 891.33 ((2830.57 - 381.08) / (891.33 - 381.08) = 4.80)
112+
G: 941.57 ((2830.57 - 381.08) / (941.57 - 381.08) = 4.37)
156113
T: 2830.57
157-
gn: 334.01 ((1069.70 - 147.78) / (334.01 - 147.78) = 4.95)
114+
gn: 436.55 ((1069.70 - 147.78) / (436.55 - 147.78) = 3.19)
158115
tn: 1069.70
159-
Gn: 877.74 ((2806.51 - 381.08) / (877.74 - 381.08) = 4.88)
116+
Gn: 927.53 ((2806.51 - 381.08) / (927.53 - 381.08) = 4.44)
160117
Tn: 2806.51
161118

162-
raspberry pi (3b + raspbian buster): ROUND_EVEN=0
163-
f: 5892.31 (18654.65 / 5892.31 = 3.17)
164-
s: 18654.65
165-
d: 13151.53 (42775.59 / 13151.53 = 3.25)
166-
S: 42775.59
167-
fn: 639.49 (12308.98 / 639.49 = 19.25)
168-
sn: 12308.98
169-
dn: 1638.83 (30160.90 / 1638.83 = 18.40)
170-
Sn: 30160.90
171-
g: 1888.70 ((6340.77 - 639.49) / (1888.70 - 639.49) = 4.56)
172-
t: 6340.77
173-
G: 3929.60 ((19176.56 - 1638.83) / (3929.60 - 1638.83) = 7.66)
174-
T: 18796.96
175-
gn: 1823.51 ((6226.29 - 639.49) / (1823.51 - 639.49) = 4.72)
176-
tn: 6226.29
177-
Gn: 3785.88 ((18765.85 - 1638.83) / (3785.88 - 1638.83) = 7.98)
178-
Tn: 18765.85
179-
180-
raspberry pi (3b + raspbian buster): ROUND_EVEN=1
119+
raspberry pi (3b + raspbian buster):
181120
f: 6005.82 (18654.65 / 6005.82 = 3.11)
182121
s: 18654.65
183122
d: 13205.23 (42775.59 / 13205.23 = 3.24)
@@ -186,20 +125,20 @@ fn: 819.58 (12308.98 / 819.58 = 15.02)
186125
sn: 12308.98
187126
dn: 1763.91 (30160.90 / 1763.91 = 17.10)
188127
Sn: 30160.90
189-
g: 2121.96 ((6340.77 - 819.58) / (2121.96 - 819.58) = 4.24)
128+
g: 2422.91 ((6340.77 - 819.58) / (2422.91 - 819.58) = 3.44)
190129
t: 6340.77
191-
G: 4046.78 ((19176.56 - 1763.91) / (4046.78 - 1763.91) = 7.63)
130+
G: 4387.61 ((19176.56 - 1763.91) / (4387.61 - 1763.91) = 6.64)
192131
T: 18796.96
193-
gn: 2170.09 ((6226.29 - 819.58) / (2170.09 - 819.58) = 4.00)
132+
gn: 2373.73 ((6226.29 - 819.58) / (2373.73 - 819.58) = 3.48)
194133
tn: 6226.29
195-
Gn: 3991.01 ((18765.85 - 1763.91) / (3991.01 - 1763.91) = 7.63)
134+
Gn: 4347.37 ((18765.85 - 1763.91) / (4347.37 - 1763.91) = 6.58)
196135
Tn: 18765.85
197136

198-
p: 286.03 fast: 0, libc: 0 (ROUND_EVEN=1)
199-
P: 1090.16 fast: 0, libc: 0 (ROUND_EVEN=1)
137+
p: 286.03 fast: 0, libc: 0
138+
P: 1090.16 fast: 0, libc: 0
200139

201-
c: 2072.77 0 0.00% (ROUND_EVEN=1)
202-
C: 4439.85 0 0.00% (ROUND_EVEN=1)
140+
c: 2072.77 0 0.00%
141+
C: 4439.85 0 0.00%
203142

204143
</pre>
205144

0 commit comments

Comments
 (0)