-
Notifications
You must be signed in to change notification settings - Fork 273
/
Copy pathieee_float.h
405 lines (331 loc) · 9.82 KB
/
ieee_float.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
/*******************************************************************\
Module:
Author: Daniel Kroening, kroening@kroening.com
\*******************************************************************/
#ifndef CPROVER_UTIL_IEEE_FLOAT_H
#define CPROVER_UTIL_IEEE_FLOAT_H
#include <iosfwd>
#include "mp_arith.h"
#include "format_spec.h"
class constant_exprt;
class floatbv_typet;
class ieee_float_spect
{
public:
// Number of bits for fraction (excluding hidden bit)
// and exponent, respectively
std::size_t f, e;
// x86 has an extended precision format with an explicit
// integer bit.
bool x86_extended;
mp_integer bias() const;
explicit ieee_float_spect(const floatbv_typet &type)
{
from_type(type);
}
void from_type(const floatbv_typet &type);
ieee_float_spect():f(0), e(0), x86_extended(false)
{
}
ieee_float_spect(std::size_t _f, std::size_t _e):
f(_f), e(_e), x86_extended(false)
{
}
std::size_t width() const
{
// Add one for the sign bit.
// Add one if x86 explicit integer bit is used.
return f+e+1+(x86_extended?1:0);
}
mp_integer max_exponent() const;
mp_integer max_fraction() const;
class floatbv_typet to_type() const;
// this is binary16 in IEEE 754-2008
static ieee_float_spect half_precision()
{
// 16 bits in total
return ieee_float_spect(10, 5);
}
// the well-know standard formats
static ieee_float_spect single_precision()
{
// 32 bits in total
return ieee_float_spect(23, 8);
}
static ieee_float_spect double_precision()
{
// 64 bits in total
return ieee_float_spect(52, 11);
}
static ieee_float_spect quadruple_precision()
{
// IEEE 754 binary128
return ieee_float_spect(112, 15);
}
static ieee_float_spect x86_80()
{
// Intel, not IEEE
ieee_float_spect result(63, 15);
result.x86_extended=true;
return result;
}
static ieee_float_spect x86_96()
{
// Intel, not IEEE
ieee_float_spect result(63, 15);
result.x86_extended=true;
return result;
}
bool operator==(const ieee_float_spect &other) const
{
return f==other.f && e==other.e && x86_extended==other.x86_extended;
}
bool operator!=(const ieee_float_spect &other) const
{
return !(*this==other);
}
};
/// An IEEE 754 floating-point value, including specificiation.
class ieee_float_valuet
{
public:
ieee_float_spect spec;
explicit ieee_float_valuet(const ieee_float_spect &_spec)
: spec(_spec),
sign_flag(false),
exponent(0),
fraction(0),
NaN_flag(false),
infinity_flag(false)
{
}
explicit ieee_float_valuet(const floatbv_typet &type)
: spec(ieee_float_spect(type)),
sign_flag(false),
exponent(0),
fraction(0),
NaN_flag(false),
infinity_flag(false)
{
}
explicit ieee_float_valuet(const constant_exprt &expr)
{
from_expr(expr);
}
ieee_float_valuet()
: sign_flag(false),
exponent(0),
fraction(0),
NaN_flag(false),
infinity_flag(false)
{
}
void negate()
{
sign_flag=!sign_flag;
}
void set_sign(bool _sign)
{ sign_flag = _sign; }
void make_zero()
{
sign_flag=false;
exponent=0;
fraction=0;
NaN_flag=false;
infinity_flag=false;
}
static ieee_float_valuet zero(const floatbv_typet &type)
{
ieee_float_valuet result(type);
result.make_zero();
return result;
}
static ieee_float_valuet zero(const ieee_float_spect &spec)
{
ieee_float_valuet result(spec);
result.make_zero();
return result;
}
static ieee_float_valuet one(const floatbv_typet &);
static ieee_float_valuet one(const ieee_float_spect &);
void make_NaN();
void make_plus_infinity();
void make_minus_infinity();
void make_fltmax(); // maximum representable finite floating-point number
void make_fltmin(); // minimum normalized positive floating-point number
static ieee_float_valuet NaN(const ieee_float_spect &_spec)
{
ieee_float_valuet c(_spec);
c.make_NaN();
return c;
}
static ieee_float_valuet plus_infinity(const ieee_float_spect &_spec)
{
ieee_float_valuet c(_spec);
c.make_plus_infinity();
return c;
}
static ieee_float_valuet minus_infinity(const ieee_float_spect &_spec)
{
ieee_float_valuet c(_spec);
c.make_minus_infinity();
return c;
}
// maximum representable finite floating-point number
static ieee_float_valuet fltmax(const ieee_float_spect &_spec)
{
ieee_float_valuet c(_spec);
c.make_fltmax();
return c;
}
// minimum normalized positive floating-point number
static ieee_float_valuet fltmin(const ieee_float_spect &_spec)
{
ieee_float_valuet c(_spec);
c.make_fltmin();
return c;
}
// set to next representable number towards plus infinity
void increment(bool distinguish_zero=false)
{
if(is_zero() && get_sign() && distinguish_zero)
negate();
else
next_representable(true);
}
// set to previous representable number towards minus infinity
void decrement(bool distinguish_zero=false)
{
if(is_zero() && !get_sign() && distinguish_zero)
negate();
else
next_representable(false);
}
bool is_zero() const
{
return !NaN_flag && !infinity_flag && fraction==0 && exponent==0;
}
bool get_sign() const { return sign_flag; }
bool is_NaN() const { return NaN_flag; }
bool is_infinity() const { return !NaN_flag && infinity_flag; }
bool is_normal() const;
const mp_integer &get_exponent() const { return exponent; }
const mp_integer &get_fraction() const { return fraction; }
// Construct IEEE floating point value
void unpack(const mp_integer &);
void from_double(double);
void from_float(float);
// performs conversions from IEEE float-point format
// to something else
double to_double() const;
float to_float() const;
bool is_double() const;
bool is_float() const;
mp_integer pack() const;
void extract_base2(mp_integer &_exponent, mp_integer &_fraction) const;
void extract_base10(mp_integer &_exponent, mp_integer &_fraction) const;
mp_integer to_integer() const; // this always rounds to zero
// output
void print(std::ostream &out) const;
std::string to_ansi_c_string() const
{
return format(format_spect());
}
std::string to_string_decimal(std::size_t precision) const;
std::string to_string_scientific(std::size_t precision) const;
std::string format(const format_spect &format_spec) const;
// expressions
constant_exprt to_expr() const;
void from_expr(const constant_exprt &expr);
bool operator<(const ieee_float_valuet &) const;
bool operator<=(const ieee_float_valuet &) const;
bool operator>(const ieee_float_valuet &) const;
bool operator>=(const ieee_float_valuet &) const;
// warning: these do packed equality, not IEEE equality
// e.g., NAN==NAN
bool operator==(const ieee_float_valuet &) const;
bool operator!=(const ieee_float_valuet &) const;
bool operator==(int) const;
// these do IEEE equality, i.e., NAN!=NAN
bool ieee_equal(const ieee_float_valuet &) const;
bool ieee_not_equal(const ieee_float_valuet &) const;
protected:
// we store the number unpacked
bool sign_flag;
mp_integer exponent; // this is unbiased
mp_integer fraction; // this _does_ include the hidden bit
bool NaN_flag, infinity_flag;
// number of digits of an integer >=1 in base 10
static mp_integer base10_digits(const mp_integer &src);
void next_representable(bool greater);
};
inline std::ostream &operator<<(std::ostream &out, const ieee_float_valuet &f)
{
return out << f.to_ansi_c_string();
}
/// An IEEE 754 value plus a rounding mode,
/// enabling operations with rounding on values.
class ieee_floatt : public ieee_float_valuet
{
public:
// ROUND_TO_EVEN is also known as "round to nearest, ties to even", and
// is the IEEE default.
// ROUND_TO_AWAY is also known as "round to infinity".
// The numbering below is what x86 uses in the control word and
// what is recommended in C11 5.2.4.2.2.
// The numbering of ROUND_TO_AWAY is not specified in C11 5.2.4.2.2.
enum rounding_modet
{
ROUND_TO_EVEN = 0,
ROUND_TO_MINUS_INF = 1,
ROUND_TO_PLUS_INF = 2,
ROUND_TO_ZERO = 3,
ROUND_TO_AWAY = 4,
UNKNOWN,
NONDETERMINISTIC
};
rounding_modet rounding_mode() const
{
return _rounding_mode;
}
// A helper to turn a rounding mode into a constant bitvector expression
static constant_exprt rounding_mode_expr(rounding_modet);
ieee_floatt(ieee_float_spect __spec, rounding_modet __rounding_mode)
: ieee_float_valuet(__spec), _rounding_mode(__rounding_mode)
{
}
ieee_floatt(const floatbv_typet &type, rounding_modet __rounding_mode)
: ieee_float_valuet(type), _rounding_mode(__rounding_mode)
{
}
ieee_floatt(const constant_exprt &expr, rounding_modet __rounding_mode)
: ieee_float_valuet(expr), _rounding_mode(__rounding_mode)
{
}
ieee_floatt(ieee_float_valuet __value, rounding_modet __rounding_mode)
: ieee_float_valuet(__value), _rounding_mode(__rounding_mode)
{
}
// performs conversion to IEEE floating point format,
// with rounding.
void from_integer(const mp_integer &i);
void from_base10(const mp_integer &exp, const mp_integer &frac);
void build(const mp_integer &exp, const mp_integer &frac);
// performs conversions from IEEE float-point format
// to something else
double to_double() const;
float to_float() const;
mp_integer to_integer() const; // this always rounds to zero
// conversions
void change_spec(const ieee_float_spect &dest_spec);
// the usual operators
ieee_floatt &operator/=(const ieee_floatt &other);
ieee_floatt &operator*=(const ieee_floatt &other);
ieee_floatt &operator+=(const ieee_floatt &other);
ieee_floatt &operator-=(const ieee_floatt &other);
protected:
rounding_modet _rounding_mode;
void divide_and_round(mp_integer ÷nd, const mp_integer &divisor);
void align();
};
#endif // CPROVER_UTIL_IEEE_FLOAT_H