[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[lmi-commits] [lmi] master f7200946 1/5: Avoid gcc 'Wstrict-aliasing' wa
From: |
Greg Chicares |
Subject: |
[lmi-commits] [lmi] master f7200946 1/5: Avoid gcc 'Wstrict-aliasing' warnings |
Date: |
Tue, 24 May 2022 22:18:38 -0400 (EDT) |
branch: master
commit f72009460071a1cd330f9f023e093c8a06eca3f5
Author: Gregory W. Chicares <gchicares@sbcglobal.net>
Commit: Gregory W. Chicares <gchicares@sbcglobal.net>
Avoid gcc 'Wstrict-aliasing' warnings
Imported macros that NetBSD used to remedy an acknowledged defect in
FDLIBM. This would seem to replace a particularly nasty sort of UB with
a different sort of UB: all UB is equal, but some is more equal.
However, see:
https://www.open-std.org/jtc1/sc22/wg14/www/docs/dr_283.htm
Accessing a non-current union member ("type punning")
Added 'inline' cover functions for some macros that were just too ugly
to use; measurements show no speed penalty for them.
Incidentally realigned comments, many of which had become misaligned due
to macro changes.
---
fdlibm.hpp | 174 +++++++++++++++++++++++++++++++++++++++++++++++++++------
fdlibm_expm1.c | 25 ++++-----
fdlibm_log1p.c | 18 +++---
3 files changed, 176 insertions(+), 41 deletions(-)
diff --git a/fdlibm.hpp b/fdlibm.hpp
index 6be690e8..64d9861e 100644
--- a/fdlibm.hpp
+++ b/fdlibm.hpp
@@ -41,6 +41,8 @@
#include "config.hpp"
+#include <stdint.h>
+
// Apparently the clang maintainers believe that floating-point
// endianness is necessarily the same as integer endianness.
#if defined __clang__
@@ -55,26 +57,164 @@
# error Expected endianness macros not defined.
#endif // expected endianness macros not defined
-// https://www.netlib.org/fdlibm/readme
-//
-// NOT FIXED YET
+#if __FLOAT_WORD_ORDER__ == __ORDER_LITTLE_ENDIAN__ // okay
+#elif __FLOAT_WORD_ORDER__ == __ORDER_BIG_ENDIAN__ // also okay
+#else // not okay
+# error Unknown endianness.
+#endif // not okay
+
+// Improved type-punning macros adapted from:
+//
https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/generic/math_private.h
+// That file retains the original Sun copyright and license notices,
+// and does not add any for glibc; the commit message suggests that
+// the original work was done for netbsd.
//
-// 3. Compiler failure on non-standard code
-// Statements like
-// *(1+(int*)&t1) = 0;
-// are not standard C and cause some optimizing compilers (e.g. GCC)
-// to generate bad code under optimization. These cases
-// are to be addressed in the next release.
+// Adapted for lmi by GWC; any defects introduced should not reflect
+// on the reputations of the original authors.
+
+/* The original fdlibm code used statements like:
+ n0 = ((*(int*)&one)>>29)^1; * index of high word *
+ ix0 = *(n0+(int*)&x); * high word of x *
+ ix1 = *((1-n0)+(int*)&x); * low word of x *
+ to dig two 32 bit words out of the 64 bit IEEE floating point
+ value. That is non-ANSI, and, moreover, the gcc instruction
+ scheduler gets it wrong. We instead use the following macros.
+ Unlike the original code, we determine the endianness at compile
+ time, not at run time; I don't see much benefit to selecting
+ endianness at run time. */
+
+/* A union which permits us to convert between a double and two 32 bit
+ ints. */
+
+#if __FLOAT_WORD_ORDER__ == __ORDER_BIG_ENDIAN__
+typedef union
+{
+ double value;
+ struct
+ {
+ uint32_t msw;
+ uint32_t lsw;
+ } parts;
+ uint64_t word;
+} ieee_double_shape_type;
+#endif // __FLOAT_WORD_ORDER__ == __ORDER_BIG_ENDIAN__
#if __FLOAT_WORD_ORDER__ == __ORDER_LITTLE_ENDIAN__
-# define FDLIBM_HI(x) *(1+(int*)&x)
-# define FDLIBM_LO(x) *(int*)&x
-#elif __FLOAT_WORD_ORDER__ == __ORDER_BIG_ENDIAN__
-# define FDLIBM_HI(x) *(int*)&x
-# define FDLIBM_LO(x) *(1+(int*)&x)
-#else // unknown endianness
-# error Unknown endianness.
-#endif // unknown endianness
+typedef union
+{
+ double value;
+ struct
+ {
+ uint32_t lsw;
+ uint32_t msw;
+ } parts;
+ uint64_t word;
+} ieee_double_shape_type;
+#endif // __FLOAT_WORD_ORDER__ == __ORDER_LITTLE_ENDIAN__
+
+/* Get two 32 bit ints from a double. */
+
+#define EXTRACT_WORDS(ix0,ix1,d) \
+do { \
+ ieee_double_shape_type ew_u; \
+ ew_u.value = (d); \
+ (ix0) = ew_u.parts.msw; \
+ (ix1) = ew_u.parts.lsw; \
+} while (0)
+
+/* Get the more significant 32 bit int from a double. */
+
+#define GET_HIGH_WORD(i,d) \
+do { \
+ ieee_double_shape_type gh_u; \
+ gh_u.value = (d); \
+ (i) = gh_u.parts.msw; \
+} while (0)
+
+static inline uint32_t hi_uint(double d)
+{
+ uint32_t i;
+ GET_HIGH_WORD(i,d);
+ return i;
+}
+
+#if defined __cplusplus && defined LMI_GCC
+# pragma GCC diagnostic push
+# pragma GCC diagnostic ignored "-Wold-style-cast"
+#endif // defined __cplusplus && defined LMI_GCC
+static inline int32_t hi_int(double d)
+{
+ uint32_t i;
+ GET_HIGH_WORD(i,d);
+ return (int32_t)i;
+}
+#if defined __cplusplus && defined LMI_GCC
+# pragma GCC diagnostic pop
+#endif // defined __cplusplus && defined LMI_GCC
+
+/* Get the less significant 32 bit int from a double. */
+
+#define GET_LOW_WORD(i,d) \
+do { \
+ ieee_double_shape_type gl_u; \
+ gl_u.value = (d); \
+ (i) = gl_u.parts.lsw; \
+} while (0)
+
+static inline uint32_t lo_uint(double d)
+{
+ uint32_t i;
+ GET_LOW_WORD(i,d);
+ return i;
+}
+
+/* Get all in one, efficient on 64-bit machines. */
+
+#define EXTRACT_WORDS64(i,d) \
+do { \
+ ieee_double_shape_type gh_u; \
+ gh_u.value = (d); \
+ (i) = gh_u.word; \
+} while (0)
+
+/* Set a double from two 32 bit ints. */
+
+#define INSERT_WORDS(d,ix0,ix1) \
+do { \
+ ieee_double_shape_type iw_u; \
+ iw_u.parts.msw = (ix0); \
+ iw_u.parts.lsw = (ix1); \
+ (d) = iw_u.value; \
+} while (0)
+
+/* Get all in one, efficient on 64-bit machines. */
+
+#define INSERT_WORDS64(d,i) \
+do { \
+ ieee_double_shape_type iw_u; \
+ iw_u.word = (i); \
+ (d) = iw_u.value; \
+} while (0)
+
+/* Set the more significant 32 bits of a double from an int. */
+
+#define SET_HIGH_WORD(d,v) \
+do { \
+ ieee_double_shape_type sh_u; \
+ sh_u.value = (d); \
+ sh_u.parts.msw = (v); \
+ (d) = sh_u.value; \
+} while (0)
+
+/* Set the less significant 32 bits of a double from an int. */
+
+#define SET_LOW_WORD(d,v) \
+do { \
+ ieee_double_shape_type sl_u; \
+ sl_u.value = (d); \
+ sl_u.parts.lsw = (v); \
+ (d) = sl_u.value; \
+} while (0)
#if defined __cplusplus
extern "C"
diff --git a/fdlibm_expm1.c b/fdlibm_expm1.c
index db3c7a45..05650a53 100644
--- a/fdlibm_expm1.c
+++ b/fdlibm_expm1.c
@@ -27,13 +27,10 @@
#include "fdlibm.hpp"
-#include <stdint.h>
-
#if defined LMI_GCC
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wfloat-conversion"
# pragma GCC diagnostic ignored "-Wsign-conversion"
-# pragma GCC diagnostic ignored "-Wstrict-aliasing"
# pragma GCC diagnostic ignored "-Wunsuffixed-float-constants"
#endif // defined LMI_GCC
@@ -178,7 +175,7 @@ double fdlibm_expm1(double x)
int32_t k,xsb;
uint32_t hx;
- hx = FDLIBM_HI(x); /* high word of x */
+ hx = hi_uint(x); /* high word of x */
xsb = hx&0x80000000; /* sign bit of x */
if(xsb==0) y=x; else y= -x; /* y = |x| */
hx &= 0x7fffffff; /* high word of |x| */
@@ -187,15 +184,15 @@ double fdlibm_expm1(double x)
if(hx >= 0x4043687A) { /* if |x|>=56*ln2 */
if(hx >= 0x40862E42) { /* if |x|>=709.78... */
if(hx>=0x7ff00000) {
- if(((hx&0xfffff)|FDLIBM_LO(x))!=0)
+ if(((hx&0xfffff)|lo_uint(x))!=0)
return x+x; /* NaN */
else return (xsb==0)? x:-1.0;/* exp(+-inf)={inf,-1} */
}
if(x > o_threshold) return huge*huge; /* overflow */
}
- if(xsb!=0) { /* x < -56*ln2, return -1.0 with inexact */
- if(x+tiny<0.0) /* raise inexact */
- return tiny-one; /* return -1 */
+ if(xsb!=0) { /* x < -56*ln2, return -1.0 with inexact */
+ if(x+tiny<0.0) /* raise inexact */
+ return tiny-one; /* return -1 */
}
}
@@ -216,7 +213,7 @@ double fdlibm_expm1(double x)
c = (hi-x)-lo;
}
else if(hx < 0x3c900000) { /* when |x|<2**-54, return x */
- t = huge+x; /* return x with inexact flags when x!=0 */
+ t = huge+x; /* return x with inexact flags when x!=0 */
return x - (t-(huge+x));
}
else k = 0;
@@ -243,19 +240,19 @@ double fdlibm_expm1(double x)
}
if (k <= -2 || k>56) { /* suffice to return exp(x)-1 */
y = one-(e-x);
- FDLIBM_HI(y) += (k<<20); /* add k to y's exponent */
+ SET_HIGH_WORD(y, hi_uint(y) + (k<<20)); /* add k to y's
exponent */
return y-one;
}
t = one;
if(k<20) {
- FDLIBM_HI(t) = 0x3ff00000 - (0x200000>>k); /* t=1-2^-k */
+ SET_HIGH_WORD(t, 0x3ff00000 - (0x200000>>k)); /* t=1-2^-k */
y = t-(e-x);
- FDLIBM_HI(y) += (k<<20); /* add k to y's exponent */
+ SET_HIGH_WORD(y, hi_uint(y) + (k<<20)); /* add k to y's
exponent */
} else {
- FDLIBM_HI(t) = ((0x3ff-k)<<20); /* 2^-k */
+ SET_HIGH_WORD(t, (0x3ff-k)<<20); /* 2^-k */
y = x-(e+t);
y += one;
- FDLIBM_HI(y) += (k<<20); /* add k to y's exponent */
+ SET_HIGH_WORD(y, hi_uint(y) + (k<<20)); /* add k to y's
exponent */
}
}
return y;
diff --git a/fdlibm_log1p.c b/fdlibm_log1p.c
index 608ba12b..d516901f 100644
--- a/fdlibm_log1p.c
+++ b/fdlibm_log1p.c
@@ -27,11 +27,9 @@
#include "fdlibm.hpp"
-#include <stdint.h>
-
#if defined LMI_GCC
# pragma GCC diagnostic push
-# pragma GCC diagnostic ignored "-Wstrict-aliasing"
+# pragma GCC diagnostic ignored "-Wsign-conversion"
# pragma GCC diagnostic ignored "-Wunsuffixed-float-constants"
#endif // defined LMI_GCC
@@ -147,7 +145,7 @@ double fdlibm_log1p(double x)
double hfsq,f,c,s,z,R,u,z2,z4,z6,R1,R2,R3,R4;
int32_t k,hx,hu,ax;
- hx = FDLIBM_HI(x); /* high word of x */
+ hx = hi_int(x); /* high word of x */
ax = hx&0x7fffffff;
k = 1;
@@ -171,28 +169,28 @@ double fdlibm_log1p(double x)
if(k!=0) {
if(hx<0x43400000) {
u = 1.0+x;
- hu = FDLIBM_HI(u); /* high word of u */
+ hu = hi_int(u); /* high word of u */
k = (hu>>20)-1023;
- c = (k>0)? 1.0-(u-x):x-(u-1.0);/* correction term */
+ c = (k>0)? 1.0-(u-x):x-(u-1.0); /* correction term */
c /= u;
} else {
u = x;
- hu = FDLIBM_HI(u); /* high word of u */
+ hu = hi_int(u); /* high word of u */
k = (hu>>20)-1023;
c = 0;
}
hu &= 0x000fffff;
if(hu<0x6a09e) {
- FDLIBM_HI(u) = hu|0x3ff00000; /* normalize u */
+ SET_HIGH_WORD(u, hu|0x3ff00000); /* normalize u */
} else {
k += 1;
- FDLIBM_HI(u) = hu|0x3fe00000; /* normalize u/2 */
+ SET_HIGH_WORD(u, hu|0x3fe00000); /* normalize u/2 */
hu = (0x00100000-hu)>>2;
}
f = u-1.0;
}
hfsq=0.5*f*f;
- if(hu==0) { /* |f| < 2**-20 */
+ if(hu==0) { /* |f| < 2**-20 */
if(f==zero) {
if(k==0) return zero;
else {c += k*ln2_lo; return k*ln2_hi+c;}