[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Bug-gsl] bsimp/msbdf e5_bigt in ode-initval2/test.c is FMA-sensitive
From: |
Hal Finkel |
Subject: |
[Bug-gsl] bsimp/msbdf e5_bigt in ode-initval2/test.c is FMA-sensitive |
Date: |
Mon, 6 Apr 2015 18:18:42 -0500 |
Hello,
Please CC me directly on any replies because I'm not subscribed to the bugs
list.
When compiling gsl-1.16 using Clang/LLVM trunk for big-endian PPC64/Linux, and
using -ffp-contract=fast, make check reports one test failure, which is in
ode-initval2, with this error:
FAIL: bsimp/msbdf e5_bigt [0] (3.74930479547497807e-49 observed vs
3.75953510319630164e-49 expected) [1126]
FAIL: bsimp/msbdf e5_bigt [1] (6.24102529027298901e-21 observed vs
8.02997493696624951e-21 expected) [1127]
FAIL: bsimp/msbdf e5_bigt [2] (1.55381668382339737e-20 observed vs
1.27123535645485829e-20 expected) [1128]
FAIL: bsimp/msbdf e5_bigt [3] (5.6710668834932872e-65 observed vs
4.65237109886871277e-65 expected) [1129]
When compiling ode-initval2/test.c using -ffp-contract=on or -ffp-contract=off
instead, the test passes. Also, when compiling using -ffp-contract=fast but
placing '#pragma clang optimize off'/'#pragma clang optimize on' around only
the rhs_e5 function, the test also passes. I've examined the assembly produced
for the rhs_e5 (annotated below), and it looks okay. As a result, it seems
likely that this is a sensitivity of the test to the extra FMA precision.
Could the test be modified to make it less sensitive to FMA formation?
Thanks again,
Hal
int
rhs_e5 (double t, const double y[], double f[], void *params)
{
const double a = 7.89e-10;
const double b = 1.1e7;
const double c = 1.13e3;
const double m = 1.0e6;
extern int nfe;
nfe += 1;
f[0] = -a * y[0] - b * y[0] * y[2];
f[1] = a * y[0] - m * c * y[1] * y[2];
f[3] = b * y[0] * y[2] - c * y[3];
f[2] = f[1] - f[3];
return GSL_SUCCESS;
}
.section .rodata.cst4,"aM",@progbits,4
.align 2
.LCPI29_0:
.long 1260902592 # float 1.1E+7
.LCPI29_2:
.long 3464934621 # float -1.13E+9
.LCPI29_4:
.long 3297591296 # float -1130
.section .rodata.cst8,"aM",@progbits,8
.align 3
.LCPI29_1:
.quad -4752674066357206733 # double -7.8899999999999996E-10
.LCPI29_3:
.quad 4470697970497569075 # double 7.8899999999999996E-10
...
.Lfunc_begin29:
# BB#0: # %entry
mflr 0
std 31, -8(1)
std 0, 16(1)
stdu 1, -160(1)
addis 3, 2, address@hidden@ha
addis 7, 2, address@hidden@ha
mr 31, 1
ld 3, address@hidden@l(3)
addi 7, 7, address@hidden@l
lfs 0, 0(7)
** f0 = 1.1E+7 = b
lwz 6, 0(3)
addi 6, 6, 1
stw 6, 0(3)
addis 3, 2, address@hidden@ha
lfd 1, 0(4)
** f1 = y[0]
lfd 2, 16(4)
** f2 = y[2]
lfd 4, address@hidden@l(3)
** f4 = -7.8899999999999996E-10 = -a
addis 3, 2, address@hidden@ha
addi 3, 3, address@hidden@l
fmul 3, 1, 0
** f3 = b*y[0]
fmul 2, 3, 2
** f2 = b*y[0]*y[2]
fmsub 1, 1, 4, 2
** f1 = y[0]*(-a) - b*y[0]*y[2]
lfs 2, 0(3)
** f2 = -1.13E+9 = -c*m
addis 3, 2, address@hidden@ha
stfd 1, 0(5)
** f[0] = y[0]*(-a) - y[2]
lfd 3, 8(4)
** f3 = y[1]
lfd 4, 16(4)
** f4 = y[2]
lfd 5, 0(4)
** f5 = y[0]
fmul 2, 3, 2
** f2 = y[1]*(-c*m)
lfd 3, address@hidden@l(3)
** f3 = 7.8899999999999996E-10 = a
addis 3, 2, address@hidden@ha
addi 3, 3, address@hidden@l
fmul 2, 2, 4
** f2 = y[1]*(-c*m)*y[2]
fmadd 2, 5, 3, 2
** f2 = y[0]*a + y[1]*(-c*m)*y[2]
lfs 3, 0(3)
** f3 = -1130 = -c
addis 3, 2, address@hidden@ha
ld 3, address@hidden@l(3)
stfd 2, 8(5)
** f[1] = y[0]*a + y[1]*(-c*m)*y[2]
lfd 4, 0(4)
** f4 = y[0]
lfd 13, 24(4)
** f13 = y[3]
lfd 6, 16(4)
** f6 = y[2]
fmul 0, 4, 0
** f0 = b*y[0]
fmul 4, 13, 3
** f4 = y[3]*(-c)
fnmsub 5, 13, 3, 2
** f5 = -(y[3]*(-c) - f[1])
fmadd 3, 0, 6, 4
** f3 = b*y[0]*y[2] + y[3]*(-c)
fnmsub 4, 0, 6, 5
** f4 = -(b*y[0]*y[2] + y[3]*(-c) - f[1]) = -(f[3] - f[1]) = f[1] - f[3]
stfd 3, 24(5)
** f[3] = b*y[0]*y[2] + y[3]*(-c)
stfd 4, 16(5)
** f[2] = f[1] - f[3]
...
- [Bug-gsl] bsimp/msbdf e5_bigt in ode-initval2/test.c is FMA-sensitive,
Hal Finkel <=