[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: PSPP-BUG: moments_calculate error
From: |
Ben Pfaff |
Subject: |
Re: PSPP-BUG: moments_calculate error |
Date: |
Wed, 19 Dec 2012 22:16:47 -0800 |
User-agent: |
Mutt/1.5.21 (2010-09-15) |
On Wed, Dec 19, 2012 at 09:49:37PM -0800, Ben Pfaff wrote:
> John Darrington <address@hidden> writes:
>
> > I had a look at this one.
> >
> > The pertinent assertion is:
> >
> > /* After the second pass we can calculate any stat. We
> > don't support "online" computation during the second
> > pass, so As a simple self-check, the total weight for
> > the passes must agree. */
> > assert (m->w1 == m->w2);
> >
> > and when I ran it through gdb, I see:
> >
> > Breakpoint 1, moments_calculate (m=0x81430c0, weight=0xbffff1c0,
> > mean=0xbffff1b8, variance=0xbffff1b0, skewness=0xbffff1a8,
> > kurtosis=0xbffff1a0) at /home/john/pspp-master/src/math/moments.c:233
> > 233 assert (m->w1 == m->w2);
> > (gdb) print m
> > $2 = {max_moment = MOMENT_KURTOSIS, pass = 2, w1 = 2257.1583655975705, sum
> > = 970999467.71251988, mean = 430186.6818527166,
> > w2 = 2257.1583655975778, d1 = 6.8955123424530029e-06, d2 =
> > 1504554313897766, d3 = 2.7440683550398496e+21,
> > d4 = 9.0176522508330139e+27}
> >
> > So it would appear that this is simply a floating point
> > precision problem (which inevitably arises when doing equality
> > comparisons with floating points). So I suggest that this
> > assertion be deleted.
>
> w1 and w2 are each calculated as the sum of the same sequence of
> values in the same order. It's not obvious the results could
> differ, even slightly, without the sequences also differing in
> some way.
In an attempt to prove this hypothesis, I applied the following patch:
----------------------------------------------------------------------
diff --git a/src/math/moments.c b/src/math/moments.c
index 83cbbe4..24b9d80 100644
--- a/src/math/moments.c
+++ b/src/math/moments.c
@@ -20,6 +20,7 @@
#include <assert.h>
#include <math.h>
+#include <stdio.h>
#include <stdlib.h>
#include "data/val-type.h"
@@ -85,6 +86,7 @@ struct moments
double w1; /* Total weight for pass 1, so far. */
double sum; /* Sum of values so far. */
double mean; /* Mean = sum / w1. */
+ int c1;
/* Pass two. */
double w2; /* Total weight for pass 2, so far. */
@@ -92,6 +94,9 @@ struct moments
double d2; /* Sum of squared deviations from the mean. */
double d3; /* Sum of cubed deviations from the mean. */
double d4; /* Sum of (deviations from the mean)**4. */
+ int c2;
+
+ int instance;
};
/* Initializes moments M for calculating moment MAX_MOMENT and
@@ -99,11 +104,14 @@ struct moments
static void
init_moments (struct moments *m, enum moment max_moment)
{
+ static int instance;
+
assert (m != NULL);
assert (max_moment == MOMENT_MEAN || max_moment == MOMENT_VARIANCE
|| max_moment == MOMENT_SKEWNESS || max_moment == MOMENT_KURTOSIS);
m->max_moment = max_moment;
moments_clear (m);
+ m->instance = instance++;
}
/* Clears out a set of moments so that it can be reused for a new
@@ -113,6 +121,7 @@ moments_clear (struct moments *m)
{
m->pass = 1;
m->w1 = m->w2 = 0.;
+ m->c1 = m->c2 = 0;
m->sum = 0.;
}
@@ -146,6 +155,8 @@ moments_pass_one (struct moments *m, double value, double
weight)
{
m->sum += value * weight;
m->w1 += weight;
+ printf ("moment\t%d\t%d\t%f\n", m->instance, m->c1, weight);
+ m->c1++;
}
}
@@ -163,7 +174,7 @@ moments_pass_two (struct moments *m, double value, double
weight)
m->d1 = m->d2 = m->d3 = m->d4 = 0.;
}
- if (value != SYSMIS && weight >= 0.)
+ if (value != SYSMIS && weight > 0.)
{
double d = value - m->mean;
double d1_delta = d * weight;
@@ -184,6 +195,8 @@ moments_pass_two (struct moments *m, double value, double
weight)
}
}
m->w2 += weight;
+ printf ("moment\t%d\t%d\t%f\n", m->instance, m->c2, weight);
+ m->c2++;
}
}
@@ -230,6 +243,8 @@ moments_calculate (const struct moments *m,
pass, so As a simple self-check, the total weight for
the passes must agree. */
assert (m->pass == 2);
+ assert (m->c1 == m->c2);
+ printf ("c1=%d\n", m->c1);
assert (m->w1 == m->w2);
if (m->w2 > 0.)
----------------------------------------------------------------------
and ran:
pspp jov2012_report.sps |grep '^moment' | sort -n -k 2 -k 3 | uniq -c
|grep -v '^ *2'
which reported numerous significant differences in weights between the two
passes:
1 moment 242 1 0.877916
1 moment 242 1 1.334082
1 moment 242 2 1.012678
1 moment 242 2 1.334082
1 moment 242 3 0.881010
1 moment 242 3 1.310005
1 moment 242 4 0.593003
1 moment 242 4 1.012678
1 moment 242 5 0.610125
1 moment 242 5 0.881010
1 moment 242 6 0.825404
1 moment 242 6 0.949728
1 moment 242 7 0.571481
1 moment 242 7 0.593003
1 moment 242 8 0.610125
1 moment 242 8 0.970796
1 moment 242 9 0.825404
1 moment 242 9 0.948538
1 moment 242 10 0.571481
1 moment 242 10 1.214519
1 moment 242 11 0.940354
1 moment 242 11 0.970796
1 moment 242 12 0.881010
1 moment 242 12 0.948538
1 moment 242 13 0.555444
1 moment 242 13 1.214519
1 moment 242 14 0.571481
1 moment 242 14 0.940354
1 moment 242 15 0.877916
1 moment 242 15 0.880794
1 moment 242 16 0.881010
1 moment 242 16 1.249585
1 moment 242 17 0.555444
1 moment 242 17 1.249585
1 moment 242 18 0.571481
1 moment 242 18 1.249585
1 moment 242 19 0.925553
...and so on...
There may be a real bug here.