bug-gnu-pspp
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: PSPP-BUG: moments_calculate error


From: Ben Pfaff
Subject: Re: PSPP-BUG: moments_calculate error
Date: Wed, 19 Dec 2012 22:16:47 -0800
User-agent: Mutt/1.5.21 (2010-09-15)

On Wed, Dec 19, 2012 at 09:49:37PM -0800, Ben Pfaff wrote:
> John Darrington <address@hidden> writes:
> 
> > I had a look at this one.
> >
> > The pertinent assertion is:
> >
> >      /* After the second pass we can calculate any stat.  We
> >          don't support "online" computation during the second
> >          pass, so As a simple self-check, the total weight for
> >          the passes must agree. */
> >       assert (m->w1 == m->w2);
> >
> > and when I ran it through gdb, I see:
> >
> > Breakpoint 1, moments_calculate (m=0x81430c0, weight=0xbffff1c0, 
> > mean=0xbffff1b8, variance=0xbffff1b0, skewness=0xbffff1a8, 
> >     kurtosis=0xbffff1a0) at /home/john/pspp-master/src/math/moments.c:233
> > 233           assert (m->w1 == m->w2);
> > (gdb) print m
> > $2 = {max_moment = MOMENT_KURTOSIS, pass = 2, w1 = 2257.1583655975705, sum 
> > = 970999467.71251988, mean = 430186.6818527166, 
> >   w2 = 2257.1583655975778, d1 = 6.8955123424530029e-06, d2 = 
> > 1504554313897766, d3 = 2.7440683550398496e+21, 
> >   d4 = 9.0176522508330139e+27}
> >
> > So it would appear that this is simply a floating point
> > precision problem (which inevitably arises when doing equality
> > comparisons with floating points).  So I suggest that this
> > assertion be deleted.
> 
> w1 and w2 are each calculated as the sum of the same sequence of
> values in the same order.  It's not obvious the results could
> differ, even slightly, without the sequences also differing in
> some way.

In an attempt to prove this hypothesis, I applied the following patch:

----------------------------------------------------------------------
diff --git a/src/math/moments.c b/src/math/moments.c
index 83cbbe4..24b9d80 100644
--- a/src/math/moments.c
+++ b/src/math/moments.c
@@ -20,6 +20,7 @@
 
 #include <assert.h>
 #include <math.h>
+#include <stdio.h>
 #include <stdlib.h>
 
 #include "data/val-type.h"
@@ -85,6 +86,7 @@ struct moments
     double w1;                  /* Total weight for pass 1, so far. */
     double sum;                 /* Sum of values so far. */
     double mean;                /* Mean = sum / w1. */
+    int c1;
 
     /* Pass two. */
     double w2;                  /* Total weight for pass 2, so far. */
@@ -92,6 +94,9 @@ struct moments
     double d2;                  /* Sum of squared deviations from the mean. */
     double d3;                  /* Sum of cubed deviations from the mean. */
     double d4;                  /* Sum of (deviations from the mean)**4. */
+    int c2;
+
+    int instance;
   };
 
 /* Initializes moments M for calculating moment MAX_MOMENT and
@@ -99,11 +104,14 @@ struct moments
 static void
 init_moments (struct moments *m, enum moment max_moment)
 {
+  static int instance;
+  
   assert (m != NULL);
   assert (max_moment == MOMENT_MEAN || max_moment == MOMENT_VARIANCE
           || max_moment == MOMENT_SKEWNESS || max_moment == MOMENT_KURTOSIS);
   m->max_moment = max_moment;
   moments_clear (m);
+  m->instance = instance++;
 }
 
 /* Clears out a set of moments so that it can be reused for a new
@@ -113,6 +121,7 @@ moments_clear (struct moments *m)
 {
   m->pass = 1;
   m->w1 = m->w2 = 0.;
+  m->c1 = m->c2 = 0;
   m->sum = 0.;
 }
 
@@ -146,6 +155,8 @@ moments_pass_one (struct moments *m, double value, double 
weight)
     {
       m->sum += value * weight;
       m->w1 += weight;
+      printf ("moment\t%d\t%d\t%f\n", m->instance, m->c1, weight);
+      m->c1++;
     }
 }
 
@@ -163,7 +174,7 @@ moments_pass_two (struct moments *m, double value, double 
weight)
       m->d1 = m->d2 = m->d3 = m->d4 = 0.;
     }
 
-  if (value != SYSMIS && weight >= 0.)
+  if (value != SYSMIS && weight > 0.)
     {
       double d = value - m->mean;
       double d1_delta = d * weight;
@@ -184,6 +195,8 @@ moments_pass_two (struct moments *m, double value, double 
weight)
             }
         }
       m->w2 += weight;
+      printf ("moment\t%d\t%d\t%f\n", m->instance, m->c2, weight);
+      m->c2++;
     }
 }
 
@@ -230,6 +243,8 @@ moments_calculate (const struct moments *m,
          pass, so As a simple self-check, the total weight for
          the passes must agree. */
       assert (m->pass == 2);
+      assert (m->c1 == m->c2);
+      printf ("c1=%d\n", m->c1);
       assert (m->w1 == m->w2);
 
       if (m->w2 > 0.)
----------------------------------------------------------------------

and ran:

        pspp jov2012_report.sps  |grep '^moment' | sort -n -k 2 -k 3 | uniq -c 
|grep -v '^ *2'

which reported numerous significant differences in weights between the two
passes:

      1 moment  242     1       0.877916
      1 moment  242     1       1.334082
      1 moment  242     2       1.012678
      1 moment  242     2       1.334082
      1 moment  242     3       0.881010
      1 moment  242     3       1.310005
      1 moment  242     4       0.593003
      1 moment  242     4       1.012678
      1 moment  242     5       0.610125
      1 moment  242     5       0.881010
      1 moment  242     6       0.825404
      1 moment  242     6       0.949728
      1 moment  242     7       0.571481
      1 moment  242     7       0.593003
      1 moment  242     8       0.610125
      1 moment  242     8       0.970796
      1 moment  242     9       0.825404
      1 moment  242     9       0.948538
      1 moment  242     10      0.571481
      1 moment  242     10      1.214519
      1 moment  242     11      0.940354
      1 moment  242     11      0.970796
      1 moment  242     12      0.881010
      1 moment  242     12      0.948538
      1 moment  242     13      0.555444
      1 moment  242     13      1.214519
      1 moment  242     14      0.571481
      1 moment  242     14      0.940354
      1 moment  242     15      0.877916
      1 moment  242     15      0.880794
      1 moment  242     16      0.881010
      1 moment  242     16      1.249585
      1 moment  242     17      0.555444
      1 moment  242     17      1.249585
      1 moment  242     18      0.571481
      1 moment  242     18      1.249585
      1 moment  242     19      0.925553
...and so on...

There may be a real bug here.



reply via email to

[Prev in Thread] Current Thread [Next in Thread]