pspp-dev
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: type 3 patches


From: John Darrington
Subject: Re: type 3 patches
Date: Mon, 17 Jan 2011 13:27:29 +0000
User-agent: Mutt/1.5.18 (2008-05-17)

My initial reaction is to prefer the first patch.  It seems simpler.

However I'm a bit confused about this :

     +      gsl_vector_set (result, dropped, 
     +                gsl_matrix_get (small_cov, 0, 0)
     +                - gsl_matrix_get (small_cov, 0, 0));

Doesn't that mean that the result will always be the zero vector?


To answer the question about in which file to put the function, I suggest
that for now we put it in glm.c  That's where we know it's going to be used.
We can move it later if necessary.

J'

On Wed, Jan 12, 2011 at 09:57:20PM -0500, Jason Stover wrote:
     Two patches for type 3 sums of squares are below. I'm not sure
     which is best.
     
     First, notice computation of type 3 sums of squares requires 
     computation of many least-squares surfaces (one for each explanatory
     variable).
     
     One patch changes glm.c only to allow the computation. The other adds a
     function to linreg.c. The one for glm.c alone avoids teaching glm.c
     about linreg.c, but doesn't provide a function that other procedures
     can call.  The patch to linreg.c allows procedures to just call
     linreg_type3_ss, but requires those procedures to use linreg.c.
     
     linreg.c seems kind of clunky now (I had such high hopes for it), so
     if it seems best to have a function in src/math that can do the job
     without appealing to linreg.c, then we would need to put that function
     in covariance.c or make a new file. But putting it in covariance.c
     would mean teaching covariance.c about sweep.c.
     
     So I don't know. But I wonder what other procedures will need to know
     about type 3 sums of squares. If they don't compute a linear model, I
     don't think their computation of type 3 sums of squares will be
     identical to that for the linear model anyway -- that is, they'll have
     their own computations, meaning their own functions.
     
     (BTW: I have not tested these beyond compilation.)
     
     Patch below:
     
     diff --git a/src/language/stats/glm.c b/src/language/stats/glm.c
     index 862f49c..b074c56 100644
     --- a/src/language/stats/glm.c
     +++ b/src/language/stats/glm.c
     @@ -191,8 +191,50 @@ cmd_glm (struct lexer *lexer, struct dataset *ds)
      }
      
      static  void dump_matrix (const gsl_matrix *m);
     +static void get_ssq (gsl_matrix * cov, gsl_vector *ssq);
      
      static void
     +get_ssq (gsl_matrix * cov, gsl_vector * ssq)
     +{
     +  size_t i;
     +  size_t j;
     +  size_t dropped;
     +  size_t dep_column;
     +  gsl_matrix * small_cov = NULL;
     +
     +  small_cov = gsl_matrix_alloc (cov->size1 - 1, cov->size2 - 1);
     +  for (dropped = 1; dropped < cov->size1; dropped++)
     +    {
     +      for (i = 0; i < dropped; i++)
     +  {
     +    for (j = 0; j < dropped; j++)
     +      {
     +        gsl_matrix_set (small_cov, i, j, gsl_matrix_get (cov, i, j));
     +      }
     +    for (j = dropped + 1; j < small_cov->size2; j++)
     +      {
     +        gsl_matrix_set (small_cov, i, j - 1, gsl_matrix_get (cov, i, j));
     +      }
     +  }
     +      for (i = dropped + 1; i < small_cov->size1; i++)
     +  {
     +    for (j = 0; j < dropped; j++)
     +      {
     +        gsl_matrix_set (small_cov, i - 1, j, gsl_matrix_get (cov, i, j));
     +      }
     +    for (j = dropped + 1; j < small_cov->size2; j++)
     +      {
     +        gsl_matrix_set (small_cov, i - 1, j - 1, gsl_matrix_get (cov, i, 
j));
     +      }
     +  }
     +      reg_sweep (small_cov, 0);
     +      gsl_vector_set (result, dropped, 
     +                gsl_matrix_get (small_cov, 0, 0)
     +                - gsl_matrix_get (small_cov, 0, 0));
     +    }
     +  
     +}
     +static void
      run_glm (const struct glm_spec *cmd, struct casereader *input, const 
struct dataset *ds)
      {
        int v;
     @@ -255,12 +297,18 @@ run_glm (const struct glm_spec *cmd, struct 
casereader *input, const struct data
      
        {
          gsl_matrix *cm = covariance_calculate_unnormalized (cov);
     +    gsl_vector *ssq;
     +
     +    ssq = gsl_vector_alloc (cov->size1 - 1);
     +
      
          dump_matrix (cm);
      
          ws.total_ssq = gsl_matrix_get (cm, 0, 0);
      
          reg_sweep (cm, 0);
     +    
     +    get_ssq (cov, ssq);
      
          dump_matrix (cm);
      
     diff --git a/src/math/linreg.c b/src/math/linreg.c
     index 63d1a0f..83e9fde 100644
     --- a/src/math/linreg.c
     +++ b/src/math/linreg.c
     @@ -386,3 +386,50 @@ linreg_fit (const gsl_matrix *cov, linreg *l)
          }
      }
      
     +/*
     +  Call this function after calling linreg_fit.
     + */
     +void
     +linreg_type3_ss (const gsl_matrix * cov, linreg * l, gsl_vector * result)
     +{
     +  size_t i;
     +  size_t j;
     +  size_t dropped;
     +  size_t dep_column;
     +  gsl_matrix * small_cov = NULL;
     +
     +  small_cov = gsl_matrix_alloc (cov->size1 - 1, cov->size2 - 1);
     +  for (dropped = 0; dropped < cov->size1; dropped++)
     +    {
     +      if (dropped != l->dependent_column)
     +  {
     +    for (i = 0; i < dropped; i++)
     +      {
     +        for (j = 0; j < dropped; j++)
     +          {
     +            gsl_matrix_set (small_cov, i, j, gsl_matrix_get (cov, i, j));
     +          }
     +        for (j = dropped + 1; j < small_cov->size2; j++)
     +          {
     +            gsl_matrix_set (small_cov, i, j - 1, gsl_matrix_get (cov, i, 
j));
     +          }
     +      }
     +    for (i = dropped + 1; i < small_cov->size1; i++)
     +      {
     +        for (j = 0; j < dropped; j++)
     +          {
     +            gsl_matrix_set (small_cov, i - 1, j, gsl_matrix_get (cov, i, 
j));
     +          }
     +        for (j = dropped + 1; j < small_cov->size2; j++)
     +          {
     +            gsl_matrix_set (small_cov, i - 1, j - 1, gsl_matrix_get (cov, 
i, j));
     +          }
     +      }
     +  }
     +      dep_column = (l->dependent_column < dropped) ? l->dependent_column 
: (l->dependent_column - 1);
     +      reg_sweep (small_cov, dep_column);
     +      gsl_vector_set (result, dropped, 
     +                gsl_matrix_get (small_cov, small_cov->size1 - 1, 
small_cov->size2 - 1) 
     +                - linreg_sse (l));
     +    }
     +}
     
     _______________________________________________
     pspp-dev mailing list
     address@hidden
     http://lists.gnu.org/mailman/listinfo/pspp-dev

-- 
PGP Public key ID: 1024D/2DE827B3 
fingerprint = 8797 A26D 0854 2EAB 0285  A290 8A67 719C 2DE8 27B3
See http://pgp.mit.edu or any PGP keyserver for public key.


Attachment: signature.asc
Description: Digital signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]