pspp-dev
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: casefile random access


From: John Darrington
Subject: Re: casefile random access
Date: Tue, 6 Jun 2006 09:36:38 +0800
User-agent: Mutt/1.5.9i

On Mon, Jun 05, 2006 at 05:38:53PM -0700, Ben Pfaff wrote:
     John Darrington <address@hidden> writes:
     
     > If you're able and willing to write a random access casereader, then
     > that will certainly make the gui code simpler.  I've got a change
     > almost ready to commit, which will remove the GUI's hard limit  on the
     > number of variables.   The next major step for the GUI is to unlimit
     > the number of cases. So I'm just about ready to use such casereader.
     >
     >
     > My idea of the interface would be something along the lines of the
     > following, but you may have better ideas.
     
     Here's an implementation with a different interface.  If you like
     it, I'll check it in.  Let me know.

I suppose I can use it.  I'd be immediately following every call to
casereader_seek with a call to casereader_read.  In many, but not all,
instances the GUI will be reading sequentially.  So the reader will
already be pointing to the seeked position.  Will your implementation
behave optimally in this situation?  Also, when scrolling backwards
there'll be a lot of seeking to 2 positions preceding the current
position.  Is it worth having some means of hinting to the casereader
that the next read will probably be the case preceeding the case
currently being read, instead of the following one?

On the other hand perhaps we should leave such optimisations until we
know they're justified.
     
     Index: src/data/casefile.h
     ===================================================================
     RCS file: /cvsroot/pspp/pspp/src/data/casefile.h,v
     retrieving revision 1.2
     diff -u -p -r1.2 casefile.h
     --- src/data/casefile.h    10 May 2006 04:08:25 -0000      1.2
     +++ src/data/casefile.h    6 Jun 2006 00:36:41 -0000
     @@ -44,12 +44,15 @@ bool casefile_append_xfer (struct casefi
      void casefile_mode_reader (struct casefile *);
      struct casereader *casefile_get_reader (const struct casefile *);
      struct casereader *casefile_get_destructive_reader (struct casefile *);
     +struct casereader *casefile_get_random_reader (const struct casefile *);
      
      const struct casefile *casereader_get_casefile (const struct casereader 
*);
      bool casereader_read (struct casereader *, struct ccase *);
      bool casereader_read_xfer (struct casereader *, struct ccase *);
      void casereader_destroy (struct casereader *);
      
     +void casereader_seek (struct casereader *, unsigned long case_idx);
     +
      unsigned long casereader_cnum(const struct casereader *);
      
      #endif /* casefile.h */
     Index: src/data/casefile.c
     ===================================================================
     RCS file: /cvsroot/pspp/pspp/src/data/casefile.c,v
     retrieving revision 1.9
     diff -u -p -r1.9 casefile.c
     --- src/data/casefile.c    10 May 2006 04:08:25 -0000      1.9
     +++ src/data/casefile.c    6 Jun 2006 00:36:41 -0000
     @@ -150,6 +150,7 @@ struct casereader 
          struct casefile *cf;                /* Our casefile. */
          unsigned long case_idx;             /* Case number of current case. */
          bool destructive;                   /* Is this a destructive reader? 
*/
     +    bool random;                        /* Is this a random reader? */
      
          /* Disk storage. */
          int fd;                             /* File descriptor. */
     @@ -174,10 +175,11 @@ static size_t case_bytes;
      static void register_atexit (void);
      static void exit_handler (void);
      
     -static void reader_open_file (struct casereader *reader);
     -static void write_case_to_disk (struct casefile *cf, const struct ccase 
*c);
     -static void flush_buffer (struct casefile *cf);
     -static bool fill_buffer (struct casereader *reader);
     +static void reader_open_file (struct casereader *);
     +static void write_case_to_disk (struct casefile *, const struct ccase *);
     +static void flush_buffer (struct casefile *);
     +static void seek_and_fill_buffer (struct casereader *);
     +static bool fill_buffer (struct casereader *);
      
      static void io_error (struct casefile *, const char *, ...)
           PRINTF_FORMAT (2, 3);
     @@ -530,6 +532,17 @@ casefile_get_reader (const struct casefi
        return reader;
      }
      
     +/* Creates and returns a random casereader for CF.  A random
     +   casereader can be used to randomly read the cases in a
     +   casefile. */
     +struct casereader *
     +casefile_get_random_reader (const struct casefile *cf) 
     +{
     +  struct casereader *reader = casefile_get_reader (cf);
     +  reader->random = true;
     +  return reader;
     +}
     +
      /* Creates and returns a destructive casereader for CF.  Like a
         normal casereader, a destructive casereader sequentially reads
         the cases in a casefile.  Unlike a normal casereader, a
     @@ -556,8 +569,6 @@ static void
      reader_open_file (struct casereader *reader) 
      {
        struct casefile *cf = reader->cf;
     -  off_t file_ofs;
     -
        if (!cf->ok || reader->case_idx >= cf->case_cnt)
          return;
      
     @@ -585,6 +596,19 @@ reader_open_file (struct casereader *rea
            memset (reader->buffer, 0, cf->buffer_size * sizeof *cf->buffer); 
          }
      
     +  case_create (&reader->c, cf->value_cnt);
     +
     +  seek_and_fill_buffer (reader);
     +}
     +
     +/* Seeks the backing file for READER to the proper position and
     +   refreshes the buffer contents. */
     +static void
     +seek_and_fill_buffer (struct casereader *reader) 
     +{
     +  struct casefile *cf = reader->cf;
     +  off_t file_ofs;
     +
        if (cf->value_cnt != 0) 
          {
            size_t buffer_case_cnt = cf->buffer_size / cf->value_cnt;
     @@ -601,8 +625,6 @@ reader_open_file (struct casereader *rea
      
        if (cf->case_cnt > 0 && cf->value_cnt > 0)
          fill_buffer (reader);
     -
     -  case_create (&reader->c, cf->value_cnt);
      }
      
      /* Fills READER's buffer by reading a block from disk. */
     @@ -696,6 +718,21 @@ casereader_read_xfer (struct casereader 
          }
      }
      
     +/* Sets the next case to be read by READER to CASE_IDX,
     +   which must be less than the number of cases in the casefile.
     +   Allowed only for random readers. */
     +void
     +casereader_seek (struct casereader *reader, unsigned long case_idx) 
     +{
     +  assert (reader != NULL);
     +  assert (reader->random);
     +  assert (case_idx < reader->cf->case_cnt);
     +
     +  reader->case_idx = case_idx;
     +  if (reader->cf->storage == DISK)
     +    seek_and_fill_buffer (reader);
     +}
     +
      /* Destroys READER. */
      void
      casereader_destroy (struct casereader *reader)
     Index: src/language/tests/casefile-test.c
     ===================================================================
     RCS file: /cvsroot/pspp/pspp/src/language/tests/casefile-test.c,v
     retrieving revision 1.2
     diff -u -p -r1.2 casefile-test.c
     --- src/language/tests/casefile-test.c     15 Mar 2006 03:29:11 -0000      
1.2
     +++ src/language/tests/casefile-test.c     6 Jun 2006 00:36:41 -0000
     @@ -21,11 +21,14 @@
      #include <data/casefile.h>
      #include <data/case.h>
      
     +#include <gsl/gsl_randist.h>
      #include <gsl/gsl_rng.h>
      #include <stdarg.h>
      #include <language/command.h>
      #include <language/lexer/lexer.h>
      
     +#include "xalloc.h"
     +
      static void test_casefile (int pattern, size_t value_cnt, size_t 
case_cnt);
      static void get_random_case (struct ccase *, size_t value_cnt,
                                   size_t case_idx);
     @@ -133,6 +136,24 @@ test_casefile (int pattern, size_t value
          casereader_destroy (r1);
        if (pattern != 2)
          casereader_destroy (r2);
     +  if (pattern > 3) 
     +    {
     +      int *order;
     +      r1 = casefile_get_random_reader (cf);
     +      order = xmalloc (sizeof *order * case_cnt);
     +      for (i = 0; i < case_cnt; i++)
     +        order[i] = i;
     +      if (case_cnt > 0)
     +        gsl_ran_shuffle (rng, order, case_cnt, sizeof *order);
     +      for (i = 0; i < case_cnt; i++)
     +        {
     +          int case_idx = order[i];
     +          casereader_seek (r1, case_idx);
     +          read_and_verify_random_case (cf, r1, case_idx);
     +        }
     +      casereader_destroy (r1);
     +      free (order);
     +    }
        if (pattern > 2) 
          {
            r1 = casefile_get_destructive_reader (cf);
     
     -- 
     "J'avais trouv'e ma religion :
      rien ne me parut plus important qu'un livre.
      La biblioth`eque, j'y voyais un temple."
     --Jean-Paul Sartre

-- 
PGP Public key ID: 1024D/2DE827B3 
fingerprint = 8797 A26D 0854 2EAB 0285  A290 8A67 719C 2DE8 27B3
See http://pgp.mit.edu or any PGP keyserver for public key.


Attachment: signature.asc
Description: Digital signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]