octave-maintainers
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: gzipped data files


From: John W. Eaton
Subject: Re: gzipped data files
Date: Fri, 29 Apr 2005 16:46:13 -0400

On 29-Apr-2005, David Bateman wrote:

| Dmitri A. Sergatskov wrote:
| 
| > Since we now have all this gzip machinery in place,
| > would it be easy to add gzip option to fopen()?
| > That is, I would like to be able to do something like:
| >
| > fid = fopen("data.dat.gz","rz")   
| >
| > a = fscanf(fid,"%f")
| >
| > ...
| >
| > Just asking...
| >
| > Dmitri.
| 
| Something like that is relatively easy, as gzopen just a C file 
| descriptor.

?  Does it really work to pass a gzFile object to any of the stdio
functions that expect a FILE pointer?

Anyway, try the following patch.

Thanks,

jwe

src/ChangeLog:

2005-04-29  John W. Eaton  <address@hidden>

        * c-file-ptr-stream.h (c_file_ptr_stream): New template class,
        converted from i_c_file_ptr_stream.
        (i_c_file_ptr_stream, o_c_file_ptr_stream, io_c_file_ptr_stream):
        Now typedefs.
        (i_c_zfile_ptr_stream, o_c_zfile_ptr_stream, io_c_zfile_ptr_stream):
        New typedefs.
        * c-file-ptr-stream.h, c-file-ptr-stream.cc (c_zfile_ptr_buf):
        New class.

        * oct-stdstrm.h (class octave_tstdiostream): New template class,
        converted from octave_stdiostream.
        (octave_stdiostream): Now a typedef.
        [HAVE_ZLIB] (octave_zstdiostream): New a typedef.
        * oct-stdstrm.cc: Delete.
        * Makefile.in (DIST_SRC): Remove it from the list.


Index: src/Makefile.in
===================================================================
RCS file: /cvs/octave/src/Makefile.in,v
retrieving revision 1.375
diff -u -r1.375 Makefile.in
--- src/Makefile.in     29 Apr 2005 13:04:25 -0000      1.375
+++ src/Makefile.in     29 Apr 2005 20:35:46 -0000
@@ -168,7 +168,7 @@
        ls-mat-ascii.cc ls-mat4.cc ls-mat5.cc ls-oct-ascii.cc \
        ls-oct-binary.cc ls-utils.cc main.c mappers.cc matherr.c \
        oct-fstrm.cc oct-hist.cc oct-iostrm.cc oct-map.cc \
-       oct-obj.cc oct-prcstrm.cc oct-procbuf.cc oct-stdstrm.cc \
+       oct-obj.cc oct-prcstrm.cc oct-procbuf.cc \
        oct-stream.cc zfstream.cc oct-strstrm.cc oct-lvalue.cc pager.cc \
        parse.y pr-output.cc procstream.cc sighandlers.cc \
        siglist.c sparse-xdiv.cc sparse-xpow.cc strcasecmp.c \
Index: src/c-file-ptr-stream.cc
===================================================================
RCS file: /cvs/octave/src/c-file-ptr-stream.cc,v
retrieving revision 1.16
diff -u -r1.16 c-file-ptr-stream.cc
--- src/c-file-ptr-stream.cc    26 Apr 2005 19:24:32 -0000      1.16
+++ src/c-file-ptr-stream.cc    29 Apr 2005 20:35:46 -0000
@@ -190,9 +190,158 @@
   return retval;
 }
 
+#ifdef HAVE_ZLIB
+
+c_zfile_ptr_buf::~c_zfile_ptr_buf (void)
+{
+  close ();
+}
+
+// XXX FIXME XXX -- I'm sure there is room for improvement here...
+
+c_zfile_ptr_buf::int_type
+c_zfile_ptr_buf::overflow (int_type c)
+{
+#if defined (CXX_ISO_COMPLIANT_LIBRARY)
+  if (f)
+    return (c != traits_type::eof ()) ? gzputc (f, c) : flush ();
+  else
+    return traits_type::not_eof (c);
+#else
+  if (f)
+    return (c != EOF) ? gzputc (f, c) : flush ();
+  else
+    return EOF;
+#endif
+}
+
+c_zfile_ptr_buf::int_type
+c_zfile_ptr_buf::underflow_common (bool bump)
+{
+  if (f)
+    {
+      int_type c = gzgetc (f);
+
+      if (! bump
+#if defined (CXX_ISO_COMPLIANT_LIBRARY)
+         && c != traits_type::eof ())
+#else
+         && c != EOF)
+#endif
+       gzungetc (c, f);
+
+      return c;
+    }
+  else
+#if defined (CXX_ISO_COMPLIANT_LIBRARY)
+    return traits_type::eof ();
+#else
+    return EOF;
+#endif
+}
+
+c_zfile_ptr_buf::int_type
+c_zfile_ptr_buf::pbackfail (int_type c)
+{
+#if defined (CXX_ISO_COMPLIANT_LIBRARY)
+  return (c != traits_type::eof () && f) ? gzungetc (c, f) : 
+    traits_type::not_eof (c);
+#else
+  return (c != EOF && f) ? gzungetc (c, f) : EOF;
+#endif
+}
+
+std::streamsize
+c_zfile_ptr_buf::xsputn (const char* s, std::streamsize n)
+{
+  if (f)
+    return gzwrite (f, s, n);
+  else
+    return 0;
+}
+
+std::streamsize
+c_zfile_ptr_buf::xsgetn (char *s, std::streamsize n)
+{
+  if (f)
+    return gzread (f, s, n);
+  else
+    return 0;
+}
+
+std::streampos
+c_zfile_ptr_buf::seekoff (std::streamoff offset, std::ios::seekdir dir,
+                        std::ios::openmode)
+{
+  // XXX FIXME XXX
+#if 0
+  if (f)
+    {
+      gzseek (f, offset, seekdir_to_whence (dir));
+
+      return gztell (f);
+    }
+  else
+    return 0;
+#endif
+  return -1;
+}
+
+std::streampos
+c_zfile_ptr_buf::seekpos (std::streampos offset, std::ios::openmode)
+{
+  // XXX FIXME XXX
+#if 0  
+  if (f)
+    {
+      gzseek (f, offset, SEEK_SET);
+
+      return gztell (f);
+    }
+  else
+    return 0;
+#endif
+  return -1;
+}
+
+int
+c_zfile_ptr_buf::sync (void)
+{
+  flush ();
+
+  return 0;
+}
+
+int
+c_zfile_ptr_buf::flush (void)
+{
+  // XXX FIXME XXX -- do we need something more complex here, passing
+  // something other than 0 for the second argument to gzflush and
+  // checking the return value, etc.?
+
+  return f ? gzflush (f, 0) : EOF;
+}
+
+int
+c_zfile_ptr_buf::close (void)
+{
+  int retval = -1;
+
+  flush ();
+
+  if (f)
+    {
+      retval = cf (f);
+      f = 0;
+    }
+
+  return retval;
+}
+
+#endif
+
 /*
 ;;; Local Variables: ***
 ;;; mode: C++ ***
 ;;; End: ***
 */
-
Index: src/c-file-ptr-stream.h
===================================================================
RCS file: /cvs/octave/src/c-file-ptr-stream.h,v
retrieving revision 1.19
diff -u -r1.19 c-file-ptr-stream.h
--- src/c-file-ptr-stream.h     26 Apr 2005 19:24:32 -0000      1.19
+++ src/c-file-ptr-stream.h     29 Apr 2005 20:35:47 -0000
@@ -94,18 +94,21 @@
   int_type underflow_common (bool);
 };
 
+// XXX FIXME XXX -- the following three classes could probably share
+// some code...
+
+template <typename STREAM_T, typename FILE_T, typename BUF_T>
 class
-i_c_file_ptr_stream : public std::istream
+c_file_ptr_stream : public STREAM_T
 {
 public:
 
-  i_c_file_ptr_stream (FILE* f,
-                      c_file_ptr_buf::close_fcn cf = c_file_ptr_buf::fclose)
-    : std::istream (0), buf (new c_file_ptr_buf (f, cf)) { init (buf); }
+  c_file_ptr_stream (FILE_T f, typename BUF_T::close_fcn cf = BUF_T::fclose)
+    : STREAM_T (0), buf (new BUF_T (f, cf)) { init (buf); }
 
-  ~i_c_file_ptr_stream (void) { delete buf; buf = 0; }
+  ~c_file_ptr_stream (void) { delete buf; buf = 0; }
 
-  c_file_ptr_buf *rdbuf (void) { return buf; }
+  BUF_T *rdbuf (void) { return buf; }
 
   void close (void) { if (buf) buf->close (); }
 
@@ -114,67 +117,96 @@
 
   long tell (void) { return buf ? buf->tell () : -1; }
 
-  void clear (void) { if (buf) buf->clear (); std::istream::clear (); }
+  void clear (void) { if (buf) buf->clear (); STREAM_T::clear (); }
 
 private:
 
-  c_file_ptr_buf *buf;
+  BUF_T *buf;
 };
 
+typedef c_file_ptr_stream<std::istream, FILE *, c_file_ptr_buf> 
i_c_file_ptr_stream;
+typedef c_file_ptr_stream<std::ostream, FILE *, c_file_ptr_buf> 
o_c_file_ptr_stream;
+typedef c_file_ptr_stream<std::iostream, FILE *, c_file_ptr_buf> 
io_c_file_ptr_stream;
+
+#ifdef HAVE_ZLIB
+
+#ifdef HAVE_ZLIB_H
+#include <zlib.h>
+#endif
+
 class
-o_c_file_ptr_stream : public std::ostream
+c_zfile_ptr_buf : public std::streambuf
 {
 public:
 
-  o_c_file_ptr_stream (FILE* f,
-                      c_file_ptr_buf::close_fcn cf = c_file_ptr_buf::fclose)
-    : std::ostream (0), buf (new c_file_ptr_buf (f, cf)) { init (buf); }
+#if !defined (CXX_ISO_COMPLIANT_LIBRARY)
+  typedef int int_type;
+#else
+  typedef std::streambuf::int_type int_type;
+#endif
 
-  ~o_c_file_ptr_stream (void) { delete buf; buf = 0; }
+  typedef int (*close_fcn) (gzFile);
 
-  c_file_ptr_buf *rdbuf (void) { return buf; }
+  gzFile stdiofile (void) { return f; }
 
-  void close (void) { if (buf) buf->close (); }
+  c_zfile_ptr_buf (gzFile f_arg, close_fcn cf_arg = fclose)
+    : std::streambuf (), f (f_arg), cf (cf_arg)
+    { }
 
-  int seek (long offset, int origin)
-    { return buf ? buf->seek (offset, origin) : -1; }
+  ~c_zfile_ptr_buf (void);
 
-  long tell (void) { return buf ? buf->tell () : -1; }
+  int_type overflow (int_type);
 
-  void clear (void) { if (buf) buf->clear (); std::ostream::clear (); }
+  int_type underflow (void) { return underflow_common (false); }
 
-private:
+  int_type uflow (void) { return underflow_common (true); }
 
-  c_file_ptr_buf *buf;
-};
+  int_type pbackfail (int_type);
 
-class
-io_c_file_ptr_stream : public std::iostream
-{
-public:
+  std::streamsize xsputn (const char*, std::streamsize);
 
-  io_c_file_ptr_stream (FILE* f,
-                       c_file_ptr_buf::close_fcn cf = c_file_ptr_buf::fclose)
-    : std::iostream (0), buf (new c_file_ptr_buf (f, cf)) { init (buf); }
+  std::streamsize xsgetn (char *, std::streamsize);
 
-  ~io_c_file_ptr_stream (void) { delete buf; buf = 0; }
+  std::streampos seekoff (std::streamoff, std::ios::seekdir,
+                         std::ios::openmode = std::ios::in | std::ios::out);
+  
+  std::streampos seekpos (std::streampos,
+                         std::ios::openmode = std::ios::in | std::ios::out);
 
-  c_file_ptr_buf *rdbuf (void) { return buf; }
+  int sync (void);
 
-  void close (void) { if (buf) buf->close (); }
+  int flush (void);
+
+  int close (void);
+
+  int file_number () const { return -1; }
 
   int seek (long offset, int origin)
-    { return buf ? buf->seek (offset, origin) : -1; }
+    { return f ? gzseek (f, offset, origin) : -1; }
 
-  long tell (void) { return buf ? buf->tell () : -1; }
+  long tell (void) { return f ? gztell (f) : -1; }
+
+  void clear (void) { if (f) gzclearerr (f); }
 
-  void clear (void) { if (buf) buf->clear (); std::iostream::clear (); }
+  static int fclose (gzFile f) { return ::gzclose (f); }
+
+protected:
+
+  gzFile f;
+
+  close_fcn cf;
 
 private:
 
-  c_file_ptr_buf *buf;
+  int_type underflow_common (bool);
 };
 
+typedef c_file_ptr_stream<std::istream, gzFile, c_zfile_ptr_buf> 
i_c_zfile_ptr_stream;
+typedef c_file_ptr_stream<std::ostream, gzFile, c_zfile_ptr_buf> 
o_c_zfile_ptr_stream;
+typedef c_file_ptr_stream<std::iostream, gzFile, c_zfile_ptr_buf> 
io_c_zfile_ptr_stream;
+
+#endif
+
 #endif
 
 /*
Index: src/file-io.cc
===================================================================
RCS file: /cvs/octave/src/file-io.cc,v
retrieving revision 1.166
diff -u -r1.166 file-io.cc
--- src/file-io.cc      29 Apr 2005 04:47:55 -0000      1.166
+++ src/file-io.cc      29 Apr 2005 20:35:47 -0000
@@ -53,6 +53,10 @@
 #include <unistd.h>
 #endif
 
+#ifdef HAVE_ZLIB_H
+#include <zlib.h>
+#endif
+
 #include "error.h"
 #include "file-ops.h"
 #include "lo-ieee.h"
@@ -126,42 +130,58 @@
 }
 
 static std::ios::openmode
-fopen_mode_to_ios_mode (const std::string& mode)
+fopen_mode_to_ios_mode (const std::string& mode_arg)
 {
   std::ios::openmode retval = std::ios::in;
 
-  if (! mode.empty ())
+  if (! mode_arg.empty ())
     {
       // Could probably be faster, but does it really matter?
 
-      if (mode == "rt")
-       retval = std::ios::in;
-      else if (mode == "wt")
-       retval = std::ios::out | std::ios::trunc;
-      else if (mode == "at")
-       retval = std::ios::out | std::ios::app;
-      else if (mode == "r+t")
-       retval = std::ios::in | std::ios::out;
-      else if (mode == "w+t")
-       retval = std::ios::in | std::ios::out | std::ios::trunc;
-      else if (mode == "a+t")
-       retval = std::ios::in | std::ios::out | std::ios::ate;
-      else if (mode == "rb" || mode == "r")
-       retval = std::ios::in | std::ios::binary;
-      else if (mode == "wb" || mode == "w")
-       retval = std::ios::out | std::ios::trunc | std::ios::binary;
-      else if (mode == "ab" || mode == "a")
-       retval = std::ios::out | std::ios::app | std::ios::binary;
-      else if (mode == "r+b" || mode == "r+")
-       retval = std::ios::in | std::ios::out | std::ios::binary;
-      else if (mode == "w+b" || mode == "w+")
-       retval = (std::ios::in | std::ios::out | std::ios::trunc
-                 | std::ios::binary);
-      else if (mode == "a+b" || mode == "a+")
-       retval = (std::ios::in | std::ios::out | std::ios::ate
-                 | std::ios::binary);
-      else
-       ::error ("invalid mode specified");
+      std::string mode = mode_arg;
+
+      size_t pos = mode.find ('z');
+
+      if (pos != NPOS)
+       {
+#if defined (HAVE_ZLIB)
+         mode.erase (pos, 1);
+#else
+         error ("this version of Octave does not support gzipped files");
+#endif
+       }
+
+      if (! error_state)
+       {
+         if (mode == "rt")
+           retval = std::ios::in;
+         else if (mode == "wt")
+           retval = std::ios::out | std::ios::trunc;
+         else if (mode == "at")
+           retval = std::ios::out | std::ios::app;
+         else if (mode == "r+t")
+           retval = std::ios::in | std::ios::out;
+         else if (mode == "w+t")
+           retval = std::ios::in | std::ios::out | std::ios::trunc;
+         else if (mode == "a+t")
+           retval = std::ios::in | std::ios::out | std::ios::ate;
+         else if (mode == "rb" || mode == "r")
+           retval = std::ios::in | std::ios::binary;
+         else if (mode == "wb" || mode == "w")
+           retval = std::ios::out | std::ios::trunc | std::ios::binary;
+         else if (mode == "ab" || mode == "a")
+           retval = std::ios::out | std::ios::app | std::ios::binary;
+         else if (mode == "r+b" || mode == "r+")
+           retval = std::ios::in | std::ios::out | std::ios::binary;
+         else if (mode == "w+b" || mode == "w+")
+           retval = (std::ios::in | std::ios::out | std::ios::trunc
+                     | std::ios::binary);
+         else if (mode == "a+b" || mode == "a+")
+           retval = (std::ios::in | std::ios::out | std::ios::ate
+                     | std::ios::binary);
+         else
+           ::error ("invalid mode specified");
+       }
     }
 
   return retval;
@@ -386,15 +406,39 @@
 
       if (! error_state)
        {
-         FILE *fptr = ::fopen (name.c_str (), mode.c_str ());
+#if defined (HAVE_ZLIB)
+         std::string tmode = mode;
 
-         retval = octave_stdiostream::create (name, fptr, md, flt_fmt);
+         size_t pos = tmode.find ('z');
 
-         if (! fptr)
+         if (pos != NPOS)
            {
-             using namespace std;
-             retval.error (::strerror (errno));
+             tmode.erase (pos, 1);
+
+             gzFile fptr = ::gzopen (name.c_str (), tmode.c_str ());
+
+             if (fptr)
+               retval = octave_zstdiostream::create (name, fptr, md, flt_fmt);
+             else
+               {
+                 using namespace std;
+                 retval.error (::strerror (errno));
+               }
            }
+         else
+#endif
+           {
+             FILE *fptr = ::fopen (name.c_str (), mode.c_str ());
+
+             if (fptr)
+               retval = octave_stdiostream::create (name, fptr, md, flt_fmt);
+             else
+               {
+                 using namespace std;
+                 retval.error (::strerror (errno));
+               }
+           }
+
        }
     }
 
Index: src/oct-stdstrm.h
===================================================================
RCS file: /cvs/octave/src/oct-stdstrm.h,v
retrieving revision 1.25
diff -u -r1.25 oct-stdstrm.h
--- src/oct-stdstrm.h   26 Apr 2005 19:24:33 -0000      1.25
+++ src/oct-stdstrm.h   29 Apr 2005 20:35:47 -0000
@@ -27,39 +27,39 @@
 #include "oct-stream.h"
 #include "c-file-ptr-stream.h"
 
+template <typename BUF_T, typename STREAM_T, typename FILE_T>
 class
-octave_stdiostream : public octave_base_stream
+octave_tstdiostream : public octave_base_stream
 {
 public:
 
-  octave_stdiostream (const std::string& n, FILE *f = 0,
-                     std::ios::openmode m = std::ios::in|std::ios::out,
-                     oct_mach_info::float_format ff
-                       = oct_mach_info::native_float_format (),
-                     c_file_ptr_buf::close_fcn cf = c_file_ptr_buf::fclose)
-    : octave_base_stream (m, ff), nm (n), md (m), s(0)
-  {
-    if (f)
-      s = new io_c_file_ptr_stream (f, cf);
-  }
+  octave_tstdiostream (const std::string& n, FILE_T f = 0,
+                      std::ios::openmode m = std::ios::in|std::ios::out,
+                      oct_mach_info::float_format ff
+                        = oct_mach_info::native_float_format (),
+                      typename BUF_T::close_fcn cf = BUF_T::fclose)
+    : octave_base_stream (m, ff), nm (n), md (m),
+      s(f ? new STREAM_T (f, cf) : 0)
+  { }
 
   static octave_stream
-  create (const std::string& n, FILE *f = 0,
+  create (const std::string& n, FILE_T f = 0,
          std::ios::openmode m = std::ios::in|std::ios::out,
          oct_mach_info::float_format ff
            = oct_mach_info::native_float_format (),
-         c_file_ptr_buf::close_fcn cf = c_file_ptr_buf::fclose)
+         typename BUF_T::close_fcn cf = BUF_T::fclose)
   {
-    return octave_stream (new octave_stdiostream (n, f, m, ff, cf));
+    return octave_stream (new octave_tstdiostream (n, f, m, ff, cf));
   }
 
   // Position a stream at OFFSET relative to ORIGIN.
 
-  int seek (long offset, int origin);
+  int seek (long offset, int origin)
+    { return s ? s->seek (offset, origin) : -1; }
 
   // Return current stream position.
 
-  long tell (void);
+  long tell (void) { return s ? s->tell () : -1; }
 
   // Return non-zero if EOF has been reached on this stream.
 
@@ -74,8 +74,8 @@
   std::ostream *output_stream (void) { return (md & std::ios::out) ? s : 0; }
 
   // XXX FIXME XXX -- should not have to cast away const here.
-  c_file_ptr_buf *rdbuf (void) const
-    { return s ? (const_cast<io_c_file_ptr_stream *> (s))->rdbuf () : 0; }
+  BUF_T *rdbuf (void) const
+    { return s ? (const_cast<STREAM_T *> (s))->rdbuf () : 0; }
 
   bool bad (void) const { return s ? s->bad () : true; }
 
@@ -89,19 +89,27 @@
 
   std::ios::openmode md;
 
-  io_c_file_ptr_stream *s;
+  STREAM_T *s;
 
-  ~octave_stdiostream (void) { delete s; }
+  ~octave_tstdiostream (void) { delete s; }
 
 private:
 
   // No copying!
 
-  octave_stdiostream (const octave_stdiostream&);
+  octave_tstdiostream (const octave_tstdiostream&);
 
-  octave_stdiostream& operator = (const octave_stdiostream&);
+  octave_tstdiostream& operator = (const octave_tstdiostream&);
 };
 
+typedef octave_tstdiostream<c_file_ptr_buf, io_c_file_ptr_stream, FILE *> 
octave_stdiostream;
+
+#ifdef HAVE_ZLIB
+
+typedef octave_tstdiostream<c_zfile_ptr_buf, io_c_zfile_ptr_stream, gzFile> 
octave_zstdiostream;
+
+#endif
+
 #endif
 
 /*

reply via email to

[Prev in Thread] Current Thread [Next in Thread]