octave-maintainers
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Patch for fgetl (i.e., do_gets) not meant for immediate check-in


From: Daniel J Sebald
Subject: Patch for fgetl (i.e., do_gets) not meant for immediate check-in
Date: Thu, 08 Jan 2009 02:50:47 -0600
User-agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.3) Gecko/20041020

John,

In an attempt to speed up some code, I wrote a patch for oct-stream.cc that 
changes ::do_gets so that instead of getting and storing one character at a 
time, it uses the std streams getline() member function.  I think the getline() 
is a naturally better fit because all the flags and functions are there to 
avoid any strange looking putback(), as in the current code.

There isn't much of a speedup; here's the difference of doing simply fgetl() on 
a big ASCII file (6%):

Current HG:

octave:6> ttest
ans =  1.3668

With patch:

octave:3> ttest
ans =  1.2858

That being said, I think something like this:

     // Construct only once.  There is a lot of initialization and
     // memory allocation that goes on to construct a string class
     // and its base class.
     if (! buf)
       {
          if (! (buf = new std::ostringstream))
            {
              err = true;
              error(who, "allocate error");
              std::string retval;
              return retval;
            }
        }
     else
        buf->str("");

is still good practice and worth the change.  Calling that constructor for a 
stack based object time and time again is wasteful.

The speedup as a result of this patch isn't the sort of speedup I was looking for, as it 
really isn't very inefficient in the first place.  (I realized the speedup I'm searching 
for is still in functions like "datenum", etc.  Those are very inefficient.  
Interpretation may be slow too.)

I don't think I'm going to take this patch much further, and since I don't 
supply many C++ patches to the list I guess this patch is for you to sort 
through and see how clean and efficient you might make the code using getline().

Dan


PPS:  I'm going to send a slight variation on this patch.

PS: Here's some code to test that the results are correct:

fidout = fopen('test0.txt', 'w');
longstring = strcat('Long, ', repmat('long, ', 1, 50), 'long line.');
fprintf(fidout, '%s\n', longstring);
fprintf(fidout, 'Here''s a line where the end is:\n');
fprintf(fidout, '0 newline');
fclose(fidout);
fidout = fopen('test1.txt', 'w');
fprintf(fidout, 'Here''s a line where the end is:\n');
fprintf(fidout, '1 newline\n');
fclose(fidout);
fidout = fopen('test2.txt', 'w');
fprintf(fidout, 'Here''s a line where the end is:\n');
fprintf(fidout, '2 newline\n\n');
fclose(fidout);

fidin = fopen('test0.txt', 'r');
%instr = fgetl(fidin)
instr = fgetl(fidin, length(longstring))
if (~strcmp(instr, longstring))
 size(instr)
 size(longstring)
 minstrlen = min(length(instr), length(longstring));
 find(copystring(1:minstrlen) != longstring(1:minstrlen))
 error('Incorrect result.');
end
instr = fgetl(fidin)
instr = fgetl(fidin)
instr = fgetl(fidin)
instr = fgetl(fidin)
fclose(fidin);

fidin = fopen('test1.txt', 'r');
instr = fgetl(fidin)
instr = fgetl(fidin)
instr = fgetl(fidin)
instr = fgetl(fidin)
fclose(fidin);

fidin = fopen('test2.txt', 'r');
instr = fgetl(fidin)
instr = fgetl(fidin)
instr = fgetl(fidin)
instr = fgetl(fidin)
fclose(fidin);
--- octave/src/oct-stream.cc    2009-01-05 01:53:38.000000000 -0600
+++ octave-mod/src/oct-stream.cc        2009-01-08 02:18:01.000000000 -0600
@@ -961,7 +961,6 @@
 octave_base_stream::do_gets (octave_idx_type max_len, bool& err,
                             bool strip_newline, const std::string& who)
 {
-  std::string retval;
 
   err = false;
 
@@ -969,62 +968,78 @@
 
   if (isp)
     {
-      std::istream& is = *isp;
+      static std::ostringstream *buf;
 
-      std::ostringstream buf;
+      // Construct only once.  There is a lot of initialization and
+      // memory allocation that goes on to construct a string class
+      // and its base class.
+      if (! buf)
+        {
+         if (! (buf = new std::ostringstream))
+           {
+             err = true;
+             error(who, "allocate error");
+             std::string retval;
+             return retval;
+           }
+       }
+      else
+       buf->str("");
 
-      int c = 0;
-      int char_count = 0;
+#define TMPBUFLEN 256
 
-      if (max_len != 0)
-       {
-         while (is && (c = is.get ()) != EOF)
-           {
-             char_count++;
+      static char ctmpbuf[TMPBUFLEN];
 
-             if (c == '\n')
-               {
-                 if (! strip_newline)
-                   buf << static_cast<char> (c);
+      octave_idx_type read_len = max_len < 0 ? -max_len : max_len;
 
-                 break;
-               }
+      while (isp->good () && ctmpbuf && read_len > 0)
+       {
+         if (max_len > 0)
+           {
+             if (read_len < TMPBUFLEN)
+               isp->getline (ctmpbuf, read_len + 1);
              else
-               buf << static_cast<char> (c);
-
-             if (max_len > 0 && char_count == max_len)
-               break;
+               isp->getline (ctmpbuf, TMPBUFLEN);
+             read_len -= TMPBUFLEN - 1;
            }
+         else
+           isp->getline (ctmpbuf, TMPBUFLEN);
+                   
+         *buf << ctmpbuf;
+
+         if (isp->eof () || ! isp->fail ())
+           {
+              if (! strip_newline)
+               *buf << static_cast<char> ('\n');
+             break;
+            }
+         else
+           isp->clear (isp->rdstate () & ~std::istream::failbit);
        }
 
-      if (! is.eof () && char_count > 0)
+      if (! isp->good())
        {
-         // GAGME.  Matlab seems to check for EOF even if the last
-         // character in a file is a newline character.  This is NOT
-         // what the corresponding C-library functions do.
-         int disgusting_compatibility_hack = is.get ();
-         if (! is.eof ())
-           is.putback (disgusting_compatibility_hack);
+         if (isp->eof () && ! buf->tellp ())
+           {
+             err = true;
+             error (who, "at end of file");
+           }
+         else if (isp->bad ())
+           {
+             err = true;
+             error (who, "read error");
+           }
        }
 
-      if (is.good () || (is.eof () && char_count > 0))
-       retval = buf.str ();
-      else
-       {
-         err = true;
+#undef TMPBUFLEN
 
-         if (is.eof () && char_count == 0)
-           error (who, "at end of file");
-         else
-           error (who, "read error");
-       }
-    }
-  else
-    {
-      err = true;
-      invalid_operation (who, "reading");
+      return buf->str ();
     }
 
+  err = true;
+  invalid_operation (who, "reading");
+
+  std::string retval;
   return retval;
 }
 
@@ -3881,13 +3896,12 @@
   bool retval = true;
 
   if (! instance)
-    instance = new octave_stream_list ();
-
-  if (! instance)
     {
-      ::error ("unable to create stream list object!");
-
-      retval = false;
+      if (! (instance = new octave_stream_list ()))
+       {
+         ::error ("unable to create stream list object!");
+         retval = false;
+       }
     }
 
   return retval;

reply via email to

[Prev in Thread] Current Thread [Next in Thread]