[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Patch for fgetl (i.e., do_gets) not meant for immediate check-in
From: |
Daniel J Sebald |
Subject: |
Patch for fgetl (i.e., do_gets) not meant for immediate check-in |
Date: |
Thu, 08 Jan 2009 02:50:47 -0600 |
User-agent: |
Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.3) Gecko/20041020 |
John,
In an attempt to speed up some code, I wrote a patch for oct-stream.cc that
changes ::do_gets so that instead of getting and storing one character at a
time, it uses the std streams getline() member function. I think the getline()
is a naturally better fit because all the flags and functions are there to
avoid any strange looking putback(), as in the current code.
There isn't much of a speedup; here's the difference of doing simply fgetl() on
a big ASCII file (6%):
Current HG:
octave:6> ttest
ans = 1.3668
With patch:
octave:3> ttest
ans = 1.2858
That being said, I think something like this:
// Construct only once. There is a lot of initialization and
// memory allocation that goes on to construct a string class
// and its base class.
if (! buf)
{
if (! (buf = new std::ostringstream))
{
err = true;
error(who, "allocate error");
std::string retval;
return retval;
}
}
else
buf->str("");
is still good practice and worth the change. Calling that constructor for a
stack based object time and time again is wasteful.
The speedup as a result of this patch isn't the sort of speedup I was looking for, as it
really isn't very inefficient in the first place. (I realized the speedup I'm searching
for is still in functions like "datenum", etc. Those are very inefficient.
Interpretation may be slow too.)
I don't think I'm going to take this patch much further, and since I don't
supply many C++ patches to the list I guess this patch is for you to sort
through and see how clean and efficient you might make the code using getline().
Dan
PPS: I'm going to send a slight variation on this patch.
PS: Here's some code to test that the results are correct:
fidout = fopen('test0.txt', 'w');
longstring = strcat('Long, ', repmat('long, ', 1, 50), 'long line.');
fprintf(fidout, '%s\n', longstring);
fprintf(fidout, 'Here''s a line where the end is:\n');
fprintf(fidout, '0 newline');
fclose(fidout);
fidout = fopen('test1.txt', 'w');
fprintf(fidout, 'Here''s a line where the end is:\n');
fprintf(fidout, '1 newline\n');
fclose(fidout);
fidout = fopen('test2.txt', 'w');
fprintf(fidout, 'Here''s a line where the end is:\n');
fprintf(fidout, '2 newline\n\n');
fclose(fidout);
fidin = fopen('test0.txt', 'r');
%instr = fgetl(fidin)
instr = fgetl(fidin, length(longstring))
if (~strcmp(instr, longstring))
size(instr)
size(longstring)
minstrlen = min(length(instr), length(longstring));
find(copystring(1:minstrlen) != longstring(1:minstrlen))
error('Incorrect result.');
end
instr = fgetl(fidin)
instr = fgetl(fidin)
instr = fgetl(fidin)
instr = fgetl(fidin)
fclose(fidin);
fidin = fopen('test1.txt', 'r');
instr = fgetl(fidin)
instr = fgetl(fidin)
instr = fgetl(fidin)
instr = fgetl(fidin)
fclose(fidin);
fidin = fopen('test2.txt', 'r');
instr = fgetl(fidin)
instr = fgetl(fidin)
instr = fgetl(fidin)
instr = fgetl(fidin)
fclose(fidin);
--- octave/src/oct-stream.cc 2009-01-05 01:53:38.000000000 -0600
+++ octave-mod/src/oct-stream.cc 2009-01-08 02:18:01.000000000 -0600
@@ -961,7 +961,6 @@
octave_base_stream::do_gets (octave_idx_type max_len, bool& err,
bool strip_newline, const std::string& who)
{
- std::string retval;
err = false;
@@ -969,62 +968,78 @@
if (isp)
{
- std::istream& is = *isp;
+ static std::ostringstream *buf;
- std::ostringstream buf;
+ // Construct only once. There is a lot of initialization and
+ // memory allocation that goes on to construct a string class
+ // and its base class.
+ if (! buf)
+ {
+ if (! (buf = new std::ostringstream))
+ {
+ err = true;
+ error(who, "allocate error");
+ std::string retval;
+ return retval;
+ }
+ }
+ else
+ buf->str("");
- int c = 0;
- int char_count = 0;
+#define TMPBUFLEN 256
- if (max_len != 0)
- {
- while (is && (c = is.get ()) != EOF)
- {
- char_count++;
+ static char ctmpbuf[TMPBUFLEN];
- if (c == '\n')
- {
- if (! strip_newline)
- buf << static_cast<char> (c);
+ octave_idx_type read_len = max_len < 0 ? -max_len : max_len;
- break;
- }
+ while (isp->good () && ctmpbuf && read_len > 0)
+ {
+ if (max_len > 0)
+ {
+ if (read_len < TMPBUFLEN)
+ isp->getline (ctmpbuf, read_len + 1);
else
- buf << static_cast<char> (c);
-
- if (max_len > 0 && char_count == max_len)
- break;
+ isp->getline (ctmpbuf, TMPBUFLEN);
+ read_len -= TMPBUFLEN - 1;
}
+ else
+ isp->getline (ctmpbuf, TMPBUFLEN);
+
+ *buf << ctmpbuf;
+
+ if (isp->eof () || ! isp->fail ())
+ {
+ if (! strip_newline)
+ *buf << static_cast<char> ('\n');
+ break;
+ }
+ else
+ isp->clear (isp->rdstate () & ~std::istream::failbit);
}
- if (! is.eof () && char_count > 0)
+ if (! isp->good())
{
- // GAGME. Matlab seems to check for EOF even if the last
- // character in a file is a newline character. This is NOT
- // what the corresponding C-library functions do.
- int disgusting_compatibility_hack = is.get ();
- if (! is.eof ())
- is.putback (disgusting_compatibility_hack);
+ if (isp->eof () && ! buf->tellp ())
+ {
+ err = true;
+ error (who, "at end of file");
+ }
+ else if (isp->bad ())
+ {
+ err = true;
+ error (who, "read error");
+ }
}
- if (is.good () || (is.eof () && char_count > 0))
- retval = buf.str ();
- else
- {
- err = true;
+#undef TMPBUFLEN
- if (is.eof () && char_count == 0)
- error (who, "at end of file");
- else
- error (who, "read error");
- }
- }
- else
- {
- err = true;
- invalid_operation (who, "reading");
+ return buf->str ();
}
+ err = true;
+ invalid_operation (who, "reading");
+
+ std::string retval;
return retval;
}
@@ -3881,13 +3896,12 @@
bool retval = true;
if (! instance)
- instance = new octave_stream_list ();
-
- if (! instance)
{
- ::error ("unable to create stream list object!");
-
- retval = false;
+ if (! (instance = new octave_stream_list ()))
+ {
+ ::error ("unable to create stream list object!");
+ retval = false;
+ }
}
return retval;
- Patch for fgetl (i.e., do_gets) not meant for immediate check-in,
Daniel J Sebald <=