bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

coreutils "tac" int cleanup fixes


From: Paul Eggert
Subject: coreutils "tac" int cleanup fixes
Date: Tue, 03 Aug 2004 15:43:11 -0700
User-agent: Gnus/5.1006 (Gnus v5.10.6) Emacs/21.3 (gnu/linux)

The rarely-used "tac" command has a number of botches if the input is
larger than 2**31.  I installed the following patch to fix the problems
that I found.  I also fixed tac-pipe.c even though it's not used now.

Because of limitations in the regular-expression API, "tac" still
mishandles some large inputs, but it now diagnoses the problem and
exits rather than silently continuing with the wrong answers.

2004-08-03  Paul Eggert  <address@hidden>

        * src/tac.c (separator_ends_record, tac_seekable, tac_file,
        tac_stdin, tac_stdin_to_mem, main): Use bool for booleans.
        (match_length, G_buffer_size, tac_seekable, main): Use size_t for sizes.
        (tac_seekable): Use ptrdiff_t for pointer subtraction.
        Report an error if the result is out of range.
        (tac_seekable, main): Check for integer overflow in buffer size
        calculations.
        (main): Remove unnecessary casts.
        * src/tac-pipe.c (buf_init_from_stdin, find_bol, tac_mem):
        Use bool for booleans.
        (buf_init_from_stdin, buf_free, find_bol, print_line):
        Use size_t for sizes.

Index: src/tac.c
===================================================================
RCS file: /home/eggert/coreutils/cu/src/tac.c,v
retrieving revision 1.109
diff -p -u -r1.109 tac.c
--- src/tac.c   21 Jan 2004 23:46:07 -0000      1.109
+++ src/tac.c   24 Jul 2004 22:57:10 -0000
@@ -77,9 +77,9 @@ char *program_name;
 /* The string that separates the records of the file. */
 static char *separator;
 
-/* If nonzero, print `separator' along with the record preceding it
+/* If true, print `separator' along with the record preceding it
    in the file; otherwise with the record following it. */
-static int separator_ends_record;
+static bool separator_ends_record;
 
 /* 0 if `separator' is to be matched as a regular expression;
    otherwise, the length of `separator', used as a sentinel to
@@ -89,7 +89,7 @@ static size_t sentinel_length;
 /* The length of a match with `separator'.  If `sentinel_length' is 0,
    `match_length' is computed every time a match succeeds;
    otherwise, it is simply the length of `separator'. */
-static int match_length;
+static size_t match_length;
 
 /* The input buffer. */
 static char *G_buffer;
@@ -100,7 +100,7 @@ static size_t read_size;
 /* The size of `buffer'.  This is read_size * 2 + sentinel_length + 2.
    The extra 2 bytes allow `past_end' to have a value beyond the
    end of `G_buffer' and `match_start' to run off the front of `G_buffer'. */
-static unsigned G_buffer_size;
+static size_t G_buffer_size;
 
 /* The compiled regular expression representing `separator'. */
 static struct re_pattern_buffer compiled_separator;
@@ -181,9 +181,9 @@ output (const char *start, const char *p
 }
 
 /* Print in reverse the file open on descriptor FD for reading FILE.
-   Return 0 if ok, 1 if an error occurs. */
+   Return true if successful.  */
 
-static int
+static bool
 tac_seekable (int input_fd, const char *file)
 {
   /* Pointer to the location in `G_buffer' where the search for
@@ -200,18 +200,18 @@ tac_seekable (int input_fd, const char *
   /* Offset in the file of the next read. */
   off_t file_pos;
 
-  /* Nonzero if `output' has not been called yet for any file.
+  /* True if `output' has not been called yet for any file.
      Only used when the separator is attached to the preceding record. */
-  int first_time = 1;
+  bool first_time = true;
   char first_char = *separator;        /* Speed optimization, non-regexp. */
   char *separator1 = separator + 1; /* Speed optimization, non-regexp. */
-  int match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
+  size_t match_length1 = match_length - 1; /* Speed optimization, non-regexp. 
*/
   struct re_registers regs;
 
   /* Find the size of the input file. */
   file_pos = lseek (input_fd, (off_t) 0, SEEK_END);
   if (file_pos < 1)
-    return 0;                  /* It's an empty file. */
+    return true;                       /* It's an empty file. */
 
   /* Arrange for the first read to lop off enough to leave the rest of the
      file a multiple of `read_size'.  Since `read_size' can change, this may
@@ -231,7 +231,7 @@ tac_seekable (int input_fd, const char *
   if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size)
     {
       error (0, errno, "%s", file);
-      return 1;
+      return false;
     }
 
   match_start = past_end = G_buffer + saved_record_size;
@@ -249,9 +249,11 @@ tac_seekable (int input_fd, const char *
         Otherwise, make `match_start' < `G_buffer'. */
       if (sentinel_length == 0)
        {
-         int i = match_start - G_buffer;
+         ptrdiff_t i = match_start - G_buffer;
          int ret;
 
+         if (! (INT_MIN < i && i <= INT_MAX))
+           abort ();
          ret = re_search (&compiled_separator, G_buffer, i, i - 1, -i, &regs);
          if (ret == -1)
            match_start = G_buffer - 1;
@@ -283,7 +285,7 @@ tac_seekable (int input_fd, const char *
            {
              /* Hit the beginning of the file; print the remaining record. */
              output (G_buffer, past_end);
-             return 0;
+             return true;
            }
 
          saved_record_size = past_end - G_buffer;
@@ -294,15 +296,20 @@ tac_seekable (int input_fd, const char *
                 the data already in `G_buffer', we need to increase
                 `G_buffer_size'. */
              char *newbuffer;
-             int offset = sentinel_length ? sentinel_length : 1;
+             size_t offset = sentinel_length ? sentinel_length : 1;
+             ptrdiff_t match_start_offset = match_start - G_buffer;
+             ptrdiff_t past_end_offset = past_end - G_buffer;
+             size_t old_G_buffer_size = G_buffer_size;
 
              read_size *= 2;
              G_buffer_size = read_size * 2 + sentinel_length + 2;
+             if (G_buffer_size < old_G_buffer_size)
+               xalloc_die ();
              newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
              newbuffer += offset;
              /* Adjust the pointers for the new buffer location.  */
-             match_start += newbuffer - G_buffer;
-             past_end += newbuffer - G_buffer;
+             match_start = newbuffer + match_start_offset;
+             past_end = newbuffer + past_end_offset;
              G_buffer = newbuffer;
            }
 
@@ -330,7 +337,7 @@ tac_seekable (int input_fd, const char *
          if (safe_read (input_fd, G_buffer, read_size) != read_size)
            {
              error (0, errno, "%s", file);
-             return 1;
+             return false;
            }
        }
       else
@@ -342,10 +349,10 @@ tac_seekable (int input_fd, const char *
 
              /* If this match of `separator' isn't at the end of the
                 file, print the record. */
-             if (first_time == 0 || match_end != past_end)
+             if (!first_time || match_end != past_end)
                output (match_end, past_end);
              past_end = match_end;
-             first_time = 0;
+             first_time = false;
            }
          else
            {
@@ -361,28 +368,28 @@ tac_seekable (int input_fd, const char *
 }
 
 /* Print FILE in reverse.
-   Return 0 if ok, 1 if an error occurs. */
+   Return true if successful.  */
 
-static int
+static bool
 tac_file (const char *file)
 {
-  int errors;
+  bool ok;
   FILE *in;
 
   in = fopen (file, "r");
   if (in == NULL)
     {
       error (0, errno, "%s", file);
-      return 1;
+      return false;
     }
   SET_BINARY (fileno (in));
-  errors = tac_seekable (fileno (in), file);
+  ok = tac_seekable (fileno (in), file);
   if (fclose (in) != 0)
     {
       error (0, errno, "%s", file);
-      return 1;
+      return false;
     }
-  return errors;
+  return ok;
 }
 
 #if DONT_UNLINK_WHILE_OPEN
@@ -466,12 +473,11 @@ save_stdin (FILE **g_tmp, char **g_tempf
 
 /* Print the standard input in reverse, saving it to temporary
    file first if it is a pipe.
-   Return 0 if ok, 1 if an error occurs. */
+   Return true if successful.  */
 
-static int
+static bool
 tac_stdin (void)
 {
-  int errors;
   struct stat stats;
 
   /* No tempfile is needed for "tac < file".
@@ -481,22 +487,20 @@ tac_stdin (void)
   if (fstat (STDIN_FILENO, &stats))
     {
       error (0, errno, _("standard input"));
-      return 1;
+      return false;
     }
 
   if (S_ISREG (stats.st_mode))
     {
-      errors = tac_seekable (fileno (stdin), _("standard input"));
+      return tac_seekable (STDIN_FILENO, _("standard input"));
     }
   else
     {
       FILE *tmp_stream;
       char *tmp_file;
       save_stdin (&tmp_stream, &tmp_file);
-      errors = tac_seekable (fileno (tmp_stream), tmp_file);
+      return tac_seekable (fileno (tmp_stream), tmp_file);
     }
-
-  return errors;
 }
 
 #if 0
@@ -548,7 +552,7 @@ tac_mem (const char *buf, size_t n_bytes
 
 /* FIXME: describe */
 
-static int
+static bool
 tac_stdin_to_mem (void)
 {
   char *buf = NULL;
@@ -586,7 +590,7 @@ tac_stdin_to_mem (void)
 
   tac_mem (buf, n_bytes, stdout);
 
-  return 0;
+  return true;
 }
 #endif
 
@@ -594,8 +598,10 @@ int
 main (int argc, char **argv)
 {
   const char *error_message;   /* Return value from re_compile_pattern. */
-  int optc, errors;
-  int have_read_stdin = 0;
+  int optc;
+  bool ok;
+  bool have_read_stdin = false;
+  size_t half_buffer_size;
 
   initialize_main (&argc, &argv);
   program_name = argv[0];
@@ -605,10 +611,9 @@ main (int argc, char **argv)
 
   atexit (close_stdout);
 
-  errors = 0;
   separator = "\n";
   sentinel_length = 1;
-  separator_ends_record = 1;
+  separator_ends_record = true;
 
   while ((optc = getopt_long (argc, argv, "brs:", longopts, NULL)) != -1)
     {
@@ -617,7 +622,7 @@ main (int argc, char **argv)
        case 0:
          break;
        case 'b':
-         separator_ends_record = 0;
+         separator_ends_record = false;
          break;
        case 'r':
          sentinel_length = 0;
@@ -637,8 +642,7 @@ main (int argc, char **argv)
   if (sentinel_length == 0)
     {
       compiled_separator.allocated = 100;
-      compiled_separator.buffer = (unsigned char *)
-       xmalloc (compiled_separator.allocated);
+      compiled_separator.buffer = xmalloc (compiled_separator.allocated);
       compiled_separator.fastmap = xmalloc (256);
       compiled_separator.translate = 0;
       error_message = re_compile_pattern (separator, strlen (separator),
@@ -650,10 +654,16 @@ main (int argc, char **argv)
     match_length = sentinel_length = strlen (separator);
 
   read_size = INITIAL_READSIZE;
-  /* A precaution that will probably never be needed. */
-  while (sentinel_length * 2 >= read_size)
-    read_size *= 2;
-  G_buffer_size = read_size * 2 + sentinel_length + 2;
+  while (sentinel_length >= read_size / 2)
+    {
+      if (SIZE_MAX / 2 < read_size)
+       xalloc_die ();
+      read_size *= 2;
+    }
+  half_buffer_size = read_size + sentinel_length + 1;
+  G_buffer_size = 2 * half_buffer_size;
+  if (! (read_size < half_buffer_size && half_buffer_size < G_buffer_size))
+    xalloc_die ();
   G_buffer = xmalloc (G_buffer_size);
   if (sentinel_length)
     {
@@ -667,21 +677,22 @@ main (int argc, char **argv)
 
   if (optind == argc)
     {
-      have_read_stdin = 1;
+      have_read_stdin = true;
       /* We need binary I/O, since `tac' relies
         on `lseek' and byte counts.  */
       SET_BINARY2 (STDIN_FILENO, STDOUT_FILENO);
-      errors = tac_stdin ();
+      ok = tac_stdin ();
     }
   else
     {
+      ok = true;
       for (; optind < argc; ++optind)
        {
          if (STREQ (argv[optind], "-"))
            {
-             have_read_stdin = 1;
+             have_read_stdin = true;
              SET_BINARY2 (STDIN_FILENO, STDOUT_FILENO);
-             errors |= tac_stdin ();
+             ok &= tac_stdin ();
            }
          else
            {
@@ -691,7 +702,7 @@ main (int argc, char **argv)
                 text mode has no visible effect on console output,
                 since two CRs in a row are just like one CR.  */
              SET_BINARY (STDOUT_FILENO);
-             errors |= tac_file (argv[optind]);
+             ok &= tac_file (argv[optind]);
            }
        }
     }
@@ -701,5 +712,5 @@ main (int argc, char **argv)
 
   if (have_read_stdin && close (STDIN_FILENO) < 0)
     error (EXIT_FAILURE, errno, "-");
-  exit (errors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
+  exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
 }
Index: src/tac-pipe.c
===================================================================
RCS file: /home/eggert/coreutils/cu/src/tac-pipe.c,v
retrieving revision 1.4
diff -p -u -r1.4 tac-pipe.c
--- src/tac-pipe.c      30 Aug 2002 23:04:54 -0000      1.4
+++ src/tac-pipe.c      19 Jul 2004 18:33:25 -0000
@@ -30,11 +30,11 @@ struct Buf
 };
 typedef struct Buf Buf;
 
-static int
-buf_init_from_stdin (Buf *x, int eol_byte)
+static bool
+buf_init_from_stdin (Buf *x, char eol_byte)
 {
-  int last_byte_is_eol_byte = 1;
-  int fail = 0;
+  bool last_byte_is_eol_byte = true;
+  bool ok = true;
 
 #define OBS (&(x->obs))
   obstack_init (OBS);
@@ -42,18 +42,18 @@ buf_init_from_stdin (Buf *x, int eol_byt
   while (1)
     {
       char *buf = (char *) malloc (BUFFER_SIZE);
-      int bytes_read;
+      size_t bytes_read;
 
       if (buf == NULL)
        {
          /* Fall back on the code that relies on a temporary file.
             Write all buffers to that file and free them.  */
          /* FIXME */
-         fail = 1;
+         ok = false;
          break;
        }
       bytes_read = full_read (STDIN_FILENO, buf, BUFFER_SIZE);
-      if (bytes_read < 0)
+      if (bytes_read != buffer_size && errno != 0)
        error (EXIT_FAILURE, errno, _("read error"));
 
       {
@@ -63,14 +63,14 @@ buf_init_from_stdin (Buf *x, int eol_byt
        obstack_grow (OBS, &bp, sizeof (bp));
       }
 
-      if (bytes_read > 0)
+      if (bytes_read != 0)
        last_byte_is_eol_byte = (buf[bytes_read - 1] == eol_byte);
 
       if (bytes_read < BUFFER_SIZE)
        break;
     }
 
-  if (!fail)
+  if (ok)
     {
       /* If the file was non-empty and lacked an EOL_BYTE at its end,
         then add a buffer containing just that one byte.  */
@@ -80,7 +80,7 @@ buf_init_from_stdin (Buf *x, int eol_byt
          if (buf == NULL)
            {
              /* FIXME: just like above */
-             fail = 1;
+             ok = false;
            }
          else
            {
@@ -102,13 +102,13 @@ buf_init_from_stdin (Buf *x, int eol_byt
       && x->p[x->n_bufs - 1].start == x->p[x->n_bufs - 1].one_past_end)
     free (x->p[--(x->n_bufs)].start);
 
-  return fail;
+  return ok;
 }
 
 static void
 buf_free (Buf *x)
 {
-  int i;
+  size_t i;
   for (i = 0; i < x->n_bufs; i++)
     free (x->p[i].start);
   obstack_free (OBS, NULL);
@@ -153,16 +153,16 @@ line_ptr_increment (const Buf *x, const 
   return lp_new;
 }
 
-static int
+static bool
 find_bol (const Buf *x,
          const Line_ptr *last_bol, Line_ptr *new_bol, char eol_byte)
 {
-  int i;
+  size_t i;
   Line_ptr tmp;
   char *last_bol_ptr;
 
   if (last_bol->ptr == x->p[0].start)
-    return 0;
+    return false;
 
   tmp = line_ptr_decrement (x, last_bol);
   last_bol_ptr = tmp.ptr;
@@ -176,10 +176,10 @@ find_bol (const Buf *x,
          nl_pos.i = i;
          nl_pos.ptr = nl;
          *new_bol = line_ptr_increment (x, &nl_pos);
-         return 1;
+         return true;
        }
 
-      if (i <= 0)
+      if (i == 0)
        break;
 
       --i;
@@ -193,17 +193,17 @@ find_bol (const Buf *x,
     {
       new_bol->i = 0;
       new_bol->ptr = x->p[0].start;
-      return 1;
+      return true;
     }
 
-  return 0;
+  return false;
 }
 
 static void
 print_line (FILE *out_stream, const Buf *x,
            const Line_ptr *bol, const Line_ptr *bol_next)
 {
-  int i;
+  size_t i;
   for (i = bol->i; i <= bol_next->i; i++)
     {
       char *a = (i == bol->i ? bol->ptr : x->p[i].start);
@@ -212,24 +212,22 @@ print_line (FILE *out_stream, const Buf 
     }
 }
 
-static int
+static bool
 tac_mem ()
 {
   Buf x;
   Line_ptr bol;
   char eol_byte = '\n';
-  int fail;
 
-  fail = buf_init_from_stdin (&x, eol_byte);
-  if (fail)
+  if (! buf_init_from_stdin (&x, eol_byte))
     {
       buf_free (&x);
-      return 1;
+      return false;
     }
 
   /* Special case the empty file.  */
   if (EMPTY (&x))
-    return 0;
+    return true;
 
   /* Initially, point at one past the last byte of the file.  */
   bol.i = x.n_bufs - 1;
@@ -238,11 +236,10 @@ tac_mem ()
   while (1)
     {
       Line_ptr new_bol;
-      int found = find_bol (&x, &bol, &new_bol, eol_byte);
-      if (!found)
+      if (! find_bol (&x, &bol, &new_bol, eol_byte))
        break;
       print_line (stdout, &x, &new_bol, &bol);
       bol = new_bol;
     }
-  return 0;
+  return true;
 }




reply via email to

[Prev in Thread] Current Thread [Next in Thread]