coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[coreutils] [PATCH] sort: don't dump core when merging from input twice


From: Paul Eggert
Subject: [coreutils] [PATCH] sort: don't dump core when merging from input twice
Date: Thu, 16 Dec 2010 00:06:36 -0800
User-agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.13) Gecko/20101208 Thunderbird/3.1.7

* NEWS: Document this.
* src/sort.c (avoid_trashing_input): The previous fix to this
function didn't fix all the problems with this code.  Replace it
with something simpler: just copy the input file.  This doesn't
change the number of files, so return void instead of the updated
file count.  Caller changed.
* tests/misc/sort-merge-fdlimit: Test for the bug.
---
 NEWS                          |    2 +
 src/sort.c                    |   42 ++++++++++++++++------------------------
 tests/misc/sort-merge-fdlimit |   20 +++++++++++++++++++
 3 files changed, 39 insertions(+), 25 deletions(-)

diff --git a/NEWS b/NEWS
index e98b815..429a1b7 100644
--- a/NEWS
+++ b/NEWS
@@ -23,6 +23,8 @@ GNU coreutils NEWS                                    -*- 
outline -*-
 
   sort --compress no longer mishandles subprocesses' exit statuses.
 
+  sort -m -o f f ... f no longer dumps core when file descriptors are limited.
+
 ** New features
 
   split accepts the --number option to generate a specific number of files.
diff --git a/src/sort.c b/src/sort.c
index 3321ddb..f53e64d 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -3549,12 +3549,12 @@ sortlines (struct line *restrict lines, size_t nthreads,
   pthread_mutex_destroy (&node->lock);
 }
 
-/* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
-   the same as OUTFILE.  If found, merge the found instances (and perhaps
-   some other files) into a temporary file so that it can in turn be
-   merged into OUTFILE without destroying OUTFILE before it is completely
-   read.  Return the new value of NFILES, which differs from the old if
-   some merging occurred.
+/* Scan through FILES[NTEMPS .. NFILES-1] looking for files that are
+   the same as OUTFILE.  If found, replace each with the same
+   temporary copy that can be merged into OUTFILE without destroying
+   OUTFILE before it is completely read.  This temporary copy does not
+   count as a merge temp, so don't worry about incrementing NTEMPS in
+   the caller; final cleanup will remove it, not zaptemp.
 
    This test ensures that an otherwise-erroneous use like
    "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
@@ -3566,13 +3566,15 @@ sortlines (struct line *restrict lines, size_t nthreads,
    Catching these obscure cases would slow down performance in
    common cases.  */
 
-static size_t
+static void
 avoid_trashing_input (struct sortfile *files, size_t ntemps,
                       size_t nfiles, char const *outfile)
 {
   size_t i;
   bool got_outstat = false;
   struct stat outstat;
+  char const *tempcopy = NULL;
+  pid_t pid IF_LINT (= 0);
 
   for (i = ntemps; i < nfiles; i++)
     {
@@ -3603,27 +3605,17 @@ avoid_trashing_input (struct sortfile *files, size_t 
ntemps,
 
       if (same)
         {
-          FILE *tftp;
-          pid_t pid;
-          char *temp = create_temp (&tftp, &pid);
-          size_t num_merged = 0;
-          do
+          if (! tempcopy)
             {
-              num_merged += mergefiles (&files[i], 0, nfiles - i, tftp, temp);
-              files[i].name = temp;
-              files[i].pid = pid;
-
-              memmove (&files[i + 1], &files[i + num_merged],
-                       (nfiles - (i + num_merged)) * sizeof *files);
-              ntemps += 1;
-              nfiles -= num_merged - 1;;
-              i += num_merged;
+              FILE *tftp;
+              tempcopy = create_temp (&tftp, &pid);
+              mergefiles (&files[i], 0, 1, tftp, tempcopy);
             }
-          while (i < nfiles);
+
+          files[i].name = tempcopy;
+          files[i].pid = pid;
         }
     }
-
-  return nfiles;
 }
 
 /* Merge the input FILES.  NTEMPS is the number of files at the
@@ -3693,7 +3685,7 @@ merge (struct sortfile *files, size_t ntemps, size_t 
nfiles,
       nfiles -= in - out;
     }
 
-  nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
+  avoid_trashing_input (files, ntemps, nfiles, output_file);
 
   /* We aren't guaranteed that this final mergefiles will work, therefore we
      try to merge into the output, and then merge as much as we can into a
diff --git a/tests/misc/sort-merge-fdlimit b/tests/misc/sort-merge-fdlimit
index 1ed5095..b0ab71c 100755
--- a/tests/misc/sort-merge-fdlimit
+++ b/tests/misc/sort-merge-fdlimit
@@ -50,4 +50,24 @@ for randsource in '' --random-source=some-data; do
      || ! grep "open failed" err/merge-random-err) || fail=1
 done
 
+# 'sort -m' should work in a limited file descriptor
+# environment when the output is repeatedly one of its inputs.
+# In coreutils 8.7 and earlier, 'sort' would dump core on this test.
+#
+# This test uses 'exec' to redirect file descriptors rather than
+# ordinary redirection on the 'sort' command.  This is intended to
+# work around bugs in OpenBSD /bin/sh, and some other sh variants,
+# that squirrel away file descriptors before closing them; see
+# <http://lists.gnu.org/archive/html/bug-tar/2010-10/msg00075.html>.
+# This test finds the bug only with shells that do not close FDs on
+# exec, and will miss the bug (if present) on other shells, but it's
+# not easy to fix this without running afoul of the OpenBSD-like sh bugs.
+(seq 6 && echo 6) >exp || fail=1
+echo 6 >out || fail=1
+(exec 3<&- 4<&- 5<&- 6</dev/null 7<&6 8<&6 9<&6 &&
+ ulimit -n 10 &&
+ sort -n -m --batch-size=7 -o out out in/1 in/2 in/3 in/4 in/5 out
+) &&
+compare out exp || fail=1
+
 Exit $fail
-- 
1.7.2




reply via email to

[Prev in Thread] Current Thread [Next in Thread]