[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[coreutils] [PATCH] sort: don't dump core when merging from input twice
From: |
Paul Eggert |
Subject: |
[coreutils] [PATCH] sort: don't dump core when merging from input twice |
Date: |
Thu, 16 Dec 2010 00:06:36 -0800 |
User-agent: |
Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.13) Gecko/20101208 Thunderbird/3.1.7 |
* NEWS: Document this.
* src/sort.c (avoid_trashing_input): The previous fix to this
function didn't fix all the problems with this code. Replace it
with something simpler: just copy the input file. This doesn't
change the number of files, so return void instead of the updated
file count. Caller changed.
* tests/misc/sort-merge-fdlimit: Test for the bug.
---
NEWS | 2 +
src/sort.c | 42 ++++++++++++++++------------------------
tests/misc/sort-merge-fdlimit | 20 +++++++++++++++++++
3 files changed, 39 insertions(+), 25 deletions(-)
diff --git a/NEWS b/NEWS
index e98b815..429a1b7 100644
--- a/NEWS
+++ b/NEWS
@@ -23,6 +23,8 @@ GNU coreutils NEWS -*-
outline -*-
sort --compress no longer mishandles subprocesses' exit statuses.
+ sort -m -o f f ... f no longer dumps core when file descriptors are limited.
+
** New features
split accepts the --number option to generate a specific number of files.
diff --git a/src/sort.c b/src/sort.c
index 3321ddb..f53e64d 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -3549,12 +3549,12 @@ sortlines (struct line *restrict lines, size_t nthreads,
pthread_mutex_destroy (&node->lock);
}
-/* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
- the same as OUTFILE. If found, merge the found instances (and perhaps
- some other files) into a temporary file so that it can in turn be
- merged into OUTFILE without destroying OUTFILE before it is completely
- read. Return the new value of NFILES, which differs from the old if
- some merging occurred.
+/* Scan through FILES[NTEMPS .. NFILES-1] looking for files that are
+ the same as OUTFILE. If found, replace each with the same
+ temporary copy that can be merged into OUTFILE without destroying
+ OUTFILE before it is completely read. This temporary copy does not
+ count as a merge temp, so don't worry about incrementing NTEMPS in
+ the caller; final cleanup will remove it, not zaptemp.
This test ensures that an otherwise-erroneous use like
"sort -m -o FILE ... FILE ..." copies FILE before writing to it.
@@ -3566,13 +3566,15 @@ sortlines (struct line *restrict lines, size_t nthreads,
Catching these obscure cases would slow down performance in
common cases. */
-static size_t
+static void
avoid_trashing_input (struct sortfile *files, size_t ntemps,
size_t nfiles, char const *outfile)
{
size_t i;
bool got_outstat = false;
struct stat outstat;
+ char const *tempcopy = NULL;
+ pid_t pid IF_LINT (= 0);
for (i = ntemps; i < nfiles; i++)
{
@@ -3603,27 +3605,17 @@ avoid_trashing_input (struct sortfile *files, size_t
ntemps,
if (same)
{
- FILE *tftp;
- pid_t pid;
- char *temp = create_temp (&tftp, &pid);
- size_t num_merged = 0;
- do
+ if (! tempcopy)
{
- num_merged += mergefiles (&files[i], 0, nfiles - i, tftp, temp);
- files[i].name = temp;
- files[i].pid = pid;
-
- memmove (&files[i + 1], &files[i + num_merged],
- (nfiles - (i + num_merged)) * sizeof *files);
- ntemps += 1;
- nfiles -= num_merged - 1;;
- i += num_merged;
+ FILE *tftp;
+ tempcopy = create_temp (&tftp, &pid);
+ mergefiles (&files[i], 0, 1, tftp, tempcopy);
}
- while (i < nfiles);
+
+ files[i].name = tempcopy;
+ files[i].pid = pid;
}
}
-
- return nfiles;
}
/* Merge the input FILES. NTEMPS is the number of files at the
@@ -3693,7 +3685,7 @@ merge (struct sortfile *files, size_t ntemps, size_t
nfiles,
nfiles -= in - out;
}
- nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
+ avoid_trashing_input (files, ntemps, nfiles, output_file);
/* We aren't guaranteed that this final mergefiles will work, therefore we
try to merge into the output, and then merge as much as we can into a
diff --git a/tests/misc/sort-merge-fdlimit b/tests/misc/sort-merge-fdlimit
index 1ed5095..b0ab71c 100755
--- a/tests/misc/sort-merge-fdlimit
+++ b/tests/misc/sort-merge-fdlimit
@@ -50,4 +50,24 @@ for randsource in '' --random-source=some-data; do
|| ! grep "open failed" err/merge-random-err) || fail=1
done
+# 'sort -m' should work in a limited file descriptor
+# environment when the output is repeatedly one of its inputs.
+# In coreutils 8.7 and earlier, 'sort' would dump core on this test.
+#
+# This test uses 'exec' to redirect file descriptors rather than
+# ordinary redirection on the 'sort' command. This is intended to
+# work around bugs in OpenBSD /bin/sh, and some other sh variants,
+# that squirrel away file descriptors before closing them; see
+# <http://lists.gnu.org/archive/html/bug-tar/2010-10/msg00075.html>.
+# This test finds the bug only with shells that do not close FDs on
+# exec, and will miss the bug (if present) on other shells, but it's
+# not easy to fix this without running afoul of the OpenBSD-like sh bugs.
+(seq 6 && echo 6) >exp || fail=1
+echo 6 >out || fail=1
+(exec 3<&- 4<&- 5<&- 6</dev/null 7<&6 8<&6 9<&6 &&
+ ulimit -n 10 &&
+ sort -n -m --batch-size=7 -o out out in/1 in/2 in/3 in/4 in/5 out
+) &&
+compare out exp || fail=1
+
Exit $fail
--
1.7.2
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [coreutils] [PATCH] sort: don't dump core when merging from input twice,
Paul Eggert <=