[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH] Use a heap for the internal merge.
From: |
Paul Eggert |
Subject: |
[PATCH] Use a heap for the internal merge. |
Date: |
Fri, 13 Mar 2009 12:24:22 -0700 |
With random data and a 64-way merge this sped up 'sort' by 5% on our
tests. Sorry, I don't know how to put multiple authors in the patch.
However, the authors are:
Alexander Nguyen <address@hidden>
Paul Eggert <address@hidden>
We've both signed papers.
-----
src/sort.c (mergefps): When merging a line with a K-way merge, update a
heap rather than doing a linear insertion. This improves the
overall performance of 'sort' from O(N*K) to O(N log K).
(less_than, swap): New functions.
(sortlines_temp): Rename local variable so that it doesn't shadow the
new 'swap' function.
---
src/sort.c | 140 +++++++++++++++++++++++++++++-------------------------------
1 files changed, 67 insertions(+), 73 deletions(-)
diff --git a/src/sort.c b/src/sort.c
index 7b0b064..0e22aff 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -2060,6 +2060,25 @@ compare (const struct line *a, const struct line *b)
return reverse ? -diff : diff;
}
+/* If CUR is a vector of lines, and ORD a vector of indexes into CUR,
+ then return true if the line ORD[A] less less than the line ORD[B]
+ in CUR, using a stable comparison. */
+static bool
+less_than (struct line const *const *cur, size_t const *ord,
+ size_t a, size_t b)
+{
+ return compare (cur[ord[a]], cur[ord[b]]) < (ord[a] < ord[b]);
+}
+
+/* Swap *A and *B. */
+static inline void
+swap (size_t *a, size_t *b)
+{
+ size_t t = *a;
+ *a = *b;
+ *b = t;
+}
+
/* Check that the lines read from FILE_NAME come in order. Return
true if they are in order. If CHECKONLY == 'c', also print a
diagnostic (FILE_NAME, line number, contents of line) to stderr if
@@ -2173,60 +2192,56 @@ mergefps (struct sortfile *files, size_t ntemps, size_t
nfiles,
struct line const **base = xnmalloc (nmerge, sizeof *base);
/* Base of each line table. */
size_t *ord = xnmalloc (nmerge, sizeof *ord);
- /* Table representing a permutation of fps,
- such that cur[ord[0]] is the smallest line
- and will be next output. */
- size_t i;
- size_t j;
- size_t t;
+ /* Zero-based binary heap of run numbers for
+ selecting the minimum line to output next,
+ referenced by cur[ord[0]]. */
+ size_t nopen = 0;
struct keyfield const *key = keylist;
saved.text = NULL;
/* Read initial lines from each input file. */
- for (i = 0; i < nfiles; )
+ for (size_t i = 0; i < nfiles; i++)
{
fps[i] = (files[i].pid
? open_temp (files[i].name, files[i].pid)
: xfopen (files[i].name, "r"));
initbuf (&buffer[i], sizeof (struct line),
- MAX (merge_buffer_size, sort_size / nfiles));
+ MAX (merge_buffer_size, sort_size / (nfiles - (i - nopen))));
if (fillbuf (&buffer[i], fps[i], files[i].name))
{
struct line const *linelim = buffer_linelim (&buffer[i]);
cur[i] = linelim - 1;
base[i] = linelim - buffer[i].nlines;
- i++;
+ ord[nopen++] = i;
}
else
{
/* fps[i] is empty; eliminate it from future consideration. */
xfclose (fps[i], files[i].name);
if (i < ntemps)
- {
- ntemps--;
- zaptemp (files[i].name);
- }
+ zaptemp (files[i].name);
free (buffer[i].buf);
- --nfiles;
- for (j = i; j < nfiles; ++j)
- files[j] = files[j + 1];
}
}
if (! ofp)
ofp = xfopen (output_file, "w");
- /* Set up the ord table according to comparisons among input lines.
- Since this only reorders two items if one is strictly greater than
- the other, it is stable. */
- for (i = 0; i < nfiles; ++i)
- ord[i] = i;
- for (i = 1; i < nfiles; ++i)
- if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
- t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
+ /* Set up the heap. Since this never reorders two lines when the
+ parent equals the current line, the resulting root must be the
+ smallest line, even if stability is taken into account. */
+ for (size_t j = 1; j < nopen; j++)
+ for (size_t current = j; 0 < current; )
+ {
+ size_t parent = (current - 1) / 2;
+ if (compare (cur[ord[parent]], cur[ord[current]]) <= 0)
+ break;
+ swap (&ord[current], &ord[parent]);
+ current = parent;
+ }
/* Repeatedly output the smallest line until no input remains. */
- while (nfiles)
+ while (nopen != 0)
{
struct line const *smallest = cur[ord[0]];
@@ -2282,57 +2297,36 @@ mergefps (struct sortfile *files, size_t ntemps, size_t
nfiles,
else
{
/* We reached EOF on fps[ord[0]]. */
- for (i = 1; i < nfiles; ++i)
- if (ord[i] > ord[0])
- --ord[i];
- --nfiles;
xfclose (fps[ord[0]], files[ord[0]].name);
if (ord[0] < ntemps)
- {
- ntemps--;
- zaptemp (files[ord[0]].name);
- }
+ zaptemp (files[ord[0]].name);
free (buffer[ord[0]].buf);
- for (i = ord[0]; i < nfiles; ++i)
- {
- fps[i] = fps[i + 1];
- files[i] = files[i + 1];
- buffer[i] = buffer[i + 1];
- cur[i] = cur[i + 1];
- base[i] = base[i + 1];
- }
- for (i = 0; i < nfiles; ++i)
- ord[i] = ord[i + 1];
- continue;
+
+ /* Shrink the ord array by replacing the root with the
+ last line on the last level. This line will be
+ sifted down in the loop below. */
+ ord[0] = ord[--nopen];
}
}
/* The new line just read in may be larger than other lines
- already in main memory; push it back in the queue until we
- encounter a line larger than it. Optimize for the common
- case where the new line is smallest. */
- {
- size_t lo = 1;
- size_t hi = nfiles;
- size_t probe = lo;
- size_t ord0 = ord[0];
- size_t count_of_smaller_lines;
-
- while (lo < hi)
- {
- int cmp = compare (cur[ord0], cur[ord[probe]]);
- if (cmp < 0 || (cmp == 0 && ord0 < ord[probe]))
- hi = probe;
- else
- lo = probe + 1;
- probe = (lo + hi) / 2;
- }
-
- count_of_smaller_lines = lo - 1;
- for (j = 0; j < count_of_smaller_lines; j++)
- ord[j] = ord[j + 1];
- ord[count_of_smaller_lines] = ord0;
- }
+ already in main memory. Sift down the heap. */
+ for (size_t current = 0; ; )
+ {
+ size_t left = 2 * current + 1;
+ if (nopen <= left)
+ break;
+ size_t right = left + 1;
+ size_t winner = current;
+ if (less_than (cur, ord, left, winner))
+ winner = left;
+ if (right < nopen && less_than (cur, ord, right, winner))
+ winner = right;
+ if (winner == current)
+ break;
+ swap (&ord[current], &ord[winner]);
+ current = winner;
+ }
/* Free up some resources every once in a while. */
if (MAX_PROCS_BEFORE_REAP < nprocs)
@@ -2439,12 +2433,12 @@ sortlines_temp (struct line *lines, size_t nlines,
struct line *temp)
{
if (nlines == 2)
{
- /* Declare `swap' as int, not bool, to work around a bug
+ /* Declare `swap_offset' as int, not bool, to work around a bug
<http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
in the IBM xlc 6.0.0.0 compiler in 64-bit mode. */
- int swap = (0 < compare (&lines[-1], &lines[-2]));
- temp[-1] = lines[-1 - swap];
- temp[-2] = lines[-2 + swap];
+ int swap_offset = (0 < compare (&lines[-1], &lines[-2]));
+ temp[-1] = lines[-1 - swap_offset];
+ temp[-2] = lines[-2 + swap_offset];
}
else
{
--
1.5.3.2
- [PATCH] Use a heap for the internal merge.,
Paul Eggert <=