[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: Support bytesize comparison in sort
From: |
Mart Somermaa |
Subject: |
Re: Support bytesize comparison in sort |
Date: |
Sat, 08 Apr 2006 14:43:22 +0300 |
User-agent: |
Mail/News 1.5 (X11/20060309) |
Andreas Schwab wrote:
> Mart Somermaa <address@hidden> writes:
>
>
>> @@ -295,6 +299,7 @@ Ordering options:\n\
>> "), stdout);
>> fputs (_("\
>> -b, --ignore-leading-blanks ignore leading blanks\n\
>> + -B, --size-in-bytes compare bytesizes (numbers suffixed with K,
>> M, G)\n\
>>
>
> You forgot to update the help string.
>
> Andreas.
Fixed. Added documentation to coreutils.texi.
diff -burp coreutils-5.94.orig/doc/coreutils.texi
coreutils-5.94/doc/coreutils.texi
--- coreutils-5.94.orig/doc/coreutils.texi 2006-02-07 10:31:28.000000000
+0200
+++ coreutils-5.94/doc/coreutils.texi 2006-04-08 13:22:40.000000000 +0300
@@ -3257,6 +3257,23 @@ Use this option only if there is no alte
@option{--numeric-sort} (@option{-n}) and it can lose information when
converting to floating point.
address@hidden -h
address@hidden --human-readable-bytesize
address@hidden -h
address@hidden --human-readable-bytesize
address@hidden sort human readable bytesizes
+Behaves otherwise like @option{--general-numeric-sort}, but also takes
+into account human readable bytesize suffixes (K for kilo-, M for
+mega-, G for giga-, T for tera-, P for peta-, E for exa-, Z for zetta-
+and Y for yottabytes).
+
+Note that input numbers are assumed to be properly scaled (i.e. 1M
+should always be used instead of 1024K or 1000K) -- numbers in K will
+always compare less than numbers in M, similarly with M and G, G and T
+etc. This approach makes the routine immune to the 1KB = 1000B versus
+1KB = 1024B problem, sorting correctly the output of other GNU utilities
+that accept either @option{--human-readable} or @option{--si} options.
+
@item -i
@itemx --ignore-nonprinting
@opindex -i
diff -burp coreutils-5.94.orig/src/sort.c coreutils-5.94/src/sort.c
--- coreutils-5.94.orig/src/sort.c 2005-10-07 21:48:28.000000000 +0300
+++ coreutils-5.94/src/sort.c 2006-04-08 13:00:10.000000000 +0300
@@ -26,6 +26,7 @@
#include <getopt.h>
#include <sys/types.h>
#include <signal.h>
+#include <ctype.h>
#include "system.h"
#include "error.h"
#include "hard-locale.h"
@@ -149,6 +150,9 @@ struct keyfield
point, but no exponential notation. */
bool general_numeric; /* Flag for general, numeric comparison.
Handle numbers in exponential notation. */
+ bool size_in_bytes; /* Flag for human-readable bytesize comparison.
+ Handle numbers suffixed with K for kilo-,
+ M for mega- and G for gigabytes. */
bool month; /* Flag for comparison by month name. */
bool reverse; /* Reverse the sense of comparison. */
struct keyfield *next; /* Next keyfield to try. */
@@ -300,6 +304,7 @@ Ordering options:\n\
"), stdout);
fputs (_("\
-g, --general-numeric-sort compare according to general numerical value\n\
+ -h, --human-readable-bytesize compare bytesizes (suffixed with K, M, G
etc)\n\
-i, --ignore-nonprinting consider only printable characters\n\
-M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\
-n, --numeric-sort compare according to string numerical value\n\
@@ -353,7 +358,7 @@ native byte values.\n\
exit (status);
}
-static char const short_options[] = "-bcdfgik:mMno:rsS:t:T:uy:z";
+static char const short_options[] = "-bcdfghik:mMno:rsS:t:T:uy:z";
static struct option const long_options[] =
{
@@ -362,6 +367,7 @@ static struct option const long_options[
{"dictionary-order", no_argument, NULL, 'd'},
{"ignore-case", no_argument, NULL, 'f'},
{"general-numeric-sort", no_argument, NULL, 'g'},
+ {"human-readable-bytesize", no_argument, NULL, 'h'},
{"ignore-nonprinting", no_argument, NULL, 'i'},
{"key", required_argument, NULL, 'k'},
{"merge", no_argument, NULL, 'm'},
@@ -1077,8 +1083,14 @@ numcompare (const char *a, const char *b
return strnumcmp (a, b, decimal_point, thousands_sep);
}
-static int
-general_numcompare (const char *sa, const char *sb)
+/* If size_in_bytes is true, compare strings A and B as human-readable
+ * positive byte counts (as returned e.g. by df -h) suffixed with
+ * either 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y' for kilobytes,
+ * megabytes, gigabytes, terabytes, petabytes, exabytes, zettabytes,
+ * yottabytes.
+ */
+static int general_numcompare (const char *sa, const char *sb,
+ bool size_in_bytes)
{
/* FIXME: add option to warn about failed conversions. */
/* FIXME: maybe add option to try expensive FP conversion
@@ -1095,6 +1107,49 @@ general_numcompare (const char *sa, cons
if (sb == eb)
return 1;
+ if (size_in_bytes && ea && eb)
+ {
+ char ca, cb;
+
+ while (isblank(*ea))
+ ea++;
+ while (isblank(*eb))
+ eb++;
+
+ ca = (char) tolower(*ea);
+ cb = (char) tolower(*eb);
+
+ /* 1) We don't require both operands to have a known suffix.
+ * 2) If both suffixes are unknown or equal, compare as usual */
+ if (! ( (ca == 'k' || ca == 'm' || ca == 'g' || ca == 't'
+ || ca == 'p' || ca == 'e' || ca == 'z' || ca == 'y')
+ ||
+ (cb == 'k' || cb == 'm' || cb == 'g' || cb == 't'
+ || cb == 'p' || cb == 'e' || cb == 'z' || cb == 'y') )
+ || ca == cb)
+ goto compare_as_usual;
+
+ /* As ca != cb, if ca in YB => ca bigger, cb in YB => cb bigger,
+ * if neither of these, if ca in ZB => ca bigger etc */
+ return (ca == 'y' ? 1
+ : cb == 'y' ? -1
+ : ca == 'z' ? 1
+ : cb == 'z' ? -1
+ : ca == 'e' ? 1
+ : cb == 'e' ? -1
+ : ca == 'p' ? 1
+ : cb == 'p' ? -1
+ : ca == 't' ? 1
+ : cb == 't' ? -1
+ : ca == 'g' ? 1
+ : cb == 'g' ? -1
+ : ca == 'm' ? 1
+ : cb == 'm' ? -1
+ : ca == 'k' ? 1 /* ca in KB and cb without a known suffix */
+ : -1); /* cb in KB and ca without a known suffix */
+ }
+
+compare_as_usual:
/* Sort numbers in the usual way, where -0 == +0. Put NaNs after
conversion errors but before numbers; sort them by internal
bit-pattern, for lack of a more portable alternative. */
@@ -1179,13 +1234,14 @@ keycompare (const struct line *a, const
size_t lenb = limb <= textb ? 0 : limb - textb;
/* Actually compare the fields. */
- if (key->numeric | key->general_numeric)
+ if (key->numeric | key->general_numeric | key->size_in_bytes)
{
char savea = *lima, saveb = *limb;
*lima = *limb = '\0';
- diff = ((key->numeric ? numcompare : general_numcompare)
- (texta, textb));
+ diff = (key->numeric ?
+ numcompare(texta, textb) :
+ general_numcompare(texta, textb, key->size_in_bytes));
*lima = savea, *limb = saveb;
}
else if (key->month)
@@ -2069,6 +2125,9 @@ set_ordering (const char *s, struct keyf
case 'g':
key->general_numeric = true;
break;
+ case 'h':
+ key->size_in_bytes = true;
+ break;
case 'i':
/* Option order should not matter, so don't let -i override
-d. -d implies -i, but -i does not imply -d. */
@@ -2187,7 +2246,8 @@ main (int argc, char **argv)
gkey.sword = gkey.eword = SIZE_MAX;
gkey.ignore = NULL;
gkey.translate = NULL;
- gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = false;
+ gkey.numeric = gkey.general_numeric = gkey.size_in_bytes = false;
+ gkey.month = gkey.reverse = false;
gkey.skipsblanks = gkey.skipeblanks = false;
files = xnmalloc (argc, sizeof *files);
@@ -2259,6 +2319,7 @@ main (int argc, char **argv)
case 'd':
case 'f':
case 'g':
+ case 'h':
case 'i':
case 'M':
case 'n':
@@ -2418,7 +2479,7 @@ main (int argc, char **argv)
if (! (key->ignore || key->translate
|| (key->skipsblanks | key->reverse
| key->skipeblanks | key->month | key->numeric
- | key->general_numeric)))
+ | key->general_numeric | key->size_in_bytes)))
{
key->ignore = gkey.ignore;
key->translate = gkey.translate;
@@ -2427,12 +2488,14 @@ main (int argc, char **argv)
key->month = gkey.month;
key->numeric = gkey.numeric;
key->general_numeric = gkey.general_numeric;
+ key->size_in_bytes = gkey.size_in_bytes;
key->reverse = gkey.reverse;
}
if (!keylist && (gkey.ignore || gkey.translate
|| (gkey.skipsblanks | gkey.skipeblanks | gkey.month
- | gkey.numeric | gkey.general_numeric)))
+ | gkey.numeric | gkey.general_numeric
+ | gkey.size_in_bytes )))
insertkey (&gkey);
reverse = gkey.reverse;