bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Human readable sort


From: Pádraig Brady
Subject: Re: Human readable sort
Date: Sun, 26 Apr 2009 00:32:38 +0100
User-agent: Thunderbird 2.0.0.6 (X11/20071008)

Michael Speer wrote:
> That's much more readable.  I tacked in a size.

Good catch. The size is required or otherwise
one could get undefined results for some chars.

> The standards do not
> reference the lowercase letters you commented out, so I just deleted
> them outright.

Fair enough.

>> Something else to consider is to flag when
>> a mixture of SI and IEC units are used, as
>> this not being supported might not be obvious
>> to users and could cause difficult to debug issues for users.
>> I.E. flag an error if the following input is presented.
>>  999MB
>>  998MiB
>> I added a very quick hack for that to the patch for illustration.
>>
> 
> While du only outputs the first letter, this makes the change better
> for more general use.  I added a bounds check, but do not see anything
> else beyond your illustration would be needed.

Oops, yes the bounds check is also needed.

I've further modified your latest in the attached.
I refactored the suffix finding a bit and also added
support for --sort=human-numeric.
I'm wondering whether "numeric" is superfluous?
I.E. are --sort=human and --human-sort sufficient.

cheers,
Pádraig.
diff --git a/src/sort.c b/src/sort.c
index f48d727..9d7d659 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -176,6 +176,8 @@ struct keyfield
   bool random;                 /* Sort by random hash of key.  */
   bool general_numeric;                /* Flag for general, numeric comparison.
                                   Handle numbers in exponential notation. */
+  bool human_numeric;           /* Flag for sorting by human readable
+                                   units with either SI xor IEC prefixes. */
   bool month;                  /* Flag for comparison by month name. */
   bool reverse;                        /* Reverse the sense of comparison. */
   bool version;                        /* sort by version number */
@@ -336,6 +338,9 @@ Ordering options:\n\
   -i, --ignore-nonprinting    consider only printable characters\n\
   -M, --month-sort            compare (unknown) < `JAN' < ... < `DEC'\n\
 "), stdout);
+      fputs(_("\
+  -h, --human-numeric-sort    compare human readable numbers (e.g., 2K 1G)\n\
+"), stdout);
       fputs (_("\
   -n, --numeric-sort          compare according to string numerical value\n\
   -R, --random-sort           sort by random hash of keys\n\
@@ -344,8 +349,8 @@ Ordering options:\n\
 "), stdout);
       fputs (_("\
       --sort=WORD             sort according to WORD:\n\
-                                general-numeric -g, month -M, numeric -n,\n\
-                                random -R, version -V\n\
+                                general-numeric -g, human-numeric -h, month 
-M,\n\
+                                numeric -n, random -R, version -V\n\
   -V, --version-sort          natural sort of (version) numbers within text\n\
 \n\
 "), stdout);
@@ -426,7 +431,7 @@ enum
   SORT_OPTION
 };
 
-static char const short_options[] = "-bcCdfgik:mMno:rRsS:t:T:uVy:z";
+static char const short_options[] = "-bcCdfghik:mMno:rRsS:t:T:uVy:z";
 
 static struct option const long_options[] =
 {
@@ -442,6 +447,7 @@ static struct option const long_options[] =
   {"merge", no_argument, NULL, 'm'},
   {"month-sort", no_argument, NULL, 'M'},
   {"numeric-sort", no_argument, NULL, 'n'},
+  {"human-numeric-sort", no_argument, NULL, 'h'},
   {"version-sort", no_argument, NULL, 'V'},
   {"random-sort", no_argument, NULL, 'R'},
   {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
@@ -480,6 +486,7 @@ static char const check_types[] =
 
 #define SORT_TABLE \
   _st_("general-numeric", 'g') \
+  _st_("human-numeric",   'h') \
   _st_("month",           'M') \
   _st_("numeric",         'n') \
   _st_("random",          'R') \
@@ -1673,6 +1680,60 @@ numcompare (const char *a, const char *b)
   return strnumcmp (a, b, decimal_point, thousands_sep);
 }
 
+/* Exit with an error if a mixture of SI and IEC units detected.  */
+
+static void
+check_mixed_SI_IEC (char prefix)
+{
+  static int seen_si = -1;
+  bool si_present = prefix == 'i';
+  if (seen_si != -1 && seen_si != si_present)
+    error (SORT_FAILURE, 0, _("both SI and IEC prefixes present on units"));
+  seen_si = si_present;
+}
+
+/* Return the address of the number suffix or NUL if not present */
+
+static const char*
+find_suffix (const char* number)
+{
+  const char *p = number;
+
+  while (ISDIGIT (*p) || *p == decimal_point || *p == thousands_sep)
+    p++;
+
+  if (*p)
+    check_mixed_SI_IEC (*(p+1));
+
+  return p;
+}
+
+/* Compare numbers ending in units with SI xor IEC prefixes
+          <none/unknown> < K < M < G < T < P < E < Z < Y
+   Assume that numbers are properly abbreviated.
+   i.e. input will never have 5000K instead of 5M.  */
+
+static int
+human_numcompare (const char *a, const char *b)
+{
+  static const char weights [UCHAR_LIM] = {
+    ['K']=1, ['M']=2, ['G']=3, ['T']=4, ['P']=5, ['E']=6, ['Z']=7, ['Y']=8,
+    ['k']=1,
+  };
+
+  while (blanks[to_uchar (*a)])
+    a++;
+  while (blanks[to_uchar (*b)])
+    b++;
+
+  int aw = weights[to_uchar (*find_suffix (a))];
+  int bw = weights[to_uchar (*find_suffix (b))];
+
+  return (aw > bw ? 1
+          : aw < bw ? -1
+          : strnumcmp (a , b , decimal_point , thousands_sep));
+}
+
 static int
 general_numcompare (const char *sa, const char *sb)
 {
@@ -1917,13 +1978,14 @@ keycompare (const struct line *a, const struct line *b)
 
       if (key->random)
        diff = compare_random (texta, lena, textb, lenb);
-      else if (key->numeric | key->general_numeric)
+      else if (key->numeric | key->general_numeric | key->human_numeric)
        {
          char savea = *lima, saveb = *limb;
 
          *lima = *limb = '\0';
-         diff = ((key->numeric ? numcompare : general_numcompare)
-                 (texta, textb));
+         diff = ((key->numeric ? numcompare
+                  : key->general_numeric ? general_numcompare
+                  : human_numcompare) (texta, textb));
          *lima = savea, *limb = saveb;
        }
       else if (key->version)
@@ -2889,7 +2951,7 @@ check_ordering_compatibility (void)
 
   for (key = keylist; key; key = key->next)
     if ((1 < (key->random + key->numeric + key->general_numeric + key->month
-             + key->version + !!key->ignore))
+             + key->version + (!!key->ignore) + key->human_numeric))
        || (key->random && key->translate))
       {
        /* The following is too big, but guaranteed to be "big enough". */
@@ -2901,6 +2963,8 @@ check_ordering_compatibility (void)
          *p++ = 'f';
        if (key->general_numeric)
          *p++ = 'g';
+        if (key->human_numeric)
+          *p++ = 'h';
        if (key->ignore == nonprinting)
          *p++ = 'i';
        if (key->month)
@@ -2992,6 +3056,9 @@ set_ordering (const char *s, struct keyfield *key, enum 
blanktype blanktype)
        case 'g':
          key->general_numeric = true;
          break;
+        case 'h':
+          key->human_numeric = true;
+          break;
        case 'i':
          /* Option order should not matter, so don't let -i override
             -d.  -d implies -i, but -i does not imply -d.  */
@@ -3140,7 +3207,8 @@ main (int argc, char **argv)
   gkey.sword = gkey.eword = SIZE_MAX;
   gkey.ignore = NULL;
   gkey.translate = NULL;
-  gkey.numeric = gkey.general_numeric = gkey.random = gkey.version = false;
+  gkey.numeric = gkey.general_numeric = gkey.human_numeric = false;
+  gkey.random = gkey.version = false;
   gkey.month = gkey.reverse = false;
   gkey.skipsblanks = gkey.skipeblanks = false;
 
@@ -3219,6 +3287,7 @@ main (int argc, char **argv)
        case 'd':
        case 'f':
        case 'g':
+        case 'h':
        case 'i':
        case 'M':
        case 'n':
@@ -3471,6 +3540,7 @@ main (int argc, char **argv)
                 | key->numeric
                 | key->version
                 | key->general_numeric
+                 | key->human_numeric
                 | key->random)))
         {
           key->ignore = gkey.ignore;
@@ -3480,6 +3550,7 @@ main (int argc, char **argv)
           key->month = gkey.month;
           key->numeric = gkey.numeric;
           key->general_numeric = gkey.general_numeric;
+          key->human_numeric = gkey.human_numeric;
           key->random = gkey.random;
           key->reverse = gkey.reverse;
           key->version = gkey.version;
@@ -3495,6 +3566,7 @@ main (int argc, char **argv)
                       | gkey.month
                       | gkey.numeric
                       | gkey.general_numeric
+                       | gkey.human_numeric
                       | gkey.random
                       | gkey.version)))
     {

reply via email to

[Prev in Thread] Current Thread [Next in Thread]