emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] master 1f7feec: Use Gnulib filevercmp for version comparis


From: Paul Eggert
Subject: [Emacs-diffs] master 1f7feec: Use Gnulib filevercmp for version comparison
Date: Sun, 21 Feb 2016 21:27:43 +0000

branch: master
commit 1f7feecaee0ed3fb79758fe60020aefb30d9ff01
Author: Paul Eggert <address@hidden>
Commit: Paul Eggert <address@hidden>

    Use Gnulib filevercmp for version comparison
    
    * admin/merge-gnulib (GNULIB_MODULES): Add filevercmp.
    * doc/lispref/strings.texi (Text Comparison):
    * etc/NEWS, src/fns.c:
    * test/src/fns-tests.el (fns-tests-string-version-lessp):
    Rename newly-introduced function to string-version-lessp, by
    analogy with strverscmp.
    * lib/filevercmp.c, lib/filevercmp.h: New files, copied from gnulib.
    * lib/gnulib.mk, m4/gnulib-comp.m4: Regenerate.
    * src/fns.c: Include <filevercmp.h>.
    (gather_number_from_string): Remove.
    (Fstring_version_lessp): Reimplement via filevercmp.
---
 admin/merge-gnulib       |    2 +-
 doc/lispref/strings.texi |   20 ++----
 etc/NEWS                 |    2 +-
 lib/filevercmp.c         |  181 ++++++++++++++++++++++++++++++++++++++++++++++
 lib/filevercmp.h         |   42 +++++++++++
 lib/gnulib.mk            |   10 +++-
 m4/gnulib-comp.m4        |    3 +
 src/fns.c                |  129 ++++++++-------------------------
 test/src/fns-tests.el    |   28 ++++----
 9 files changed, 287 insertions(+), 130 deletions(-)

diff --git a/admin/merge-gnulib b/admin/merge-gnulib
index 5463d1b..5d65127 100755
--- a/admin/merge-gnulib
+++ b/admin/merge-gnulib
@@ -30,7 +30,7 @@ GNULIB_MODULES='
   careadlinkat close-stream count-one-bits count-trailing-zeros
   crypto/md5 crypto/sha1 crypto/sha256 crypto/sha512
   dtoastr dtotimespec dup2 environ execinfo faccessat
-  fcntl fcntl-h fdatasync fdopendir filemode fstatat fsync
+  fcntl fcntl-h fdatasync fdopendir filemode filevercmp fstatat fsync
   getloadavg getopt-gnu gettime gettimeofday gitlog-to-changelog
   ignore-value intprops largefile lstat
   manywarnings memrchr mkostemp mktime
diff --git a/doc/lispref/strings.texi b/doc/lispref/strings.texi
index 19301de..ce629aa 100644
--- a/doc/lispref/strings.texi
+++ b/doc/lispref/strings.texi
@@ -633,20 +633,12 @@ If your system does not support a locale environment, 
this function
 behaves like @code{string-lessp}.
 @end defun
 
address@hidden string-numerical-lessp strin1 string2
-This function behaves like @code{string-lessp} for stretches of
-consecutive non-numerical characters, but compares sequences of
-numerical characters as if they comprised a base-ten number, and then
-compares the numbers.  So @samp{foo2.png} is ``smaller'' than
address@hidden according to this predicate, even if @samp{12} is
-lexicographically ``smaller'' than @samp{2}.
-
-If one string has a number in a position in the string, and the other
-doesn't, then lexicograpic comparison is done at that point, so
address@hidden is ``smaller'' than @samp{foo2.png}.  If any of the
-numbers in the strings are larger than can be represented as an
-integer number, the entire string is compared using
address@hidden
address@hidden string-version-lessp string1 string2
+This function compares strings lexicographically, except it treats
+sequences of numerical characters as if they comprised a base-ten
+number, and then compares the numbers.  So @samp{foo2.png} is
+``smaller'' than @samp{foo12.png} according to this predicate, even if
address@hidden is lexicographically ``smaller'' than @samp{2}.
 @end defun
 
 @defun string-prefix-p string1 string2 &optional ignore-case
diff --git a/etc/NEWS b/etc/NEWS
index bad9519..9a3799a 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -1726,7 +1726,7 @@ systems and for MS-Windows, for other systems they fall 
back to their
 counterparts `string-lessp' and `string-equal'.
 
 +++
-** The new function `string-numeric-lessp' compares strings by
+** The new function `string-version-lessp' compares strings by
 interpreting consecutive runs of numerical characters as numbers, and
 compares their numerical values.  According to this predicate,
 "foo2.png" is smaller than "foo12.png".
diff --git a/lib/filevercmp.c b/lib/filevercmp.c
new file mode 100644
index 0000000..a75c946
--- /dev/null
+++ b/lib/filevercmp.c
@@ -0,0 +1,181 @@
+/*
+   Copyright (C) 1995 Ian Jackson <address@hidden>
+   Copyright (C) 2001 Anthony Towns <address@hidden>
+   Copyright (C) 2008-2016 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+#include "filevercmp.h"
+
+#include <sys/types.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <c-ctype.h>
+#include <limits.h>
+
+/* Match a file suffix defined by this regular expression:
+   /(\.[A-Za-z~][A-Za-z0-9~]*)*$/
+   Scan the string *STR and return a pointer to the matching suffix, or
+   NULL if not found.  Upon return, *STR points to terminating NUL.  */
+static const char *
+match_suffix (const char **str)
+{
+  const char *match = NULL;
+  bool read_alpha = false;
+  while (**str)
+    {
+      if (read_alpha)
+        {
+          read_alpha = false;
+          if (!c_isalpha (**str) && '~' != **str)
+            match = NULL;
+        }
+      else if ('.' == **str)
+        {
+          read_alpha = true;
+          if (!match)
+            match = *str;
+        }
+      else if (!c_isalnum (**str) && '~' != **str)
+        match = NULL;
+      (*str)++;
+    }
+  return match;
+}
+
+/* verrevcmp helper function */
+static int
+order (unsigned char c)
+{
+  if (c_isdigit (c))
+    return 0;
+  else if (c_isalpha (c))
+    return c;
+  else if (c == '~')
+    return -1;
+  else
+    return (int) c + UCHAR_MAX + 1;
+}
+
+/* slightly modified verrevcmp function from dpkg
+   S1, S2 - compared string
+   S1_LEN, S2_LEN - length of strings to be scanned
+
+   This implements the algorithm for comparison of version strings
+   specified by Debian and now widely adopted.  The detailed
+   specification can be found in the Debian Policy Manual in the
+   section on the 'Version' control field.  This version of the code
+   implements that from s5.6.12 of Debian Policy v3.8.0.1
+   http://www.debian.org/doc/debian-policy/ch-controlfields.html#s-f-Version */
+static int _GL_ATTRIBUTE_PURE
+verrevcmp (const char *s1, size_t s1_len, const char *s2, size_t s2_len)
+{
+  size_t s1_pos = 0;
+  size_t s2_pos = 0;
+  while (s1_pos < s1_len || s2_pos < s2_len)
+    {
+      int first_diff = 0;
+      while ((s1_pos < s1_len && !c_isdigit (s1[s1_pos]))
+             || (s2_pos < s2_len && !c_isdigit (s2[s2_pos])))
+        {
+          int s1_c = (s1_pos == s1_len) ? 0 : order (s1[s1_pos]);
+          int s2_c = (s2_pos == s2_len) ? 0 : order (s2[s2_pos]);
+          if (s1_c != s2_c)
+            return s1_c - s2_c;
+          s1_pos++;
+          s2_pos++;
+        }
+      while (s1[s1_pos] == '0')
+        s1_pos++;
+      while (s2[s2_pos] == '0')
+        s2_pos++;
+      while (c_isdigit (s1[s1_pos]) && c_isdigit (s2[s2_pos]))
+        {
+          if (!first_diff)
+            first_diff = s1[s1_pos] - s2[s2_pos];
+          s1_pos++;
+          s2_pos++;
+        }
+      if (c_isdigit (s1[s1_pos]))
+        return 1;
+      if (c_isdigit (s2[s2_pos]))
+        return -1;
+      if (first_diff)
+        return first_diff;
+    }
+  return 0;
+}
+
+/* Compare version strings S1 and S2.
+   See filevercmp.h for function description.  */
+int
+filevercmp (const char *s1, const char *s2)
+{
+  const char *s1_pos;
+  const char *s2_pos;
+  const char *s1_suffix, *s2_suffix;
+  size_t s1_len, s2_len;
+  int result;
+
+  /* easy comparison to see if strings are identical */
+  int simple_cmp = strcmp (s1, s2);
+  if (simple_cmp == 0)
+    return 0;
+
+  /* special handle for "", "." and ".." */
+  if (!*s1)
+    return -1;
+  if (!*s2)
+    return 1;
+  if (0 == strcmp (".", s1))
+    return -1;
+  if (0 == strcmp (".", s2))
+    return 1;
+  if (0 == strcmp ("..", s1))
+    return -1;
+  if (0 == strcmp ("..", s2))
+    return 1;
+
+  /* special handle for other hidden files */
+  if (*s1 == '.' && *s2 != '.')
+    return -1;
+  if (*s1 != '.' && *s2 == '.')
+    return 1;
+  if (*s1 == '.' && *s2 == '.')
+    {
+      s1++;
+      s2++;
+    }
+
+  /* "cut" file suffixes */
+  s1_pos = s1;
+  s2_pos = s2;
+  s1_suffix = match_suffix (&s1_pos);
+  s2_suffix = match_suffix (&s2_pos);
+  s1_len = (s1_suffix ? s1_suffix : s1_pos) - s1;
+  s2_len = (s2_suffix ? s2_suffix : s2_pos) - s2;
+
+  /* restore file suffixes if strings are identical after "cut" */
+  if ((s1_suffix || s2_suffix) && (s1_len == s2_len)
+      && 0 == strncmp (s1, s2, s1_len))
+    {
+      s1_len = s1_pos - s1;
+      s2_len = s2_pos - s2;
+    }
+
+  result = verrevcmp (s1, s1_len, s2, s2_len);
+  return result == 0 ? simple_cmp : result;
+}
diff --git a/lib/filevercmp.h b/lib/filevercmp.h
new file mode 100644
index 0000000..220b71b
--- /dev/null
+++ b/lib/filevercmp.h
@@ -0,0 +1,42 @@
+/*
+   Copyright (C) 1995 Ian Jackson <address@hidden>
+   Copyright (C) 2001 Anthony Towns <address@hidden>
+   Copyright (C) 2008-2016 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef FILEVERCMP_H
+#define FILEVERCMP_H
+
+/* Compare version strings:
+
+   This function compares strings S1 and S2:
+   1) By PREFIX in the same way as strcmp.
+   2) Then by VERSION (most similarly to version compare of Debian's dpkg).
+      Leading zeros in version numbers are ignored.
+   3) If both (PREFIX and  VERSION) are equal, strcmp function is used for
+      comparison. So this function can return 0 if (and only if) strings S1
+      and S2 are identical.
+
+   It returns number >0 for S1 > S2, 0 for S1 == S2 and number <0 for S1 < S2.
+
+   This function compares strings, in a way that if VER1 and VER2 are version
+   numbers and PREFIX and SUFFIX (SUFFIX defined as 
(\.[A-Za-z~][A-Za-z0-9~]*)*)
+   are strings then VER1 < VER2 implies filevercmp (PREFIX VER1 SUFFIX,
+   PREFIX VER2 SUFFIX) < 0.
+
+   This function is intended to be a replacement for strverscmp. */
+int filevercmp (const char *s1, const char *s2) _GL_ATTRIBUTE_PURE;
+
+#endif /* FILEVERCMP_H */
diff --git a/lib/gnulib.mk b/lib/gnulib.mk
index b1edd86..cc84296 100644
--- a/lib/gnulib.mk
+++ b/lib/gnulib.mk
@@ -21,7 +21,7 @@
 # the same distribution terms as the rest of that program.
 #
 # Generated by gnulib-tool.
-# Reproduce by: gnulib-tool --import --lib=libgnu --source-base=lib 
--m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux 
--avoid=close --avoid=dup --avoid=fchdir --avoid=flexmember --avoid=fstat 
--avoid=malloc-posix --avoid=msvc-inval --avoid=msvc-nothrow --avoid=open 
--avoid=openat-die --avoid=opendir --avoid=raise --avoid=save-cwd 
--avoid=select --avoid=setenv --avoid=sigprocmask --avoid=stdarg 
--avoid=stdbool --avoid=threadlib --avoid=unsetenv --makefile-name=gnulib.mk - 
[...]
+# Reproduce by: gnulib-tool --import --lib=libgnu --source-base=lib 
--m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux 
--avoid=close --avoid=dup --avoid=fchdir --avoid=flexmember --avoid=fstat 
--avoid=malloc-posix --avoid=msvc-inval --avoid=msvc-nothrow --avoid=open 
--avoid=openat-die --avoid=opendir --avoid=raise --avoid=save-cwd 
--avoid=select --avoid=setenv --avoid=sigprocmask --avoid=stdarg 
--avoid=stdbool --avoid=threadlib --avoid=unsetenv --makefile-name=gnulib.mk - 
[...]
 
 
 MOSTLYCLEANFILES += core *.stackdump
@@ -441,6 +441,14 @@ EXTRA_DIST += filemode.h
 
 ## end   gnulib module filemode
 
+## begin gnulib module filevercmp
+
+libgnu_a_SOURCES += filevercmp.c
+
+EXTRA_DIST += filevercmp.h
+
+## end   gnulib module filevercmp
+
 ## begin gnulib module fpending
 
 
diff --git a/m4/gnulib-comp.m4 b/m4/gnulib-comp.m4
index 831bb4c..5a3fc98 100644
--- a/m4/gnulib-comp.m4
+++ b/m4/gnulib-comp.m4
@@ -78,6 +78,7 @@ AC_DEFUN([gl_EARLY],
   # Code from module fdatasync:
   # Code from module fdopendir:
   # Code from module filemode:
+  # Code from module filevercmp:
   # Code from module fpending:
   # Code from module fstatat:
   # Code from module fsync:
@@ -889,6 +890,8 @@ AC_DEFUN([gl_FILE_LIST], [
   lib/fdopendir.c
   lib/filemode.c
   lib/filemode.h
+  lib/filevercmp.c
+  lib/filevercmp.h
   lib/fpending.c
   lib/fpending.h
   lib/fstatat.c
diff --git a/src/fns.c b/src/fns.c
index 77ad450..d314fcd 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -21,6 +21,7 @@ along with GNU Emacs.  If not, see 
<http://www.gnu.org/licenses/>.  */
 #include <config.h>
 
 #include <unistd.h>
+#include <filevercmp.h>
 #include <intprops.h>
 #include <vla.h>
 #include <errno.h>
@@ -332,50 +333,21 @@ Symbols are also allowed; their print names are used 
instead.  */)
   return i1 < SCHARS (string2) ? Qt : Qnil;
 }
 
-/* Return the numerical value of a consecutive run of numerical
-   characters from STRING.  The ISP and ISP_BYTE address pointer
-   pointers are increased and left at the next character after the
-   numerical characters. */
-static size_t
-gather_number_from_string (Lisp_Object string,
-                          ptrdiff_t *isp, ptrdiff_t *isp_byte)
-{
-  size_t number = 0;
-  char *s = SSDATA (string);
-  char *end;
-
-  errno = 0;
-  number = strtoumax (s + *isp_byte, &end, 10);
-  if (errno == ERANGE)
-    /* If we have an integer overflow, then we fall back on lexical
-       comparison. */
-    return -1;
-  else
-    {
-      size_t diff = end - (s + *isp_byte);
-      (*isp) += diff;
-      (*isp_byte) += diff;
-      return number;
-    }
-}
+DEFUN ("string-version-lessp", Fstring_version_lessp,
+       Sstring_version_lessp, 2, 2, 0,
+       doc: /* Return non-nil if S1 is less than S2, as version strings.
+
+This function compares version strings S1 and S2:
+   1) By prefix lexicographically.
+   2) Then by version (similarly to version comparison of Debian's dpkg).
+      Leading zeros in version numbers are ignored.
+   3) If both prefix and version are equal, compare as ordinary strings.
 
-DEFUN ("string-numeric-lessp", Fstring_numeric_lessp,
-       Sstring_numeric_lessp, 2, 2, 0,
-       doc: /* Return non-nil if STRING1 is less than STRING2 in 'numeric' 
order.
-Sequences of non-numerical characters are compared lexicographically,
-while sequences of numerical characters are converted into numbers,
-and then the numbers are compared.  This means that \"foo2.png\" is
-less than \"foo12.png\" according to this predicate.
+For example, \"foo2.png\" compares less than \"foo12.png\".
 Case is significant.
 Symbols are also allowed; their print names are used instead.  */)
-  (register Lisp_Object string1, Lisp_Object string2)
+  (Lisp_Object string1, Lisp_Object string2)
 {
-  ptrdiff_t end;
-  ptrdiff_t i1, i1_byte, i2, i2_byte;
-  size_t num1, num2;
-  unsigned char *chp;
-  int chlen1, chlen2;
-
   if (SYMBOLP (string1))
     string1 = SYMBOL_NAME (string1);
   if (SYMBOLP (string2))
@@ -383,67 +355,26 @@ Symbols are also allowed; their print names are used 
instead.  */)
   CHECK_STRING (string1);
   CHECK_STRING (string2);
 
-  i1 = i1_byte = i2 = i2_byte = 0;
+  char *p1 = SSDATA (string1);
+  char *p2 = SSDATA (string2);
+  char *lim1 = p1 + SBYTES (string1);
+  char *lim2 = p2 + SBYTES (string2);
+  int cmp;
 
-  end = SCHARS (string1);
-  if (end > SCHARS (string2))
-    end = SCHARS (string2);
-
-  while (i1 < end)
+  while ((cmp = filevercmp (p1, p2)) == 0)
     {
-      /* When we find a mismatch, we must compare the
-        characters, not just the bytes.  */
-      int c1, c2;
-
-      if (STRING_MULTIBYTE (string1))
-       {
-         chp = &SDATA (string1)[i1_byte];
-         c1 = STRING_CHAR_AND_LENGTH (chp, chlen1);
-       }
-      else
-       {
-         c1 = SREF (string1, i1_byte);
-         chlen1 = 1;
-       }
-
-      if (STRING_MULTIBYTE (string2))
-       {
-         chp = &SDATA (string1)[i2_byte];
-         c2 = STRING_CHAR_AND_LENGTH (chp, chlen2);
-       }
-      else
-       {
-         c2 = SREF (string2, i2_byte);
-         chlen2 = 1;
-       }
-
-      if (c1 >= '0' && c1 <= '9' &&
-         c2 >= '0' && c2 <= '9')
-       /* Both strings are numbers, so compare them. */
-       {
-         num1 = gather_number_from_string (string1, &i1, &i1_byte);
-         num2 = gather_number_from_string (string2, &i2, &i2_byte);
-         /* If we have an integer overflow, then resort to sorting
-            the entire string lexicographically. */
-         if (num1 == -1 || num2 == -1)
-           return Fstring_lessp (string1, string2);
-         else if (num1 < num2)
-           return Qt;
-         else if (num1 > num2)
-           return Qnil;
-       }
-      else
-       {
-         if (c1 != c2)
-           return c1 < c2 ? Qt : Qnil;
-
-         i1++;
-         i2++;
-         i1_byte += chlen1;
-         i2_byte += chlen2;
-       }
+      /* If the strings are identical through their first null bytes,
+        skip past identical prefixes and try again.  */
+      ptrdiff_t size = strlen (p1) + 1;
+      p1 += size;
+      p2 += size;
+      if (lim1 < p1)
+       return lim2 < p2 ? Qnil : Qt;
+      if (lim2 < p2)
+       return Qnil;
     }
-  return i1 < SCHARS (string2) ? Qt : Qnil;
+
+  return cmp < 0 ? Qt : Qnil;
 }
 
 DEFUN ("string-collate-lessp", Fstring_collate_lessp, Sstring_collate_lessp, 
2, 4, 0,
@@ -5164,7 +5095,7 @@ this variable.  */);
   defsubr (&Sstring_equal);
   defsubr (&Scompare_strings);
   defsubr (&Sstring_lessp);
-  defsubr (&Sstring_numeric_lessp);
+  defsubr (&Sstring_version_lessp);
   defsubr (&Sstring_collate_lessp);
   defsubr (&Sstring_collate_equalp);
   defsubr (&Sappend);
diff --git a/test/src/fns-tests.el b/test/src/fns-tests.el
index 0c6edb8..8617369 100644
--- a/test/src/fns-tests.el
+++ b/test/src/fns-tests.el
@@ -192,19 +192,19 @@
               a b (if (eq system-type 'windows-nt) "enu_USA" "en_US.UTF-8")))))
     '("Adrian" "Ævar" "Agustín" "Eli"))))
 
-(ert-deftest fns-tests-string-numeric-lessp ()
-  (should (string-numeric-lessp "foo2.png" "foo12.png"))
-  (should (not (string-numeric-lessp "foo12.png" "foo2.png")))
-  (should (string-numeric-lessp "foo12.png" "foo20000.png"))
-  (should (not (string-numeric-lessp "foo20000.png" "foo12.png")))
-  (should (string-numeric-lessp "foo.png" "foo2.png"))
-  (should (not (string-numeric-lessp "foo2.png" "foo.png")))
+(ert-deftest fns-tests-string-version-lessp ()
+  (should (string-version-lessp "foo2.png" "foo12.png"))
+  (should (not (string-version-lessp "foo12.png" "foo2.png")))
+  (should (string-version-lessp "foo12.png" "foo20000.png"))
+  (should (not (string-version-lessp "foo20000.png" "foo12.png")))
+  (should (string-version-lessp "foo.png" "foo2.png"))
+  (should (not (string-version-lessp "foo2.png" "foo.png")))
   (should (equal (sort '("foo12.png" "foo2.png" "foo1.png")
-                       'string-numeric-lessp)
+                       'string-version-lessp)
                  '("foo1.png" "foo2.png" "foo12.png")))
-  (should (string-numeric-lessp "foo2" "foo1234"))
-  (should (not (string-numeric-lessp "foo1234" "foo2")))
-  (should (string-numeric-lessp "foo.png" "foo2"))
-  (should (string-numeric-lessp "foo1.25.5.png" "foo1.125.5"))
-  (should (string-numeric-lessp "2" "1245"))
-  (should (not (string-numeric-lessp "1245" "2"))))
+  (should (string-version-lessp "foo2" "foo1234"))
+  (should (not (string-version-lessp "foo1234" "foo2")))
+  (should (string-version-lessp "foo.png" "foo2"))
+  (should (string-version-lessp "foo1.25.5.png" "foo1.125.5"))
+  (should (string-version-lessp "2" "1245"))
+  (should (not (string-version-lessp "1245" "2"))))



reply via email to

[Prev in Thread] Current Thread [Next in Thread]