emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] master d682f0d: Add command to replace buffer contents


From: Philipp Stephani
Subject: [Emacs-diffs] master d682f0d: Add command to replace buffer contents
Date: Sat, 17 Jun 2017 09:46:58 -0400 (EDT)

branch: master
commit d682f0daa3c0bfdd5ee8ce0e9226353d505e85a9
Author: Philipp Stephani <address@hidden>
Commit: Philipp Stephani <address@hidden>

    Add command to replace buffer contents
    
    Add a new command 'replace-buffer-contents' that uses the Myers diff
    algorithm to non-destructively replace the accessible portion of the
    current buffer.  The Myers algorithm is implemented in Gnulib.
    
    * src/editfns.c (Freplace_buffer_contents): New command.
    (set_bit, bit_is_set, buffer_chars_equal): New helper functions.
    (syms_of_editfns): Define new command.
    
    * test/src/editfns-tests.el (replace-buffer-contents-1)
    (replace-buffer-contents-2): New unit tests.
    
    * src/buffer.h (BUF_FETCH_CHAR_AS_MULTIBYTE): New helper macro.
    
    * admin/merge-gnulib (GNULIB_MODULES): Add diffseq.h and minmax.h.
---
 admin/merge-gnulib        |   4 +-
 etc/NEWS                  |   5 +
 lib/diffseq.h             | 525 ++++++++++++++++++++++++++++++++++++++++++++++
 lib/gnulib.mk.in          |  18 +-
 lib/minmax.h              |  60 ++++++
 m4/gnulib-comp.m4         |   6 +
 m4/minmax.m4              |  44 ++++
 src/buffer.h              |   9 +
 src/editfns.c             | 201 ++++++++++++++++++
 test/src/editfns-tests.el |  31 +++
 10 files changed, 900 insertions(+), 3 deletions(-)

diff --git a/admin/merge-gnulib b/admin/merge-gnulib
index e5fb0f5..d4bbf17 100755
--- a/admin/merge-gnulib
+++ b/admin/merge-gnulib
@@ -30,12 +30,12 @@ GNULIB_MODULES='
   careadlinkat close-stream
   count-leading-zeros count-one-bits count-trailing-zeros
   crypto/md5 crypto/sha1 crypto/sha256 crypto/sha512
-  dtoastr dtotimespec dup2 environ execinfo faccessat
+  diffseq dtoastr dtotimespec dup2 environ execinfo faccessat
   fcntl fcntl-h fdatasync fdopendir
   filemode filevercmp flexmember fstatat fsync
   getloadavg getopt-gnu gettime gettimeofday gitlog-to-changelog
   ignore-value intprops largefile lstat
-  manywarnings memrchr mkostemp mktime
+  manywarnings memrchr minmax mkostemp mktime
   pipe2 pselect pthread_sigmask putenv qcopy-acl readlink readlinkat
   sig2str socklen stat-time std-gnu11 stdalign stddef stdio
   stpcpy strftime strtoimax symlink sys_stat
diff --git a/etc/NEWS b/etc/NEWS
index 2fb8daa..ab600eb 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -462,6 +462,11 @@ Negative prefix arg flips the direction of selection.  
Also,
 defun are selected unless they are separated from the defun by a blank
 line.
 
+** New command 'replace-buffer-contents'.  This command replaces the
+contents of theaccessible portion of the current buffer with the
+contents of the accessible portion of a different buffer while keeping
+point, mark, markers, and text properties as intact as possible.
+
 
 * Changes in Specialized Modes and Packages in Emacs 26.1
 
diff --git a/lib/diffseq.h b/lib/diffseq.h
new file mode 100644
index 0000000..d7a3743
--- /dev/null
+++ b/lib/diffseq.h
@@ -0,0 +1,525 @@
+/* Analyze differences between two vectors.
+
+   Copyright (C) 1988-1989, 1992-1995, 2001-2004, 2006-2017 Free Software
+   Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+
+/* The basic idea is to consider two vectors as similar if, when
+   transforming the first vector into the second vector through a
+   sequence of edits (inserts and deletes of one element each),
+   this sequence is short - or equivalently, if the ordered list
+   of elements that are untouched by these edits is long.  For a
+   good introduction to the subject, read about the "Levenshtein
+   distance" in Wikipedia.
+
+   The basic algorithm is described in:
+   "An O(ND) Difference Algorithm and its Variations", Eugene W. Myers,
+   Algorithmica Vol. 1, 1986, pp. 251-266,
+   <http://dx.doi.org/10.1007/BF01840446>.
+   See especially section 4.2, which describes the variation used below.
+
+   The basic algorithm was independently discovered as described in:
+   "Algorithms for Approximate String Matching", Esko Ukkonen,
+   Information and Control Vol. 64, 1985, pp. 100-118,
+   <http://dx.doi.org/10.1016/S0019-9958(85)80046-2>.
+
+   Unless the 'find_minimal' flag is set, this code uses the TOO_EXPENSIVE
+   heuristic, by Paul Eggert, to limit the cost to O(N**1.5 log N)
+   at the price of producing suboptimal output for large inputs with
+   many differences.  */
+
+/* Before including this file, you need to define:
+     ELEMENT                 The element type of the vectors being compared.
+     EQUAL                   A two-argument macro that tests two elements for
+                             equality.
+     OFFSET                  A signed integer type sufficient to hold the
+                             difference between two indices.  Usually
+                             something like ptrdiff_t.
+     EXTRA_CONTEXT_FIELDS    Declarations of fields for 'struct context'.
+     NOTE_DELETE(ctxt, xoff) Record the removal of the object xvec[xoff].
+     NOTE_INSERT(ctxt, yoff) Record the insertion of the object yvec[yoff].
+     EARLY_ABORT(ctxt)       (Optional) A boolean expression that triggers an
+                             early abort of the computation.
+     USE_HEURISTIC           (Optional) Define if you want to support the
+                             heuristic for large vectors.
+   It is also possible to use this file with abstract arrays.  In this case,
+   xvec and yvec are not represented in memory.  They only exist conceptually.
+   In this case, the list of defines above is amended as follows:
+     ELEMENT                 Undefined.
+     EQUAL                   Undefined.
+     XVECREF_YVECREF_EQUAL(ctxt, xoff, yoff)
+                             A three-argument macro: References xvec[xoff] and
+                             yvec[yoff] and tests these elements for equality.
+   Before including this file, you also need to include:
+     #include <limits.h>
+     #include <stdbool.h>
+     #include "minmax.h"
+ */
+
+/* Maximum value of type OFFSET.  */
+#define OFFSET_MAX \
+  ((((OFFSET)1 << (sizeof (OFFSET) * CHAR_BIT - 2)) - 1) * 2 + 1)
+
+/* Default to no early abort.  */
+#ifndef EARLY_ABORT
+# define EARLY_ABORT(ctxt) false
+#endif
+
+/* Use this to suppress gcc's "...may be used before initialized" warnings.
+   Beware: The Code argument must not contain commas.  */
+#ifndef IF_LINT
+# if defined GCC_LINT || defined lint
+#  define IF_LINT(Code) Code
+# else
+#  define IF_LINT(Code) /* empty */
+# endif
+#endif
+
+/* As above, but when Code must contain one comma. */
+#ifndef IF_LINT2
+# if defined GCC_LINT || defined lint
+#  define IF_LINT2(Code1, Code2) Code1, Code2
+# else
+#  define IF_LINT2(Code1, Code2) /* empty */
+# endif
+#endif
+
+/*
+ * Context of comparison operation.
+ */
+struct context
+{
+  #ifdef ELEMENT
+  /* Vectors being compared.  */
+  ELEMENT const *xvec;
+  ELEMENT const *yvec;
+  #endif
+
+  /* Extra fields.  */
+  EXTRA_CONTEXT_FIELDS
+
+  /* Vector, indexed by diagonal, containing 1 + the X coordinate of the point
+     furthest along the given diagonal in the forward search of the edit
+     matrix.  */
+  OFFSET *fdiag;
+
+  /* Vector, indexed by diagonal, containing the X coordinate of the point
+     furthest along the given diagonal in the backward search of the edit
+     matrix.  */
+  OFFSET *bdiag;
+
+  #ifdef USE_HEURISTIC
+  /* This corresponds to the diff --speed-large-files flag.  With this
+     heuristic, for vectors with a constant small density of changes,
+     the algorithm is linear in the vector size.  */
+  bool heuristic;
+  #endif
+
+  /* Edit scripts longer than this are too expensive to compute.  */
+  OFFSET too_expensive;
+
+  /* Snakes bigger than this are considered "big".  */
+  #define SNAKE_LIMIT 20
+};
+
+struct partition
+{
+  /* Midpoints of this partition.  */
+  OFFSET xmid;
+  OFFSET ymid;
+
+  /* True if low half will be analyzed minimally.  */
+  bool lo_minimal;
+
+  /* Likewise for high half.  */
+  bool hi_minimal;
+};
+
+
+/* Find the midpoint of the shortest edit script for a specified portion
+   of the two vectors.
+
+   Scan from the beginnings of the vectors, and simultaneously from the ends,
+   doing a breadth-first search through the space of edit-sequence.
+   When the two searches meet, we have found the midpoint of the shortest
+   edit sequence.
+
+   If FIND_MINIMAL is true, find the minimal edit script regardless of
+   expense.  Otherwise, if the search is too expensive, use heuristics to
+   stop the search and report a suboptimal answer.
+
+   Set PART->(xmid,ymid) to the midpoint (XMID,YMID).  The diagonal number
+   XMID - YMID equals the number of inserted elements minus the number
+   of deleted elements (counting only elements before the midpoint).
+
+   Set PART->lo_minimal to true iff the minimal edit script for the
+   left half of the partition is known; similarly for PART->hi_minimal.
+
+   This function assumes that the first elements of the specified portions
+   of the two vectors do not match, and likewise that the last elements do not
+   match.  The caller must trim matching elements from the beginning and end
+   of the portions it is going to specify.
+
+   If we return the "wrong" partitions, the worst this can do is cause
+   suboptimal diff output.  It cannot cause incorrect diff output.  */
+
+static void
+diag (OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim, bool find_minimal,
+      struct partition *part, struct context *ctxt)
+{
+  OFFSET *const fd = ctxt->fdiag;       /* Give the compiler a chance. */
+  OFFSET *const bd = ctxt->bdiag;       /* Additional help for the compiler. */
+#ifdef ELEMENT
+  ELEMENT const *const xv = ctxt->xvec; /* Still more help for the compiler. */
+  ELEMENT const *const yv = ctxt->yvec; /* And more and more . . . */
+  #define XREF_YREF_EQUAL(x,y)  EQUAL (xv[x], yv[y])
+#else
+  #define XREF_YREF_EQUAL(x,y)  XVECREF_YVECREF_EQUAL (ctxt, x, y)
+#endif
+  const OFFSET dmin = xoff - ylim;      /* Minimum valid diagonal. */
+  const OFFSET dmax = xlim - yoff;      /* Maximum valid diagonal. */
+  const OFFSET fmid = xoff - yoff;      /* Center diagonal of top-down search. 
*/
+  const OFFSET bmid = xlim - ylim;      /* Center diagonal of bottom-up 
search. */
+  OFFSET fmin = fmid;
+  OFFSET fmax = fmid;           /* Limits of top-down search. */
+  OFFSET bmin = bmid;
+  OFFSET bmax = bmid;           /* Limits of bottom-up search. */
+  OFFSET c;                     /* Cost. */
+  bool odd = (fmid - bmid) & 1; /* True if southeast corner is on an odd
+                                   diagonal with respect to the northwest. */
+
+  fd[fmid] = xoff;
+  bd[bmid] = xlim;
+
+  for (c = 1;; ++c)
+    {
+      OFFSET d;                 /* Active diagonal. */
+      bool big_snake = false;
+
+      /* Extend the top-down search by an edit step in each diagonal. */
+      if (fmin > dmin)
+        fd[--fmin - 1] = -1;
+      else
+        ++fmin;
+      if (fmax < dmax)
+        fd[++fmax + 1] = -1;
+      else
+        --fmax;
+      for (d = fmax; d >= fmin; d -= 2)
+        {
+          OFFSET x;
+          OFFSET y;
+          OFFSET tlo = fd[d - 1];
+          OFFSET thi = fd[d + 1];
+          OFFSET x0 = tlo < thi ? thi : tlo + 1;
+
+          for (x = x0, y = x0 - d;
+               x < xlim && y < ylim && XREF_YREF_EQUAL (x, y);
+               x++, y++)
+            continue;
+          if (x - x0 > SNAKE_LIMIT)
+            big_snake = true;
+          fd[d] = x;
+          if (odd && bmin <= d && d <= bmax && bd[d] <= x)
+            {
+              part->xmid = x;
+              part->ymid = y;
+              part->lo_minimal = part->hi_minimal = true;
+              return;
+            }
+        }
+
+      /* Similarly extend the bottom-up search.  */
+      if (bmin > dmin)
+        bd[--bmin - 1] = OFFSET_MAX;
+      else
+        ++bmin;
+      if (bmax < dmax)
+        bd[++bmax + 1] = OFFSET_MAX;
+      else
+        --bmax;
+      for (d = bmax; d >= bmin; d -= 2)
+        {
+          OFFSET x;
+          OFFSET y;
+          OFFSET tlo = bd[d - 1];
+          OFFSET thi = bd[d + 1];
+          OFFSET x0 = tlo < thi ? tlo : thi - 1;
+
+          for (x = x0, y = x0 - d;
+               xoff < x && yoff < y && XREF_YREF_EQUAL (x - 1, y - 1);
+               x--, y--)
+            continue;
+          if (x0 - x > SNAKE_LIMIT)
+            big_snake = true;
+          bd[d] = x;
+          if (!odd && fmin <= d && d <= fmax && x <= fd[d])
+            {
+              part->xmid = x;
+              part->ymid = y;
+              part->lo_minimal = part->hi_minimal = true;
+              return;
+            }
+        }
+
+      if (find_minimal)
+        continue;
+
+#ifdef USE_HEURISTIC
+      /* Heuristic: check occasionally for a diagonal that has made lots
+         of progress compared with the edit distance.  If we have any
+         such, find the one that has made the most progress and return it
+         as if it had succeeded.
+
+         With this heuristic, for vectors with a constant small density
+         of changes, the algorithm is linear in the vector size.  */
+
+      if (200 < c && big_snake && ctxt->heuristic)
+        {
+          {
+            OFFSET best = 0;
+
+            for (d = fmax; d >= fmin; d -= 2)
+              {
+                OFFSET dd = d - fmid;
+                OFFSET x = fd[d];
+                OFFSET y = x - d;
+                OFFSET v = (x - xoff) * 2 - dd;
+
+                if (v > 12 * (c + (dd < 0 ? -dd : dd)))
+                  {
+                    if (v > best
+                        && xoff + SNAKE_LIMIT <= x && x < xlim
+                        && yoff + SNAKE_LIMIT <= y && y < ylim)
+                      {
+                        /* We have a good enough best diagonal; now insist
+                           that it end with a significant snake.  */
+                        int k;
+
+                        for (k = 1; XREF_YREF_EQUAL (x - k, y - k); k++)
+                          if (k == SNAKE_LIMIT)
+                            {
+                              best = v;
+                              part->xmid = x;
+                              part->ymid = y;
+                              break;
+                            }
+                      }
+                  }
+              }
+            if (best > 0)
+              {
+                part->lo_minimal = true;
+                part->hi_minimal = false;
+                return;
+              }
+          }
+
+          {
+            OFFSET best = 0;
+
+            for (d = bmax; d >= bmin; d -= 2)
+              {
+                OFFSET dd = d - bmid;
+                OFFSET x = bd[d];
+                OFFSET y = x - d;
+                OFFSET v = (xlim - x) * 2 + dd;
+
+                if (v > 12 * (c + (dd < 0 ? -dd : dd)))
+                  {
+                    if (v > best
+                        && xoff < x && x <= xlim - SNAKE_LIMIT
+                        && yoff < y && y <= ylim - SNAKE_LIMIT)
+                      {
+                        /* We have a good enough best diagonal; now insist
+                           that it end with a significant snake.  */
+                        int k;
+
+                        for (k = 0; XREF_YREF_EQUAL (x + k, y + k); k++)
+                          if (k == SNAKE_LIMIT - 1)
+                            {
+                              best = v;
+                              part->xmid = x;
+                              part->ymid = y;
+                              break;
+                            }
+                      }
+                  }
+              }
+            if (best > 0)
+              {
+                part->lo_minimal = false;
+                part->hi_minimal = true;
+                return;
+              }
+          }
+        }
+#endif /* USE_HEURISTIC */
+
+      /* Heuristic: if we've gone well beyond the call of duty, give up
+         and report halfway between our best results so far.  */
+      if (c >= ctxt->too_expensive)
+        {
+          OFFSET fxybest;
+          OFFSET fxbest IF_LINT (= 0);
+          OFFSET bxybest;
+          OFFSET bxbest IF_LINT (= 0);
+
+          /* Find forward diagonal that maximizes X + Y.  */
+          fxybest = -1;
+          for (d = fmax; d >= fmin; d -= 2)
+            {
+              OFFSET x = MIN (fd[d], xlim);
+              OFFSET y = x - d;
+              if (ylim < y)
+                {
+                  x = ylim + d;
+                  y = ylim;
+                }
+              if (fxybest < x + y)
+                {
+                  fxybest = x + y;
+                  fxbest = x;
+                }
+            }
+
+          /* Find backward diagonal that minimizes X + Y.  */
+          bxybest = OFFSET_MAX;
+          for (d = bmax; d >= bmin; d -= 2)
+            {
+              OFFSET x = MAX (xoff, bd[d]);
+              OFFSET y = x - d;
+              if (y < yoff)
+                {
+                  x = yoff + d;
+                  y = yoff;
+                }
+              if (x + y < bxybest)
+                {
+                  bxybest = x + y;
+                  bxbest = x;
+                }
+            }
+
+          /* Use the better of the two diagonals.  */
+          if ((xlim + ylim) - bxybest < fxybest - (xoff + yoff))
+            {
+              part->xmid = fxbest;
+              part->ymid = fxybest - fxbest;
+              part->lo_minimal = true;
+              part->hi_minimal = false;
+            }
+          else
+            {
+              part->xmid = bxbest;
+              part->ymid = bxybest - bxbest;
+              part->lo_minimal = false;
+              part->hi_minimal = true;
+            }
+          return;
+        }
+    }
+  #undef XREF_YREF_EQUAL
+}
+
+
+/* Compare in detail contiguous subsequences of the two vectors
+   which are known, as a whole, to match each other.
+
+   The subsequence of vector 0 is [XOFF, XLIM) and likewise for vector 1.
+
+   Note that XLIM, YLIM are exclusive bounds.  All indices into the vectors
+   are origin-0.
+
+   If FIND_MINIMAL, find a minimal difference no matter how
+   expensive it is.
+
+   The results are recorded by invoking NOTE_DELETE and NOTE_INSERT.
+
+   Return false if terminated normally, or true if terminated through early
+   abort.  */
+
+static bool
+compareseq (OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim,
+            bool find_minimal, struct context *ctxt)
+{
+#ifdef ELEMENT
+  ELEMENT const *xv = ctxt->xvec; /* Help the compiler.  */
+  ELEMENT const *yv = ctxt->yvec;
+  #define XREF_YREF_EQUAL(x,y)  EQUAL (xv[x], yv[y])
+#else
+  #define XREF_YREF_EQUAL(x,y)  XVECREF_YVECREF_EQUAL (ctxt, x, y)
+#endif
+
+  /* Slide down the bottom initial diagonal.  */
+  while (xoff < xlim && yoff < ylim && XREF_YREF_EQUAL (xoff, yoff))
+    {
+      xoff++;
+      yoff++;
+    }
+
+  /* Slide up the top initial diagonal. */
+  while (xoff < xlim && yoff < ylim && XREF_YREF_EQUAL (xlim - 1, ylim - 1))
+    {
+      xlim--;
+      ylim--;
+    }
+
+  /* Handle simple cases. */
+  if (xoff == xlim)
+    while (yoff < ylim)
+      {
+        NOTE_INSERT (ctxt, yoff);
+        if (EARLY_ABORT (ctxt))
+          return true;
+        yoff++;
+      }
+  else if (yoff == ylim)
+    while (xoff < xlim)
+      {
+        NOTE_DELETE (ctxt, xoff);
+        if (EARLY_ABORT (ctxt))
+          return true;
+        xoff++;
+      }
+  else
+    {
+      struct partition part IF_LINT2 (= { .xmid = 0, .ymid = 0 });
+
+      /* Find a point of correspondence in the middle of the vectors.  */
+      diag (xoff, xlim, yoff, ylim, find_minimal, &part, ctxt);
+
+      /* Use the partitions to split this problem into subproblems.  */
+      if (compareseq (xoff, part.xmid, yoff, part.ymid, part.lo_minimal, ctxt))
+        return true;
+      if (compareseq (part.xmid, xlim, part.ymid, ylim, part.hi_minimal, ctxt))
+        return true;
+    }
+
+  return false;
+  #undef XREF_YREF_EQUAL
+}
+
+#undef ELEMENT
+#undef EQUAL
+#undef OFFSET
+#undef EXTRA_CONTEXT_FIELDS
+#undef NOTE_DELETE
+#undef NOTE_INSERT
+#undef EARLY_ABORT
+#undef USE_HEURISTIC
+#undef XVECREF_YVECREF_EQUAL
+#undef OFFSET_MAX
diff --git a/lib/gnulib.mk.in b/lib/gnulib.mk.in
index 73d3043..509089e 100644
--- a/lib/gnulib.mk.in
+++ b/lib/gnulib.mk.in
@@ -21,7 +21,7 @@
 # the same distribution terms as the rest of that program.
 #
 # Generated by gnulib-tool.
-# Reproduce by: gnulib-tool --import --lib=libgnu --source-base=lib 
--m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux 
--avoid=close --avoid=dup --avoid=fchdir --avoid=fstat --avoid=malloc-posix 
--avoid=msvc-inval --avoid=msvc-nothrow --avoid=open --avoid=openat-die 
--avoid=opendir --avoid=raise --avoid=save-cwd --avoid=select --avoid=setenv 
--avoid=sigprocmask --avoid=stat --avoid=stdarg --avoid=stdbool 
--avoid=threadlib --avoid=tzset --avoid=unsetenv --avoid=utime --avo [...]
+# Reproduce by: gnulib-tool --import --lib=libgnu --source-base=lib 
--m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux 
--avoid=close --avoid=dup --avoid=fchdir --avoid=fstat --avoid=malloc-posix 
--avoid=msvc-inval --avoid=msvc-nothrow --avoid=open --avoid=openat-die 
--avoid=opendir --avoid=raise --avoid=save-cwd --avoid=select --avoid=setenv 
--avoid=sigprocmask --avoid=stat --avoid=stdarg --avoid=stdbool 
--avoid=threadlib --avoid=tzset --avoid=unsetenv --avoid=utime --avo [...]
 
 
 MOSTLYCLEANFILES += core *.stackdump
@@ -1167,6 +1167,14 @@ EXTRA_DIST += gl_openssl.h sha512.h
 endif
 ## end   gnulib module crypto/sha512
 
+## begin gnulib module diffseq
+ifeq (,$(OMIT_GNULIB_MODULE_diffseq))
+
+libgnu_a_SOURCES += diffseq.h
+
+endif
+## end   gnulib module diffseq
+
 ## begin gnulib module dirent
 ifeq (,$(OMIT_GNULIB_MODULE_dirent))
 
@@ -1747,6 +1755,14 @@ EXTRA_libgnu_a_SOURCES += memrchr.c
 endif
 ## end   gnulib module memrchr
 
+## begin gnulib module minmax
+ifeq (,$(OMIT_GNULIB_MODULE_minmax))
+
+libgnu_a_SOURCES += minmax.h
+
+endif
+## end   gnulib module minmax
+
 ## begin gnulib module mkostemp
 ifeq (,$(OMIT_GNULIB_MODULE_mkostemp))
 
diff --git a/lib/minmax.h b/lib/minmax.h
new file mode 100644
index 0000000..6b602a9
--- /dev/null
+++ b/lib/minmax.h
@@ -0,0 +1,60 @@
+/* MIN, MAX macros.
+   Copyright (C) 1995, 1998, 2001, 2003, 2005, 2009-2017 Free Software
+   Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.  */
+
+#ifndef _MINMAX_H
+#define _MINMAX_H
+
+/* Note: MIN, MAX are also defined in <sys/param.h> on some systems
+   (glibc, IRIX, HP-UX, OSF/1).  Therefore you might get warnings about
+   MIN, MAX macro redefinitions on some systems; the workaround is to
+   #include this file as the last one among the #include list.  */
+
+/* Before we define the following symbols we get the <limits.h> file
+   since otherwise we get redefinitions on some systems if <limits.h> is
+   included after this file.  Likewise for <sys/param.h>.
+   If more than one of these system headers define MIN and MAX, pick just
+   one of the headers (because the definitions most likely are the same).  */
+#if HAVE_MINMAX_IN_LIMITS_H
+# include <limits.h>
+#elif HAVE_MINMAX_IN_SYS_PARAM_H
+# include <sys/param.h>
+#endif
+
+/* Note: MIN and MAX should be used with two arguments of the
+   same type.  They might not return the minimum and maximum of their two
+   arguments, if the arguments have different types or have unusual
+   floating-point values.  For example, on a typical host with 32-bit 'int',
+   64-bit 'long long', and 64-bit IEEE 754 'double' types:
+
+     MAX (-1, 2147483648) returns 4294967295.
+     MAX (9007199254740992.0, 9007199254740993) returns 9007199254740992.0.
+     MAX (NaN, 0.0) returns 0.0.
+     MAX (+0.0, -0.0) returns -0.0.
+
+   and in each case the answer is in some sense bogus.  */
+
+/* MAX(a,b) returns the maximum of A and B.  */
+#ifndef MAX
+# define MAX(a,b) ((a) > (b) ? (a) : (b))
+#endif
+
+/* MIN(a,b) returns the minimum of A and B.  */
+#ifndef MIN
+# define MIN(a,b) ((a) < (b) ? (a) : (b))
+#endif
+
+#endif /* _MINMAX_H */
diff --git a/m4/gnulib-comp.m4 b/m4/gnulib-comp.m4
index 8f53a99..1ac58e8 100644
--- a/m4/gnulib-comp.m4
+++ b/m4/gnulib-comp.m4
@@ -61,6 +61,7 @@ AC_DEFUN([gl_EARLY],
   # Code from module crypto/sha1:
   # Code from module crypto/sha256:
   # Code from module crypto/sha512:
+  # Code from module diffseq:
   # Code from module dirent:
   # Code from module dirfd:
   # Code from module dosname:
@@ -105,6 +106,7 @@ AC_DEFUN([gl_EARLY],
   # Code from module lstat:
   # Code from module manywarnings:
   # Code from module memrchr:
+  # Code from module minmax:
   # Code from module mkostemp:
   # Code from module mktime:
   # Code from module mktime-internal:
@@ -289,6 +291,7 @@ AC_DEFUN([gl_INIT],
     gl_PREREQ_MEMRCHR
   fi
   gl_STRING_MODULE_INDICATOR([memrchr])
+  gl_MINMAX
   gl_FUNC_MKOSTEMP
   if test $HAVE_MKOSTEMP = 0; then
     AC_LIBOBJ([mkostemp])
@@ -821,6 +824,7 @@ AC_DEFUN([gl_FILE_LIST], [
   lib/count-one-bits.h
   lib/count-trailing-zeros.c
   lib/count-trailing-zeros.h
+  lib/diffseq.h
   lib/dirent.in.h
   lib/dirfd.c
   lib/dosname.h
@@ -875,6 +879,7 @@ AC_DEFUN([gl_FILE_LIST], [
   lib/md5.c
   lib/md5.h
   lib/memrchr.c
+  lib/minmax.h
   lib/mkostemp.c
   lib/mktime-internal.h
   lib/mktime.c
@@ -991,6 +996,7 @@ AC_DEFUN([gl_FILE_LIST], [
   m4/manywarnings.m4
   m4/md5.m4
   m4/memrchr.m4
+  m4/minmax.m4
   m4/mkostemp.m4
   m4/mktime.m4
   m4/multiarch.m4
diff --git a/m4/minmax.m4 b/m4/minmax.m4
new file mode 100644
index 0000000..6845fce
--- /dev/null
+++ b/m4/minmax.m4
@@ -0,0 +1,44 @@
+# minmax.m4 serial 4
+dnl Copyright (C) 2005, 2009-2017 Free Software Foundation, Inc.
+dnl This file is free software; the Free Software Foundation
+dnl gives unlimited permission to copy and/or distribute it,
+dnl with or without modifications, as long as this notice is preserved.
+
+AC_PREREQ([2.53])
+
+AC_DEFUN([gl_MINMAX],
+[
+  AC_REQUIRE([gl_PREREQ_MINMAX])
+])
+
+# Prerequisites of lib/minmax.h.
+AC_DEFUN([gl_PREREQ_MINMAX],
+[
+  gl_MINMAX_IN_HEADER([limits.h])
+  gl_MINMAX_IN_HEADER([sys/param.h])
+])
+
+dnl gl_MINMAX_IN_HEADER(HEADER)
+dnl The parameter has to be a literal header name; it cannot be macro,
+dnl nor a shell variable. (Because autoheader collects only AC_DEFINE
+dnl invocations with a literal macro name.)
+AC_DEFUN([gl_MINMAX_IN_HEADER],
+[
+  m4_pushdef([header], AS_TR_SH([$1]))
+  m4_pushdef([HEADER], AS_TR_CPP([$1]))
+  AC_CACHE_CHECK([whether <$1> defines MIN and MAX],
+    [gl_cv_minmax_in_]header,
+    [AC_COMPILE_IFELSE(
+       [AC_LANG_PROGRAM(
+          [[#include <$1>
+            int x = MIN (42, 17);]],
+          [[]])],
+       [gl_cv_minmax_in_]header[=yes],
+       [gl_cv_minmax_in_]header[=no])])
+  if test $gl_cv_minmax_in_[]header = yes; then
+    AC_DEFINE([HAVE_MINMAX_IN_]HEADER, 1,
+      [Define to 1 if <$1> defines the MIN and MAX macros.])
+  fi
+  m4_popdef([HEADER])
+  m4_popdef([header])
+])
diff --git a/src/buffer.h b/src/buffer.h
index a2bdc4e..be270fe 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -412,6 +412,15 @@ extern void enlarge_buffer_text (struct buffer *, 
ptrdiff_t);
    ? BUF_FETCH_MULTIBYTE_CHAR ((buf), (pos))    \
    : BUF_FETCH_BYTE ((buf), (pos)))
 
+/* Return character at byte position POS in buffer BUF.  If BUF is
+   unibyte and the character is not ASCII, make the returning
+   character multibyte.  */
+
+#define BUF_FETCH_CHAR_AS_MULTIBYTE(buf, pos)           \
+  (! NILP (BVAR ((buf), enable_multibyte_characters))   \
+   ? BUF_FETCH_MULTIBYTE_CHAR ((buf), (pos))            \
+   : UNIBYTE_TO_CHAR (BUF_FETCH_BYTE ((buf), (pos))))
+
 /* Return the byte at byte position N in buffer BUF.   */
 
 #define BUF_FETCH_BYTE(buf, n) \
diff --git a/src/editfns.c b/src/editfns.c
index 43b17f9..76b4aaf 100644
--- a/src/editfns.c
+++ b/src/editfns.c
@@ -3105,6 +3105,206 @@ determines whether case is significant or ignored.  */)
   /* Same length too => they are equal.  */
   return make_number (0);
 }
+
+
+/* Set up necessary definitions for diffseq.h; see comments in
+   diffseq.h for explanation.  */
+
+#undef ELEMENT
+#undef EQUAL
+
+#define XVECREF_YVECREF_EQUAL(ctx, xoff, yoff)  \
+  buffer_chars_equal ((ctx), (xoff), (yoff))
+
+#define OFFSET ptrdiff_t
+
+#define EXTRA_CONTEXT_FIELDS                    \
+  /* Buffers to compare.  */                    \
+  struct buffer *buffer_a;                      \
+  struct buffer *buffer_b;                      \
+  /* Bit vectors recording for each character whether it was deleted
+     or inserted.  */                           \
+  unsigned char *deletions;                     \
+  unsigned char *insertions;
+
+#define NOTE_DELETE(ctx, xoff) set_bit ((ctx)->deletions, (xoff))
+#define NOTE_INSERT(ctx, yoff) set_bit ((ctx)->insertions, (yoff))
+
+struct context;
+static void set_bit (unsigned char *, OFFSET);
+static bool bit_is_set (const unsigned char *, OFFSET);
+static bool buffer_chars_equal (struct context *, OFFSET, OFFSET);
+
+#include "minmax.h"
+#include "diffseq.h"
+
+DEFUN ("replace-buffer-contents", Freplace_buffer_contents,
+       Sreplace_buffer_contents, 1, 1, "bSource buffer: ",
+       doc: /* Replace accessible portion of the current buffer with 
accessible portion of SOURCE.
+As far as possible the replacement is non-destructive, i.e. existing
+buffer contents, markers, properties, and overlays in the current
+buffer stay intact.  */)
+  (Lisp_Object source)
+{
+  struct buffer *a = current_buffer;
+  Lisp_Object source_buffer = Fget_buffer (source);
+  if (NILP (source_buffer))
+    nsberror (source);
+  struct buffer *b = XBUFFER (source_buffer);
+  if (! BUFFER_LIVE_P (b))
+    error ("Selecting deleted buffer");
+  if (a == b)
+    error ("Cannot replace a buffer with itself");
+
+  ptrdiff_t min_a = BEGV;
+  ptrdiff_t min_b = BUF_BEGV (b);
+  ptrdiff_t size_a = ZV - min_a;
+  ptrdiff_t size_b = BUF_ZV (b) - min_b;
+  eassume (size_a >= 0);
+  eassume (size_b >= 0);
+  bool a_empty = size_a == 0;
+  bool b_empty = size_b == 0;
+
+  /* Handle trivial cases where at least one accessible portion is
+     empty.  */
+
+  if (a_empty && b_empty)
+    return Qnil;
+
+  if (a_empty)
+    return Finsert_buffer_substring (source, Qnil, Qnil);
+
+  if (b_empty)
+    {
+      del_range_both (BEGV, BEGV_BYTE, ZV, ZV_BYTE, true);
+      return Qnil;
+    }
+
+  /* FIXME: It is not documented how to initialize the contents of the
+     context structure.  This code cargo-cults from the existing
+     caller in src/analyze.c of GNU Diffutils, which appears to
+     work.  */
+
+  ptrdiff_t diags = size_a + size_b + 3;
+  ptrdiff_t *buffer;
+  USE_SAFE_ALLOCA;
+  SAFE_NALLOCA (buffer, 2, diags);
+  /* Micro-optimization: Casting to size_t generates much better
+     code.  */
+  ptrdiff_t del_bytes = (size_t) size_a / CHAR_BIT + 1;
+  ptrdiff_t ins_bytes = (size_t) size_b / CHAR_BIT + 1;
+  struct context ctx = {
+    .buffer_a = a,
+    .buffer_b = b,
+    .deletions = SAFE_ALLOCA (del_bytes),
+    .insertions = SAFE_ALLOCA (ins_bytes),
+    .fdiag = buffer + size_b + 1,
+    .bdiag = buffer + diags + size_b + 1,
+    /* FIXME: Find a good number for .too_expensive.  */
+    .too_expensive = 1000000,
+  };
+  memclear (ctx.deletions, del_bytes);
+  memclear (ctx.insertions, ins_bytes);
+  /* compareseq requires indices to be zero-based.  We add BEGV back
+     later.  */
+  bool early_abort = compareseq (0, size_a, 0, size_b, false, &ctx);
+  /* Since we didn’t define EARLY_ABORT, we should never abort
+     early.  */
+  eassert (! early_abort);
+  SAFE_FREE ();
+
+  Fundo_boundary ();
+  ptrdiff_t count = SPECPDL_INDEX ();
+  record_unwind_protect (save_excursion_restore, save_excursion_save ());
+
+  SET_PT_BOTH (BEGV, BEGV_BYTE);
+  ptrdiff_t i = size_a;
+  ptrdiff_t j = size_b;
+  /* Walk backwards through the lists of changes.  This was also
+     cargo-culted from src/analyze.c in GNU Diffutils.  Because we
+     walk backwards, we don’t have to keep the positions in sync.  */
+  while (i >= 0 || j >= 0)
+    {
+      /* Check whether there is a change (insertion or deletion)
+         before the current position.  */
+      if ((i > 0 && bit_is_set (ctx.deletions, i - 1)) ||
+          (j > 0 && bit_is_set (ctx.insertions, j - 1)))
+       {
+          ptrdiff_t end_a = min_a + i;
+          ptrdiff_t end_b = min_b + j;
+          /* Find the beginning of the current change run.  */
+         while (i > 0 && bit_is_set (ctx.deletions, i - 1))
+            --i;
+         while (j > 0 && bit_is_set (ctx.insertions, j - 1))
+            --j;
+          ptrdiff_t beg_a = min_a + i;
+          ptrdiff_t beg_b = min_b + j;
+          eassert (beg_a >= BEGV);
+          eassert (beg_b >= BUF_BEGV (b));
+          eassert (beg_a <= end_a);
+          eassert (beg_b <= end_b);
+          eassert (end_a <= ZV);
+          eassert (end_b <= BUF_ZV (b));
+          eassert (beg_a < end_a || beg_b < end_b);
+          if (beg_a < end_a)
+            del_range (beg_a, end_a);
+          if (beg_b < end_b)
+            {
+              SET_PT (beg_a);
+              Finsert_buffer_substring (source, make_natnum (beg_b),
+                                        make_natnum (end_b));
+            }
+       }
+      --i;
+      --j;
+    }
+
+  return unbind_to (count, Qnil);
+}
+
+static void
+set_bit (unsigned char *a, ptrdiff_t i)
+{
+  eassert (i >= 0);
+  /* Micro-optimization: Casting to size_t generates much better
+     code.  */
+  size_t j = i;
+  a[j / CHAR_BIT] |= (1 << (j % CHAR_BIT));
+}
+
+static bool
+bit_is_set (const unsigned char *a, ptrdiff_t i)
+{
+  eassert (i >= 0);
+  /* Micro-optimization: Casting to size_t generates much better
+     code.  */
+  size_t j = i;
+  return a[j / CHAR_BIT] & (1 << (j % CHAR_BIT));
+}
+
+/* Return true if the characters at position POS_A of buffer
+   CTX->buffer_a and at position POS_B of buffer CTX->buffer_b are
+   equal.  POS_A and POS_B are zero-based.  Text properties are
+   ignored.  */
+
+static bool
+buffer_chars_equal (struct context *ctx,
+                    ptrdiff_t pos_a, ptrdiff_t pos_b)
+{
+  eassert (pos_a >= 0);
+  pos_a += BUF_BEGV (ctx->buffer_a);
+  eassert (pos_a >= BUF_BEGV (ctx->buffer_a));
+  eassert (pos_a < BUF_ZV (ctx->buffer_a));
+
+  eassert (pos_b >= 0);
+  pos_b += BUF_BEGV (ctx->buffer_b);
+  eassert (pos_b >= BUF_BEGV (ctx->buffer_b));
+  eassert (pos_b < BUF_ZV (ctx->buffer_b));
+
+  return BUF_FETCH_CHAR_AS_MULTIBYTE (ctx->buffer_a, pos_a)
+    == BUF_FETCH_CHAR_AS_MULTIBYTE (ctx->buffer_b, pos_b);
+}
+
 
 static void
 subst_char_in_region_unwind (Lisp_Object arg)
@@ -5315,6 +5515,7 @@ functions if all the text being accessed has this 
property.  */);
 
   defsubr (&Sinsert_buffer_substring);
   defsubr (&Scompare_buffer_substrings);
+  defsubr (&Sreplace_buffer_contents);
   defsubr (&Ssubst_char_in_region);
   defsubr (&Stranslate_region_internal);
   defsubr (&Sdelete_region);
diff --git a/test/src/editfns-tests.el b/test/src/editfns-tests.el
index 3073e37..a3ea8ab 100644
--- a/test/src/editfns-tests.el
+++ b/test/src/editfns-tests.el
@@ -208,4 +208,35 @@
                  '(error "Invalid format operation %$")))
   (should (equal (format "%1$c %1$s" ?±) "± 177")))
 
+(ert-deftest replace-buffer-contents-1 ()
+  (with-temp-buffer
+    (insert #("source" 2 4 (prop 7)))
+    (let ((source (current-buffer)))
+      (with-temp-buffer
+        (insert "before dest after")
+        (let ((marker (set-marker (make-marker) 14)))
+          (save-restriction
+            (narrow-to-region 8 12)
+            (replace-buffer-contents source))
+          (should (equal (marker-buffer marker) (current-buffer)))
+          (should (equal (marker-position marker) 16)))
+        (should (equal-including-properties
+                 (buffer-string)
+                 #("before source after" 9 11 (prop 7))))
+        (should (equal (point) 9))))
+    (should (equal-including-properties
+             (buffer-string)
+             #("source" 2 4 (prop 7))))))
+
+(ert-deftest replace-buffer-contents-2 ()
+  (with-temp-buffer
+    (insert "foo bar baz qux")
+    (let ((source (current-buffer)))
+      (with-temp-buffer
+        (insert "foo BAR baz qux")
+        (replace-buffer-contents source)
+        (should (equal-including-properties
+                 (buffer-string)
+                 "foo bar baz qux"))))))
+
 ;;; editfns-tests.el ends here



reply via email to

[Prev in Thread] Current Thread [Next in Thread]