emacs-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: State of the overlay tree branch?


From: Stefan Monnier
Subject: Re: State of the overlay tree branch?
Date: Fri, 23 Mar 2018 15:39:35 -0400
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/27.0.50 (gnu/linux)

OK, I think I'm tired of these experiments.
Here's my current test, along with the patches I use with it.

You can test it with something like

    src/emacs -Q --batch -l ~/tmp/foo.el --eval '(setq 
internal--bytechar-distance-increment 50 internal--randomize-markers t)' --eval 
'(bytechar-test 3000 nil)' 

Shuffling the markers can make a noticeable difference in some cases but
we're only talking about a factor of 2 or 3.  It doesn't have much
negative impact, so it's not a bad option, but the algorithmic
problem remains anyway.


        Stefan


(defun bytechar-test (buffer-kb &optional forward)
  (random "seed")
  (with-temp-buffer
    (dotimes (i (* buffer-kb 33))
      (insert "lksajflahalskjdféefawrgfrüegf\n"))
    (message "buffer-size = %SkB" (/ (buffer-size) 1024))
    (let ((txtbuf (current-buffer))
          (goto-iterations (/ 10000000 buffer-kb))
          (line-iterations (/ 20000 buffer-kb))
          (markers ()))
      (dotimes (s 4)
        (with-temp-buffer
          (insert-buffer-substring txtbuf)
          (let ((stepsize (lsh 10 (* 4 s))))
            (if forward
                (cl-loop for n from (point-min) upto (point-max) by stepsize do
                         (push (copy-marker n) markers))
              (cl-loop for n from (point-max) downto (point-min) by stepsize do
                       (push (copy-marker n) markers))))
          ;; The GC's internal--randomize-markers just brings-to-front every
          ;; 8th marker, so when starting with an ordered list of markers (like
          ;; in our case), we need to run the GC at least 8 times before the
          ;; whole list starts to look somewhat shuffled.
          (dotimes (i 20) (garbage-collect))
          (let ((timing
                 (benchmark-run goto-iterations
                   (goto-char (+ (point-min) (random (buffer-size)))))))
            (message "ols=%S goto-random time=%.4f (+ %S)"
                     (/ (buffer-size) (lsh 10 (* 4 s)))
                     (car timing) (cdr timing)))
          (garbage-collect)             ;throw away the transient markers
          (let ((timing
                 (benchmark-run line-iterations
                   (dotimes (i 5)
                     (line-number-at-pos
                      (+ (point-min) (* i (/ (buffer-size) 4))))))))
            (message "nbmks=%S pos=*/4 time=%.4f (+ %S)"
                     (/ (buffer-size) (lsh 10 (* 4 s)))
                     (car timing) (cdr timing)))
          (dotimes (i 5)
            (let ((timing
                   (benchmark-run line-iterations
                     (line-number-at-pos
                      (+ (point-min) (* i (/ (buffer-size) 4)))))))
              (message "nbmks=%S pos=%S/4 time=%.4f (+ %S)"
                       (/ (buffer-size) (lsh 10 (* 4 s))) i
                       (car timing) (cdr timing))))
          )))))

diff --git a/lisp/emacs-lisp/benchmark.el b/lisp/emacs-lisp/benchmark.el
index b86b56b81e..2f4e38fe35 100644
--- a/lisp/emacs-lisp/benchmark.el
+++ b/lisp/emacs-lisp/benchmark.el
@@ -50,7 +50,7 @@ benchmark-run
 garbage collections that ran, and the time taken by garbage collection.
 See also `benchmark-run-compiled'."
   (declare (indent 1) (debug t))
-  (unless (natnump repetitions)
+  (unless (or (natnump repetitions) (symbolp repetitions))
     (setq forms (cons repetitions forms)
          repetitions 1))
   (let ((i (make-symbol "i"))
@@ -58,7 +58,7 @@ benchmark-run
        (gc (make-symbol "gc")))
     `(let ((,gc gc-elapsed)
           (,gcs gcs-done))
-       (list ,(if (> repetitions 1)
+       (list ,(if (or (symbolp repetitions) (> repetitions 1))
                  ;; Take account of the loop overhead.
                  `(- (benchmark-elapse (dotimes (,i ,repetitions)
                                          ,@forms))
@@ -101,7 +101,7 @@ benchmark
 For non-interactive use see also `benchmark-run' and
 `benchmark-run-compiled'."
   (interactive "p\nxForm: ")
-  (let ((result (eval `(benchmark-run ,repetitions ,form))))
+  (let ((result (eval `(benchmark-run ,repetitions ,form) t)))
     (if (zerop (nth 1 result))
        (message "Elapsed time: %fs" (car result))
       (message "Elapsed time: %fs (%fs in %d GCs)" (car result)
diff --git a/src/alloc.c b/src/alloc.c
index 7ba872aaee..16d11e34cd 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -7270,10 +7270,22 @@ static void
 unchain_dead_markers (struct buffer *buffer)
 {
   struct Lisp_Marker *this, **prev = &BUF_MARKERS (buffer);
+  /* In order to try and avoid worst case behaviors in buf_charpos_to_bytepos
+     we try and randomize the order of markers here.  */
+  unsigned i = 4;
 
   while ((this = *prev))
     if (this->gcmarkbit)
-      prev = &this->next;
+      {
+        if (!randomize_markers || i++ % 8)
+          prev = &this->next;
+        else
+          { /* Move this one to front, just to randomize things a bit.  */
+            *prev = this->next;
+            this->next = BUF_MARKERS (buffer);
+            BUF_MARKERS (buffer) = this;
+          }
+      }
     else
       {
         this->buffer = NULL;
@@ -7752,6 +7764,9 @@ The time is in seconds as a floating point value.  */);
   DEFVAR_INT ("gcs-done", gcs_done,
               doc: /* Accumulated number of garbage collections done.  */);
 
+  DEFVAR_BOOL ("internal--randomize-markers", randomize_markers, doc: /*  */);
+  randomize_markers = true;
+  
   defsubr (&Scons);
   defsubr (&Slist);
   defsubr (&Svector);
diff --git a/src/marker.c b/src/marker.c
index 3d808fd6fa..7c1d164927 100644
--- a/src/marker.c
+++ b/src/marker.c
@@ -133,6 +133,28 @@ CHECK_MARKER (Lisp_Object x)
   CHECK_TYPE (MARKERP (x), Qmarkerp, x);
 }
 
+/* When converting bytes from/to chars, we look through the list of
+   markers to try and find a good starting point (since markers keep
+   track of both bytepos and charpos at the same time).
+   But if there are many markers, it can take too much time to find a "good"
+   marker from which to start.  Worse yet: if it takes a long time and we end
+   up finding a nearby markers, we won't add a new marker to cache this
+   result, so next time around we'll have to go through this same long list
+   to (re)find this best marker.  So the further down the list of
+   markers we go, the less demanding we are w.r.t what is a good marker.
+
+   The previous code used INITIAL=50 and INCREMENT=0 and this lead to
+   really poor performance when there are many markers.
+   I haven't tried to tweak INITIAL, but my experiments on my trusty Thinkpad
+   T61 using various artificial test cases seem to suggest that INCREMENT=50
+   might be "the best compromise": it significantly improved the
+   worst case and it was rarely slower and never by much.
+
+   The asymptotic behavior is still poor, tho, so in largish buffers with many
+   overlays (e.g. 300KB and 30K overlays), it can still be a bottlneck.  */
+#define BYTECHAR_DISTANCE_INITIAL 50
+#define BYTECHAR_DISTANCE_INCREMENT bytechar_distance_increment
+
 /* Return the byte position corresponding to CHARPOS in B.  */
 
 ptrdiff_t
@@ -141,7 +163,7 @@ buf_charpos_to_bytepos (struct buffer *b, ptrdiff_t charpos)
   struct Lisp_Marker *tail;
   ptrdiff_t best_above, best_above_byte;
   ptrdiff_t best_below, best_below_byte;
-  ptrdiff_t distance = 50;
+  ptrdiff_t distance = BYTECHAR_DISTANCE_INITIAL;
 
   eassert (BUF_BEG (b) <= charpos && charpos <= BUF_Z (b));
 
@@ -184,7 +206,7 @@ buf_charpos_to_bytepos (struct buffer *b, ptrdiff_t charpos)
       if (best_above - best_below < distance)
        break;
       else
-        distance++;
+        distance += BYTECHAR_DISTANCE_INCREMENT;
     }
 
   /* We get here if we did not exactly hit one of the known places.
@@ -296,7 +318,7 @@ buf_bytepos_to_charpos (struct buffer *b, ptrdiff_t bytepos)
   struct Lisp_Marker *tail;
   ptrdiff_t best_above, best_above_byte;
   ptrdiff_t best_below, best_below_byte;
-  ptrdiff_t distance = 50;
+  ptrdiff_t distance = BYTECHAR_DISTANCE_INITIAL;
 
   eassert (BUF_BEG_BYTE (b) <= bytepos && bytepos <= BUF_Z_BYTE (b));
 
@@ -330,7 +352,7 @@ buf_bytepos_to_charpos (struct buffer *b, ptrdiff_t bytepos)
       if (best_above - best_below < distance)
        break;
       else
-        distance++;
+        distance += BYTECHAR_DISTANCE_INCREMENT;
     }
 
   /* We get here if we did not exactly hit one of the known places.
@@ -756,4 +778,9 @@ syms_of_marker (void)
   defsubr (&Scopy_marker);
   defsubr (&Smarker_insertion_type);
   defsubr (&Sset_marker_insertion_type);
+
+  DEFVAR_INT ("internal--bytechar-distance-increment",
+              bytechar_distance_increment,
+              doc: /* Haha */);
+  bytechar_distance_increment = 50;
 }



reply via email to

[Prev in Thread] Current Thread [Next in Thread]