>From 73c753f07c21ad2fe32fac124b7287bd8b6ab01b Mon Sep 17 00:00:00 2001 From: Noam Postavsky Date: Sun, 13 Aug 2017 13:15:10 -0400 Subject: [PATCH 1/3] Produce dynamic docstrings for bytecode (Bug#27748) Instead of relying on decompilation to create source forms that we can easily extract docstrings from, record the docstrings as we compile and then print out all the docstrings with their corresponding symbol names in a single comment at the top. Old format: #@ nnn Docstring of var1^_ (defvar var1 (init-expression) (#$ . nn)) #@ nnn Docstring of var2^_ (defvar var2 (init-expression) (#$ . nn)) New format: #@ nnnnnn ^_Vvar1^_Docstring of var1^_ ^_Vvar2^_Docstring of var2^_ (defvar var1 (init-expression) (#$ . nn)) (defvar var2 (init-expression) (#$ . nn)) (Where "^_" represents the character \037, aka "Unit Separator".) The new format can still be loaded by older Emacs versions since the bytecode loader only requires that dynamic docstrings be at the right file offset and preceded with "^_" or "#@ ". It cannot be used by older make-doc versions. * lisp/emacs-lisp/bytecomp.el (byte-compile-docstring-handler): New function, push a special kind of constant onto `byte-compile-constants' for the given docstring. (byte-compile-file-form-defvar-function): Use it on the docstring. (byte-compile--docstring-constants): New variable. (byte-compile-constants-vector): Use it to rememember the indices of the special docstring constants. (byte-compile-top-level, byte-compile-flush-pending): Let-bind byte-compile--docstring-constants to nil. (byte-compile--docstring-marker): New variable. (byte-compile-from-buffer): Let-bind it to nil. (byte-compile-insert-header): Set it to a pair markers pointing to the end of the header. (byte-compile-output-as-comment): Write the docstrings collected into byte-compile--docstring-constants to the second marker in byte-compile--docstring-marker. When writing docstrings (as opposed to lazy loaded bytecode), also print V or F prior to the docstring. (byte-compile-output-file-form): When writing out the constants vector, use the (#$ . %d) format instead of the string itself. (byte-compile-escape-docstring): New function, extracted from `byte-compile-output-as-comment'. (byte-compile-fix-header-docstring-comment): New function, comment out the docstrings at the top of the file with a #@N kind of comment. Delete semicolons from the header as needed to preserve offsets. (byte-compile-fix-header-multibyte): Rename from byte-compile-fix-header. (byte-compile-fix-header): Call both `byte-compile-fix-header-multibyte' and `byte-compile-fix-header-docstring-comment'. * lib-src/make-docfile.c (scan_lisp_file): Update for new format, collect all of symbol type, name, and docstring directly from #@N comments. --- lib-src/make-docfile.c | 61 ++++++++++------- lisp/emacs-lisp/bytecomp.el | 158 ++++++++++++++++++++++++++++++-------------- 2 files changed, 147 insertions(+), 72 deletions(-) diff --git a/lib-src/make-docfile.c b/lib-src/make-docfile.c index ecd6447ab7..8daca9aba2 100644 --- a/lib-src/make-docfile.c +++ b/lib-src/make-docfile.c @@ -1258,7 +1258,8 @@ read_lisp_symbol (FILE *infile, char *buffer) c = getc (infile); if (c == '\\') *(++fillp) = getc (infile); - else if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '(' || c == ')') + else if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || + c == '(' || c == ')' || c == '\037') { ungetc (c, infile); *fillp = 0; @@ -1367,14 +1368,13 @@ scan_lisp_file (const char *filename, const char *mode) /* Skip the line break. */ while (c == '\n' || c == '\r') c = getc (infile); - /* Detect a dynamic doc string and save it for the next expression. */ + /* Detect the dynamic string block. */ if (c == '#') { c = getc (infile); if (c == '@') { ptrdiff_t length = 0; - ptrdiff_t i; /* Read the length. */ while ((c = getc (infile), @@ -1387,31 +1387,46 @@ scan_lisp_file (const char *filename, const char *mode) } if (length <= 1) - fatal ("invalid dynamic doc string length"); + fatal ("%s: invalid dynamic doc string length", filename); + + /* We expect one newline character following the + comment. */ + ptrdiff_t end_offset = ftell (infile) + length + 1; if (c != ' ') fatal ("space not found after dynamic doc string length"); - /* The next character is a space that is counted in the length - but not part of the doc string. - We already read it, so just ignore it. */ - length--; - /* Read in the contents. */ - free (saved_string); - saved_string = xmalloc (length); - for (i = 0; i < length; i++) - saved_string[i] = getc (infile); - /* The last character is a ^_. - That is needed in the .elc file - but it is redundant in DOC. So get rid of it here. */ - saved_string[length - 1] = 0; - /* Skip the line break. */ - while (c == '\n' || c == '\r') - c = getc (infile); - /* Skip the following line. */ - while (c != '\n' && c != '\r') - c = getc (infile); + for (;;) + { + c = getc (infile); + if (c != '\037') break; + type = getc (infile); + if (type != 'V' && type != 'F') + fatal ("%s: 'V' or 'F' not found before symbol name (%c)\n", filename, c); + read_lisp_symbol (infile, buffer); + c = getc (infile); + if (c != '\037') + fatal ("\\037 not found after symbol name"); + + printf ("\037%c%s\n", type, buffer); + for (;;) + { + c = getc (infile); + if (c == '\037') + { + if ('\n' != getc (infile)) + fatal ("newline not found after dynamic doc string\n"); + break; + } + putc (c, stdout); + } + } + /* All dynamic strings should be in that block. */ + if (ftell (infile) != end_offset) + fatal ("%s: wrong dynamic doc string length (%ld != %ld)", + filename, ftell (infile), end_offset); + break; } continue; } diff --git a/lisp/emacs-lisp/bytecomp.el b/lisp/emacs-lisp/bytecomp.el index cf06c0c8ef..d2768a159b 100644 --- a/lisp/emacs-lisp/bytecomp.el +++ b/lisp/emacs-lisp/bytecomp.el @@ -1976,6 +1976,8 @@ byte-compile-from-buffer ;; Simulate entry to byte-compile-top-level (byte-compile-jump-tables nil) (byte-compile-constants nil) + (byte-compile--docstring-constants nil) + (byte-compile--docstring-marker nil) (byte-compile-variables nil) (byte-compile-tag-number 0) (byte-compile-depth 0) @@ -2050,6 +2052,22 @@ byte-compile-from-buffer byte-compile--outbuffer))) (defun byte-compile-fix-header (_filename) + (byte-compile-fix-header-multibyte) + (byte-compile-fix-header-docstring-comment)) + +(defun byte-compile-fix-header-docstring-comment () + (pcase byte-compile--docstring-marker + (`(,beg . ,end) + (let* ((bytes (- (position-bytes end) (position-bytes beg))) + (comment-beg (format "#@%d " bytes))) + (when (> bytes 0) + (goto-char (point-min)) + (search-forward ";;;;;;;;;;" beg) + (beginning-of-line) + (delete-char (length comment-beg)) + (princ comment-beg beg)))))) + +(defun byte-compile-fix-header-multibyte () "If the current buffer has any multibyte characters, insert a version test." (when (< (point-max) (position-bytes (point-max))) (goto-char (point-min)) @@ -2127,7 +2145,9 @@ byte-compile-insert-header ;; can delete them so as to keep the buffer positions ;; constant for the actual compiled code. ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n" - ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n\n")))) + ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n\n") + (setq byte-compile--docstring-marker + (cons (point-marker) (point-marker)))))) (defun byte-compile-output-file-form (form) ;; Write the given form to the output buffer, being careful of docstrings @@ -2151,7 +2171,29 @@ byte-compile-output-file-form '(defvaralias autoload custom-declare-variable))) (princ "\n" byte-compile--outbuffer) - (prin1 form byte-compile--outbuffer) + (pcase form + ((and (guard byte-compile--docstring-constants) + (guard byte-compile--docstring-marker) + `(byte-code ,bytestr ,constants ,maxdepth)) + (princ "(byte-code " byte-compile--outbuffer) + (prin1 bytestr byte-compile--outbuffer) + (princ " [" byte-compile--outbuffer) + (cl-callf cl-sort byte-compile--docstring-constants #'< :key #'car) + (let ((docs-head byte-compile--docstring-constants)) + (dotimes (i (length constants)) + (if (or (null docs-head) (/= i (caar docs-head))) + (prin1 (aref constants i) byte-compile--outbuffer) + (pcase-let* ((`(,_i ,symtype ,sym) (car docs-head))) + (princ (format "(#$ . %d)" + (byte-compile-output-as-comment + (aref constants i) nil symtype sym)) + byte-compile--outbuffer)) + (pop docs-head)) + (princ " " byte-compile--outbuffer))) + (princ "] " byte-compile--outbuffer) + (prin1 maxdepth byte-compile--outbuffer) + (princ ")" byte-compile--outbuffer)) + (_ (prin1 form byte-compile--outbuffer))) nil))) (defvar byte-compile--for-effect) @@ -2172,17 +2214,15 @@ byte-compile-output-docform (let ((dynamic-docstrings byte-compile-dynamic-docstrings)) (with-current-buffer byte-compile--outbuffer (let (position) - ;; Insert the doc string, and make it a comment with #@LENGTH. (and (>= (nth 1 info) 0) dynamic-docstrings - (progn - ;; Make the doc string start at beginning of line - ;; for make-docfile's sake. - (insert "\n") - (setq position - (byte-compile-output-as-comment - (nth (nth 1 info) form) nil)) + (pcase-let* (((or `',sym sym) (or name (nth 1 form))) + (symtype (if (if preface (string-match-p "defalias" preface) + (memq (car form) '(autoload defalias))) + ?F ?V))) + (setq position (byte-compile-output-as-comment + (nth (nth 1 info) form) nil symtype sym)) ;; If the doc string starts with * (a user variable), ;; negate POSITION. (if (and (stringp (nth (nth 1 info) form)) @@ -2227,10 +2267,9 @@ byte-compile-output-docform (not non-nil))) ;; Output the byte code and constants specially ;; for lazy dynamic loading. - (let ((position - (byte-compile-output-as-comment - (cons (car form) (nth 1 form)) - t))) + (let* ((position (byte-compile-output-as-comment + (cons (car form) (nth 1 form)) + t nil nil))) (princ (format "(#$ . %d) nil" position) byte-compile--outbuffer) (setq form (cdr form)) @@ -2275,6 +2314,7 @@ byte-compile-flush-pending (form (byte-compile-output-file-form form))) (setq byte-compile-constants nil + byte-compile--docstring-constants nil byte-compile-variables nil byte-compile-depth 0 byte-compile-maxdepth 0 @@ -2389,8 +2429,11 @@ byte-compile-file-form-defvar (put 'defvaralias 'byte-hunk-handler 'byte-compile-file-form-defvar-function) (defun byte-compile-file-form-defvar-function (form) - (pcase-let (((or `',name (let name nil)) (nth 1 form))) - (if name (byte-compile--declare-var name))) + (pcase-let (((or `',name (let name nil)) (nth 1 form)) + (docstr (nth 3 form))) + (if name (byte-compile--declare-var name)) + (when (stringp docstr) + (setf (nth 3 form) `(byte-compile-docstring ,docstr ?V ,name)))) (byte-compile-keep-pending form)) (put 'custom-declare-variable 'byte-hunk-handler @@ -2578,42 +2621,43 @@ byte-compile-file-form-defmumble (princ ")" byte-compile--outbuffer) t))))) -(defun byte-compile-output-as-comment (exp quoted) - "Print Lisp object EXP in the output file, inside a comment, +(defun byte-compile-escape-docstring (beg &optional end) + "Quote characters in the range BEG to END for `get_doc_string'." + (save-excursion + (goto-char beg) + (while (search-forward "\^A" end t) + (replace-match "\^A\^A" t t)) + (goto-char beg) + (while (search-forward "\000" end t) + (replace-match "\^A0" t t)) + (goto-char beg) + (while (search-forward "\037" end t) + (replace-match "\^A_" t t)))) + +(defun byte-compile-output-as-comment (exp quoted symtype sym) + "Print Lisp object EXP to the output file's header comment, and return the file (byte) position it will have. -If QUOTED is non-nil, print with quoting; otherwise, print without quoting." +The header lies between the markers in +`byte-compile--docstring-marker'. +If QUOTED is non-nil, print with quoting; otherwise, print without quoting. +If SYMTYPE is a character, print it and SYM before EXP." (with-current-buffer byte-compile--outbuffer - (let ((position (point))) - - ;; Insert EXP, and make it a comment with #@LENGTH. - (insert " ") + (let* ((doc-marker (cdr byte-compile--docstring-marker)) + (position (progn (when (characterp symtype) + (write-char ?\037 doc-marker) + (write-char symtype doc-marker) + (princ sym doc-marker)) + (write-char ?\037 doc-marker) + (marker-position doc-marker)))) (if quoted - (prin1 exp byte-compile--outbuffer) - (princ exp byte-compile--outbuffer)) - (goto-char position) - ;; Quote certain special characters as needed. - ;; get_doc_string in doc.c does the unquoting. - (while (search-forward "\^A" nil t) - (replace-match "\^A\^A" t t)) - (goto-char position) - (while (search-forward "\000" nil t) - (replace-match "\^A0" t t)) - (goto-char position) - (while (search-forward "\037" nil t) - (replace-match "\^A_" t t)) - (goto-char (point-max)) - (insert "\037") - (goto-char position) - (insert "#@" (format "%d" (- (position-bytes (point-max)) - (position-bytes position)))) - + (prin1 exp doc-marker) + (princ exp doc-marker)) + (byte-compile-escape-docstring position doc-marker) + (princ "\037\n" doc-marker) ;; Save the file position of the object. - ;; Note we add 1 to skip the space that we inserted before the actual doc - ;; string, and subtract point-min to convert from an 1-origin Emacs - ;; position to a file position. - (prog1 - (- (position-bytes (point)) (point-min) -1) - (goto-char (point-max)))))) + ;; Note we subtract point-min to convert from an 1-origin Emacs + ;; position to a 0-origin file offset. + (- (position-bytes position) (point-min))))) (defun byte-compile--reify-function (fun) "Return an expression which will evaluate to a function value FUN. @@ -2838,6 +2882,8 @@ byte-compile-lambda (list (nth 1 int)))))))) (defvar byte-compile-reserved-constants 0) +(defvar byte-compile--docstring-constants nil) +(defvar byte-compile--docstring-marker nil) (defun byte-compile-constants-vector () ;; Builds the constants-vector from the current variables and constants. @@ -2866,7 +2912,11 @@ byte-compile-constants-vector ((setq tmp (assq (car (car rest)) ret)) (setcdr (car rest) (cdr tmp))) (t - (setcdr (car rest) (setq i (1+ i))) + (setq i (1+ i)) + (pcase (car rest) + (`(,_docstr docstring ,symtype ,sym) + (push (list i symtype sym) byte-compile--docstring-constants))) + (setcdr (car rest) i) (setq ret (cons (car rest) ret)))) (setq rest (cdr rest))) (setq limits (cdr limits) ;Step @@ -2885,6 +2935,7 @@ byte-compile-top-level ;; 'file -> used at file-level. (let ((byte-compile--for-effect for-effect) (byte-compile-constants nil) + (byte-compile--docstring-constants nil) (byte-compile-variables nil) (byte-compile-tag-number 0) (byte-compile-depth 0) @@ -4578,6 +4629,15 @@ byte-compile-make-obsolete-variable (push (nth 1 (nth 1 form)) byte-compile-global-not-obsolete-vars)) (byte-compile-normal-call form)) + +(byte-defop-compiler + (byte-compile-docstring nil) byte-compile-docstring-handler) +(defun byte-compile-docstring-handler (form) + ;; FORM = (byte-compile-docstring DOCSTR ?V NAME) + (byte-compile-out 'byte-constant + (car (push (cl-list* (cadr form) 'docstring (cddr form)) + byte-compile-constants)))) + (defconst byte-compile-tmp-var (make-symbol "def-tmp-var")) (defun byte-compile-defvar (form) -- 2.14.1