>From 0fbe96c0add052338d68453de5fb3486201e61d0 Mon Sep 17 00:00:00 2001 From: Noam Postavsky Date: Sun, 13 Aug 2017 13:15:10 -0400 Subject: [PATCH] [WIP] Produce dynamic docstrings for bytecode (Bug#27748) Instead of relying on decompilation to create source forms that we can easily extract docstrings from, record the docstrings as we compile and then print out all the docstrings with their corresponding symbol names in a single comment at the top. NOTE: Currently only updates recording docstrings for defcustom leaving other forms to produce the old format, but make-docfile is updated for the new format, so this prevents successful building of emacs. * lisp/emacs-lisp/bytecomp.el (byte-compile-docstring-handler): New function, push a special kind of constant onto `byte-compile-constants' for the given docstring. (byte-compile-file-form-defvar-function): Use it on the docstring. (byte-compile--docstring-constants): New variable. (byte-compile-constants-vector): Use it to rememember the indices of the special docstring constants. (byte-compile-top-level, byte-compile-flush-pending): Let-bind byte-compile--docstring-constants to nil. (byte-compile--docstring-marker): New variable. (byte-compile-from-buffer): Let-bind it to nil. (byte-compile-insert-header): Set it to a pair markers pointing to the end of the header. (byte-compile-output-file-form): Write the docstrings collected into byte-compile--docstring-constants to the second marker in byte-compile--docstring-marker. When writing out the constants vector, use the (#$ . %d) format instead of the string itself. (byte-compile-escape-docstring): New function, extracted from `byte-compile-output-as-comment'. (byte-compile-fix-header-docstring-comment): New function, comment out the docstrings at the top of the file with a #@N kind of comment. Delete semicolons from the header as needed to preserve offsets. (byte-compile-fix-header-multibyte): Rename from byte-compile-fix-header. (byte-compile-fix-header): Call both `byte-compile-fix-header-multibyte' and `byte-compile-fix-header-docstring-comment'. * lib-src/make-docfile.c (scan_lisp_file): Update for new format, collect all of symbol type, name, and docstring directly from #@N comments. --- lib-src/make-docfile.c | 43 +++++++++++------- lisp/emacs-lisp/bytecomp.el | 104 +++++++++++++++++++++++++++++++++++++------- 2 files changed, 115 insertions(+), 32 deletions(-) diff --git a/lib-src/make-docfile.c b/lib-src/make-docfile.c index ecd6447ab7..06377c5fda 100644 --- a/lib-src/make-docfile.c +++ b/lib-src/make-docfile.c @@ -1258,7 +1258,8 @@ read_lisp_symbol (FILE *infile, char *buffer) c = getc (infile); if (c == '\\') *(++fillp) = getc (infile); - else if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '(' || c == ')') + else if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || + c == '(' || c == ')' || c == '\037') { ungetc (c, infile); *fillp = 0; @@ -1374,7 +1375,6 @@ scan_lisp_file (const char *filename, const char *mode) if (c == '@') { ptrdiff_t length = 0; - ptrdiff_t i; /* Read the length. */ while ((c = getc (infile), @@ -1398,20 +1398,31 @@ scan_lisp_file (const char *filename, const char *mode) length--; /* Read in the contents. */ - free (saved_string); - saved_string = xmalloc (length); - for (i = 0; i < length; i++) - saved_string[i] = getc (infile); - /* The last character is a ^_. - That is needed in the .elc file - but it is redundant in DOC. So get rid of it here. */ - saved_string[length - 1] = 0; - /* Skip the line break. */ - while (c == '\n' || c == '\r') - c = getc (infile); - /* Skip the following line. */ - while (c != '\n' && c != '\r') - c = getc (infile); + for (;;) + { + c = getc (infile); + if (c != '\037') break; + type = getc (infile); + if (type != 'V' && type != 'F') + fatal ("'V' or 'F' not found before symbol name (%c)\n", c); + read_lisp_symbol (infile, buffer); + c = getc (infile); + if (c != '\037') + fatal ("\\037 not found after symbol name"); + + printf ("\037%c%s\n", type, buffer); + for (;;) + { + c = getc (infile); + if (c == '\037') + { + if ('\n' != getc (infile)) + fatal ("newline not found after dynamic doc string\n"); + break; + } + putc (c, stdout); + } + } } continue; } diff --git a/lisp/emacs-lisp/bytecomp.el b/lisp/emacs-lisp/bytecomp.el index d82b0385b1..28fd2b9cdd 100644 --- a/lisp/emacs-lisp/bytecomp.el +++ b/lisp/emacs-lisp/bytecomp.el @@ -1976,6 +1976,8 @@ byte-compile-from-buffer ;; Simulate entry to byte-compile-top-level (byte-compile-jump-tables nil) (byte-compile-constants nil) + (byte-compile--docstring-constants nil) + (byte-compile--docstring-marker nil) (byte-compile-variables nil) (byte-compile-tag-number 0) (byte-compile-depth 0) @@ -2050,6 +2052,21 @@ byte-compile-from-buffer byte-compile--outbuffer))) (defun byte-compile-fix-header (_filename) + (byte-compile-fix-header-multibyte) + (byte-compile-fix-header-docstring-comment)) + +(defun byte-compile-fix-header-docstring-comment () + (pcase byte-compile--docstring-marker + (`(,beg . ,end) + (let ((comment-beg (format "#@%d " (- (position-bytes end) + (position-bytes beg))))) + (goto-char (point-min)) + (search-forward ";;;;;;;;;;" beg) + (beginning-of-line) + (delete-char (length comment-beg)) + (princ comment-beg beg))))) + +(defun byte-compile-fix-header-multibyte () "If the current buffer has any multibyte characters, insert a version test." (when (< (point-max) (position-bytes (point-max))) (goto-char (point-min)) @@ -2127,7 +2144,9 @@ byte-compile-insert-header ;; can delete them so as to keep the buffer positions ;; constant for the actual compiled code. ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n" - ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n\n")))) + ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n\n") + (setq byte-compile--docstring-marker + (cons (point-marker) (point-marker)))))) (defun byte-compile-output-file-form (form) ;; Write the given form to the output buffer, being careful of docstrings @@ -2151,7 +2170,39 @@ byte-compile-output-file-form '(defvaralias autoload custom-declare-variable))) (princ "\n" byte-compile--outbuffer) - (prin1 form byte-compile--outbuffer) + (pcase form + ((and (guard byte-compile--docstring-constants) + (guard byte-compile--docstring-marker) + `(byte-code ,bytestr ,constants ,maxdepth)) + (princ "(byte-code " byte-compile--outbuffer) + (prin1 bytestr byte-compile--outbuffer) + (princ " [" byte-compile--outbuffer) + (cl-callf cl-sort byte-compile--docstring-constants #'< :key #'car) + (let ((docs-head byte-compile--docstring-constants)) + (dotimes (i (length constants)) + (if (/= i (caar docs-head)) + (prin1 (aref constants i) byte-compile--outbuffer) + (pcase-let* ((doc-marker (cdr byte-compile--docstring-marker)) + (start (marker-position doc-marker)) + (`(,_i ,symtype ,sym) (car docs-head))) + (princ (format "(#$ . %d)" + (with-current-buffer byte-compile--outbuffer + (- (position-bytes start) (point-min)))) + byte-compile--outbuffer) + (write-char ?\037 doc-marker) + (write-char symtype doc-marker) + (princ sym doc-marker) + (write-char ?\037 doc-marker) + (princ (aref constants i) doc-marker) + (princ "\037\n" doc-marker) + (byte-compile-escape-docstring + start doc-marker)) + (pop docs-head)) + (princ " " byte-compile--outbuffer))) + (princ "] " byte-compile--outbuffer) + (prin1 maxdepth byte-compile--outbuffer) + (princ ")" byte-compile--outbuffer)) + (_ (prin1 form byte-compile--outbuffer))) nil))) (defvar byte-compile--for-effect) @@ -2275,6 +2326,7 @@ byte-compile-flush-pending (form (byte-compile-output-file-form form))) (setq byte-compile-constants nil + byte-compile--docstring-constants nil byte-compile-variables nil byte-compile-depth 0 byte-compile-maxdepth 0 @@ -2389,10 +2441,18 @@ byte-compile-file-form-defvar (put 'defvaralias 'byte-hunk-handler 'byte-compile-file-form-defvar-function) (defun byte-compile-file-form-defvar-function (form) - (pcase-let (((or `',name (let name nil)) (nth 1 form))) - (if name (byte-compile--declare-var name))) + (pcase-let (((or `',name (let name nil)) (nth 1 form)) + (docstr (nth 3 form))) + (if name (byte-compile--declare-var name)) + (when (stringp docstr) + (setf (nth 3 form) `(byte-compile-docstring ,docstr ?V ,name)))) (byte-compile-keep-pending form)) +(defun byte-compile-docstring-handler (form) + (byte-compile-out 'byte-constant + (car (push (cl-list* (cadr form) 'docstring (cddr form)) + byte-compile-constants)))) + (put 'custom-declare-variable 'byte-hunk-handler 'byte-compile-file-form-custom-declare-variable) (defun byte-compile-file-form-custom-declare-variable (form) @@ -2578,6 +2638,18 @@ byte-compile-file-form-defmumble (princ ")" byte-compile--outbuffer) t))))) +(defun byte-compile-escape-docstring (beg &optional end) + "Quote characters in the range BEG to END for `get_doc_string'." + (goto-char beg) + (while (search-forward "\^A" end t) + (replace-match "\^A\^A" t t)) + (goto-char beg) + (while (search-forward "\000" end t) + (replace-match "\^A0" t t)) + (goto-char beg) + (while (search-forward "\037" end t) + (replace-match "\^A_" t t))) + (defun byte-compile-output-as-comment (exp quoted) "Print Lisp object EXP in the output file, inside a comment, and return the file (byte) position it will have. @@ -2590,17 +2662,7 @@ byte-compile-output-as-comment (if quoted (prin1 exp byte-compile--outbuffer) (princ exp byte-compile--outbuffer)) - (goto-char position) - ;; Quote certain special characters as needed. - ;; get_doc_string in doc.c does the unquoting. - (while (search-forward "\^A" nil t) - (replace-match "\^A\^A" t t)) - (goto-char position) - (while (search-forward "\000" nil t) - (replace-match "\^A0" t t)) - (goto-char position) - (while (search-forward "\037" nil t) - (replace-match "\^A_" t t)) + (byte-compile-escape-docstring position) (goto-char (point-max)) (insert "\037") (goto-char position) @@ -2838,6 +2900,8 @@ byte-compile-lambda (list (nth 1 int)))))))) (defvar byte-compile-reserved-constants 0) +(defvar byte-compile--docstring-constants nil) +(defvar byte-compile--docstring-marker nil) (defun byte-compile-constants-vector () ;; Builds the constants-vector from the current variables and constants. @@ -2866,7 +2930,11 @@ byte-compile-constants-vector ((setq tmp (assq (car (car rest)) ret)) (setcdr (car rest) (cdr tmp))) (t - (setcdr (car rest) (setq i (1+ i))) + (setq i (1+ i)) + (pcase (car rest) + (`(,_docstr docstring ,symtype ,sym) + (push (list i symtype sym) byte-compile--docstring-constants))) + (setcdr (car rest) i) (setq ret (cons (car rest) ret)))) (setq rest (cdr rest))) (setq limits (cdr limits) ;Step @@ -2885,6 +2953,7 @@ byte-compile-top-level ;; 'file -> used at file-level. (let ((byte-compile--for-effect for-effect) (byte-compile-constants nil) + (byte-compile--docstring-constants nil) (byte-compile-variables nil) (byte-compile-tag-number 0) (byte-compile-depth 0) @@ -3493,6 +3562,9 @@ byte-defop-compiler-1 ;;####(byte-defop-compiler move-to-column 1) (byte-defop-compiler-1 interactive byte-compile-noop) +(byte-defop-compiler (byte-compile-docstring nil) + byte-compile-docstring-handler) + (defun byte-compile-subr-wrong-args (form n) (byte-compile-set-symbol-position (car form)) -- 2.14.1