bug-gnu-emacs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

bug#15212: 24.3.50; c++-mode doesn't support raw string literals


From: Alan Mackenzie
Subject: bug#15212: 24.3.50; c++-mode doesn't support raw string literals
Date: Thu, 2 Jun 2016 16:07:41 +0000
User-agent: Mutt/1.5.24 (2015-08-30)

Hello, Ivan.

On Tue, May 31, 2016 at 11:21:18PM -0600, Ivan Andrus wrote:
> On May 31, 2016, at 4:21 PM, Alan Mackenzie <acm@muc.de> wrote:

> > Hello, yet again, Ivan!

> > On Tue, May 31, 2016 at 08:22:07AM -0600, Ivan Andrus wrote:
> >> On May 29, 2016, at 3:36 PM, Alan Mackenzie <acm@muc.de> wrote:

> > [ .... ]

> >> Moreover, I was somehow able to get it into a bad state where changing the
> >> delimiters wouldn’t update fontification.  I’ll see if I can come up with a
> >> recipe for how to reproduce it reliably.

> > The following gets it into a bad state:
> > (i) Set up two separate valid raw strings with the same delimiter in
> >  both.
> > (ii) "Damage" the closing delimiter of the first string.  There is now
> >  just one raw string which extends to what used to be the end of the
> >  second raw string.
> > (iii) Restore the closing delimiter of the first string.  The
> >  syntax-table text properties and fontifications are now broken, and, I
> >  think, need the mode reinitialising to recover.

> Good sleuthing.  That would fit with my experience.

The following patch (a full patch which works with the savannah master
branch) should fix that problem.

It is not yet a workable patch, since it fails to take proper account of
macros and comments.  Indeed, the fontification fails when the raw string
is inside a macro.  CC Mode has become somewhat unwieldy in this area,
and I'll be working on it in the next few days.

Until then .....



diff -r d83a74c6ec31 cc-engine.el
--- a/cc-engine.el      Sun May 29 11:59:26 2016 +0000
+++ b/cc-engine.el      Thu Jun 02 16:01:26 2016 +0000
@@ -2295,7 +2295,8 @@
   ;;     (STATE TYPE (BEG . END))     if TO is in a literal; or
   ;;     (STATE)                      otherwise,
   ;; where STATE is the parsing state at TO, TYPE is the type of the literal
-  ;; (one of 'c, 'c++, 'string) and (BEG . END) is the boundaries of the 
literal.
+  ;; (one of 'c, 'c++, 'string) and (BEG . END) is the boundaries of the 
literal,
+  ;; including the delimiters.
   ;;
   ;; Unless NOT-IN-DELIMITER is non-nil, when TO is inside a two-character
   ;; comment opener, this is recognized as being in a comment literal.
@@ -5793,6 +5794,132 @@
                                       'c-decl-arg-start)))))))
       (or (c-forward-<>-arglist nil)
          (forward-char)))))
+
+
+;; Routines to handle C++ raw strings.
+(defun c-raw-string-pos ()
+  ;; Get POINT's relationship to any containing raw string.
+  ;; If point isn't in a raw string, return nil.
+  ;; Otherwise, return the following list:
+  ;;
+  ;;   (POS B\" B\( E\) E\")
+  ;;
+  ;; , where POS is the symbol `open-delim' if point is in the opening
+  ;; delimiter, the symbol `close-delim' if it's in the closing delimiter, and
+  ;; nil if it's in the string body.  B\", B\(, E\), E\" are the positions of
+  ;; the opening and closing quotes and parentheses of a correctly terminated
+  ;; raw string.  (N.B.: E\) and E\" are NOT on the "outside" of these
+  ;; characters.)  If the raw string is not terminated, E\) and E\" are set to
+  ;; nil.
+  ;;
+  ;; Note: this routine is dependant upon the correct syntax-table text
+  ;; properties being set.
+  (let* ((safe (c-state-semi-safe-place (point)))
+        (state (c-state-pp-to-literal safe (point)))
+        open-quote-pos open-paren-pos close-paren-pos close-quote-pos id)
+    (save-excursion
+      (when
+         (and
+          (cond
+           ((null (cadr state))
+            (or (eq (char-after) ?\")
+                (search-backward "\"" (max (- (point) 17) (point-min)) t)))
+           ((and (eq (cadr state) 'string)
+                 (goto-char (car (nth 2 state)))
+                 (or (eq (char-after) ?\")
+                     (search-backward "\"" (max (- (point) 17) (point-min)) t))
+                 (not (bobp)))))
+          (eq (char-before) ?R)
+          (looking-at "\"\\([^ ()\\\n\r\t]\\{,16\\}\\)("))
+       (setq open-quote-pos (point)
+             open-paren-pos (match-end 1)
+             id (match-string-no-properties 1))
+       (goto-char (1+ open-paren-pos))
+       (when (and (not (c-get-char-property open-paren-pos 'syntax-table))
+                  (search-forward (concat ")" id "\"") nil t))
+         (setq close-paren-pos (match-beginning 0)
+               close-quote-pos (1- (point))))))
+    (and open-quote-pos
+        (list
+         (cond
+          ((<= (point) open-paren-pos)
+           'open-delim)
+          ((and close-paren-pos
+                (> (point) close-paren-pos))
+           'close-delim)
+          (t nil))
+         open-quote-pos open-paren-pos close-paren-pos close-quote-pos))))
+
+(defun c-before-change-check-c++-raw-strings (beg end)
+  ;; This function clears syntax-table text properties from C++ raw strings in
+  ;; the region (c-new-BEG c-new-END).
+  (c-save-buffer-state
+      ((beg-rs (progn (goto-char c-new-BEG) (c-raw-string-pos)))
+       (end-rs (progn (goto-char c-new-END) (c-raw-string-pos))) ; FIXME!!!
+                                       ; Optimize this so that we don't call
+                                       ; `c-raw-string-pos' twice when once
+                                       ; will do.  (2016-06-02).
+       )
+    (when beg-rs
+      (setq c-new-BEG (min c-new-BEG (cadr beg-rs)))
+      (if (nth 3 beg-rs)
+         ;; We've got a terminated raw string.
+         (when (< (nth 2 beg-rs) beg)
+           (c-clear-char-property-with-value
+            (1+ (nth 2 beg-rs)) beg 'syntax-table '(1)))
+       ;; We've got an unmatched raw string opening delimiter.
+       (c-clear-char-property (cadr beg-rs) 'syntax-table)
+       (c-clear-char-property (nth 2 beg-rs) 'syntax-table)))
+    (when end-rs
+      (setq c-new-END (max c-new-END
+                          (1+ (or (nth 4 end-rs)
+                                  (nth 2 end-rs)))))
+      (if (nth 3 end-rs)
+         ;; We've got a terminated raw string.
+         (when (< end (nth 3 end-rs))
+           (c-clear-char-property-with-value
+            end (nth 3 end-rs) 'syntax-table '(1)))
+       ;; We've got an unmatched raw string opening delimiter.
+       (c-clear-char-property (cadr end-rs) 'syntax-table)
+       (c-clear-char-property (nth 2 end-rs) 'syntax-table)))))
+
+(defun c-temp-before-change (beg end)
+  (setq c-new-BEG beg
+       c-new-END end)
+  (c-before-change-check-c++-raw-strings beg end))
+
+(defun c-after-change-mark-raw-strings (beg end old-len)
+  ;; Put any needed text properties on raw strings.  This function is called
+  ;; as an after-change function.
+  (save-excursion
+    (c-save-buffer-state ()
+      (goto-char c-new-BEG)
+      (while (and (< (point) c-new-END)
+                 (c-syntactic-re-search-forward
+                  "R\"\\([^ ()\\\n\r\t]\\{,16\\}\\)(" c-new-END t))
+       (let ((id (match-string-no-properties 1))
+             (open-quote (1+ (match-beginning 0)))
+             (open-paren (match-end 1))
+             )
+         (if (search-forward (concat ")" id "\"") nil t)
+             (let ((end-string (match-beginning 0))
+                   (after-quote (match-end 0))
+                   )
+               (goto-char open-paren)
+               (while (progn (skip-syntax-forward "^\"" end-string)
+                             (< (point) end-string))
+                 (c-put-char-property (point) 'syntax-table '(1)) ; punctuation
+                 (forward-char))
+               (goto-char after-quote))
+           (c-put-char-property open-quote 'syntax-table '(1)) ; punctuation
+           (c-put-char-property open-paren 'syntax-table '(15))))) ; generic 
string
+
+      )))
+
+(defun c-temp-after-change (beg end old-len)
+  (setq c-new-BEG beg
+       c-new-END end)
+  (c-after-change-mark-raw-strings beg end old-len))
 
 ;; Handling of small scale constructs like types and names.
 
diff -r d83a74c6ec31 cc-fonts.el
--- a/cc-fonts.el       Sun May 29 11:59:26 2016 +0000
+++ b/cc-fonts.el       Thu Jun 02 16:01:26 2016 +0000
@@ -717,6 +717,10 @@
        (concat ".\\(" c-string-limit-regexp "\\)")
        '((c-font-lock-invalid-string)))
 
+      ;; Fontify C++ raw strings.
+      ,@(when (c-major-mode-is 'c++-mode)
+         '(c-font-lock-c++-raw-strings))
+
       ;; Fontify keyword constants.
       ,@(when (c-lang-const c-constant-kwds)
          (let ((re (c-make-keywords-re nil (c-lang-const c-constant-kwds))))
@@ -1572,6 +1576,35 @@
            (c-forward-syntactic-ws)
            (c-font-lock-declarators limit t in-typedef)))))))
 
+(defun c-font-lock-c++-raw-strings (limit)
+  ;; Fontify C++ raw strings.
+  ;;
+  ;; This function will be called from font-lock for a region bounded by POINT
+  ;; and LIMIT, as though it were to identify a keyword for
+  ;; font-lock-keyword-face.  It always returns NIL to inhibit this and
+  ;; prevent a repeat invocation.  See elisp/lispref page "Search-based
+  ;; Fontification".
+  (while (search-forward-regexp
+         "R\\(\"\\)\\([^ ()\\\n\r\t]\\{,16\\}\\)(" limit t)
+    (when ;; (eq (c-get-char-property (1- (point)) 'face)
+       ;;     'font-lock-string-face)
+       (or (and (eobp)
+                (eq (c-get-char-property (1- (point)) 'face)
+                    'font-lock-warning-face))
+           (eq (c-get-char-property (point) 'face) 'font-lock-string-face))
+      (if (c-get-char-property (1- (point)) 'syntax-table)
+         (c-put-font-lock-face (match-beginning 0) (match-end 0)
+                               'font-lock-warning-face)
+       (c-put-font-lock-face (match-beginning 1) (match-end 2)
+                             'default)
+       (when (search-forward-regexp
+              (concat ")\\(" (regexp-quote (match-string-no-properties 2))
+                      "\\)\"")
+              limit t)
+         (c-put-font-lock-face (match-beginning 1) (point)
+                               'default)))))
+  nil)
+
 (c-lang-defconst c-simple-decl-matchers
   "Simple font lock matchers for types and declarations.  These are used
 on level 2 only and so aren't combined with `c-complex-decl-matchers'."
diff -r d83a74c6ec31 cc-langs.el
--- a/cc-langs.el       Sun May 29 11:59:26 2016 +0000
+++ b/cc-langs.el       Thu Jun 02 16:01:26 2016 +0000
@@ -457,9 +457,12 @@
   ;; The value here may be a list of functions or a single function.
   t nil
   c++ '(c-extend-region-for-CPP
+       c-depropertize-region
+       c-before-change-check-c++-raw-strings
        c-before-change-check-<>-operators
        c-invalidate-macro-cache)
   (c objc) '(c-extend-region-for-CPP
+            c-depropertize-region
             c-invalidate-macro-cache)
   ;; java 'c-before-change-check-<>-operators
   awk 'c-awk-record-region-clear-NL)
@@ -492,7 +495,8 @@
   (c objc) '(c-extend-font-lock-region-for-macros
             c-neutralize-syntax-in-and-mark-CPP
             c-change-expand-fl-region)
-  c++ '(c-extend-font-lock-region-for-macros
+  c++ '(c-after-change-mark-raw-strings
+       c-extend-font-lock-region-for-macros
        c-neutralize-syntax-in-and-mark-CPP
        c-restore-<>-properties
        c-change-expand-fl-region)
diff -r d83a74c6ec31 cc-mode.el
--- a/cc-mode.el        Sun May 29 11:59:26 2016 +0000
+++ b/cc-mode.el        Thu Jun 02 16:01:26 2016 +0000
@@ -859,6 +859,16 @@
   (memq (cadr (backtrace-frame 3))
        '(put-text-property remove-list-of-text-properties)))
 
+(defun c-depropertize-region (beg end)
+  ;; Remove the punctuation syntax-table text property from the region
+  ;; (c-new-BEG c-new-END).
+  ;;
+  ;; This function is in the C/C++/ObjC values of
+  ;; `c-get-state-before-change-functions' and is called exclusively as a
+  ;; before change function.
+  (c-clear-char-property-with-value
+   c-new-BEG c-new-END 'syntax-table '(1)))
+
 (defun c-extend-region-for-CPP (beg end)
   ;; Adjust `c-new-BEG', `c-new-END' respectively to the beginning and end of
   ;; any preprocessor construct they may be in. 
@@ -951,7 +961,7 @@
   ;; This function might make hidden buffer changes.
   (c-save-buffer-state (limits )
     ;; Clear 'syntax-table properties "punctuation":
-    (c-clear-char-property-with-value c-new-BEG c-new-END 'syntax-table '(1))
+    ;; (c-clear-char-property-with-value c-new-BEG c-new-END 'syntax-table 
'(1))
 
     ;; CPP "comment" markers:
     (if (memq 'category-properties c-emacs-features) ; GNU Emacs.



> -Ivan

-- 
Alan Mackenzie (Nuremberg, Germany).





reply via email to

[Prev in Thread] Current Thread [Next in Thread]