Line data Source code
1 : ;;; paragraphs.el --- paragraph and sentence parsing
2 :
3 : ;; Copyright (C) 1985-1987, 1991, 1994-1997, 1999-2017 Free Software
4 : ;; Foundation, Inc.
5 :
6 : ;; Maintainer: emacs-devel@gnu.org
7 : ;; Keywords: wp
8 : ;; Package: emacs
9 :
10 : ;; This file is part of GNU Emacs.
11 :
12 : ;; GNU Emacs is free software: you can redistribute it and/or modify
13 : ;; it under the terms of the GNU General Public License as published by
14 : ;; the Free Software Foundation, either version 3 of the License, or
15 : ;; (at your option) any later version.
16 :
17 : ;; GNU Emacs is distributed in the hope that it will be useful,
18 : ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 : ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 : ;; GNU General Public License for more details.
21 :
22 : ;; You should have received a copy of the GNU General Public License
23 : ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
24 :
25 : ;;; Commentary:
26 :
27 : ;; This package provides the paragraph-oriented commands documented in the
28 : ;; Emacs manual.
29 :
30 : ;;; Code:
31 :
32 : (defgroup paragraphs nil
33 : "Paragraph and sentence parsing."
34 : :group 'editing)
35 :
36 : (put 'use-hard-newlines 'permanent-local t)
37 : (define-minor-mode use-hard-newlines
38 : "Toggle distinguishing between hard and soft newlines.
39 : With a prefix argument ARG, enable the feature if ARG is
40 : positive, and disable it otherwise. If called from Lisp, enable
41 : it if ARG is omitted or nil.
42 :
43 : When enabled, the functions `newline' and `open-line' add the
44 : text-property `hard' to newlines that they insert, and a line is
45 : only considered as a candidate to match `paragraph-start' or
46 : `paragraph-separate' if it follows a hard newline.
47 :
48 : When enabling, if there are newlines in the buffer but no hard
49 : newlines, ask the user whether to mark as hard any newlines
50 : preceding a `paragraph-start' line. From a program, second arg
51 : INSERT specifies whether to do this; it can be `never' to change
52 : nothing, t or `always' to force marking, `guess' to try to do the
53 : right thing with no questions, nil or anything else to ask the
54 : user.
55 :
56 : Newlines not marked hard are called \"soft\", and are always internal
57 : to paragraphs. The fill functions insert and delete only soft newlines."
58 : :group 'paragraphs
59 : :extra-args (insert)
60 0 : (when use-hard-newlines
61 : ;; Turn mode on
62 : ;; Intuit hard newlines --
63 : ;; mark as hard any newlines preceding a paragraph-start line.
64 0 : (if (or (eq insert t) (eq insert 'always)
65 0 : (and (not (eq 'never insert))
66 0 : (not (text-property-any (point-min) (point-max) 'hard t))
67 0 : (save-excursion
68 0 : (goto-char (point-min))
69 0 : (search-forward "\n" nil t))
70 0 : (or (eq insert 'guess)
71 0 : (y-or-n-p "Make newlines between paragraphs hard? "))))
72 0 : (save-excursion
73 0 : (goto-char (point-min))
74 0 : (while (search-forward "\n" nil t)
75 0 : (let ((pos (point)))
76 0 : (move-to-left-margin)
77 0 : (when (looking-at paragraph-start)
78 0 : (set-hard-newline-properties (1- pos) pos))
79 : ;; If paragraph-separate, newline after it is hard too.
80 0 : (when (looking-at paragraph-separate)
81 0 : (set-hard-newline-properties (1- pos) pos)
82 0 : (end-of-line)
83 0 : (unless (eobp)
84 0 : (set-hard-newline-properties (point) (1+ (point)))))))))))
85 :
86 : (defcustom paragraph-start "\f\\|[ \t]*$" "\
87 : Regexp for beginning of a line that starts OR separates paragraphs.
88 : This regexp should match lines that separate paragraphs
89 : and should also match lines that start a paragraph
90 : \(and are part of that paragraph).
91 :
92 : This is matched against the text at the left margin, which is not necessarily
93 : the beginning of the line, so it should never use \"^\" as an anchor. This
94 : ensures that the paragraph functions will work equally well within a region
95 : of text indented by a margin setting.
96 :
97 : The variable `paragraph-separate' specifies how to distinguish
98 : lines that start paragraphs from lines that separate them.
99 :
100 : If the variable `use-hard-newlines' is non-nil, then only lines following a
101 : hard newline are considered to match."
102 : :group 'paragraphs
103 : :type 'regexp)
104 : (put 'paragraph-start 'safe-local-variable 'stringp)
105 :
106 : ;; paragraph-start requires a hard newline, but paragraph-separate does not:
107 : ;; It is assumed that paragraph-separate is distinctive enough to be believed
108 : ;; whenever it occurs, while it is reasonable to set paragraph-start to
109 : ;; something very minimal, even including "." (which makes every hard newline
110 : ;; start a new paragraph).
111 :
112 : (defcustom paragraph-separate "[ \t\f]*$"
113 : "Regexp for beginning of a line that separates paragraphs.
114 : If you change this, you may have to change `paragraph-start' also.
115 :
116 : This is matched against the text at the left margin, which is not necessarily
117 : the beginning of the line, so it should not use \"^\" as an anchor. This
118 : ensures that the paragraph functions will work equally within a region of
119 : text indented by a margin setting."
120 : :group 'paragraphs
121 : :type 'regexp)
122 : (put 'paragraph-separate 'safe-local-variable 'stringp)
123 :
124 : (defcustom sentence-end-double-space t
125 : "Non-nil means a single space does not end a sentence.
126 : This is relevant for filling. See also `sentence-end-without-period'
127 : and `colon-double-space'.
128 :
129 : This value is used by the function `sentence-end' to construct the
130 : regexp describing the end of a sentence, when the value of the variable
131 : `sentence-end' is nil. See Info node `(elisp)Standard Regexps'."
132 : :type 'boolean
133 : :group 'fill)
134 : (put 'sentence-end-double-space 'safe-local-variable 'booleanp)
135 :
136 : (defcustom sentence-end-without-period nil
137 : "Non-nil means a sentence will end without a period.
138 : For example, a sentence in Thai text ends with double space but
139 : without a period.
140 :
141 : This value is used by the function `sentence-end' to construct the
142 : regexp describing the end of a sentence, when the value of the variable
143 : `sentence-end' is nil. See Info node `(elisp)Standard Regexps'."
144 : :type 'boolean
145 : :group 'fill)
146 : (put 'sentence-end-without-period 'safe-local-variable 'booleanp)
147 :
148 : (defcustom sentence-end-without-space
149 : "。.?!"
150 : "String of characters that end sentence without following spaces.
151 :
152 : This value is used by the function `sentence-end' to construct the
153 : regexp describing the end of a sentence, when the value of the variable
154 : `sentence-end' is nil. See Info node `(elisp)Standard Regexps'."
155 : :group 'paragraphs
156 : :type 'string)
157 : (put 'sentence-end-without-space 'safe-local-variable 'stringp)
158 :
159 : (defcustom sentence-end nil
160 : "Regexp describing the end of a sentence.
161 : The value includes the whitespace following the sentence.
162 : All paragraph boundaries also end sentences, regardless.
163 :
164 : The value nil means to use the default value defined by the
165 : function `sentence-end'. You should always use this function
166 : to obtain the value of this variable."
167 : :group 'paragraphs
168 : :type '(choice regexp (const :tag "Use default value" nil)))
169 : (put 'sentence-end 'safe-local-variable 'string-or-null-p)
170 :
171 : (defcustom sentence-end-base "[.?!…‽][]\"'”’)}]*"
172 : "Regexp matching the basic end of a sentence, not including following space."
173 : :group 'paragraphs
174 : :type 'string
175 : :version "25.1")
176 : (put 'sentence-end-base 'safe-local-variable 'stringp)
177 :
178 : (defun sentence-end ()
179 : "Return the regexp describing the end of a sentence.
180 :
181 : This function returns either the value of the variable `sentence-end'
182 : if it is non-nil, or the default value constructed from the
183 : variables `sentence-end-base', `sentence-end-double-space',
184 : `sentence-end-without-period' and `sentence-end-without-space'.
185 :
186 : The default value specifies that in order to be recognized as the
187 : end of a sentence, the ending period, question mark, or exclamation point
188 : must be followed by two spaces, with perhaps some closing delimiters
189 : in between. See Info node `(elisp)Standard Regexps'."
190 0 : (or sentence-end
191 : ;; We accept non-break space along with space.
192 0 : (concat (if sentence-end-without-period "\\w[ \u00a0][ \u00a0]\\|")
193 : "\\("
194 0 : sentence-end-base
195 0 : (if sentence-end-double-space
196 0 : "\\($\\|[ \u00a0]$\\|\t\\|[ \u00a0][ \u00a0]\\)" "\\($\\|[\t \u00a0]\\)")
197 0 : "\\|[" sentence-end-without-space "]+"
198 : "\\)"
199 0 : "[ \u00a0\t\n]*")))
200 :
201 : (defcustom page-delimiter "^\014"
202 : "Regexp describing line-beginnings that separate pages."
203 : :group 'paragraphs
204 : :type 'regexp)
205 : (put 'page-delimiter 'safe-local-variable 'stringp)
206 :
207 : (defcustom paragraph-ignore-fill-prefix nil
208 : "Non-nil means the paragraph commands are not affected by `fill-prefix'.
209 : This is desirable in modes where blank lines are the paragraph delimiters."
210 : :group 'paragraphs
211 : :type 'boolean)
212 : (put 'paragraph-ignore-fill-prefix 'safe-local-variable 'booleanp)
213 :
214 : (defun forward-paragraph (&optional arg)
215 : "Move forward to end of paragraph.
216 : With argument ARG, do it ARG times;
217 : a negative argument ARG = -N means move backward N paragraphs.
218 :
219 : A line which `paragraph-start' matches either separates paragraphs
220 : \(if `paragraph-separate' matches it also) or is the first line of a paragraph.
221 : A paragraph end is the beginning of a line which is not part of the paragraph
222 : to which the end of the previous line belongs, or the end of the buffer.
223 : Returns the count of paragraphs left to move."
224 : (interactive "^p")
225 0 : (or arg (setq arg 1))
226 0 : (let* ((opoint (point))
227 : (fill-prefix-regexp
228 0 : (and fill-prefix (not (equal fill-prefix ""))
229 0 : (not paragraph-ignore-fill-prefix)
230 0 : (regexp-quote fill-prefix)))
231 : ;; Remove ^ from paragraph-start and paragraph-sep if they are there.
232 : ;; These regexps shouldn't be anchored, because we look for them
233 : ;; starting at the left-margin. This allows paragraph commands to
234 : ;; work normally with indented text.
235 : ;; This hack will not find problem cases like "whatever\\|^something".
236 0 : (parstart (if (and (not (equal "" paragraph-start))
237 0 : (equal ?^ (aref paragraph-start 0)))
238 0 : (substring paragraph-start 1)
239 0 : paragraph-start))
240 0 : (parsep (if (and (not (equal "" paragraph-separate))
241 0 : (equal ?^ (aref paragraph-separate 0)))
242 0 : (substring paragraph-separate 1)
243 0 : paragraph-separate))
244 : (parsep
245 0 : (if fill-prefix-regexp
246 0 : (concat parsep "\\|"
247 0 : fill-prefix-regexp "[ \t]*$")
248 0 : parsep))
249 : ;; This is used for searching.
250 0 : (sp-parstart (concat "^[ \t]*\\(?:" parstart "\\|" parsep "\\)"))
251 : start found-start)
252 0 : (while (and (< arg 0) (not (bobp)))
253 0 : (if (and (not (looking-at parsep))
254 0 : (re-search-backward "^\n" (max (1- (point)) (point-min)) t)
255 0 : (looking-at parsep))
256 0 : (setq arg (1+ arg))
257 0 : (setq start (point))
258 : ;; Move back over paragraph-separating lines.
259 0 : (forward-char -1) (beginning-of-line)
260 0 : (while (and (not (bobp))
261 0 : (progn (move-to-left-margin)
262 0 : (looking-at parsep)))
263 0 : (forward-line -1))
264 0 : (if (bobp)
265 : nil
266 0 : (setq arg (1+ arg))
267 : ;; Go to end of the previous (non-separating) line.
268 0 : (end-of-line)
269 : ;; Search back for line that starts or separates paragraphs.
270 0 : (if (if fill-prefix-regexp
271 : ;; There is a fill prefix; it overrides parstart.
272 0 : (let (multiple-lines)
273 0 : (while (and (progn (beginning-of-line) (not (bobp)))
274 0 : (progn (move-to-left-margin)
275 0 : (not (looking-at parsep)))
276 0 : (looking-at fill-prefix-regexp))
277 0 : (unless (= (point) start)
278 0 : (setq multiple-lines t))
279 0 : (forward-line -1))
280 0 : (move-to-left-margin)
281 : ;; This deleted code caused a long hanging-indent line
282 : ;; not to be filled together with the following lines.
283 : ;; ;; Don't move back over a line before the paragraph
284 : ;; ;; which doesn't start with fill-prefix
285 : ;; ;; unless that is the only line we've moved over.
286 : ;; (and (not (looking-at fill-prefix-regexp))
287 : ;; multiple-lines
288 : ;; (forward-line 1))
289 0 : (not (bobp)))
290 0 : (while (and (re-search-backward sp-parstart nil 1)
291 0 : (setq found-start t)
292 : ;; Found a candidate, but need to check if it is a
293 : ;; REAL parstart.
294 0 : (progn (setq start (point))
295 0 : (move-to-left-margin)
296 0 : (not (looking-at parsep)))
297 0 : (not (and (looking-at parstart)
298 0 : (or (not use-hard-newlines)
299 0 : (bobp)
300 0 : (get-text-property
301 0 : (1- start) 'hard)))))
302 0 : (setq found-start nil)
303 0 : (goto-char start))
304 0 : found-start)
305 : ;; Found one.
306 0 : (progn
307 : ;; Move forward over paragraph separators.
308 : ;; We know this cannot reach the place we started
309 : ;; because we know we moved back over a non-separator.
310 0 : (while (and (not (eobp))
311 0 : (progn (move-to-left-margin)
312 0 : (looking-at parsep)))
313 0 : (forward-line 1))
314 : ;; If line before paragraph is just margin, back up to there.
315 0 : (end-of-line 0)
316 0 : (if (> (current-column) (current-left-margin))
317 0 : (forward-char 1)
318 0 : (skip-chars-backward " \t")
319 0 : (if (not (bolp))
320 0 : (forward-line 1))))
321 : ;; No starter or separator line => use buffer beg.
322 0 : (goto-char (point-min))))))
323 :
324 0 : (while (and (> arg 0) (not (eobp)))
325 : ;; Move forward over separator lines...
326 0 : (while (and (not (eobp))
327 0 : (progn (move-to-left-margin) (not (eobp)))
328 0 : (looking-at parsep))
329 0 : (forward-line 1))
330 0 : (unless (eobp) (setq arg (1- arg)))
331 : ;; ... and one more line.
332 0 : (forward-line 1)
333 0 : (if fill-prefix-regexp
334 : ;; There is a fill prefix; it overrides parstart.
335 0 : (while (and (not (eobp))
336 0 : (progn (move-to-left-margin) (not (eobp)))
337 0 : (not (looking-at parsep))
338 0 : (looking-at fill-prefix-regexp))
339 0 : (forward-line 1))
340 0 : (while (and (re-search-forward sp-parstart nil 1)
341 0 : (progn (setq start (match-beginning 0))
342 0 : (goto-char start)
343 0 : (not (eobp)))
344 0 : (progn (move-to-left-margin)
345 0 : (not (looking-at parsep)))
346 0 : (or (not (looking-at parstart))
347 0 : (and use-hard-newlines
348 0 : (not (get-text-property (1- start) 'hard)))))
349 0 : (forward-char 1))
350 0 : (if (< (point) (point-max))
351 0 : (goto-char start))))
352 0 : (constrain-to-field nil opoint t)
353 : ;; Return the number of steps that could not be done.
354 0 : arg))
355 :
356 : (defun backward-paragraph (&optional arg)
357 : "Move backward to start of paragraph.
358 : With argument ARG, do it ARG times;
359 : a negative argument ARG = -N means move forward N paragraphs.
360 :
361 : A paragraph start is the beginning of a line which is a
362 : `paragraph-start' or which is ordinary text and follows a
363 : `paragraph-separate'ing line; except: if the first real line of a
364 : paragraph is preceded by a blank line, the paragraph starts at that
365 : blank line.
366 :
367 : See `forward-paragraph' for more information."
368 : (interactive "^p")
369 0 : (or arg (setq arg 1))
370 0 : (forward-paragraph (- arg)))
371 :
372 : (defun mark-paragraph (&optional arg allow-extend)
373 : "Put point at beginning of this paragraph, mark at end.
374 : The paragraph marked is the one that contains point or follows point.
375 :
376 : With argument ARG, puts mark at end of a following paragraph, so that
377 : the number of paragraphs marked equals ARG.
378 :
379 : If ARG is negative, point is put at end of this paragraph, mark is put
380 : at beginning of this or a previous paragraph.
381 :
382 : Interactively (or if ALLOW-EXTEND is non-nil), if this command is
383 : repeated or (in Transient Mark mode) if the mark is active,
384 : it marks the next ARG paragraphs after the ones already marked."
385 : (interactive "p\np")
386 0 : (unless arg (setq arg 1))
387 0 : (when (zerop arg)
388 0 : (error "Cannot mark zero paragraphs"))
389 0 : (cond ((and allow-extend
390 0 : (or (and (eq last-command this-command) (mark t))
391 0 : (and transient-mark-mode mark-active)))
392 0 : (set-mark
393 0 : (save-excursion
394 0 : (goto-char (mark))
395 0 : (forward-paragraph arg)
396 0 : (point))))
397 : (t
398 0 : (forward-paragraph arg)
399 0 : (push-mark nil t t)
400 0 : (backward-paragraph arg))))
401 :
402 : (defun kill-paragraph (arg)
403 : "Kill forward to end of paragraph.
404 : With arg N, kill forward to Nth end of paragraph;
405 : negative arg -N means kill backward to Nth start of paragraph."
406 : (interactive "p")
407 0 : (kill-region (point) (progn (forward-paragraph arg) (point))))
408 :
409 : (defun backward-kill-paragraph (arg)
410 : "Kill back to start of paragraph.
411 : With arg N, kill back to Nth start of paragraph;
412 : negative arg -N means kill forward to Nth end of paragraph."
413 : (interactive "p")
414 0 : (kill-region (point) (progn (backward-paragraph arg) (point))))
415 :
416 : (defun transpose-paragraphs (arg)
417 : "Interchange the current paragraph with the next one.
418 : With prefix argument ARG a non-zero integer, moves the current
419 : paragraph past ARG paragraphs, leaving point after the current paragraph.
420 : If ARG is positive, moves the current paragraph forwards, if
421 : ARG is negative moves it backwards. If ARG is zero, exchanges
422 : the current paragraph with the one containing the mark."
423 : (interactive "*p")
424 0 : (transpose-subr 'forward-paragraph arg))
425 :
426 : (defun start-of-paragraph-text ()
427 0 : (let ((opoint (point)) npoint)
428 0 : (forward-paragraph -1)
429 0 : (setq npoint (point))
430 0 : (skip-chars-forward " \t\n")
431 : ;; If the range of blank lines found spans the original start point,
432 : ;; try again from the beginning of it.
433 : ;; Must be careful to avoid infinite loop
434 : ;; when following a single return at start of buffer.
435 0 : (if (and (>= (point) opoint) (< npoint opoint))
436 0 : (progn
437 0 : (goto-char npoint)
438 0 : (if (> npoint (point-min))
439 0 : (start-of-paragraph-text))))))
440 :
441 : (defun end-of-paragraph-text ()
442 0 : (let ((opoint (point)))
443 0 : (forward-paragraph 1)
444 0 : (if (eq (preceding-char) ?\n) (forward-char -1))
445 0 : (if (<= (point) opoint)
446 0 : (progn
447 0 : (forward-char 1)
448 0 : (if (< (point) (point-max))
449 0 : (end-of-paragraph-text))))))
450 :
451 : (defun forward-sentence (&optional arg)
452 : "Move forward to next end of sentence. With argument, repeat.
453 : With negative argument, move backward repeatedly to start of sentence.
454 :
455 : The variable `sentence-end' is a regular expression that matches ends of
456 : sentences. Also, every paragraph boundary terminates sentences as well."
457 : (interactive "^p")
458 0 : (or arg (setq arg 1))
459 0 : (let ((opoint (point))
460 0 : (sentence-end (sentence-end)))
461 0 : (while (< arg 0)
462 0 : (let ((pos (point))
463 : par-beg par-text-beg)
464 0 : (save-excursion
465 0 : (start-of-paragraph-text)
466 : ;; Start of real text in the paragraph.
467 : ;; We move back to here if we don't see a sentence-end.
468 0 : (setq par-text-beg (point))
469 : ;; Start of the first line of the paragraph.
470 : ;; We use this as the search limit
471 : ;; to allow sentence-end to match if it is anchored at
472 : ;; BOL and the paragraph starts indented.
473 0 : (beginning-of-line)
474 0 : (setq par-beg (point)))
475 0 : (if (and (re-search-backward sentence-end par-beg t)
476 0 : (or (< (match-end 0) pos)
477 0 : (re-search-backward sentence-end par-beg t)))
478 0 : (goto-char (match-end 0))
479 0 : (goto-char par-text-beg)))
480 0 : (setq arg (1+ arg)))
481 0 : (while (> arg 0)
482 0 : (let ((par-end (save-excursion (end-of-paragraph-text) (point))))
483 0 : (if (re-search-forward sentence-end par-end t)
484 0 : (skip-chars-backward " \t\n")
485 0 : (goto-char par-end)))
486 0 : (setq arg (1- arg)))
487 0 : (constrain-to-field nil opoint t)))
488 :
489 : (defun repunctuate-sentences ()
490 : "Put two spaces at the end of sentences from point to the end of buffer.
491 : It works using `query-replace-regexp'."
492 : (interactive)
493 0 : (query-replace-regexp "\\([]\"')]?\\)\\([.?!]\\)\\([]\"')]?\\) +"
494 0 : "\\1\\2\\3 "))
495 :
496 :
497 : (defun backward-sentence (&optional arg)
498 : "Move backward to start of sentence. With arg, do it arg times.
499 : See `forward-sentence' for more information."
500 : (interactive "^p")
501 0 : (or arg (setq arg 1))
502 0 : (forward-sentence (- arg)))
503 :
504 : (defun kill-sentence (&optional arg)
505 : "Kill from point to end of sentence.
506 : With arg, repeat; negative arg -N means kill back to Nth start of sentence."
507 : (interactive "p")
508 0 : (kill-region (point) (progn (forward-sentence arg) (point))))
509 :
510 : (defun backward-kill-sentence (&optional arg)
511 : "Kill back from point to start of sentence.
512 : With arg, repeat, or kill forward to Nth end of sentence if negative arg -N."
513 : (interactive "p")
514 0 : (kill-region (point) (progn (backward-sentence arg) (point))))
515 :
516 : (defun mark-end-of-sentence (arg)
517 : "Put mark at end of sentence. Arg works as in `forward-sentence'.
518 : If this command is repeated, it marks the next ARG sentences after the
519 : ones already marked."
520 : (interactive "p")
521 0 : (push-mark
522 0 : (save-excursion
523 0 : (if (and (eq last-command this-command) (mark t))
524 0 : (goto-char (mark)))
525 0 : (forward-sentence arg)
526 0 : (point))
527 0 : nil t))
528 :
529 : (defun transpose-sentences (arg)
530 : "Interchange the current sentence with the next one.
531 : With prefix argument ARG a non-zero integer, moves the current
532 : sentence past ARG sentences, leaving point after the current sentence.
533 : If ARG is positive, moves the current sentence forwards, if
534 : ARG is negative moves it backwards. If ARG is zero, exchanges
535 : the current sentence with the one containing the mark."
536 : (interactive "*p")
537 0 : (transpose-subr 'forward-sentence arg))
538 :
539 : ;;; paragraphs.el ends here
|