[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
master af359de: Allow `string-limit' to work on encoded strings
From: |
Lars Ingebrigtsen |
Subject: |
master af359de: Allow `string-limit' to work on encoded strings |
Date: |
Thu, 24 Dec 2020 23:58:22 -0500 (EST) |
branch: master
commit af359de91772478587f768300ca61d64a693fedb
Author: Lars Ingebrigtsen <larsi@gnus.org>
Commit: Lars Ingebrigtsen <larsi@gnus.org>
Allow `string-limit' to work on encoded strings
* doc/lispref/strings.texi (Creating Strings): Document it.
* lisp/emacs-lisp/subr-x.el (string-limit): Allow limiting on
encoded strings.
---
doc/lispref/strings.texi | 9 ++++++++-
lisp/emacs-lisp/shortdoc.el | 3 ++-
lisp/emacs-lisp/subr-x.el | 34 +++++++++++++++++++++++++++++-----
test/lisp/emacs-lisp/subr-x-tests.el | 20 ++++++++++++++++++++
4 files changed, 59 insertions(+), 7 deletions(-)
diff --git a/doc/lispref/strings.texi b/doc/lispref/strings.texi
index 19b9147..1e5f52d 100644
--- a/doc/lispref/strings.texi
+++ b/doc/lispref/strings.texi
@@ -394,12 +394,19 @@ there are individual words that are longer than
@var{length}, these
will not be shortened.
@end defun
-@defun string-limit string length &optional end
+@defun string-limit string length &optional end coding-system
If @var{string} is shorter than @var{length}, @var{string} is returned
as is. Otherwise, return a substring of @var{string} consisting of
the first @var{length} characters. If the optional @var{end}
parameter is given, return a string of the @var{length} last
characters instead.
+
+If @var{coding-system} is non-@code{nil}, @var{string} will be encoded
+before limiting, and the result will be a unibyte string that's
+shorter than @code{length}. If @var{string} contains characters that
+are encoded into several bytes (for instance, when using
+@code{utf-8}), the resulting unibyte string is never truncated in the
+middle of a character representation.
@end defun
@defun string-lines string &optional omit-nulls
diff --git a/lisp/emacs-lisp/shortdoc.el b/lisp/emacs-lisp/shortdoc.el
index 6184655..9d183e0 100644
--- a/lisp/emacs-lisp/shortdoc.el
+++ b/lisp/emacs-lisp/shortdoc.el
@@ -146,7 +146,8 @@ There can be any number of :example/:result elements."
(string-limit
:eval (string-limit "foobar" 3)
:eval (string-limit "foobar" 3 t)
- :eval (string-limit "foobar" 10))
+ :eval (string-limit "foobar" 10)
+ :eval (string-limit "fo好" 3 nil 'utf-8))
(truncate-string-to-width
:eval (truncate-string-to-width "foobar" 3)
:eval (truncate-string-to-width "你好bar" 5))
diff --git a/lisp/emacs-lisp/subr-x.el b/lisp/emacs-lisp/subr-x.el
index dc5840a..9fbb035 100644
--- a/lisp/emacs-lisp/subr-x.el
+++ b/lisp/emacs-lisp/subr-x.el
@@ -286,7 +286,7 @@ result will have lines that are longer than LENGTH."
(fill-region (point-min) (point-max)))
(buffer-string)))
-(defun string-limit (string length &optional end)
+(defun string-limit (string length &optional end coding-system)
"Return (up to) a LENGTH substring of STRING.
If STRING is shorter than or equal to LENGTH, the entire string
is returned unchanged.
@@ -295,15 +295,39 @@ If STRING is longer than LENGTH, return a substring
consisting of
the first LENGTH characters of STRING. If END is non-nil, return
the last LENGTH characters instead.
+If CODING-SYSTEM is non-nil, STRING will be encoded before
+limiting, and LENGTH is interpreted as the number of bytes to
+limit the string to. The result will be a unibyte string that is
+shorter than LENGTH, but will not contain \"partial\" characters,
+even if CODING-SYSTEM encodes characters with several bytes per
+character.
+
When shortening strings for display purposes,
`truncate-string-to-width' is almost always a better alternative
than this function."
(unless (natnump length)
(signal 'wrong-type-argument (list 'natnump length)))
- (cond
- ((<= (length string) length) string)
- (end (substring string (- (length string) length)))
- (t (substring string 0 length))))
+ (if coding-system
+ (let ((result nil)
+ (result-length 0)
+ (index (if end (1- (length string)) 0)))
+ (while (let ((encoded (encode-coding-char
+ (aref string index) coding-system)))
+ (and (<= (+ (length encoded) result-length) length)
+ (progn
+ (push encoded result)
+ (cl-incf result-length (length encoded))
+ (setq index (if end (1- index)
+ (1+ index))))
+ (if end (> index -1)
+ (< index (length string)))))
+ ;; No body.
+ )
+ (apply #'concat (if end result (nreverse result))))
+ (cond
+ ((<= (length string) length) string)
+ (end (substring string (- (length string) length)))
+ (t (substring string 0 length)))))
(defun string-lines (string &optional omit-nulls)
"Split STRING into a list of lines.
diff --git a/test/lisp/emacs-lisp/subr-x-tests.el
b/test/lisp/emacs-lisp/subr-x-tests.el
index 2ae492e..b17185a 100644
--- a/test/lisp/emacs-lisp/subr-x-tests.el
+++ b/test/lisp/emacs-lisp/subr-x-tests.el
@@ -600,6 +600,26 @@
(should (equal (string-limit "foo" 0) ""))
(should-error (string-limit "foo" -1)))
+(ert-deftest subr-string-limit-coding ()
+ (should (not (multibyte-string-p (string-limit "foó" 10 nil 'utf-8))))
+ (should (equal (string-limit "foó" 10 nil 'utf-8) "fo\303\263"))
+ (should (equal (string-limit "foó" 3 nil 'utf-8) "fo"))
+ (should (equal (string-limit "foó" 4 nil 'utf-8) "fo\303\263"))
+ (should (equal (string-limit "foóa" 4 nil 'utf-8) "fo\303\263"))
+ (should (equal (string-limit "foóá" 4 nil 'utf-8) "fo\303\263"))
+ (should (equal (string-limit "foóa" 4 nil 'iso-8859-1) "fo\363a"))
+ (should (equal (string-limit "foóá" 4 nil 'iso-8859-1) "fo\363\341"))
+ (should (equal (string-limit "foóá" 4 nil 'utf-16) "\376\377\000f"))
+
+ (should (equal (string-limit "foó" 10 t 'utf-8) "fo\303\263"))
+ (should (equal (string-limit "foó" 3 t 'utf-8) "o\303\263"))
+ (should (equal (string-limit "foó" 4 t 'utf-8) "fo\303\263"))
+ (should (equal (string-limit "foóa" 4 t 'utf-8) "o\303\263a"))
+ (should (equal (string-limit "foóá" 4 t 'utf-8) "\303\263\303\241"))
+ (should (equal (string-limit "foóa" 4 t 'iso-8859-1) "fo\363a"))
+ (should (equal (string-limit "foóá" 4 t 'iso-8859-1) "fo\363\341"))
+ (should (equal (string-limit "foóá" 4 t 'utf-16) "\376\377\000\341")))
+
(ert-deftest subr-string-lines ()
(should (equal (string-lines "foo") '("foo")))
(should (equal (string-lines "foo \nbar") '("foo " "bar"))))
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- master af359de: Allow `string-limit' to work on encoded strings,
Lars Ingebrigtsen <=