texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[no subject]


From: Gavin D. Smith
Date: Mon, 23 Oct 2023 14:51:33 -0400 (EDT)

branch: release/7.1
commit c76bcd0feed005aaf9db28a76f4883f3ae98295b
Author: Gavin Smith <gavinsmith0123@gmail.com>
AuthorDate: Mon Oct 23 19:51:00 2023 +0100

    * tp/Texinfo/XS/xspara.c (get_utf8_codepoint):
    Wrapper for mbrtowc/btowc.
    [_WIN32]: Do not call btowc, as it was tested to be very slow
    on MinGW.  Report from Eli Zaretskii.
---
 ChangeLog              |  7 +++++++
 tp/Texinfo/XS/xspara.c | 48 ++++++++++++++++++++++++++----------------------
 2 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index e619109f5b..c4379ec56b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2023-10-23  Gavin Smith <gavinsmith0123@gmail.com>
+
+       * tp/Texinfo/XS/xspara.c (get_utf8_codepoint):
+       Wrapper for mbrtowc/btowc.
+       [_WIN32]: Do not call btowc, as it was tested to be very slow
+       on MinGW.  Report from Eli Zaretskii.
+
 2023-10-18  Gavin Smith <gavinsmith0123@gmail.com>
 
        Texinfo 7.1
diff --git a/tp/Texinfo/XS/xspara.c b/tp/Texinfo/XS/xspara.c
index 7c6895a7ff..e1cddcdc2a 100644
--- a/tp/Texinfo/XS/xspara.c
+++ b/tp/Texinfo/XS/xspara.c
@@ -684,6 +684,30 @@ xspara_end (void)
 /* characters triggering an end of sentence */
 #define end_sentence_characters ".?!"
 
+/* Wrapper for mbrtowc.  Set *PWC and return length of codepoint in bytes. */
+size_t
+get_utf8_codepoint (wchar_t *pwc, const char *mbs, size_t n)
+{
+#ifdef _WIN32
+  /* Use the above implementation of mbrtowc.  Do not use btowc as
+     does not exist as standard on MS-Windows, and was tested to be
+     very slow on MinGW. */
+  return mbrtowc (pwc, mbs, n, NULL);
+#else
+  if (!PRINTABLE_ASCII(*mbs))
+    {
+      return mbrtowc (pwc, mbs, n, NULL);
+    }
+  else
+    {
+      /* Functionally the same as mbrtowc but (tested) slightly quicker. */
+      *pwc = btowc (*mbs);
+      return 1;
+    }
+#endif
+}
+
+
 /* Add WORD to paragraph in RESULT, not refilling WORD.  If we go past the end 
    of the line start a new one.  TRANSPARENT means that the letters in WORD
    are ignored for the purpose of deciding whether a full stop ends a sentence
@@ -730,18 +754,7 @@ xspara__add_next (TEXT *result, char *word, int word_len, 
int transparent)
               if (!strchr (end_sentence_characters
                            after_punctuation_characters, *p))
                 {
-                  if (!PRINTABLE_ASCII(*p))
-                    {
-                      wchar_t wc = L'\0';
-                      mbrtowc (&wc, p, len, NULL);
-                      state.last_letter = wc;
-                      break;
-                    }
-                  else
-                    {
-                      state.last_letter = btowc (*p);
-                      break;
-                    }
+                  get_utf8_codepoint (&state.last_letter, p, len);
                 }
             }
         }
@@ -1013,16 +1026,7 @@ xspara_add_text (char *text, int len)
         }
 
       /************** Not a white space character. *****************/
-      if (!PRINTABLE_ASCII(*p))
-        {
-          char_len = mbrtowc (&wc, p, len, NULL);
-        }
-      else
-        {
-          /* Functonally the same as mbrtowc but (tested) slightly quicker. */
-          char_len = 1;
-          wc = btowc (*p);
-        }
+      char_len = get_utf8_codepoint (&wc, p, len);
 
       if ((long) char_len == 0)
         break; /* Null character. Shouldn't happen. */



reply via email to

[Prev in Thread] Current Thread [Next in Thread]