texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

branch master updated: Wrapper for u8_strconv_from_encoding


From: Gavin D. Smith
Subject: branch master updated: Wrapper for u8_strconv_from_encoding
Date: Mon, 19 Feb 2024 16:32:58 -0500

This is an automated email from the git hooks/post-receive script.

gavin pushed a commit to branch master
in repository texinfo.

The following commit(s) were added to refs/heads/master by this push:
     new 9e3da1f8fb Wrapper for u8_strconv_from_encoding
9e3da1f8fb is described below

commit 9e3da1f8fb0518f6dea8a72ee097f0c2fd5d0b1f
Author: Gavin Smith <gavinsmith0123@gmail.com>
AuthorDate: Mon Feb 19 21:32:28 2024 +0000

    Wrapper for u8_strconv_from_encoding
    
    * tp/Texinfo/XS/main/unicode.c (utf8_from_string):
    Create wrapper for u8_strconv_from_encoding.  This simplifies
    the code that calls it as it is always called in the same way,
    with a "UTF-8" encoding specified, and allows potentially swapping
    out this implementation with e.g. a simple cast.  All callers of
    u8_strconv_from_encoding updated.
---
 ChangeLog                                          | 11 +++++++++
 tp/Texinfo/XS/convert/convert_html.c               | 13 ++++------
 tp/Texinfo/XS/convert/converter.c                  |  6 ++---
 tp/Texinfo/XS/main/manipulate_indices.c            | 10 +++-----
 tp/Texinfo/XS/main/manipulate_tree.c               |  4 ++--
 tp/Texinfo/XS/main/node_name_normalization.c       |  3 +--
 tp/Texinfo/XS/main/unicode.c                       | 28 +++++++++++-----------
 tp/Texinfo/XS/main/unicode.h                       |  3 +++
 tp/Texinfo/XS/main/utils.c                         | 16 ++++---------
 tp/Texinfo/XS/parsetexi/def.c                      |  7 +++---
 .../XS/structuring_transfo/transformations.c       |  7 +++---
 11 files changed, 51 insertions(+), 57 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 45f932d352..4f9ace00ea 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2024-02-19  Gavin Smith <gavinsmith0123@gmail.com>
+
+       Wrapper for u8_strconv_from_encoding
+
+       * tp/Texinfo/XS/main/unicode.c (utf8_from_string):
+       Create wrapper for u8_strconv_from_encoding.  This simplifies
+       the code that calls it as it is always called in the same way,
+       with a "UTF-8" encoding specified, and allows potentially swapping
+       out this implementation with e.g. a simple cast.  All callers of
+       u8_strconv_from_encoding updated.
+
 2024-02-19  Patrice Dumas  <pertusus@free.fr>
 
        * tp/Texinfo/Structuring.pm (sectioning_structure),
diff --git a/tp/Texinfo/XS/convert/convert_html.c 
b/tp/Texinfo/XS/convert/convert_html.c
index dab4658b83..181e7d7b7b 100644
--- a/tp/Texinfo/XS/convert/convert_html.c
+++ b/tp/Texinfo/XS/convert/convert_html.c
@@ -9455,8 +9455,7 @@ css_string_accent (CONVERTER *self, const char *text,
           if (!p)
             {
               /* check if a character matches */
-              encoded_u8 = u8_strconv_from_encoding (text, "UTF-8",
-                                                  iconveh_question_mark);
+              encoded_u8 = utf8_from_string (text);
               next = u8_next (&first_char, encoded_u8);
               if (next && (uc_is_general_category (first_char, UC_CATEGORY_L)
                           /* ASCII digits */
@@ -9481,8 +9480,7 @@ css_string_accent (CONVERTER *self, const char *text,
                   const uint8_t *remaining;
                   if (!next)
                     {
-                      encoded_u8 = u8_strconv_from_encoding (text, "UTF-8",
-                                                  iconveh_question_mark);
+                      encoded_u8 = utf8_from_string (text);
                       next = encoded_u8;
                     }
                   remaining = u8_next (&second_char, next);
@@ -9562,9 +9560,7 @@ css_string_accent (CONVERTER *self, const char *text,
           /* determine the hexadecimal unicode point of the normalized
              character to output in the format expected in CSS strings */
           char *next_text;
-          uint8_t *encoded_u8 = u8_strconv_from_encoding (
-                                 normalized_accent_text, "UTF-8",
-                                               iconveh_question_mark);
+          uint8_t *encoded_u8 = utf8_from_string (normalized_accent_text);
           ucs4_t first_char;
           const uint8_t *next = u8_next (&first_char, encoded_u8);
           text_printf (&accented_text, "\\%04lX ", first_char);
@@ -12554,8 +12550,7 @@ convert_printindex_command (CONVERTER *self, const enum 
command_id cmd,
   for (i = 0; i < index_sorted->letter_number; i++)
     {
       char *letter = index_sorted->letter_entries[i].letter;
-      uint8_t *encoded_u8 = u8_strconv_from_encoding (letter, "UTF-8",
-                                                  iconveh_question_mark);
+      uint8_t *encoded_u8 = utf8_from_string (letter);
       ucs4_t next_char;
       u8_next (&next_char, encoded_u8);
       letter_is_symbol[i]
diff --git a/tp/Texinfo/XS/convert/converter.c 
b/tp/Texinfo/XS/convert/converter.c
index 546cda2330..8012d58a37 100644
--- a/tp/Texinfo/XS/convert/converter.c
+++ b/tp/Texinfo/XS/convert/converter.c
@@ -1022,8 +1022,7 @@ next_for_tieaccent (const char *text, const char **next)
     }
   else
     {
-      uint8_t *encoded_u8 = u8_strconv_from_encoding (text, "UTF-8",
-                                                  iconveh_question_mark);
+      uint8_t *encoded_u8 = utf8_from_string (text);
       ucs4_t first_char;
       u8_next (&first_char, encoded_u8);
       free (encoded_u8);
@@ -1099,8 +1098,7 @@ xml_numeric_entity_accent (enum command_id cmd, const 
char *text)
               xasprintf (&accented_char, "%s%s", text,
                          unicode_diacritics[cmd].text);
               normalized_char = normalize_NFC (accented_char);
-              encoded_u8 = u8_strconv_from_encoding (normalized_char, "UTF-8",
-                                                     iconveh_question_mark);
+              encoded_u8 = utf8_from_string (normalized_char);
               next = u8_next (&first_char, encoded_u8);
               if (next)
                 {
diff --git a/tp/Texinfo/XS/main/manipulate_indices.c 
b/tp/Texinfo/XS/main/manipulate_indices.c
index 5799b98ef5..387833d868 100644
--- a/tp/Texinfo/XS/main/manipulate_indices.c
+++ b/tp/Texinfo/XS/main/manipulate_indices.c
@@ -536,9 +536,7 @@ setup_index_entries_sort_strings (ERROR_MESSAGE_LIST 
*error_messages,
                         = &entry_sort_string.sort_string_subentries[k];
              /* TODO quite inefficient, only need the first character */
                       encoded_u8
-                       = u8_strconv_from_encoding (
-                                         subentry_sort_string->sort_string,
-                                         "UTF-8", iconveh_question_mark);
+                       = utf8_from_string (subentry_sort_string->sort_string);
                       new_len = u8_strmbtouc (&next_char, encoded_u8);
                       if (new_len > 0
                           && uc_is_property (next_char, 
UC_PROPERTY_ALPHABETIC))
@@ -1018,8 +1016,7 @@ sort_indices_by_letter (DOCUMENT *document, 
ERROR_MESSAGE_LIST *error_messages,
             = &sortable_index_entries->sortable_entries[j];
           char *sort_string
             = sortable_entry->sortable_subentries[0].sort_string;
-          uint8_t *encoded_u8 = u8_strconv_from_encoding (sort_string, "UTF-8",
-                                                  iconveh_question_mark);
+          uint8_t *encoded_u8 = utf8_from_string (sort_string);
           uint8_t *current_u8 = encoded_u8;
           char *letter_string;
           char *upper_letter_string;
@@ -1055,8 +1052,7 @@ sort_indices_by_letter (DOCUMENT *document, 
ERROR_MESSAGE_LIST *error_messages,
           free (letter_string);
           norm_letter_string = normalize_NFKD (upper_letter_string);
           free (upper_letter_string);
-          encoded_u8 = u8_strconv_from_encoding (norm_letter_string, "UTF-8",
-                                                  iconveh_question_mark);
+          encoded_u8 = utf8_from_string (norm_letter_string);
           free (norm_letter_string);
           current_u8 = encoded_u8;
 
diff --git a/tp/Texinfo/XS/main/manipulate_tree.c 
b/tp/Texinfo/XS/main/manipulate_tree.c
index 26f7e7eb5c..3003a6be09 100644
--- a/tp/Texinfo/XS/main/manipulate_tree.c
+++ b/tp/Texinfo/XS/main/manipulate_tree.c
@@ -32,6 +32,7 @@
 #include "targets.h"
 #include "utils.h"
 #include "manipulate_tree.h"
+#include "unicode.h"
 
 /* copy_tree from Texinfo::Common */
 
@@ -878,8 +879,7 @@ protect_text (ELEMENT *current, char *to_protect)
 
       if (current->source_mark_list.number)
         {
-          u8_text = u8_strconv_from_encoding (p, "UTF-8",
-                                            iconveh_question_mark);
+          u8_text = utf8_from_string (p);
           u8_p = u8_text;
 
           current_position = 0;
diff --git a/tp/Texinfo/XS/main/node_name_normalization.c 
b/tp/Texinfo/XS/main/node_name_normalization.c
index cb4cd57421..3f20b916ec 100644
--- a/tp/Texinfo/XS/main/node_name_normalization.c
+++ b/tp/Texinfo/XS/main/node_name_normalization.c
@@ -191,8 +191,7 @@ protect_unicode_char (const char *text, TEXT *result)
   char *str;
 
   /* determine unicode codepoint */
-  encoded_u8 = u8_strconv_from_encoding (text, "UTF-8",
-                                         iconveh_question_mark);
+  encoded_u8 = utf8_from_string (text);
   next = u8_next (&next_char, encoded_u8);
   if (next && *next)
     bug ("Something left on next_str/encoded_u8\n");
diff --git a/tp/Texinfo/XS/main/unicode.c b/tp/Texinfo/XS/main/unicode.c
index 704f14041c..38a29d57e8 100644
--- a/tp/Texinfo/XS/main/unicode.c
+++ b/tp/Texinfo/XS/main/unicode.c
@@ -37,21 +37,26 @@
 
 #include "accent_tables_8bit_codepoints.c"
 
+uint8_t *
+utf8_from_string (const char *text)
+{
+  /* TODO error checking? Or cast (uint8_t *) instead of conversion? */
+  return u8_strconv_from_encoding (text, "UTF-8", iconveh_question_mark);
+}
+
 char *
 normalize_NFC (const char *text)
 {
   size_t lengthp;
 
   char *result = 0;
-  /* TODO error checking? Or cast (uint8_t *) instead of conversion? */
-  uint8_t *encoded_u8 = u8_strconv_from_encoding (text, "UTF-8",
-                                                 iconveh_question_mark);
+  uint8_t *encoded_u8 = utf8_from_string (text);
   /* +1 to have the terminating NUL included in the string */
   uint8_t *normalized_u8 = u8_normalize (UNINORM_NFC, encoded_u8,
                                          u8_strlen (encoded_u8)+1,
                                          NULL, &lengthp);
   free (encoded_u8);
-  result = u8_strconv_to_encoding (normalized_u8, "UTF-8", 
iconveh_question_mark);
+  result = utf8_from_string (normalized_u8);
   free (normalized_u8);
   return result;
 }
@@ -62,15 +67,13 @@ normalize_NFKD (const char *text)
   size_t lengthp;
 
   char *result = 0;
-  /* TODO error checking? Or cast (uint8_t *) instead of conversion? */
-  uint8_t *encoded_u8 = u8_strconv_from_encoding (text, "UTF-8",
-                                                 iconveh_question_mark);
+  uint8_t *encoded_u8 = utf8_from_string (text);
   /* +1 to have the terminating NUL included in the string */
   uint8_t *normalized_u8 = u8_normalize (UNINORM_NFKD, encoded_u8,
                                          u8_strlen (encoded_u8)+1,
                                          NULL, &lengthp);
   free (encoded_u8);
-  result = u8_strconv_to_encoding (normalized_u8, "UTF-8", 
iconveh_question_mark);
+  result = utf8_from_string (normalized_u8);
   free (normalized_u8);
   return result;
 }
@@ -110,8 +113,7 @@ unicode_accent (const char *text, const ELEMENT *e)
         {
           /* tieaccent diacritic is naturally and correctly composed
              between two characters */
-          uint8_t *encoded_u8 = u8_strconv_from_encoding (text, "UTF-8",
-                                                  iconveh_question_mark);
+          uint8_t *encoded_u8 = utf8_from_string (text);
           const uint8_t *next;
           ucs4_t first_char;
           next = u8_next (&first_char, encoded_u8);
@@ -133,15 +135,13 @@ unicode_accent (const char *text, const ELEMENT *e)
                   if (first_char_len < 0)
                     fatal ("u8_uctomb returns negative value");
                   first_char_u8[first_char_len] = 0;
-                  first_char_text = u8_strconv_to_encoding (first_char_u8, 
"UTF-8",
-                                                            
iconveh_question_mark);
+                  first_char_text = utf8_from_string (first_char_u8);
                   free (first_char_u8);
                   text_init (&accented_text);
                   text_append (&accented_text, first_char_text);
                   free (first_char_text);
                   text_append (&accented_text, 
unicode_diacritics[e->cmd].text);
-                  next_text = u8_strconv_to_encoding (next, "UTF-8",
-                                                      iconveh_question_mark);
+                  next_text = utf8_from_string (next);
                   text_append (&accented_text, next_text);
                   free (next_text);
                   result = normalize_NFC (accented_text.text);
diff --git a/tp/Texinfo/XS/main/unicode.h b/tp/Texinfo/XS/main/unicode.h
index 2bd700b664..7911ea2d33 100644
--- a/tp/Texinfo/XS/main/unicode.h
+++ b/tp/Texinfo/XS/main/unicode.h
@@ -3,6 +3,7 @@
 #define UNICODE_H
 
 #include <stddef.h>
+#include <unitypes.h>
 
 #include "tree_types.h"
 
@@ -78,6 +79,8 @@ typedef struct DIACRITIC_UNICODE {
 extern DIACRITIC_UNICODE unicode_diacritics[];
 extern COMMAND_UNICODE unicode_character_brace_no_arg_commands[];
 
+uint8_t *utf8_from_string (const char *text);
+
 int unicode_point_decoded_in_encoding (const char *encoding, char *codepoint);
 
 char *normalize_NFC (const char *text);
diff --git a/tp/Texinfo/XS/main/utils.c b/tp/Texinfo/XS/main/utils.c
index 49639f38c0..e6a376eef1 100644
--- a/tp/Texinfo/XS/main/utils.c
+++ b/tp/Texinfo/XS/main/utils.c
@@ -45,6 +45,7 @@
 #include "builtin_commands.h"
 #include "api_to_perl.h"
 #include "utils.h"
+#include "unicode.h"
 
 #define min_level command_structuring_level[CM_chapter]
 #define max_level command_structuring_level[CM_subsubsection]
@@ -193,9 +194,7 @@ isascii_upper (int c)
 size_t
 count_multibyte (const char *text)
 {
-  /* TODO error checking? Or cast (uint8_t *) instead of conversion? */
-  uint8_t *u8_text = u8_strconv_from_encoding (text, "UTF-8",
-                                                 iconveh_question_mark);
+  uint8_t *u8_text = utf8_from_string (text);
   size_t result = u8_mbsnlen (u8_text, u8_strlen (u8_text));
 
   free (u8_text);
@@ -209,9 +208,7 @@ to_upper_or_lower_multibyte (const char *text, int 
lower_or_upper)
   char *result;
   size_t lengthp;
   uint8_t *u8_result;
-  /* TODO error checking? Or cast (uint8_t *) instead of conversion? */
-  uint8_t *u8_text = u8_strconv_from_encoding (text, "UTF-8",
-                                               iconveh_question_mark);
+  uint8_t *u8_text = utf8_from_string (text);
   if (lower_or_upper > 0)
     /* the + 1 is there to hold the terminating NULL */
     u8_result = u8_toupper (u8_text, u8_strlen (u8_text) + 1,
@@ -231,9 +228,7 @@ int
 width_multibyte (const char *text)
 {
   int result;
-  /* TODO error checking? Or cast (uint8_t *) instead of conversion? */
-  uint8_t *u8_text = u8_strconv_from_encoding (text, "UTF-8",
-                                                 iconveh_question_mark);
+  uint8_t *u8_text = utf8_from_string (text);
   /* NOTE the libunistring documentation described encoding as
      The encoding argument identifies the encoding (e.g. "ISO-8859-2"
      for Polish).  Looking at the code, it seems that it is only
@@ -250,8 +245,7 @@ width_multibyte (const char *text)
 int
 word_bytes_len_multibyte (const char *text)
 {
-  uint8_t *encoded_u8 = u8_strconv_from_encoding (text, "UTF-8",
-                                                  iconveh_question_mark);
+  uint8_t *encoded_u8 = utf8_from_string (text);
   uint8_t *current_u8 = encoded_u8;
   int len = 0;
   while (1)
diff --git a/tp/Texinfo/XS/parsetexi/def.c b/tp/Texinfo/XS/parsetexi/def.c
index f46c684d24..21bbee40f4 100644
--- a/tp/Texinfo/XS/parsetexi/def.c
+++ b/tp/Texinfo/XS/parsetexi/def.c
@@ -34,6 +34,7 @@
 #include "manipulate_tree.h"
 #include "source_marks.h"
 #include "commands.h"
+#include "unicode.h"
 
 void
 gather_def_item (ELEMENT *current, enum command_id next_command)
@@ -198,8 +199,7 @@ split_delimiters (ELEMENT *current, int starting_idx)
 
       if (e->source_mark_list.number)
         {
-          u8_text = u8_strconv_from_encoding (p, "UTF-8",
-                                            iconveh_question_mark);
+          u8_text = utf8_from_string (p);
           u8_p = u8_text;
 
           current_position = 0;
@@ -284,8 +284,7 @@ split_def_args (ELEMENT *current, int starting_idx)
 
       if (e->source_mark_list.number)
         {
-          u8_text = u8_strconv_from_encoding (p, "UTF-8",
-                                              iconveh_question_mark);
+          u8_text = utf8_from_string (p);
           u8_p = u8_text;
 
           current_position = 0;
diff --git a/tp/Texinfo/XS/structuring_transfo/transformations.c 
b/tp/Texinfo/XS/structuring_transfo/transformations.c
index 9dac12ee4f..821c6cba4f 100644
--- a/tp/Texinfo/XS/structuring_transfo/transformations.c
+++ b/tp/Texinfo/XS/structuring_transfo/transformations.c
@@ -45,6 +45,7 @@
 #include "targets.h"
 #include "node_name_normalization.h"
 #include "transformations.h"
+#include "unicode.h"
 
 
 /* in Common.pm */
@@ -105,8 +106,7 @@ protect_first_parenthesis (ELEMENT *element)
 
           if (content->source_mark_list.number)
             {
-              u8_text = u8_strconv_from_encoding (p, "UTF-8",
-                                               iconveh_question_mark);
+              u8_text = utf8_from_string (p);
               u8_p = u8_text;
 
               current_position = 0;
@@ -1368,8 +1368,7 @@ protect_hashchar_at_line_beginning_internal (const char 
*type,
                               memset (&(current->source_mark_list), 0,
                                   sizeof (SOURCE_MARK_LIST));
 
-                              u8_text = u8_strconv_from_encoding (p, "UTF-8",
-                                              iconveh_question_mark);
+                              u8_text = utf8_from_string (p);
                               u8_p = u8_text;
 
                               current_position = 0;



reply via email to

[Prev in Thread] Current Thread [Next in Thread]