bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

striconveh: new function mem_iconveh


From: Bruno Haible
Subject: striconveh: new function mem_iconveh
Date: Sun, 21 Jan 2007 23:04:47 +0100 (MET)
User-agent: KMail/1.5.4

To the striconveh module (charset conversion with error handling) I'm adding
a variant that operates on non-NUL-terminated strings and that takes the two
encodings as arguments.

2007-01-21  Bruno Haible  <address@hidden>

        * lib/striconveh.h (mem_iconveh): New declaration.
        * lib/striconveh.c (mem_iconveh): New function.
        * tests/test-striconveh.c (main): Add tests for mem_iconveh.

*** lib/striconveh.h    21 Jan 2007 21:34:27 -0000      1.2
--- lib/striconveh.h    21 Jan 2007 21:59:06 -0000
***************
*** 80,85 ****
--- 80,102 ----
  #endif
  
  /* Convert an entire string from one encoding to another, using iconv.
+    The original string is at [SRC,...,SRC+SRCLEN-1].
+    Both the "from" and the "to" encoding must use a single NUL byte at the
+    end of the string (i.e. not UCS-2, UCS-4, UTF-16, UTF-32).
+    *RESULTP and *LENGTH should initially be a scratch buffer and its size,
+    or *RESULTP can initially be NULL.
+    May erase the contents of the memory at *RESULTP.
+    Return value: 0 if successful, otherwise -1 and errno set.
+    If successful: The resulting string is stored in *RESULTP and its length
+    in *LENGTHP.  *RESULTP is set to a freshly allocated memory block, or is
+    unchanged if no dynamic memory allocation was necessary.  */
+ extern int
+        mem_iconveh (const char *src, size_t srclen,
+                   const char *from_codeset, const char *to_codeset,
+                   enum iconv_ilseq_handler handler,
+                   char **resultp, size_t *lengthp);
+ 
+ /* Convert an entire string from one encoding to another, using iconv.
     The original string is the NUL-terminated string starting at SRC.
     Both the "from" and the "to" encoding must use a single NUL byte at the
     end of the string (i.e. not UCS-2, UCS-4, UTF-16, UTF-32).
*** lib/striconveh.c    21 Jan 2007 21:34:27 -0000      1.2
--- lib/striconveh.c    21 Jan 2007 21:59:07 -0000
***************
*** 766,771 ****
--- 766,919 ----
  
  #endif
  
+ int
+ mem_iconveh (const char *src, size_t srclen,
+            const char *from_codeset, const char *to_codeset,
+            enum iconv_ilseq_handler handler,
+            char **resultp, size_t *lengthp)
+ {
+   if (c_strcasecmp (from_codeset, to_codeset) == 0)
+     {
+       char *result;
+ 
+       if (*resultp != NULL && *lengthp >= srclen)
+       result = *resultp;
+       else
+       {
+         result = (char *) malloc (srclen);
+         if (result == NULL)
+           {
+             errno = ENOMEM;
+             return -1;
+           }
+       }
+       memcpy (result, src, srclen);
+       *resultp = result;
+       *lengthp = srclen;
+       return 0;
+     }
+   else
+     {
+ #if HAVE_ICONV
+       iconv_t cd;
+       iconv_t cd1;
+       iconv_t cd2;
+       char *result;
+       size_t length;
+       int retval;
+ 
+       /* Avoid glibc-2.1 bug with EUC-KR.  */
+ # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined 
_LIBICONV_VERSION
+       if (c_strcasecmp (from_codeset, "EUC-KR") == 0
+         || c_strcasecmp (to_codeset, "EUC-KR") == 0)
+       {
+         errno = EINVAL;
+         return -1;
+       }
+ # endif
+ 
+       cd = iconv_open (to_codeset, from_codeset);
+       if (cd == (iconv_t)(-1))
+       return -1;
+ 
+       if (c_strcasecmp (from_codeset, "UTF-8") == 0)
+       cd1 = (iconv_t)(-1);
+       else
+       {
+         cd1 = iconv_open ("UTF-8", from_codeset);
+         if (cd1 == (iconv_t)(-1))
+           {
+             int saved_errno = errno;
+             iconv_close (cd);
+             errno = saved_errno;
+             return -1;
+           }
+       }
+ 
+       if (c_strcasecmp (to_codeset, "UTF-8") == 0)
+       cd2 = (iconv_t)(-1);
+       else
+       {
+         cd2 = iconv_open (to_codeset, "UTF-8");
+         if (cd2 == (iconv_t)(-1))
+           {
+             int saved_errno = errno;
+             if (cd1 != (iconv_t)(-1))
+               iconv_close (cd1);
+             iconv_close (cd);
+             errno = saved_errno;
+             return -1;
+           }
+       }
+ 
+       result = *resultp;
+       length = *lengthp;
+       retval =
+       mem_cd_iconveh (src, srclen, cd, cd1, cd2, handler, &result, &length);
+ 
+       if (retval < 0)
+       {
+         /* Close cd, cd1, cd2, but preserve the errno from str_cd_iconv.  */
+         int saved_errno = errno;
+         if (cd2 != (iconv_t)(-1))
+           iconv_close (cd2);
+         if (cd1 != (iconv_t)(-1))
+           iconv_close (cd1);
+         iconv_close (cd);
+         errno = saved_errno;
+       }
+       else
+       {
+         if (cd2 != (iconv_t)(-1) && iconv_close (cd2) < 0)
+           {
+             /* Return -1, but free the allocated memory, and while doing
+                that, preserve the errno from iconv_close.  */
+             int saved_errno = errno;
+             if (cd1 != (iconv_t)(-1))
+               iconv_close (cd1);
+             iconv_close (cd);
+             if (result != *resultp && result != NULL)
+               free (result);
+             errno = saved_errno;
+             return -1;
+           }
+         if (cd1 != (iconv_t)(-1) && iconv_close (cd1) < 0)
+           {
+             /* Return -1, but free the allocated memory, and while doing
+                that, preserve the errno from iconv_close.  */
+             int saved_errno = errno;
+             iconv_close (cd);
+             if (result != *resultp && result != NULL)
+               free (result);
+             errno = saved_errno;
+             return -1;
+           }
+         if (iconv_close (cd) < 0)
+           {
+             /* Return -1, but free the allocated memory, and while doing
+                that, preserve the errno from iconv_close.  */
+             int saved_errno = errno;
+             if (result != *resultp && result != NULL)
+               free (result);
+             errno = saved_errno;
+             return -1;
+           }
+         *resultp = result;
+         *lengthp = length;
+       }
+       return retval;
+ #else
+       /* This is a different error code than if iconv_open existed but didn't
+        support from_codeset and to_codeset, so that the caller can emit
+        an error message such as
+          "iconv() is not supported. Installing GNU libiconv and
+           then reinstalling this package would fix this."  */
+       errno = ENOSYS;
+       return -1;
+ #endif
+     }
+ }
+ 
  char *
  str_iconveh (const char *src,
             const char *from_codeset, const char *to_codeset,
*** tests/test-striconveh.c     21 Jan 2007 21:34:27 -0000      1.2
--- tests/test-striconveh.c     21 Jan 2007 21:59:07 -0000
***************
*** 347,352 ****
--- 347,501 ----
    iconv_close (cd_88592_to_utf8);
    iconv_close (cd_utf8_to_88592);
  
+   /* ------------------------- Test mem_iconveh() ------------------------- */
+ 
+   /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors.  */
+   for (h = 0; h < SIZEOF (handlers); h++)
+     {
+       enum iconv_ilseq_handler handler = handlers[h];
+       static const char input[] = "\304rger mit b\366sen B\374bchen ohne 
Augenma\337";
+       static const char expected[] = "\304rger mit b\366sen B\374bchen ohne 
Augenma\337";
+       char *result = NULL;
+       size_t length = 0;
+       int retval = mem_iconveh (input, strlen (input),
+                               "ISO-8859-2", "ISO-8859-1",
+                               handler,
+                               &result, &length);
+       ASSERT (retval == 0);
+       ASSERT (length == strlen (expected));
+       ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) 
== 0);
+       free (result);
+     }
+ 
+   /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ.  */
+   for (h = 0; h < SIZEOF (handlers); h++)
+     {
+       enum iconv_ilseq_handler handler = handlers[h];
+       static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski 
*/
+       char *result = NULL;
+       size_t length = 0;
+       int retval = mem_iconveh (input, strlen (input),
+                               "ISO-8859-2", "ISO-8859-1",
+                               handler,
+                               &result, &length);
+       switch (handler)
+       {
+       case iconveh_error:
+         ASSERT (retval == -1 && errno == EILSEQ);
+         ASSERT (result == NULL);
+         break;
+       case iconveh_question_mark:
+         {
+           static const char expected[] = "Rafa? Maszkowski";
+           ASSERT (retval == 0);
+           ASSERT (length == strlen (expected));
+           ASSERT (result != NULL && memcmp (result, expected, strlen 
(expected)) == 0);
+           free (result);
+         }
+         break;
+       case iconveh_escape_sequence:
+         {
+           static const char expected[] = "Rafa\\u0142 Maszkowski";
+           ASSERT (retval == 0);
+           ASSERT (length == strlen (expected));
+           ASSERT (result != NULL && memcmp (result, expected, strlen 
(expected)) == 0);
+           free (result);
+         }
+         break;
+       }
+     }
+ 
+   /* Test conversion from ISO-8859-1 to UTF-8 with no errors.  */
+   for (h = 0; h < SIZEOF (handlers); h++)
+     {
+       enum iconv_ilseq_handler handler = handlers[h];
+       static const char input[] = "\304rger mit b\366sen B\374bchen ohne 
Augenma\337";
+       static const char expected[] = "\303\204rger mit b\303\266sen 
B\303\274bchen ohne Augenma\303\237";
+       char *result = NULL;
+       size_t length = 0;
+       int retval = mem_iconveh (input, strlen (input),
+                               "ISO-8859-1", "UTF-8",
+                               handler,
+                               &result, &length);
+       ASSERT (retval == 0);
+       ASSERT (length == strlen (expected));
+       ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) 
== 0);
+       free (result);
+     }
+ 
+   /* Test conversion from UTF-8 to ISO-8859-1 with no errors.  */
+   for (h = 0; h < SIZEOF (handlers); h++)
+     {
+       enum iconv_ilseq_handler handler = handlers[h];
+       static const char input[] = "\303\204rger mit b\303\266sen 
B\303\274bchen ohne Augenma\303\237";
+       static const char expected[] = "\304rger mit b\366sen B\374bchen ohne 
Augenma\337";
+       char *result = NULL;
+       size_t length = 0;
+       int retval = mem_iconveh (input, strlen (input),
+                               "UTF-8", "ISO-8859-1",
+                               handler,
+                               &result, &length);
+       ASSERT (retval == 0);
+       ASSERT (length == strlen (expected));
+       ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) 
== 0);
+       free (result);
+     }
+ 
+   /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ.  */
+   for (h = 0; h < SIZEOF (handlers); h++)
+     {
+       enum iconv_ilseq_handler handler = handlers[h];
+       static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał 
Maszkowski */
+       char *result = NULL;
+       size_t length = 0;
+       int retval = mem_iconveh (input, strlen (input),
+                               "UTF-8", "ISO-8859-1",
+                               handler,
+                               &result, &length);
+       switch (handler)
+       {
+       case iconveh_error:
+         ASSERT (retval == -1 && errno == EILSEQ);
+         ASSERT (result == NULL);
+         break;
+       case iconveh_question_mark:
+         {
+           static const char expected[] = "Rafa? Maszkowski";
+           ASSERT (retval == 0);
+           ASSERT (length == strlen (expected));
+           ASSERT (result != NULL && memcmp (result, expected, strlen 
(expected)) == 0);
+           free (result);
+         }
+         break;
+       case iconveh_escape_sequence:
+         {
+           static const char expected[] = "Rafa\\u0142 Maszkowski";
+           ASSERT (retval == 0);
+           ASSERT (length == strlen (expected));
+           ASSERT (result != NULL && memcmp (result, expected, strlen 
(expected)) == 0);
+           free (result);
+         }
+         break;
+       }
+     }
+ 
+   /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL.  */
+   for (h = 0; h < SIZEOF (handlers); h++)
+     {
+       enum iconv_ilseq_handler handler = handlers[h];
+       static const char input[] = "\342";
+       char *result = NULL;
+       size_t length = 0;
+       int retval = mem_iconveh (input, strlen (input),
+                               "UTF-8", "ISO-8859-1",
+                               handler,
+                               &result, &length);
+       ASSERT (retval == 0);
+       ASSERT (length == 0);
+       if (result != NULL)
+       free (result);
+     }
+ 
    /* ------------------------- Test str_iconveh() ------------------------- */
  
    /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors.  */





reply via email to

[Prev in Thread] Current Thread [Next in Thread]