[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[pre-lexer 03/21] i18n: New function recode_substring_pool().
From: |
Ben Pfaff |
Subject: |
[pre-lexer 03/21] i18n: New function recode_substring_pool(). |
Date: |
Thu, 23 Sep 2010 21:20:39 -0700 |
Occasionally it is necessary to recode a string that might contain a null
byte. This function is useful in such a case.
---
src/libpspp/i18n.c | 63 ++++++++++++++++++++++++++++++++++++++--------------
src/libpspp/i18n.h | 33 +++++----------------------
2 files changed, 52 insertions(+), 44 deletions(-)
diff --git a/src/libpspp/i18n.c b/src/libpspp/i18n.c
index c0deb11..4934617 100644
--- a/src/libpspp/i18n.c
+++ b/src/libpspp/i18n.c
@@ -32,6 +32,7 @@
#include "libpspp/hmapx.h"
#include "libpspp/hash-functions.h"
#include "libpspp/pool.h"
+#include "libpspp/str.h"
#include "libpspp/version.h"
#include "gl/localcharset.h"
@@ -99,14 +100,16 @@ recode_string (const char *to, const char *from,
/* Uses CONV to convert the INBYTES starting at IP into the OUTBYTES starting
at OP, and appends a null terminator to the output.
- Returns true if successful, false if the output buffer is too small. */
-static bool
+ Returns the output length if successful, -1 if the output buffer is too
+ small. */
+static ssize_t
try_recode (iconv_t conv,
const char *ip, size_t inbytes,
- char *op, size_t outbytes)
+ char *op_, size_t outbytes)
{
/* FIXME: Need to ensure that this char is valid in the target encoding */
const char fallbackchar = '?';
+ char *op = op_;
/* Put the converter into the initial shift state, in case there was any
state information left over from its last usage. */
@@ -118,14 +121,14 @@ try_recode (iconv_t conv,
{
case EINVAL:
if (outbytes < 2)
- return false;
+ return -1;
*op++ = fallbackchar;
- *op++ = '\0';
- return true;
+ *op = '\0';
+ return op - op_;
case EILSEQ:
if (outbytes == 0)
- return false;
+ return -1;
*op++ = fallbackchar;
outbytes--;
ip++;
@@ -133,7 +136,7 @@ try_recode (iconv_t conv,
break;
case E2BIG:
- return false;
+ return -1;
default:
/* should never happen */
@@ -143,10 +146,10 @@ try_recode (iconv_t conv,
}
if (outbytes == 0)
- return false;
+ return -1;
*op = '\0';
- return true;
+ return op - op_;
}
/* Converts the string TEXT, which should be encoded in FROM-encoding, to a
@@ -165,14 +168,34 @@ char *
recode_string_pool (const char *to, const char *from,
const char *text, int length, struct pool *pool)
{
- size_t outbufferlength;
- iconv_t conv ;
+ struct substring out;
if ( text == NULL )
return NULL;
if ( length == -1 )
- length = strlen(text);
+ length = strlen (text);
+
+ out = recode_substring_pool (to, from, ss_buffer (text, length), pool);
+ return out.string;
+}
+
+/* Converts the string TEXT, which should be encoded in FROM-encoding, to a
+ dynamically allocated string in TO-encoding. Any characters which cannot be
+ converted will be represented by '?'.
+
+ The returned string will be null-terminated and allocated on POOL.
+
+ This function's behaviour differs from that of g_convert_with_fallback
+ provided by GLib. The GLib function will fail (returns NULL) if any part of
+ the input string is not valid in the declared input encoding. This function
+ however perseveres even in the presence of badly encoded input. */
+struct substring
+recode_substring_pool (const char *to, const char *from,
+ struct substring text, struct pool *pool)
+{
+ size_t outbufferlength;
+ iconv_t conv ;
if (to == NULL)
to = default_encoding;
@@ -183,14 +206,20 @@ recode_string_pool (const char *to, const char *from,
conv = create_iconv (to, from);
if ( (iconv_t) -1 == conv )
- return xstrdup (text);
+ {
+ struct substring out;
+ ss_alloc_substring (&out, text);
+ return out;
+ }
for ( outbufferlength = 1 ; outbufferlength != 0; outbufferlength <<= 1 )
- if ( outbufferlength > length)
+ if ( outbufferlength > text.length)
{
char *output = pool_malloc (pool, outbufferlength);
- if (try_recode (conv, text, length, output, outbufferlength))
- return output;
+ ssize_t output_len = try_recode (conv, text.string, text.length,
+ output, outbufferlength);
+ if (output_len >= 0)
+ return ss_buffer (output, output_len);
pool_free (pool, output);
}
diff --git a/src/libpspp/i18n.h b/src/libpspp/i18n.h
index e2663a0..37bd944 100644
--- a/src/libpspp/i18n.h
+++ b/src/libpspp/i18n.h
@@ -1,5 +1,5 @@
/* PSPP - a program for statistical analysis.
- Copyright (C) 2006 Free Software Foundation, Inc.
+ Copyright (C) 2006, 2010 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -26,36 +26,15 @@ void i18n_init (void);
struct pool;
-
-/*
-Converts the string TEXT, which should be encoded in FROM-encoding, to a
-dynamically allocated string in TO-encoding. Any characters which cannot
-be converted will be represented by '?'.
-
-LENGTH should be the length of the string or -1, if null terminated.
-
-The returned string will be allocated on POOL.
-
-This function's behaviour differs from that of g_convert_with_fallback provided
-by GLib. The GLib function will fail (returns NULL) if any part of the input
-string is not valid in the declared input encoding. This function however
perseveres
-even in the presence of badly encoded input.
-*/
-char *recode_string_pool (const char *to, const char *from,
- const char *text, int length, struct pool *pool);
-
-
-
-/* Similar to recode_string_pool, but allocates the returned value on the heap
instead of
- in a pool. It is the caller's responsibility to free the returned value. */
char *recode_string (const char *to, const char *from,
- const char *text, int len);
-
+ const char *text, int len);
+char *recode_string_pool (const char *to, const char *from,
+ const char *text, int length, struct pool *);
+struct substring recode_substring_pool (const char *to, const char *from,
+ struct substring text, struct pool *);
bool valid_encoding (const char *enc);
-/* Return the decimal separator according to the
- system locale */
char get_system_decimal (void);
const char * get_default_encoding (void);
--
1.7.1
- [pre-lexer 00/21] preparation for work on lexer, Ben Pfaff, 2010/09/24
- [pre-lexer 01/21] str: Make ss_alloc_substring() allocate null-terminated strings., Ben Pfaff, 2010/09/24
- [pre-lexer 13/21] command: Remove superfluous trailing spaces from command names., Ben Pfaff, 2010/09/24
- [pre-lexer 19/21] data-in: Rewrite logic for recoding input, and get rid of src_enc member., Ben Pfaff, 2010/09/24
- [pre-lexer 03/21] i18n: New function recode_substring_pool().,
Ben Pfaff <=
- [pre-lexer 11/21] lexer: Use lex_is_string() more consistently., Ben Pfaff, 2010/09/24
- [pre-lexer 04/21] syntax-string-source: Fix format string problems., Ben Pfaff, 2010/09/24
- [pre-lexer 08/21] Make translation easier., Ben Pfaff, 2010/09/24
- [pre-lexer 14/21] command: Add specific DATASET unimplemented commands., Ben Pfaff, 2010/09/24
- [pre-lexer 12/21] command: Remove INSERT from list of unimplemented commands., Ben Pfaff, 2010/09/24
- [pre-lexer 20/21] data-in: Make data_in() parameters more uniform., Ben Pfaff, 2010/09/24
- [pre-lexer 02/21] i18n: Use UTF8 macro instead of "UTF8" literal string., Ben Pfaff, 2010/09/24
- [pre-lexer 09/21] lexer: Improve translatability of lex_error()., Ben Pfaff, 2010/09/24
- [pre-lexer 15/21] message: Consistently initialize locator; use 0 for "no line number"., Ben Pfaff, 2010/09/24
- [pre-lexer 06/21] AGGREGATE: Simplify code., Ben Pfaff, 2010/09/24