bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 2/3] quotearg: fall back to Unicode single quotes in UTF-8 and GB


From: bonzini
Subject: [PATCH 2/3] quotearg: fall back to Unicode single quotes in UTF-8 and GB-18030 locales
Date: Tue, 20 Dec 2011 09:57:56 +0100

From: Paolo Bonzini <address@hidden>

Most programs do not have translation catalogs for English and much less
separate catalogs for British and American English.  Drop the suggestion
to translators about these two, and provide it automatically for Unicode
locales.  Like most programs, even those using American English, we use
single quotation marks.  This conflicts with the American typographic
convention, but works better when you cite the entire error message
within double quotes.  It also tries not to clash with established
practice and with what non-gnulib programs will usually do.

* lib/quotearg.c (gettext_quote): Hard-code U+2018 and U+2019 when using
an UTF-8 or GB-18030 locale.  The list of other locales with quotes was
provided by Bruno Haible.
(quotearg_buffer_restyled): Adjust instructions to translators.
* lib/quotearg.h (locale_quoting_style): Do not put an example in the
text, since this would be wrong when using Unicode.
* modules/quotearg: Depend on c-strcaseeq.
---
 lib/quotearg.c   |   64 ++++++++++++++++++++++++++++++++++++++++++-----------
 lib/quotearg.h   |    6 +++-
 modules/quotearg |    2 +
 3 files changed, 56 insertions(+), 16 deletions(-)

diff --git a/lib/quotearg.c b/lib/quotearg.c
index fdcb8da..6d80d22 100644
--- a/lib/quotearg.c
+++ b/lib/quotearg.c
@@ -29,6 +29,8 @@
 #include "quotearg.h"
 
 #include "xalloc.h"
+#include "c-strcaseeq.h"
+#include "localcharset.h"
 
 #include <ctype.h>
 #include <errno.h>
@@ -183,13 +185,45 @@ quoting_options_from_style (enum quoting_style style)
 }
 
 /* MSGID approximates a quotation mark.  Return its translation if it
-   has one; otherwise, return either it or "\"", depending on S.  */
+   has one; otherwise, return either it or "\"", depending on S.
+
+   S is either clocale_quoting_style or locale_quoting_style.  */
 static char const *
 gettext_quote (char const *msgid, enum quoting_style s)
 {
   char const *translation = _(msgid);
-  if (translation == msgid && s == clocale_quoting_style)
-    translation = "\"";
+  char const *locale_code;
+
+  if (translation != msgid)
+    return translation;
+
+  /* For UTF-8 and GB-18030, use single quotes U+2018 and U+2019.
+     Here is a list of other locales that include U+2018 and U+2019:
+
+        ISO-8859-7   0xA1                 KOI8-T       0x91
+        CP869        0x8B                 CP874        0x91
+        CP932        0x81 0x65            CP936        0xA1 0xAE
+        CP949        0xA1 0xAE            CP950        0xA1 0xA5
+        CP1250       0x91                 CP1251       0x91
+        CP1252       0x91                 CP1253       0x91
+        CP1254       0x91                 CP1255       0x91
+        CP1256       0x91                 CP1257       0x91
+        EUC-JP       0xA1 0xC6            EUC-KR       0xA1 0xAE
+        EUC-TW       0xA1 0xE4            BIG5         0xA1 0xA5
+        BIG5-HKSCS   0xA1 0xA5            EUC-CN       0xA1 0xAE
+        GBK          0xA1 0xAE            Georgian-PS  0x91
+        PT154        0x91
+
+     None of these is still in wide use; using iconv is overkill.  */
+  locale_code = locale_charset ();
+  if (STRCASEEQ (locale_code, "UTF-8", 'U','T','F','-','8',0,0,0,0))
+    return msgid[0] == '`' ? "\xe2\x80\x98": "\xe2\x80\x99";
+  if (STRCASEEQ (locale_code, "GB18030", 'G','B','1','8','0','3','0',0,0))
+    return msgid[0] == '`' ? "\xa1\ae": "\xa1\xaf";
+
+  if (s == clocale_quoting_style)
+    return "\"";
+
   return translation;
 }
 
@@ -258,19 +292,21 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
           {
             /* TRANSLATORS:
                Get translations for open and closing quotation marks.
-
                The message catalog should translate "`" to a left
                quotation mark suitable for the locale, and similarly for
-               "'".  If the catalog has no translation,
-               locale_quoting_style quotes `like this', and
-               clocale_quoting_style quotes "like this".
-
-               For example, an American English Unicode locale should
-               translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
-               should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
-               MARK).  A British English Unicode locale should instead
-               translate these to U+2018 (LEFT SINGLE QUOTATION MARK)
-               and U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
+               "'".  For example, a French Unicode local should translate
+               these to U+00AB (LEFT-POINTING DOUBLE ANGLE
+               QUOTATION MARK), and U+00BB (RIGHT-POINTING DOUBLE ANGLE
+               QUOTATION MARK), respectively.
+
+               If the catalog has no translation, we will try to
+               use Unicode U+2018 (LEFT SINGLE QUOTATION MARK) and
+               Unicode U+2019 (RIGHT SINGLE QUOTATION MARK).  If the
+               current locale is not Unicode, locale_quoting_style
+               will quote `like this', and clocale_quoting_style will
+               quote "like this".  You should always include translations
+               for "`" and "'" even if U+2018 and U+2019 are appropriate
+               for your locale.
 
                If you don't know what to put here, please see
                
<http://en.wikipedia.org/wiki/Quotation_marks_in_other_languages>
diff --git a/lib/quotearg.h b/lib/quotearg.h
index 2756d76..fff085a 100644
--- a/lib/quotearg.h
+++ b/lib/quotearg.h
@@ -112,8 +112,10 @@ enum quoting_style
     */
     escape_quoting_style,
 
-    /* Like clocale_quoting_style, but quote `like this' instead of
-       "like this" in the default C locale (ls --quoting-style=locale).
+    /* Like clocale_quoting_style, but use single quotes in the
+       default C locale or if the program does not use gettext
+       (ls --quoting-style=locale).  For UTF-8 locales, quote
+       characters will use Unicode.
 
        LC_MESSAGES=C
        quotearg_buffer:
diff --git a/modules/quotearg b/modules/quotearg
index b3f1ad1..176ff70 100644
--- a/modules/quotearg
+++ b/modules/quotearg
@@ -9,12 +9,14 @@ m4/mbrtowc.m4
 m4/quotearg.m4
 
 Depends-on:
+c-strcaseeq
 extensions
 gettext-h
 mbrtowc
 mbsinit
 memcmp
 quotearg-simple
+localcharset
 stdbool
 wchar
 wctype-h
-- 
1.7.7.1





reply via email to

[Prev in Thread] Current Thread [Next in Thread]