[7532] parsetexi more on input encoding

texinfo-commits
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[7532] parsetexi more on input encoding

From:	gavinsmith0123
Subject:	[7532] parsetexi more on input encoding
Date:	Tue, 22 Nov 2016 11:55:43 +0000 (UTC)
Revision: 7532
          http://svn.sv.gnu.org/viewvc/?view=rev&root=texinfo&revision=7532
Author:   gavin
Date:     2016-11-22 11:55:42 +0000 (Tue, 22 Nov 2016)
Log Message:
-----------
parsetexi more on input encoding

Modified Paths:
--------------
    trunk/tp/parsetexi/api.c
    trunk/tp/parsetexi/end_line.c
    trunk/tp/parsetexi/input.c

Modified: trunk/tp/parsetexi/api.c
===================================================================
--- trunk/tp/parsetexi/api.c    2016-11-22 07:41:01 UTC (rev 7531)
+++ trunk/tp/parsetexi/api.c    2016-11-22 11:55:42 UTC (rev 7532)
@@ -68,7 +68,7 @@
 void
 parse_file (char *filename)
 {
-  debug_output = 1;
+  debug_output = 0;
   parse_texi_file (filename);
 }
 

Modified: trunk/tp/parsetexi/end_line.c
===================================================================
--- trunk/tp/parsetexi/end_line.c       2016-11-22 07:41:01 UTC (rev 7531)
+++ trunk/tp/parsetexi/end_line.c       2016-11-22 11:55:42 UTC (rev 7532)
@@ -1572,15 +1572,34 @@
               /* Get perl_encoding. */
               perl_encoding = 0;
               if (texinfo_encoding)
+                perl_encoding = texinfo_encoding;
+              else
                 {
+                  int i;
+                  static char *known_encodings[] = {
+                      "shift_jis",
+                      0
+                  };
+                  for (i = 0; (known_encodings[i]); i++)
+                    {
+                      if (!strcmp (text2, known_encodings[i]))
+                        {
+                          perl_encoding = known_encodings[i];
+                          break;
+                        }
+                    }
+                }
+
+              if (perl_encoding)
+                {
                   struct encoding_map {
                       char *from; char *to;
                   };
                   static struct encoding_map map[] = {
                       "utf-8", "utf-8-strict",
-                      "us-ascii", "ascii"
+                      "us-ascii", "ascii",
+                      "shift_jis",   "shiftjis"
                   };
-                  perl_encoding = texinfo_encoding;
                   for (i = 0; i < sizeof map / sizeof *map; i++)
                     {
                       if (!strcmp (perl_encoding, map[i].from))
@@ -1589,9 +1608,6 @@
                           break;
                         }
                     }
-                }
-              if (perl_encoding)
-                {
                   add_extra_string (current, "input_perl_encoding",
                                     perl_encoding);
                 }
@@ -1621,9 +1637,9 @@
                       "ascii",       "us-ascii",
                       "shiftjis",    "shift_jis",
                       "latin-1",     "iso-8859-1",
-                      "iso-8859-1",  "iso8859_1",
-                      "iso-8859-2",  "iso8859_2",
-                      "iso-8859-15", "iso8859_15",
+                      "iso-8859-1",  "iso-8859-1",
+                      "iso-8859-2",  "iso-8859-2",
+                      "iso-8859-15", "iso-8859-15",
                       "koi8-r",      "koi8",
                       "koi8-u",      "koi8",
                   };
@@ -1647,10 +1663,8 @@
                                     input_encoding);
 
                   global_info.input_encoding_name = text; // 3210
+                  set_input_encoding (input_encoding);
                 }
-
-              // TODO: Need to convert input in input.c from this encoding.
-              // (INPUT_PERL_ENCODING in Perl version)
             }
           else if (current->cmd == CM_documentlanguage) // 3223
             {

Modified: trunk/tp/parsetexi/input.c
===================================================================
--- trunk/tp/parsetexi/input.c  2016-11-22 07:41:01 UTC (rev 7531)
+++ trunk/tp/parsetexi/input.c  2016-11-22 11:55:42 UTC (rev 7532)
@@ -31,14 +31,16 @@
 
 enum character_encoding {
     ce_latin1,
-    ce_utf8
+    ce_latin2,
+    ce_utf8,
+    ce_shiftjis
 };
 
 typedef struct {
     enum input_type type;
 
     FILE *file;
-    enum character_encoding input_encoding;
+    char *input_encoding;
     LINE_NR line_nr;
 
     char *text;  /* Input text to be parsed as Texinfo. */
@@ -92,6 +94,8 @@
 #define ICONV_CONST
 
 static iconv_t iconv_from_latin1 = (iconv_t) 0;
+static iconv_t iconv_from_latin2;
+static iconv_t iconv_from_shiftjis;
 
 /* Run iconv using text buffer as output buffer. */
 size_t
@@ -121,12 +125,13 @@
 
 /* Return conversion of S according to ENC.  This function frees S. */
 static char *
-convert_to_utf8 (char *s, enum character_encoding enc)
+convert_to_utf8 (char *s, char *input_encoding)
 {
   iconv_t our_iconv;
   static TEXT t;
   char *inptr; size_t bytes_left;
   size_t iconv_ret;
+  enum character_encoding enc;
 
   /* Convert from @documentencoding to UTF-8.
        It might be possible not to convert to UTF-8 and use an 8-bit encoding
@@ -152,15 +157,45 @@
           return s;
         }
     }
+  if (iconv_from_latin2 == (iconv_t) 0)
+    {
+      /* Initialize the conversion for the first time. */
+      iconv_from_latin2 = iconv_open ("UTF-8", "ISO-8859-2");
+      if (iconv_from_latin2 == (iconv_t) -1)
+        iconv_from_latin2 = iconv_from_latin1;
+    }
+  if (iconv_from_shiftjis == (iconv_t) 0)
+    {
+      /* Initialize the conversion for the first time. */
+      iconv_from_shiftjis = iconv_open ("UTF-8", "SHIFT-JIS");
+      if (iconv_from_shiftjis == (iconv_t) -1)
+        iconv_from_shiftjis = iconv_from_latin1;
+    }
 
+  enc = ce_latin1;
+  if (!input_encoding)
+    ;
+  else if (!strcmp (input_encoding, "utf-8"))
+    enc = ce_utf8;
+  else if (!strcmp (input_encoding, "iso-8859-2"))
+    enc = ce_latin2;
+  else if (!strcmp (input_encoding, "shift_jis"))
+    enc = ce_shiftjis;
+
   switch (enc)
     {
+    case ce_utf8:
+      return s; /* no conversion required. */
+      break;
     case ce_latin1:
       our_iconv = iconv_from_latin1;
       break;
-    case ce_utf8:
-      return s; /* no conversion required. */
+    case ce_latin2:
+      our_iconv = iconv_from_latin2;
       break;
+    case ce_shiftjis:
+      our_iconv = iconv_from_shiftjis;
+      break;
     }
 
   t.end = 0;
@@ -279,7 +314,7 @@
               i->line_nr.line_nr++;
               line_nr = i->line_nr;
 
-              return convert_to_utf8 (line, 0); // i->input_encoding);
+              return convert_to_utf8 (line, i->input_encoding);
             }
           free (line); line = 0;
           break;
@@ -319,6 +354,7 @@
   input_stack[input_number].file = 0;
   input_stack[input_number].text = text;
   input_stack[input_number].ptext = text;
+  input_stack[input_number].input_encoding = 0;
 
   if (!macro)
     line_number--;
@@ -354,6 +390,19 @@
   /* TODO: free the memory */
 }
 
+void
+set_input_encoding (char *encoding)
+{
+  int i;
+
+  /* Set encoding of top file in stack. */
+  i = input_number - 1;
+  while (i >= 0 && input_stack[i].type != IN_file)
+    i--;
+  if (i >= 0)
+    input_stack[i].input_encoding = encoding;
+}
+
 
 static char **include_dirs;
 static size_t include_dirs_number;
@@ -413,6 +462,7 @@
   input_stack[input_number].line_nr.macro = 0;
   input_stack[input_number].text = 0;
   input_stack[input_number].ptext = 0;
+  input_stack[input_number].input_encoding = 0;
   input_number++;
 
   return;
[Prev in Thread]
Current Thread
[Next in Thread]
[7532] parsetexi more on input encoding, gavinsmith0123 <=
Prev by Date: [7531] ModulePath.pm correct a condition
Next by Date: [7533] parsetexi record more global commands
Previous by thread: [7531] ModulePath.pm correct a condition
Next by thread: [7533] parsetexi record more global commands
Index(es):
- Date
- Thread