[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[7381] parsetexi more on input encoding
From: |
gavinsmith0123 |
Subject: |
[7381] parsetexi more on input encoding |
Date: |
Mon, 19 Sep 2016 12:56:58 +0000 (UTC) |
Revision: 7381
http://svn.sv.gnu.org/viewvc/?view=rev&root=texinfo&revision=7381
Author: gavin
Date: 2016-09-19 12:56:57 +0000 (Mon, 19 Sep 2016)
Log Message:
-----------
parsetexi more on input encoding
Modified Paths:
--------------
trunk/tp/parsetexi/end_line.c
Modified: trunk/tp/parsetexi/end_line.c
===================================================================
--- trunk/tp/parsetexi/end_line.c 2016-09-19 11:31:32 UTC (rev 7380)
+++ trunk/tp/parsetexi/end_line.c 2016-09-19 12:56:57 UTC (rev 7381)
@@ -128,7 +128,6 @@
store_value (args->contents.list[0]->text.text,
args->contents.list[1]->text.text);
- /* TODO - unless ignore_global_commands is on */
break;
set_no_name:
@@ -543,7 +542,6 @@
if (current_to != from_index)
{
- /* TODO: unless "ignore_global_commands" */
from_index->merged_in = current_to;
ADD_ARG(from);
ADD_ARG(to);
@@ -1432,15 +1430,22 @@
}
else if (current->cmd == CM_documentencoding) // 3190
{
- int i; char *p;
- // TODO: ignore_global_commands
+ int i; char *p, *text2;
+ char *texinfo_encoding, *perl_encoding, *input_encoding;
/* See tp/Texinfo/Encoding.pm (whole file) */
- text = strdup (text);
- for (p = text; *p; p++)
+ /* Three concepts of encoding:
+ texinfo_encoding -- one of the encodings supported as an
+ argument to @documentencoding, documented
+ in Texinfo manual
+ perl_encoding -- used for charset conversion within Perl
+ input_encoding -- for output within an HTML file */
+
+ text2 = strdup (text);
+ for (p = text2; *p; p++)
*p = tolower (*p);
- add_extra_string (current, "input_encoding_name", text); // 3199
+ /* Get texinfo_encoding from what was in the document */
{
static char *canonical_encodings[] = {
"us-ascii", "utf-8", "iso-8859-1",
@@ -1448,41 +1453,101 @@
0
};
+ texinfo_encoding = 0;
for (i = 0; (canonical_encodings[i]); i++)
{
- if (!strcasecmp (text, canonical_encodings[i]))
- break;
+ if (!strcmp (text2, canonical_encodings[i]))
+ {
+ texinfo_encoding = canonical_encodings[i];
+ break;
+ }
}
- if (!(canonical_encodings[i]))
+ if (!texinfo_encoding)
{
command_warn (current, "encoding `%s' is not a "
- "canonical texinfo encoding");
+ "canonical texinfo encoding", text);
}
}
- {
- struct encoding_map {
- char *from; char *to;
- };
- static struct encoding_map map[] = {
- "utf-8", "utf-8-strict"
- };
- char *perl_encoding = text;
- for (i = 0; i < sizeof map / sizeof *map; i++)
+ /* Get perl_encoding. */
+ perl_encoding = 0;
+ if (texinfo_encoding)
{
- if (!strcasecmp (text, map[i].from))
+ struct encoding_map {
+ char *from; char *to;
+ };
+ static struct encoding_map map[] = {
+ "utf-8", "utf-8-strict"
+ };
+ perl_encoding = texinfo_encoding;
+ for (i = 0; i < sizeof map / sizeof *map; i++)
{
- perl_encoding = map[i].to;
- break;
+ if (!strcmp (perl_encoding, map[i].from))
+ {
+ perl_encoding = map[i].to;
+ break;
+ }
}
}
- add_extra_string (current, "input_perl_encoding",
- perl_encoding);
- }
+ if (perl_encoding)
+ {
+ add_extra_string (current, "input_perl_encoding",
+ perl_encoding);
+ }
+ else
+ {
+ command_warn (current, "unrecognized encoding name `%s'",
+ text);
+ /* Texinfo::Encoding calls Encode::Alias, so knows
+ about more encodings than what we know about here.
+ TODO: Check when perl_encoding could be defined when
+ texinfo_encoding isn't.
+ Maybe we should check if an iconv conversion is possible
+ from this encoding to UTF-8. */
+ }
- global_info.input_encoding_name = text; // 3210
+ /* Set input_encoding from perl_encoding */
+ input_encoding = 0;
+ if (perl_encoding)
+ {
+ struct encoding_map {
+ char *from; char *to;
+ };
+ static struct encoding_map map[] = {
+ "utf8", "utf-8",
+ "utf-8-strict","utf-8",
+ "ascii", "us-ascii",
+ "shiftjis", "shift_jis",
+ "latin-1", "iso-8859-1",
+ "iso-8859-1", "iso8859_1",
+ "iso-8859-2", "iso8859_2",
+ "iso-8859-15", "iso8859_15",
+ "koi8-r", "koi8",
+ "koi8-u", "koi8",
+ };
+ input_encoding = perl_encoding;
+ for (i = 0; i < sizeof map / sizeof *map; i++)
+ {
+ /* Elements in first column map to elements in
+ second column. Elements in second column map
+ to themselves. */
+ if (!strcasecmp (input_encoding, map[i].from)
+ || !strcasecmp (input_encoding, map[i].to))
+ {
+ input_encoding = map[i].to;
+ break;
+ }
+ }
+ }
+ if (input_encoding)
+ {
+ add_extra_string (current, "input_encoding_name",
+ input_encoding);
+ global_info.input_encoding_name = text; // 3210
+ }
+
// TODO: Need to convert input in input.c from this encoding.
// (INPUT_PERL_ENCODING in Perl version)
}
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [7381] parsetexi more on input encoding,
gavinsmith0123 <=