[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Lynx-dev] HTML5 meta charset attribute
From: |
KIHARA Hideto |
Subject: |
[Lynx-dev] HTML5 meta charset attribute |
Date: |
Thu, 29 Sep 2011 19:58:10 +0900 |
User-agent: |
Mutt/1.5.21 (2010-09-15) |
Please support meta charset attribute of HTML5.
http://www.w3.org/TR/html5/semantics.html#the-meta-element
Because lynx does not recognize charset attribute on the meta element,
following site which contains <meta charset="UTF-8"> is not shown correctly.
http://vim-jp.org
("Charset: euc-jp" in information page shown by '=' key.
"Display character set" option is "Japanese (EUC-JP)").
Screen captures:
Lynx 2.8.8dev.9:
http://www1.interq.or.jp/~deton/lynx/meta-charset-NG.png
Lynx 2.8.8dev.9 with patch:
http://www1.interq.or.jp/~deton/lynx/meta-charset-OK.png
Lynx.trace log:
SGML: Unknown attribute charset for tag META
SGML: Attribute value UTF-8 ***ignored
SGML: Start <META>
LYHandleMETA: HTTP-EQUIV="(null)" NAME="(null)" CONTENT="(null)"
Here is a patch to support meta charset attribute of HTML5.
diff -urp ../lynx2-8-8.orig/WWW/Library/Implementation/hdr_HTMLDTD.h
./WWW/Library/Implementation/hdr_HTMLDTD.h
--- ../lynx2-8-8.orig/WWW/Library/Implementation/hdr_HTMLDTD.h 2011-06-13
09:18:54.000000000 +0900
+++ ./WWW/Library/Implementation/hdr_HTMLDTD.h 2011-09-27 20:53:44.000000000
+0900
@@ -670,11 +670,12 @@ extern "C" {
#define HTML_MATH_TITLE 7
#define HTML_MATH_ATTRIBUTES 8
-#define HTML_META_CONTENT 0
-#define HTML_META_HTTP_EQUIV 1
-#define HTML_META_NAME 2
-#define HTML_META_SCHEME 3
-#define HTML_META_ATTRIBUTES 4
+#define HTML_META_CHARSET 0
+#define HTML_META_CONTENT 1
+#define HTML_META_HTTP_EQUIV 2
+#define HTML_META_NAME 3
+#define HTML_META_SCHEME 4
+#define HTML_META_ATTRIBUTES 5
#define HTML_NEXTID_N 0
#define HTML_NEXTID_ATTRIBUTES 1
diff -urp ../lynx2-8-8.orig/WWW/Library/Implementation/src0_HTMLDTD.h
./WWW/Library/Implementation/src0_HTMLDTD.h
--- ../lynx2-8-8.orig/WWW/Library/Implementation/src0_HTMLDTD.h 2011-06-13
09:18:54.000000000 +0900
+++ ./WWW/Library/Implementation/src0_HTMLDTD.h 2011-09-27 20:53:44.000000000
+0900
@@ -847,6 +847,7 @@ static const AttrType MATH_attr_type[] =
};
static const attr META_attr_list[] = {
+ { "CHARSET" T(N) },
{ "CONTENT" T(N) },
{ "HTTP-EQUIV" T(N) },
{ "NAME" T(N) },
@@ -1794,6 +1795,7 @@ static const attr MATH_attr[] = {
};
static const attr META_attr[] = { /* META attributes */
+ { "CHARSET" T(N) },
{ "CONTENT" T(N) },
{ "HTTP-EQUIV" T(N) },
{ "NAME" T(N) },
diff -urp ../lynx2-8-8.orig/WWW/Library/Implementation/src0_HTMLDTD.txt
./WWW/Library/Implementation/src0_HTMLDTD.txt
--- ../lynx2-8-8.orig/WWW/Library/Implementation/src0_HTMLDTD.txt
2011-06-13 09:18:54.000000000 +0900
+++ ./WWW/Library/Implementation/src0_HTMLDTD.txt 2011-09-27
20:10:29.000000000 +0900
@@ -336,11 +336,12 @@
0:0:BOX
1:0:CLEAR
40:META
- 4 attributes:
- 0:0:CONTENT
- 1:0:HTTP-EQUIV
- 2:0:NAME
- 3:0:SCHEME
+ 5 attributes:
+ 0:0:CHARSET
+ 1:0:CONTENT
+ 2:0:HTTP-EQUIV
+ 3:0:NAME
+ 4:0:SCHEME
41:NEXTID
1 attributes:
0:0:N
@@ -2509,11 +2510,12 @@
flags:
75:META
justify
- 4 attributes:
- 0:0:CONTENT
- 1:0:HTTP-EQUIV
- 2:0:NAME
- 3:0:SCHEME
+ 5 attributes:
+ 0:0:CHARSET
+ 1:0:CONTENT
+ 2:0:HTTP-EQUIV
+ 3:0:NAME
+ 4:0:SCHEME
1 attr_types
META
contents: SGML_EMPTY
diff -urp ../lynx2-8-8.orig/WWW/Library/Implementation/src1_HTMLDTD.h
./WWW/Library/Implementation/src1_HTMLDTD.h
--- ../lynx2-8-8.orig/WWW/Library/Implementation/src1_HTMLDTD.h 2011-06-13
09:18:54.000000000 +0900
+++ ./WWW/Library/Implementation/src1_HTMLDTD.h 2011-09-27 20:53:44.000000000
+0900
@@ -847,6 +847,7 @@ static const AttrType MATH_attr_type[] =
};
static const attr META_attr_list[] = {
+ { "CHARSET" T(N) },
{ "CONTENT" T(N) },
{ "HTTP-EQUIV" T(N) },
{ "NAME" T(N) },
@@ -1794,6 +1795,7 @@ static const attr MATH_attr[] = {
};
static const attr META_attr[] = { /* META attributes */
+ { "CHARSET" T(N) },
{ "CONTENT" T(N) },
{ "HTTP-EQUIV" T(N) },
{ "NAME" T(N) },
diff -urp ../lynx2-8-8.orig/WWW/Library/Implementation/src1_HTMLDTD.txt
./WWW/Library/Implementation/src1_HTMLDTD.txt
--- ../lynx2-8-8.orig/WWW/Library/Implementation/src1_HTMLDTD.txt
2011-06-13 09:18:54.000000000 +0900
+++ ./WWW/Library/Implementation/src1_HTMLDTD.txt 2011-09-27
20:11:33.000000000 +0900
@@ -336,11 +336,12 @@
0:0:BOX
1:0:CLEAR
40:META
- 4 attributes:
- 0:0:CONTENT
- 1:0:HTTP-EQUIV
- 2:0:NAME
- 3:0:SCHEME
+ 5 attributes:
+ 0:0:CHARSET
+ 1:0:CONTENT
+ 2:0:HTTP-EQUIV
+ 3:0:NAME
+ 4:0:SCHEME
41:NEXTID
1 attributes:
0:0:N
@@ -2509,11 +2510,12 @@
flags:
75:META
justify
- 4 attributes:
- 0:0:CONTENT
- 1:0:HTTP-EQUIV
- 2:0:NAME
- 3:0:SCHEME
+ 5 attributes:
+ 0:0:CHARSET
+ 1:0:CONTENT
+ 2:0:HTTP-EQUIV
+ 3:0:NAME
+ 4:0:SCHEME
1 attr_types
META
contents: SGML_EMPTY
diff -urp ../lynx2-8-8.orig/src/LYCharUtils.c ./src/LYCharUtils.c
--- ../lynx2-8-8.orig/src/LYCharUtils.c 2011-06-13 09:18:54.000000000 +0900
+++ ./src/LYCharUtils.c 2011-09-29 07:21:32.000000000 +0900
@@ -2029,7 +2029,7 @@ void LYHandleMETA(HTStructured * me, con
const char **value,
char **include GCC_UNUSED)
{
- char *http_equiv = NULL, *name = NULL, *content = NULL;
+ char *http_equiv = NULL, *name = NULL, *content = NULL, *charset = NULL;
char *href = NULL, *id_string = NULL, *temp = NULL;
char *cp, *cp0, *cp1 = NULL;
int url_type = 0;
@@ -2079,141 +2079,49 @@ void LYHandleMETA(HTStructured * me, con
FREE(content);
}
}
+ if (present[HTML_META_CHARSET] &&
+ non_empty(value[HTML_META_CHARSET])) {
+ StrAllocCopy(charset, value[HTML_META_CHARSET]);
+ convert_to_spaces(charset, TRUE);
+ LYUCTranslateHTMLString(&charset, me->tag_charset, me->tag_charset,
+ NO, NO, YES, st_other);
+ if (*charset == '\0') {
+ FREE(charset);
+ }
+ }
CTRACE((tfp,
- "LYHandleMETA: HTTP-EQUIV=\"%s\" NAME=\"%s\" CONTENT=\"%s\"\n",
+ "LYHandleMETA: HTTP-EQUIV=\"%s\" NAME=\"%s\" CONTENT=\"%s\"
CHARSET=\"%s\"\n",
NONNULL(http_equiv),
NONNULL(name),
- NONNULL(content)));
+ NONNULL(content),
+ NONNULL(charset)));
/*
- * Make sure we have META name/value pairs to handle. - FM
+ * Check for a text/html Content-Type with a charset directive, if we
+ * didn't already set the charset via a server's header. - AAC & FM
*/
- if (!(http_equiv || name) || !content)
- goto free_META_copies;
-
- /*
- * Check for a no-cache Pragma
- * or Cache-Control directive. - FM
- */
- if (!strcasecomp(NonNull(http_equiv), "Pragma") ||
- !strcasecomp(NonNull(http_equiv), "Cache-Control")) {
- LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset,
- NO, NO, YES, st_other);
- if (!strcasecomp(content, "no-cache")) {
- me->node_anchor->no_cache = TRUE;
- HText_setNoCache(me->text);
- }
-
- /*
- * If we didn't get a Cache-Control MIME header, and the META has one,
- * convert to lowercase, store it in the anchor element, and if we
- * haven't yet set no_cache, check whether we should. - FM
- */
- if ((!me->node_anchor->cache_control) &&
- !strcasecomp(NonNull(http_equiv), "Cache-Control")) {
- LYLowerCase(content);
- StrAllocCopy(me->node_anchor->cache_control, content);
- if (me->node_anchor->no_cache == FALSE) {
- cp0 = content;
- while ((cp = strstr(cp0, "no-cache")) != NULL) {
- cp += 8;
- while (*cp != '\0' && WHITE(*cp))
- cp++;
- if (*cp == '\0' || *cp == ';') {
- me->node_anchor->no_cache = TRUE;
- HText_setNoCache(me->text);
- break;
- }
- cp0 = cp;
- }
- if (me->node_anchor->no_cache == TRUE)
- goto free_META_copies;
- cp0 = content;
- while ((cp = strstr(cp0, "max-age")) != NULL) {
- cp += 7;
- while (*cp != '\0' && WHITE(*cp))
- cp++;
- if (*cp == '=') {
- cp++;
- while (*cp != '\0' && WHITE(*cp))
- cp++;
- if (isdigit(UCH(*cp))) {
- cp0 = cp;
- while (isdigit(UCH(*cp)))
- cp++;
- if (*cp0 == '0' && cp == (cp0 + 1)) {
- me->node_anchor->no_cache = TRUE;
- HText_setNoCache(me->text);
- break;
- }
- }
- }
- cp0 = cp;
- }
- }
- }
-
- /*
- * Check for an Expires directive. - FM
- */
- } else if (!strcasecomp(NonNull(http_equiv), "Expires")) {
- /*
- * If we didn't get an Expires MIME header, store it in the anchor
- * element, and if we haven't yet set no_cache, check whether we
- * should. Note that we don't accept a Date header via META tags,
- * because it's likely to be untrustworthy, but do check for a Date
- * header from a server when making the comparison. - FM
- */
- LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset,
- NO, NO, YES, st_other);
- StrAllocCopy(me->node_anchor->expires, content);
- if (me->node_anchor->no_cache == FALSE) {
- if (!strcmp(content, "0")) {
- /*
- * The value is zero, which we treat as an absolute no-cache
- * directive. - FM
- */
- me->node_anchor->no_cache = TRUE;
- HText_setNoCache(me->text);
- } else if (me->node_anchor->date != NULL) {
- /*
- * We have a Date header, so check if the value is less than or
- * equal to that. - FM
- */
- if (LYmktime(content, TRUE) <=
- LYmktime(me->node_anchor->date, TRUE)) {
- me->node_anchor->no_cache = TRUE;
- HText_setNoCache(me->text);
- }
- } else if (LYmktime(content, FALSE) == 0) {
- /*
- * We don't have a Date header, and the value is in past for
- * us. - FM
- */
- me->node_anchor->no_cache = TRUE;
- HText_setNoCache(me->text);
- }
- }
-
- /*
- * Check for a text/html Content-Type with a charset directive, if we
- * didn't already set the charset via a server's header. - AAC & FM
- */
- } else if (isEmpty(me->node_anchor->charset) &&
- !strcasecomp(NonNull(http_equiv), "Content-Type")) {
+ if (isEmpty(me->node_anchor->charset) &&
+ (charset ||
+ !strcasecomp(NonNull(http_equiv), "Content-Type") && content)) {
LYUCcharset *p_in = NULL;
LYUCcharset *p_out = NULL;
- LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset,
- NO, NO, YES, st_other);
- LYLowerCase(content);
+ if (charset) {
+ LYLowerCase(charset);
+ } else {
+ LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset,
+ NO, NO, YES, st_other);
+ LYLowerCase(content);
+ }
- if ((cp1 = strstr(content, "charset")) != NULL) {
+ if ((cp1 = charset) != NULL ||
+ (cp1 = strstr(content, "charset")) != NULL) {
BOOL chartrans_ok = NO;
char *cp3 = NULL, *cp4;
int chndl;
- cp1 += 7;
+ if (!charset)
+ cp1 += 7;
while (*cp1 == ' ' || *cp1 == '=' || *cp1 == '"')
cp1++;
@@ -2378,6 +2286,117 @@ void LYHandleMETA(HTStructured * me, con
* Set the kcode element based on the charset. - FM
*/
HText_setKcode(me->text, me->node_anchor->charset, p_in);
+ }
+
+ /*
+ * Make sure we have META name/value pairs to handle. - FM
+ */
+ if (!(http_equiv || name) || !content)
+ goto free_META_copies;
+
+ /*
+ * Check for a no-cache Pragma
+ * or Cache-Control directive. - FM
+ */
+ if (!strcasecomp(NonNull(http_equiv), "Pragma") ||
+ !strcasecomp(NonNull(http_equiv), "Cache-Control")) {
+ LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset,
+ NO, NO, YES, st_other);
+ if (!strcasecomp(content, "no-cache")) {
+ me->node_anchor->no_cache = TRUE;
+ HText_setNoCache(me->text);
+ }
+
+ /*
+ * If we didn't get a Cache-Control MIME header, and the META has one,
+ * convert to lowercase, store it in the anchor element, and if we
+ * haven't yet set no_cache, check whether we should. - FM
+ */
+ if ((!me->node_anchor->cache_control) &&
+ !strcasecomp(NonNull(http_equiv), "Cache-Control")) {
+ LYLowerCase(content);
+ StrAllocCopy(me->node_anchor->cache_control, content);
+ if (me->node_anchor->no_cache == FALSE) {
+ cp0 = content;
+ while ((cp = strstr(cp0, "no-cache")) != NULL) {
+ cp += 8;
+ while (*cp != '\0' && WHITE(*cp))
+ cp++;
+ if (*cp == '\0' || *cp == ';') {
+ me->node_anchor->no_cache = TRUE;
+ HText_setNoCache(me->text);
+ break;
+ }
+ cp0 = cp;
+ }
+ if (me->node_anchor->no_cache == TRUE)
+ goto free_META_copies;
+ cp0 = content;
+ while ((cp = strstr(cp0, "max-age")) != NULL) {
+ cp += 7;
+ while (*cp != '\0' && WHITE(*cp))
+ cp++;
+ if (*cp == '=') {
+ cp++;
+ while (*cp != '\0' && WHITE(*cp))
+ cp++;
+ if (isdigit(UCH(*cp))) {
+ cp0 = cp;
+ while (isdigit(UCH(*cp)))
+ cp++;
+ if (*cp0 == '0' && cp == (cp0 + 1)) {
+ me->node_anchor->no_cache = TRUE;
+ HText_setNoCache(me->text);
+ break;
+ }
+ }
+ }
+ cp0 = cp;
+ }
+ }
+ }
+
+ /*
+ * Check for an Expires directive. - FM
+ */
+ } else if (!strcasecomp(NonNull(http_equiv), "Expires")) {
+ /*
+ * If we didn't get an Expires MIME header, store it in the anchor
+ * element, and if we haven't yet set no_cache, check whether we
+ * should. Note that we don't accept a Date header via META tags,
+ * because it's likely to be untrustworthy, but do check for a Date
+ * header from a server when making the comparison. - FM
+ */
+ LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset,
+ NO, NO, YES, st_other);
+ StrAllocCopy(me->node_anchor->expires, content);
+ if (me->node_anchor->no_cache == FALSE) {
+ if (!strcmp(content, "0")) {
+ /*
+ * The value is zero, which we treat as an absolute no-cache
+ * directive. - FM
+ */
+ me->node_anchor->no_cache = TRUE;
+ HText_setNoCache(me->text);
+ } else if (me->node_anchor->date != NULL) {
+ /*
+ * We have a Date header, so check if the value is less than or
+ * equal to that. - FM
+ */
+ if (LYmktime(content, TRUE) <=
+ LYmktime(me->node_anchor->date, TRUE)) {
+ me->node_anchor->no_cache = TRUE;
+ HText_setNoCache(me->text);
+ }
+ } else if (LYmktime(content, FALSE) == 0) {
+ /*
+ * We don't have a Date header, and the value is in past for
+ * us. - FM
+ */
+ me->node_anchor->no_cache = TRUE;
+ HText_setNoCache(me->text);
+ }
+ }
/*
* Check for a Refresh directive. - FM
@@ -2566,6 +2585,7 @@ void LYHandleMETA(HTStructured * me, con
FREE(http_equiv);
FREE(name);
FREE(content);
+ FREE(charset);
}
/*
- [Lynx-dev] HTML5 meta charset attribute,
KIHARA Hideto <=