[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
lynx-dev cleanup chartrans [patch]
From: |
Leonid Pauzner |
Subject: |
lynx-dev cleanup chartrans [patch] |
Date: |
Thu, 25 Feb 1999 10:45:17 +0300 (MSK) |
An attempt made to remove old-style charset declaration
in favour of chartrans-style. Few comments corrected.
Futher cleanups appreciated.
Special note for those using CJK or UTF8:
please test whether it is not broken now
(should not be a problem but I could not test it so far).
* Chartrans: old-style declarations of charsets which do not have Unicode
tables (CJK, x-transparent, also UTF-8) now moved from LYCharSets.c to
UCdomap.h and now included with UCInit() in UCdomap.c in a standard way.
diff -u old/lycharse.c ./lycharse.c
--- old/lycharse.c Fri Jan 29 13:24:20 1999
+++ ./lycharse.c Thu Feb 25 09:42:06 1999
@@ -32,17 +32,20 @@
/* will be initialized in HTMLUseCharacterSet */
/*
+ * New character sets now declared with UCInit() in UCdomap.c
+ *
* INSTRUCTIONS for adding new character sets which do not have
- * Unicode tables.
+ * Unicode tables now in UCdomap.h
*
- * Currently we only declare some charset's properties here
- * (such as MIME names, etc.), it does not include real mapping.
*
- * [We hope you need not correct/add old-style mapping
+ * [We hope you need not correct/add old-style mapping below
* as in ISO_LATIN1[] or SevenBitApproximations[] any more -
* it works now via new chartrans mechanism, but kept for compatibility only:
* we should cleanup the stuff, but this is not so easy...]
*
+ * Currently we only declare some charset's properties here
+ * (such as MIME names, etc.), it does not include real mapping.
+ *
* There is a place marked "Add your new character sets HERE" in this file.
* Make up a character set and add it in the same
* style as the ISO_LATIN1 set below, giving it a unique name.
@@ -329,20 +332,6 @@
PUBLIC CONST char ** LYCharSets[MAXCHARSETS]={
ISO_Latin1, /* ISO Latin 1 */
SevenBitApproximations, /* ISO 8859-15 (Latin 9)*/
- SevenBitApproximations, /* DosLatin1 (cp850) */
- SevenBitApproximations, /* WinLatin1 (cp1252) */
- SevenBitApproximations, /* DosLatinUS (cp437) */
- SevenBitApproximations, /* DEC Multinational */
- SevenBitApproximations, /* Macintosh (8 bit) */
- SevenBitApproximations, /* NeXT character set */
- SevenBitApproximations, /* Chinese */
- SevenBitApproximations, /* Japanese (EUC-JP) */
- SevenBitApproximations, /* Japanese (Shift_JIS) */
- SevenBitApproximations, /* Korean */
- SevenBitApproximations, /* Taipei (Big5) */
- SevenBitApproximations, /* Vietnamese (VISCII) */
- SevenBitApproximations, /* 7 Bit Approximations */
- SevenBitApproximations, /* Transparent */
};
/*
@@ -352,20 +341,6 @@
PUBLIC CONST char * LYchar_set_names[MAXCHARSETS + 1]={
"Western (ISO-8859-1)",
"Western (ISO-8859-15)",
- "Western (cp850)",
- "Western (windows-1252)",
- "IBM PC US codepage (cp437)",
- "DEC Multinational",
- "Macintosh (8 bit)",
- "NeXT character set",
- "Chinese",
- "Japanese (EUC-JP)",
- "Japanese (Shift_JIS)",
- "Korean",
- "Taipei (Big5)",
- "Vietnamese (VISCII)",
- "7 bit approximations (US-ASCII)",
- "Transparent",
(char *) 0
};
@@ -394,41 +369,6 @@
* Placeholders for Unicode tables. - FM
*/
{-1,"iso-8859-15", UCT_ENC_8BIT,0,0,0, UCT_R_8BIT,UCT_R_ASCII},
- {-1,"cp850", UCT_ENC_8BIT,0,
- UCT_REP_SUPERSETOF_LAT1,
- 0, UCT_R_8BIT,UCT_R_ASCII},
- {-1,"windows-1252", UCT_ENC_8BIT,0,
- UCT_REP_SUPERSETOF_LAT1,
- 0, UCT_R_8BIT,UCT_R_ASCII},
- {-1,"cp437", UCT_ENC_8BIT,0,0,0, UCT_R_8BIT,UCT_R_ASCII},
- {-1,"dec-mcs", UCT_ENC_8BIT,0,0,0, UCT_R_8BIT,UCT_R_ASCII},
- {-1,"macintosh", UCT_ENC_8BIT,0,0,0, UCT_R_8BIT,UCT_R_ASCII},
- {-1,"next", UCT_ENC_8BIT,0,0,0, UCT_R_8BIT,UCT_R_ASCII},
-
- /*
- * There is no strict correlation for the next five, since the transfer
- * charset gets decoded into Display Char Set by the CJK code (separate
- * from Unicode mechanism). For now we use the MIME name that describes
- * what is output to the terminal. - KW
- */
- {-1,"euc-cn", UCT_ENC_CJK,0,0,0, UCT_R_8BIT,UCT_R_ASCII},
- {-1,"euc-jp", UCT_ENC_CJK,0,0,0, UCT_R_8BIT,UCT_R_ASCII},
- {-1,"shift_jis", UCT_ENC_CJK,0,0,0, UCT_R_8BIT,UCT_R_ASCII},
- {-1,"euc-kr", UCT_ENC_CJK,0,0,0, UCT_R_8BIT,UCT_R_ASCII},
- {-1,"big5", UCT_ENC_CJK,0,0,0, UCT_R_8BIT,UCT_R_ASCII},
-
- /*
- * Placeholders for Unicode tables. - FM
- */
- {-1,"viscii", UCT_ENC_8BIT_C0,0,0,0, UCT_R_8BIT,UCT_R_ASCII},
- {-1,"us-ascii", UCT_ENC_7BIT,0,
- UCT_REP_SUBSETOF_LAT1,
- UCT_CP_SUBSETOF_LAT1, UCT_R_ASCII,UCT_R_ASCII},
-
- /*
- * Placeholder for non-translation mode. - FM
- */
- {-1,"x-transparent", UCT_ENC_8BIT,0,0,0, UCT_R_8BIT,UCT_R_ASCII}
};
@@ -443,20 +383,6 @@
PUBLIC int LYlowest_eightbit[MAXCHARSETS]={
160, /* ISO Latin 1 */
160, /* ISO 8859-15 (Latin 9)*/
- 128, /* DosLatin1 (cp850) */
- 130, /* WinLatin1 (cp1252) */
- 128, /* DosLatinUS (cp437) */
- 160, /* DEC Multinational */
- 128, /* Macintosh (8 bit) */
- 128, /* NeXT character set */
- 128, /* Chinese */
- 128, /* Japanese (EUC) */
- 128, /* Japanese (SJIS) */
- 128, /* Korean */
- 128, /* Taipei (Big5) */
- 128, /* Vietnamese (VISCII) */
- 999, /* 7 bit approximations */
- 128 /* Transparent (???) */
};
diff -u old/ucaux.c ./ucaux.c
--- old/ucaux.c Mon Jan 18 18:57:58 1999
+++ ./ucaux.c Thu Feb 25 09:57:30 1999
@@ -17,13 +17,9 @@
return NO;
if (!strcmp(LYCharSet_UC[from].MIMEname, "x-transparent"))
return NO;
- if (from == LATIN1) {
- if (LYCharSet_UC[from].codepoints & (UCT_CP_SUPERSETOF_LAT1))
- return YES;
- }
- /* others YES, but check for lost tables to be sure */
- return (LYCharSet_UC[from].UChndl >= 0);
+ /* others YES */
+ return YES;
}
PUBLIC BOOL UCCanTranslateUniTo ARGS1(
@@ -31,6 +27,11 @@
{
if (to < 0)
return NO;
+/*???
+ if (!strcmp(LYCharSet_UC[to].MIMEname, "x-transparent"))
+ return NO;
+*/
+
return YES; /* well at least some characters... */
}
@@ -51,7 +52,7 @@
CONST char * toname = LYCharSet_UC[to].MIMEname;
if (!strcmp(fromname, "x-transparent") ||
!strcmp(toname, "x-transparent")) {
- return YES;
+ return YES; /* ??? */
} else if (!strcmp(fromname, "us-ascii")) {
return YES;
}
@@ -80,7 +81,7 @@
return NO;
}
}
- return (LYCharSet_UC[from].UChndl >= 0);
+ return YES; /* others YES */
}
/*
diff -u old/ucdomap.c ./ucdomap.c
--- old/ucdomap.c Mon Feb 8 18:58:40 1999
+++ ./ucdomap.c Thu Feb 25 10:21:00 1999
@@ -28,7 +28,7 @@
#include <LYLeaks.h>
/*
- * Include tables & parameters.
+ * Include chartrans tables:
*/
#include <cp1250_uni.h> /* WinLatin2 (cp1250) */
#include <cp1251_uni.h> /* WinCyrillic (cp1251) */
@@ -65,7 +65,7 @@
#include <mnem2_suni.h> /* RFC 1345 Mnemonic */
#include <next_uni.h> /* NeXT character set */
#include <rfc_suni.h> /* RFC 1345 w/o Intro */
-#include <utf8_uni.h> /* UNICODE UTF 8 */
+/* #include <utf8_uni.h> */ /* UNICODE UTF 8 */
#include <viscii_uni.h> /* Vietnamese (VISCII) */
#include <iso9945_uni.h> /* Ukrainian Cyrillic (ISO 9945-2) */
#include <cp866u_uni.h> /* Ukrainian Cyrillic (866) */
@@ -490,10 +490,11 @@
}
/*
* The font is always 256 characters - so far.
+ * (fake 0 for built-in charsets like CJK or x-transparent, use .num_n256)
*/
con_clear_unimap();
#endif
- for (i = 0; i < 256; i++) {
+ for (i = 0; i < UCInfo[UC_charset_in_hndl].num_n256; i++) {
if ((j = UCInfo[UC_charset_in_hndl].unicount[i])) {
ptrans[i] = *p;
for (; j; j--) {
@@ -803,10 +804,11 @@
/*
* The font is always 256 characters - so far.
+ * (fake 0 for built-in charsets like CJK or x-transparent, use .num_n256)
*/
con_clear_unimap(0);
- for (i = 0; i < 256; i++) {
+ for (i = 0; i < UCInfo[UC_charset_out_hndl].num_n256; i++) {
for (j = UCInfo[UC_charset_out_hndl].unicount[i]; j; j--) {
con_insert_unipair(*(p++), i, 0);
}
@@ -1964,6 +1966,7 @@
}
UCInfo[s].LYNXname = UC_LYNXcharset;
UCInfo[s].unicount = unicount;
+ UCInfo[s].num_n256 = (unicount == NULL) ? 0 : 256 ; /* hack */
UCInfo[s].unitable = unitable;
UCInfo[s].num_uni = nnuni;
UCInfo[s].replacedesc = replacedesc;
@@ -2005,16 +2008,18 @@
PUBLIC void UCInit NOARGS
{
+
UCreset_allocated_LYCharSets();
atexit(UCcleanup_mem);
UCconsole_map_init();
- UC_CHARSET_SETUP; /* us-ascii */ /* 7 bit approximations */
-
/*
* The order of charset names visible in Lynx Options menu
* correspond to the order of lines below,
- * except for CJK and others described in LYCharSet.c
+ * except the first two described in LYCharSet.c
+ *
+ * Entries those comment marked with *** are declared in UCdomap.h,
+ * others based on the included tables - UCdomap.c, near the top.
*/
UC_CHARSET_SETUP_iso_8859_1; /* ISO Latin 1 */
@@ -2028,7 +2033,16 @@
UC_CHARSET_SETUP_next; /* NeXT character set */
UC_CHARSET_SETUP_hp_roman8; /* HP Roman8 */
+ UC_CHARSET_SETUP_euc_cn; /*** Chinese */
+ UC_CHARSET_SETUP_euc_jp; /*** Japanese (EUC_JP) */
+ UC_CHARSET_SETUP_shift_jis; /*** Japanese (Shift_JIS) */
+ UC_CHARSET_SETUP_euc_kr; /*** Korean */
+ UC_CHARSET_SETUP_big5; /*** Taipei (Big5) */
+
UC_CHARSET_SETUP_viscii; /* Vietnamese (VISCII) */
+ UC_CHARSET_SETUP; /* us-ascii */ /* 7 bit approximations */
+
+ UC_CHARSET_SETUP_x_transparent; /*** Transparent */
UC_CHARSET_SETUP_iso_8859_2; /* ISO Latin 2 */
UC_CHARSET_SETUP_cp852; /* DosLatin2 (cp852) */
@@ -2054,7 +2068,7 @@
UC_CHARSET_SETUP_iso_8859_9; /* ISO 8859-9 (Latin 5) */
UC_CHARSET_SETUP_iso_8859_10; /* ISO 8859-10 */
- UC_CHARSET_SETUP_utf_8; /* UNICODE UTF-8 */
+ UC_CHARSET_SETUP_utf_8; /*** UNICODE UTF-8 */
UC_CHARSET_SETUP_mnemonic_ascii_0; /* RFC 1345 w/o Intro */
UC_CHARSET_SETUP_mnemonic; /* RFC 1345 Mnemonic */
UC_CHARSET_SETUP_iso_9945_2; /* Ukrainian Cyrillic (ISO 9945-2) */
diff -u old/ucdomap.h ./ucdomap.h
--- old/ucdomap.h Thu Dec 3 10:28:20 1998
+++ ./ucdomap.h Thu Feb 25 09:27:46 1999
@@ -41,6 +41,7 @@
CONST char *MIMEname;
CONST char *LYNXname;
CONST u8* unicount;
+ int num_n256; /* 256 for *.tbl, 0 for CJK and x-transparent (hack) */
CONST u16* unitable;
int num_uni;
struct unimapdesc_str replacedesc;
@@ -55,5 +56,52 @@
extern int UCNumCharsets;
extern void UCInit NOARGS;
+
+
+/*
+ * INSTRUCTIONS for adding new character sets which do not have
+ * Unicode tables.
+ *
+ * Several #defines below are declarations for charsets which need no
+ * tables for mapping to Unicode - CJK multibytes, x-transparent, UTF8 -
+ * Lynx care of them internally.
+ *
+ * The declaration's format are kept from chrtrans/*_uni.h -
+ * keep this in mind when changing ucmaketbl.c,
+ * see also UC_Charset_Setup() above for details.
+ */
+
+ /*
+ * There is no strict correlation for the next five, since the transfer
+ * charset gets decoded into Display Char Set by the CJK code (separate
+ * from Unicode mechanism). For now we use the MIME name that describes
+ * what is output to the terminal. - KW
+ */
+#define UC_CHARSET_SETUP_euc_cn UC_Charset_Setup("euc-cn","Chinese",\
+ NULL,NULL,0,(struct unimapdesc_str){0,NULL,0,0},\
+ 128,UCT_ENC_CJK,0)
+#define UC_CHARSET_SETUP_euc_jp UC_Charset_Setup("euc-jp","Japanese (EUC-JP)",\
+ NULL,NULL,0,(struct unimapdesc_str){0,NULL,0,0},\
+ 128,UCT_ENC_CJK,0)
+#define UC_CHARSET_SETUP_shift_jis UC_Charset_Setup("shift_jis","Japanese
(Shift_JIS)",\
+ NULL,NULL,0,(struct unimapdesc_str){0,NULL,0,0},\
+ 128,UCT_ENC_CJK,0)
+#define UC_CHARSET_SETUP_euc_kr UC_Charset_Setup("euc-kr","Korean",\
+ NULL,NULL,0,(struct unimapdesc_str){0,NULL,0,0},\
+ 128,UCT_ENC_CJK,0)
+#define UC_CHARSET_SETUP_big5 UC_Charset_Setup("big5","Taipei (Big5)",\
+ NULL,NULL,0,(struct unimapdesc_str){0,NULL,0,0},\
+ 128,UCT_ENC_CJK,0)
+ /*
+ * Placeholder for non-translation mode. - FM
+ */
+#define UC_CHARSET_SETUP_x_transparent
UC_Charset_Setup("x-transparent","Transparent",\
+ NULL,NULL,0,(struct unimapdesc_str){0,NULL,0,0},\
+ 128,1,0)
+
+#define UC_CHARSET_SETUP_utf_8 UC_Charset_Setup("utf-8","UNICODE (UTF-8)",\
+ NULL,NULL,0,(struct unimapdesc_str){0,NULL,0,0},\
+ 128,UCT_ENC_UTF8,0)
+
#endif /* UCDOMAP_H */
diff -u old/def7_uni.tbl ./def7_uni.tbl
--- old/def7_uni.tbl Fri Aug 21 06:30:14 1998
+++ ./def7_uni.tbl Thu Feb 25 00:20:00 1999
@@ -1,9 +1,12 @@
# Default 7bit replacements. If the MIME name is set to us-ascii,
-# this will be identified with the "7 bit approximations" Display
+# this will be identified with the "7 bit approximations (US-ASCII)" Display
# character set.
#The MIME name of this charset.
Mus-ascii
+
+#Name as a Display Charset (used on Options screen)
+O7 bit approximations (US-ASCII)
# Shall this become the "default" translation table? YES!
# There has to be exactly one table marked as "default".
- lynx-dev cleanup chartrans [patch],
Leonid Pauzner <=