lynx-dev cleanup chartrans [patch]

lynx-dev
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
lynx-dev cleanup chartrans [patch]

From:	Leonid Pauzner
Subject:	lynx-dev cleanup chartrans [patch]
Date:	Thu, 25 Feb 1999 10:45:17 +0300 (MSK)
An attempt made to remove old-style charset declaration
in favour of chartrans-style. Few comments corrected.
Futher cleanups appreciated.

Special note for those using CJK or UTF8:
please test whether it is not broken now
(should not be a problem but I could not test it so far).


* Chartrans: old-style declarations of charsets which do not have Unicode
  tables (CJK, x-transparent, also UTF-8) now moved from LYCharSets.c to
  UCdomap.h and now included with UCInit() in UCdomap.c in a standard way.



diff -u old/lycharse.c ./lycharse.c
--- old/lycharse.c      Fri Jan 29 13:24:20 1999
+++ ./lycharse.c        Thu Feb 25 09:42:06 1999
@@ -32,17 +32,20 @@
                              /* will be initialized in HTMLUseCharacterSet */

 /*
+ *  New character sets now declared with UCInit() in UCdomap.c
+ *
  *  INSTRUCTIONS for adding new character sets which do not have
- *              Unicode tables.
+ *              Unicode tables now in UCdomap.h
  *
- *  Currently we only declare some charset's properties here
- *  (such as MIME names, etc.), it does not include real mapping.
  *
- *  [We hope you need not correct/add old-style mapping
+ *  [We hope you need not correct/add old-style mapping below
  *  as in ISO_LATIN1[] or SevenBitApproximations[] any more -
  *  it works now via new chartrans mechanism, but kept for compatibility only:
  *  we should cleanup the stuff, but this is not so easy...]
  *
+ *  Currently we only declare some charset's properties here
+ *  (such as MIME names, etc.), it does not include real mapping.
+ *
  *  There is a place marked "Add your new character sets HERE" in this file.
  *  Make up a character set and add it in the same
  *  style as the ISO_LATIN1 set below, giving it a unique name.
@@ -329,20 +332,6 @@
 PUBLIC CONST char ** LYCharSets[MAXCHARSETS]={
        ISO_Latin1,             /* ISO Latin 1          */
        SevenBitApproximations, /* ISO 8859-15 (Latin 9)*/
-       SevenBitApproximations, /* DosLatin1 (cp850)    */
-       SevenBitApproximations, /* WinLatin1 (cp1252)   */
-       SevenBitApproximations, /* DosLatinUS (cp437)   */
-       SevenBitApproximations, /* DEC Multinational    */
-       SevenBitApproximations, /* Macintosh (8 bit)    */
-       SevenBitApproximations, /* NeXT character set   */
-       SevenBitApproximations, /* Chinese              */
-       SevenBitApproximations, /* Japanese (EUC-JP)    */
-       SevenBitApproximations, /* Japanese (Shift_JIS) */
-       SevenBitApproximations, /* Korean               */
-       SevenBitApproximations, /* Taipei (Big5)        */
-       SevenBitApproximations, /* Vietnamese (VISCII)  */
-       SevenBitApproximations, /* 7 Bit Approximations */
-       SevenBitApproximations, /* Transparent          */
 };

 /*
@@ -352,20 +341,6 @@
 PUBLIC CONST char * LYchar_set_names[MAXCHARSETS + 1]={
        "Western (ISO-8859-1)",
        "Western (ISO-8859-15)",
-       "Western (cp850)",
-       "Western (windows-1252)",
-       "IBM PC US codepage (cp437)",
-       "DEC Multinational",
-       "Macintosh (8 bit)",
-       "NeXT character set",
-       "Chinese",
-       "Japanese (EUC-JP)",
-       "Japanese (Shift_JIS)",
-       "Korean",
-       "Taipei (Big5)",
-       "Vietnamese (VISCII)",
-       "7 bit approximations (US-ASCII)",
-       "Transparent",
        (char *) 0
 };

@@ -394,41 +369,6 @@
    *  Placeholders for Unicode tables. - FM
    */
   {-1,"iso-8859-15",   UCT_ENC_8BIT,0,0,0,     UCT_R_8BIT,UCT_R_ASCII},
-  {-1,"cp850",         UCT_ENC_8BIT,0,
-                       UCT_REP_SUPERSETOF_LAT1,
-                       0,                      UCT_R_8BIT,UCT_R_ASCII},
-  {-1,"windows-1252",  UCT_ENC_8BIT,0,
-                       UCT_REP_SUPERSETOF_LAT1,
-                       0,                      UCT_R_8BIT,UCT_R_ASCII},
-  {-1,"cp437",         UCT_ENC_8BIT,0,0,0,     UCT_R_8BIT,UCT_R_ASCII},
-  {-1,"dec-mcs",       UCT_ENC_8BIT,0,0,0,     UCT_R_8BIT,UCT_R_ASCII},
-  {-1,"macintosh",     UCT_ENC_8BIT,0,0,0,     UCT_R_8BIT,UCT_R_ASCII},
-  {-1,"next",          UCT_ENC_8BIT,0,0,0,     UCT_R_8BIT,UCT_R_ASCII},
-
-  /*
-   *  There is no strict correlation for the next five, since the transfer
-   *  charset gets decoded into Display Char Set by the CJK code (separate
-   *  from Unicode mechanism).  For now we use the MIME name that describes
-   *  what is output to the terminal. - KW
-   */
-  {-1,"euc-cn",        UCT_ENC_CJK,0,0,0,      UCT_R_8BIT,UCT_R_ASCII},
-  {-1,"euc-jp",        UCT_ENC_CJK,0,0,0,      UCT_R_8BIT,UCT_R_ASCII},
-  {-1,"shift_jis",     UCT_ENC_CJK,0,0,0,      UCT_R_8BIT,UCT_R_ASCII},
-  {-1,"euc-kr",        UCT_ENC_CJK,0,0,0,      UCT_R_8BIT,UCT_R_ASCII},
-  {-1,"big5",          UCT_ENC_CJK,0,0,0,      UCT_R_8BIT,UCT_R_ASCII},
-
-  /*
-   *  Placeholders for Unicode tables. - FM
-   */
-  {-1,"viscii",        UCT_ENC_8BIT_C0,0,0,0,  UCT_R_8BIT,UCT_R_ASCII},
-  {-1,"us-ascii",      UCT_ENC_7BIT,0,
-                       UCT_REP_SUBSETOF_LAT1,
-                       UCT_CP_SUBSETOF_LAT1,   UCT_R_ASCII,UCT_R_ASCII},
-
-  /*
-   *  Placeholder for non-translation mode. - FM
-   */
-  {-1,"x-transparent", UCT_ENC_8BIT,0,0,0,     UCT_R_8BIT,UCT_R_ASCII}

 };

@@ -443,20 +383,6 @@
 PUBLIC int LYlowest_eightbit[MAXCHARSETS]={
        160,    /* ISO Latin 1          */
        160,    /* ISO 8859-15 (Latin 9)*/
-       128,    /* DosLatin1 (cp850)    */
-       130,    /* WinLatin1 (cp1252)   */
-       128,    /* DosLatinUS (cp437)   */
-       160,    /* DEC Multinational    */
-       128,    /* Macintosh (8 bit)    */
-       128,    /* NeXT character set   */
-       128,    /* Chinese              */
-       128,    /* Japanese (EUC)       */
-       128,    /* Japanese (SJIS)      */
-       128,    /* Korean               */
-       128,    /* Taipei (Big5)        */
-       128,    /* Vietnamese (VISCII)  */
-       999,    /* 7 bit approximations */
-       128     /* Transparent  (???)   */
 };


diff -u old/ucaux.c ./ucaux.c
--- old/ucaux.c Mon Jan 18 18:57:58 1999
+++ ./ucaux.c   Thu Feb 25 09:57:30 1999
@@ -17,13 +17,9 @@
        return NO;
     if (!strcmp(LYCharSet_UC[from].MIMEname, "x-transparent"))
        return NO;
-    if (from == LATIN1) {
-       if (LYCharSet_UC[from].codepoints & (UCT_CP_SUPERSETOF_LAT1))
-           return YES;
-    }

-    /* others YES, but check for lost tables to be sure */
-    return (LYCharSet_UC[from].UChndl >= 0);
+    /* others YES */
+    return YES;
 }

 PUBLIC BOOL UCCanTranslateUniTo ARGS1(
@@ -31,6 +27,11 @@
 {
     if (to < 0)
        return NO;
+/*???
+    if (!strcmp(LYCharSet_UC[to].MIMEname, "x-transparent"))
+       return NO;
+*/
+
     return YES;                        /* well at least some characters... */
 }

@@ -51,7 +52,7 @@
        CONST char * toname = LYCharSet_UC[to].MIMEname;
        if (!strcmp(fromname, "x-transparent") ||
            !strcmp(toname, "x-transparent")) {
-           return YES;
+           return YES; /* ??? */
        } else if (!strcmp(fromname, "us-ascii")) {
            return YES;
        }
@@ -80,7 +81,7 @@
            return NO;
        }
     }
-    return (LYCharSet_UC[from].UChndl >= 0);
+    return YES;    /* others YES */
 }

 /*
diff -u old/ucdomap.c ./ucdomap.c
--- old/ucdomap.c       Mon Feb  8 18:58:40 1999
+++ ./ucdomap.c Thu Feb 25 10:21:00 1999
@@ -28,7 +28,7 @@
 #include <LYLeaks.h>

 /*
- *  Include tables & parameters.
+ *  Include chartrans tables:
  */
 #include <cp1250_uni.h>        /* WinLatin2 (cp1250)   */
 #include <cp1251_uni.h>        /* WinCyrillic (cp1251) */
@@ -65,7 +65,7 @@
 #include <mnem2_suni.h>        /* RFC 1345 Mnemonic    */
 #include <next_uni.h>          /* NeXT character set   */
 #include <rfc_suni.h>          /* RFC 1345 w/o Intro   */
-#include <utf8_uni.h>          /* UNICODE UTF 8        */
+/* #include <utf8_uni.h> */            /* UNICODE UTF 8        */
 #include <viscii_uni.h>        /* Vietnamese (VISCII)  */
 #include <iso9945_uni.h>       /* Ukrainian Cyrillic (ISO 9945-2) */
 #include <cp866u_uni.h>                /* Ukrainian Cyrillic (866) */
@@ -490,10 +490,11 @@
   }
     /*
      * The font is always 256 characters - so far.
+     *  (fake 0 for built-in charsets like CJK or x-transparent, use .num_n256)
      */
   con_clear_unimap();
 #endif
-    for (i = 0; i < 256; i++) {
+    for (i = 0; i < UCInfo[UC_charset_in_hndl].num_n256; i++) {
        if ((j = UCInfo[UC_charset_in_hndl].unicount[i])) {
            ptrans[i] = *p;
            for (; j; j--) {
@@ -803,10 +804,11 @@

     /*
      * The font is always 256 characters - so far.
+     *  (fake 0 for built-in charsets like CJK or x-transparent, use .num_n256)
      */
     con_clear_unimap(0);

-    for (i = 0; i < 256; i++) {
+    for (i = 0; i < UCInfo[UC_charset_out_hndl].num_n256; i++) {
        for (j = UCInfo[UC_charset_out_hndl].unicount[i]; j; j--) {
            con_insert_unipair(*(p++), i, 0);
        }
@@ -1964,6 +1966,7 @@
     }
     UCInfo[s].LYNXname = UC_LYNXcharset;
     UCInfo[s].unicount = unicount;
+    UCInfo[s].num_n256 = (unicount == NULL) ? 0 : 256 ; /* hack */
     UCInfo[s].unitable = unitable;
     UCInfo[s].num_uni = nnuni;
     UCInfo[s].replacedesc = replacedesc;
@@ -2005,16 +2008,18 @@

 PUBLIC void UCInit NOARGS
 {
+
     UCreset_allocated_LYCharSets();
     atexit(UCcleanup_mem);
     UCconsole_map_init();

-    UC_CHARSET_SETUP;  /* us-ascii */    /* 7 bit approximations */
-
 /*
  *  The order of charset names visible in Lynx Options menu
  *  correspond to the order of lines below,
- *  except for CJK and others described in LYCharSet.c
+ *  except the first two described in LYCharSet.c
+ *
+ *  Entries those comment marked with *** are declared in UCdomap.h,
+ *  others based on the included tables - UCdomap.c, near the top.
  */

     UC_CHARSET_SETUP_iso_8859_1;         /* ISO Latin 1          */
@@ -2028,7 +2033,16 @@
     UC_CHARSET_SETUP_next;               /* NeXT character set   */
     UC_CHARSET_SETUP_hp_roman8;                  /* HP Roman8            */

+    UC_CHARSET_SETUP_euc_cn;             /*** Chinese              */
+    UC_CHARSET_SETUP_euc_jp;             /*** Japanese (EUC_JP)    */
+    UC_CHARSET_SETUP_shift_jis;                  /*** Japanese (Shift_JIS) */
+    UC_CHARSET_SETUP_euc_kr;             /*** Korean               */
+    UC_CHARSET_SETUP_big5;               /*** Taipei (Big5)        */
+
     UC_CHARSET_SETUP_viscii;             /* Vietnamese (VISCII)  */
+    UC_CHARSET_SETUP;  /* us-ascii */    /* 7 bit approximations */
+
+    UC_CHARSET_SETUP_x_transparent;      /*** Transparent          */

     UC_CHARSET_SETUP_iso_8859_2;         /* ISO Latin 2          */
     UC_CHARSET_SETUP_cp852;              /* DosLatin2 (cp852)    */
@@ -2054,7 +2068,7 @@
     UC_CHARSET_SETUP_iso_8859_9;         /* ISO 8859-9 (Latin 5) */
     UC_CHARSET_SETUP_iso_8859_10;        /* ISO 8859-10          */

-    UC_CHARSET_SETUP_utf_8;              /* UNICODE UTF-8        */
+    UC_CHARSET_SETUP_utf_8;              /*** UNICODE UTF-8      */
     UC_CHARSET_SETUP_mnemonic_ascii_0;   /* RFC 1345 w/o Intro   */
     UC_CHARSET_SETUP_mnemonic;           /* RFC 1345 Mnemonic    */
     UC_CHARSET_SETUP_iso_9945_2;         /* Ukrainian Cyrillic (ISO 9945-2) */
diff -u old/ucdomap.h ./ucdomap.h
--- old/ucdomap.h       Thu Dec  3 10:28:20 1998
+++ ./ucdomap.h Thu Feb 25 09:27:46 1999
@@ -41,6 +41,7 @@
        CONST char *MIMEname;
        CONST char *LYNXname;
        CONST u8* unicount;
+       int num_n256;   /* 256 for *.tbl, 0 for CJK and x-transparent (hack) */
        CONST u16* unitable;
        int num_uni;
        struct unimapdesc_str replacedesc;
@@ -55,5 +56,52 @@
 extern int UCNumCharsets;

 extern void UCInit NOARGS;
+
+
+/*
+ *  INSTRUCTIONS for adding new character sets which do not have
+ *              Unicode tables.
+ *
+ *  Several #defines below are declarations for charsets which need no
+ *  tables for mapping to Unicode - CJK multibytes, x-transparent, UTF8 -
+ *  Lynx care of them internally.
+ *
+ *  The declaration's format are kept from chrtrans/*_uni.h -
+ *  keep this in mind when changing  ucmaketbl.c,
+ *  see also UC_Charset_Setup() above for details.
+ */
+
+  /*
+   *  There is no strict correlation for the next five, since the transfer
+   *  charset gets decoded into Display Char Set by the CJK code (separate
+   *  from Unicode mechanism).  For now we use the MIME name that describes
+   *  what is output to the terminal. - KW
+   */
+#define UC_CHARSET_SETUP_euc_cn UC_Charset_Setup("euc-cn","Chinese",\
+       NULL,NULL,0,(struct unimapdesc_str){0,NULL,0,0},\
+       128,UCT_ENC_CJK,0)
+#define UC_CHARSET_SETUP_euc_jp UC_Charset_Setup("euc-jp","Japanese (EUC-JP)",\
+       NULL,NULL,0,(struct unimapdesc_str){0,NULL,0,0},\
+       128,UCT_ENC_CJK,0)
+#define UC_CHARSET_SETUP_shift_jis UC_Charset_Setup("shift_jis","Japanese 
(Shift_JIS)",\
+       NULL,NULL,0,(struct unimapdesc_str){0,NULL,0,0},\
+       128,UCT_ENC_CJK,0)
+#define UC_CHARSET_SETUP_euc_kr UC_Charset_Setup("euc-kr","Korean",\
+       NULL,NULL,0,(struct unimapdesc_str){0,NULL,0,0},\
+       128,UCT_ENC_CJK,0)
+#define UC_CHARSET_SETUP_big5 UC_Charset_Setup("big5","Taipei (Big5)",\
+       NULL,NULL,0,(struct unimapdesc_str){0,NULL,0,0},\
+       128,UCT_ENC_CJK,0)
+  /*
+   *  Placeholder for non-translation mode. - FM
+   */
+#define UC_CHARSET_SETUP_x_transparent 
UC_Charset_Setup("x-transparent","Transparent",\
+       NULL,NULL,0,(struct unimapdesc_str){0,NULL,0,0},\
+       128,1,0)
+
+#define UC_CHARSET_SETUP_utf_8 UC_Charset_Setup("utf-8","UNICODE (UTF-8)",\
+       NULL,NULL,0,(struct unimapdesc_str){0,NULL,0,0},\
+       128,UCT_ENC_UTF8,0)
+

 #endif /* UCDOMAP_H */


diff -u old/def7_uni.tbl ./def7_uni.tbl
--- old/def7_uni.tbl    Fri Aug 21 06:30:14 1998
+++ ./def7_uni.tbl      Thu Feb 25 00:20:00 1999
@@ -1,9 +1,12 @@
 # Default 7bit replacements.  If the MIME name is set to us-ascii,
-# this will be identified with the "7 bit approximations" Display
+# this will be identified with the "7 bit approximations (US-ASCII)" Display
 # character set.

 #The MIME name of this charset.
 Mus-ascii
+
+#Name as a Display Charset (used on Options screen)
+O7 bit approximations (US-ASCII)

 # Shall this become the "default" translation table?  YES!
 # There has to be exactly one table marked as "default".
[Prev in Thread]
Current Thread
[Next in Thread]
lynx-dev cleanup chartrans [patch], Leonid Pauzner <=
- Re: lynx-dev cleanup chartrans [patch], Klaus Weide, 1999/02/25
  - Re: lynx-dev cleanup chartrans [patch], Leonid Pauzner, 1999/02/25
    - Re: lynx-dev cleanup chartrans [patch], Klaus Weide, 1999/02/25
    - Re: lynx-dev cleanup chartrans [patch], Leonid Pauzner, 1999/02/25
    - Re: lynx-dev cleanup chartrans [patch], Klaus Weide, 1999/02/25
    - Re: lynx-dev cleanup chartrans [patch], Leonid Pauzner, 1999/02/26
Prev by Date: Re: lynx-dev dev17 clue?
Next by Date: lynx-dev BUG: lynx can't view NY times articles
Previous by thread: lynx-dev mime.types file seems inactive
Next by thread: Re: lynx-dev cleanup chartrans [patch]
Index(es):
- Date
- Thread