classpath-patches
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[cp-patches] RFC: NIO Charset aliases


From: Robert Schuster
Subject: [cp-patches] RFC: NIO Charset aliases
Date: Sun, 30 Jan 2005 03:29:27 +0100
User-agent: Mozilla/5.0 (X11; U; Linux i686; de-AT; rv:1.7.5) Gecko/20050107

Hi,
although the GNU Classpath's en/de-coder framework needs some broader work it was decided that we should fix the immediate problems with the charsets for java.nio.charset first.

This patch adds a bunch of aliases to the charset. Some of them are taken from http://www.iana.org/assignments/character-sets while others have been taken from our class gnu.classpath.SystemProperties.

Furthermore the charset lookup is now case-insensitive.

2005-01-30  Robert Schuster  <address@hidden>
       * gnu/java/nio/charset/ISO_8859_1.java,
       gnu/java/nio/charset/US_ASCII.java: Fixed aliases
        according "http://www.iana.org/assignments/character-sets";
        and "gnu/classpath/SystemProperties.java".
       * gnu/java/nio/charset/UTF_16.java,
       gnu/java/nio/charset/UTF_16_LE.java,
       gnu/java/nio/charset/UTF_16_BE.java,
       gnu/java/nio/charset/UTF_8.java: Added aliases according
        to "gnu/classpath/SystemProperties.java".
       * gnu/java/nio/charset/Provider.java: Made charset lookup
        case-insensitive which fixes bug #11740.

Please tell me what you think of this or have suggestions for more aliases.

cu
Robert
Index: gnu/java/nio/charset/ISO_8859_1.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/java/nio/charset/ISO_8859_1.java,v
retrieving revision 1.3
diff -u -r1.3 ISO_8859_1.java
--- gnu/java/nio/charset/ISO_8859_1.java        6 Nov 2004 22:51:40 -0000       
1.3
+++ gnu/java/nio/charset/ISO_8859_1.java        30 Jan 2005 02:20:57 -0000
@@ -1,5 +1,5 @@
 /* ISO_8859_1.java -- 
-   Copyright (C) 2002, 2004 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -53,7 +53,23 @@
 {
   ISO_8859_1 ()
   {
-    super ("ISO-8859-1", new String[]{"ISO-LATIN-1"});
+    super ("8859_1", new String[] {
+        /* These names are provided by 
+         * http://www.iana.org/assignments/character-sets
+         */
+        "iso-ir-100",
+        "ISO_8859-1",
+        "ISO-8859-1",
+        "latin1",
+        "l1",
+        "IBM819",
+        "CP819",
+        "csISOLatin1",
+        // These names are taken from gnu.classpath.Systemproperties
+        "ISO8859_1", "ISO_8859_1", "ibm-819", "ISO_8859-1:1987",
+        "819"
+        });
+
   }
 
   public boolean contains (Charset cs)
Index: gnu/java/nio/charset/Provider.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/java/nio/charset/Provider.java,v
retrieving revision 1.2
diff -u -r1.2 Provider.java
--- gnu/java/nio/charset/Provider.java  8 Nov 2002 14:05:22 -0000       1.2
+++ gnu/java/nio/charset/Provider.java  30 Jan 2005 02:20:57 -0000
@@ -1,5 +1,5 @@
 /* Provider.java -- 
-   Copyright (C) 2002 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2005 Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -48,6 +48,7 @@
  * address@hidden Charset#charsetForName} and * address@hidden 
Charset#availableCharsets}.
  *
  * @author Jesse Rosenstock
+ * @author Robert Schuster (address@hidden)
  * @see Charset
  */
 public final class Provider extends CharsetProvider
@@ -63,12 +64,14 @@
   }
 
   /**
-   * Map from charset name to charset canonical name.
+   * Map from charset name to charset canonical name. The strings
+   * are all lower-case to allow case-insensitive retrieval of
+   * Charset instances. 
    */
   private final HashMap canonicalNames;
 
   /**
-   * Map from canonical name to Charset.
+   * Map from lower-case canonical name to Charset.
    * TODO: We may want to use soft references.  We would then need to keep
    * track of the class name to regenerate the object.
    */
@@ -76,8 +79,6 @@
 
   private Provider ()
   {
-    // FIXME: We might need to make the name comparison case insensitive.
-    // Verify this with the Sun JDK.
     canonicalNames = new HashMap ();
     charsets = new HashMap ();
 
@@ -106,24 +107,42 @@
                       .iterator ();
   }
 
+  /**
+   * Returns a Charset instance by converting the given
+   * name to lower-case, looking up the canonical charset
+   * name and finally looking up the Charset with that name.
+   * 
+   * <p>The lookup is therefore case-insensitive.</p>
+   * 
+   *  @returns The Charset having <code>charsetName</code>
+   *  as its alias or null if no such Charset exist.
+   */
   public Charset charsetForName (String charsetName)
   {
-    return (Charset) charsets.get (canonicalize (charsetName));
-  }
-
-  private Object canonicalize (String charsetName)
-  {
-    Object o = canonicalNames.get (charsetName);
-    return o == null ? charsetName : o;
+    return (Charset) 
charsets.get(canonicalNames.get(charsetName.toLowerCase()));
   }
 
+  /**
+   * Puts a Charset under its canonical name into the 'charsets' map.
+   * Then puts a mapping from all its alias names to the canonical name.
+   * 
+   * <p>All names are converted to lower-case</p>.
+   * 
+   * @param cs
+   */
   private void addCharset (Charset cs)
   {
-    String canonicalName = cs.name ();
+    String canonicalName = cs.name().toLowerCase();
     charsets.put (canonicalName, cs);
+    
+    /* Adds a mapping between the canonical name
+     * itself making a lookup using that name
+     * no special case.
+     */  
+    canonicalNames.put(canonicalName, canonicalName);
 
     for (Iterator i = cs.aliases ().iterator (); i.hasNext (); )
-      canonicalNames.put (i.next (), canonicalName);
+      canonicalNames.put (((String) i.next()).toLowerCase(), canonicalName);
   }
 
   public static synchronized Provider provider ()
Index: gnu/java/nio/charset/US_ASCII.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/java/nio/charset/US_ASCII.java,v
retrieving revision 1.3
diff -u -r1.3 US_ASCII.java
--- gnu/java/nio/charset/US_ASCII.java  6 Nov 2004 22:51:40 -0000       1.3
+++ gnu/java/nio/charset/US_ASCII.java  30 Jan 2005 02:20:57 -0000
@@ -1,5 +1,5 @@
 /* US_ASCII.java -- 
-   Copyright (C) 2002, 2004 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -53,7 +53,24 @@
 {
   US_ASCII ()
   {
-    super ("US-ASCII", new String[]{"ISO646-US"});
+    super ("ASCII", new String[] {
+        /* These names are provided by 
+         * http://www.iana.org/assignments/character-sets
+         */
+        "iso-ir-6",
+        "ANSI_X3.4-1986",
+        "ISO_646.irv:1991",
+        "ASCII",
+        "ISO646-US",
+        "US-ASCII",
+        "us",
+        "IBM367",
+        "cp367",
+        "csASCII",
+        // These are provided by gnu.classpath.SystemProperties
+        "ANSI_X3.4-1968", "iso_646.irv:1983", "ascii7", "646",
+        "windows-20127"
+        });
   }
 
   public boolean contains (Charset cs)
Index: gnu/java/nio/charset/UTF_16.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/java/nio/charset/UTF_16.java,v
retrieving revision 1.3
diff -u -r1.3 UTF_16.java
--- gnu/java/nio/charset/UTF_16.java    13 Oct 2004 14:32:33 -0000      1.3
+++ gnu/java/nio/charset/UTF_16.java    30 Jan 2005 02:20:57 -0000
@@ -1,5 +1,5 @@
 /* UTF_16.java -- 
-   Copyright (C) 2002, 2004  Free Software Foundation, Inc.
+   Copyright (C) 2002, 2004, 2005  Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -51,7 +51,11 @@
 {
   UTF_16 ()
   {
-    super ("UTF-16", null);
+    super ("UTF-16", new String[] {
+        // These are provided by gnu.classpath.SystemProperties
+        "UTF16", "ISO-10646-UCS-2", "unicode", "csUnicode",
+        "ucs-2"
+    });
   }
 
   public boolean contains (Charset cs)
Index: gnu/java/nio/charset/UTF_16BE.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/java/nio/charset/UTF_16BE.java,v
retrieving revision 1.3
diff -u -r1.3 UTF_16BE.java
--- gnu/java/nio/charset/UTF_16BE.java  13 Oct 2004 14:32:33 -0000      1.3
+++ gnu/java/nio/charset/UTF_16BE.java  30 Jan 2005 02:20:57 -0000
@@ -1,5 +1,5 @@
 /* UTF_16BE.java -- 
-   Copyright (C) 2002, 2004  Free Software Foundation, Inc.
+   Copyright (C) 2002, 2004, 2005  Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -51,7 +51,12 @@
 {
   UTF_16BE ()
   {
-    super ("UTF-16BE", null);
+    super ("UTF-16BE",  new String[] {
+        // These are provided by gnu.classpath.SystemProperties
+        "UTF16BE", "x-utf-16be", "ibm-1200", "ibm-1201", "ibm-5297",
+        "ibm-13488", "ibm-17584", "windows-1201", "cp1200", "cp1201",
+        "UTF16_BigEndian"
+    });
   }
 
   public boolean contains (Charset cs)
Index: gnu/java/nio/charset/UTF_16LE.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/java/nio/charset/UTF_16LE.java,v
retrieving revision 1.3
diff -u -r1.3 UTF_16LE.java
--- gnu/java/nio/charset/UTF_16LE.java  13 Oct 2004 14:32:33 -0000      1.3
+++ gnu/java/nio/charset/UTF_16LE.java  30 Jan 2005 02:20:58 -0000
@@ -1,5 +1,5 @@
 /* UTF_16LE.java -- 
-   Copyright (C) 2002, 2004  Free Software Foundation, Inc.
+   Copyright (C) 2002, 2004, 2005  Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -51,7 +51,11 @@
 {
   UTF_16LE ()
   {
-    super ("UTF-16LE", null);
+    super ("UTF-16LE", new String[] {
+        // These are provided by gnu.classpath.SystemProperties
+        "UTF16LE", "x-utf-16le", "ibm-1202", "ibm-13490", "ibm-17586",
+        "UTF16_LittleEndian"
+    });
   }
 
   public boolean contains (Charset cs)
Index: gnu/java/nio/charset/UTF_8.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/java/nio/charset/UTF_8.java,v
retrieving revision 1.3
diff -u -r1.3 UTF_8.java
--- gnu/java/nio/charset/UTF_8.java     6 Nov 2004 22:51:40 -0000       1.3
+++ gnu/java/nio/charset/UTF_8.java     30 Jan 2005 02:20:58 -0000
@@ -1,5 +1,5 @@
 /* UTF_8.java -- 
-   Copyright (C) 2002, 2004 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2004, 2005  Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -62,7 +62,11 @@
 {
   UTF_8 ()
   {
-    super ("UTF-8", null);
+    super ("UTF-8", new String[] {
+        // These are provided by gnu.classpath.SystemProperties
+        "UTF8", "ibm-1208", "ibm-1209", "ibm-5304", "ibm-5305",
+        "windows-65001", "cp1208"
+    });
   }
 
   public boolean contains (Charset cs)

reply via email to

[Prev in Thread] Current Thread [Next in Thread]