bug-classpath
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Bug classpath/24467] New: gnu_java_nio_charset_iconv_IconvDecoder.c tre


From: kaz at maczuka dot gcd dot org
Subject: [Bug classpath/24467] New: gnu_java_nio_charset_iconv_IconvDecoder.c treats normal bytes as invalid
Date: 21 Oct 2005 07:16:30 -0000

gnu_java_nio_charset_iconv_IconvDecoder.c has this coding:

      if (errno == EILSEQ || errno == EINVAL)
        retval = 1;
      else
        retval = 0;

But errno == EINVAL is a normal status which should result in
CoderResult.UNDERFLOW and not in CoderResult.MALFORMED.

errno == EINVAL being treated as invalid, there are cases where
charset conversion goes wrong.

Following is a program which shows this bug.

import gnu.java.nio.charset.iconv.*;
import java.nio.*;
import java.nio.charset.*;

public class TestIconvCharset {

    public static void main(String[] args) throws Exception {

        int size = Integer.parseInt(args[0]);

        byte[] eucbytes =  new byte[11 * 100];
        for (int i = 0, j = 0; i < 100; i++, j+=11) {
            eucbytes[j] = (byte)'A';
            for (int k = j+1; k < j+9; k++) {
                eucbytes[k] = (byte)0xa1;
            }
            eucbytes[j+9] = (byte)'B';
            eucbytes[j+10] = (byte)'C';
        } 

        Charset eucjp = IconvProvider.provider().charsetForName("EUC-JP");
        CharsetDecoder decoder = eucjp.newDecoder();
        CharsetEncoder encoder = eucjp.newEncoder();
        encoder.onMalformedInput(CodingErrorAction.REPORT);
        encoder.onUnmappableCharacter(CodingErrorAction.REPORT);

        int inpos = 0;
        while (inpos < eucbytes.length) {
            int l = eucbytes.length - inpos;
            if (l > size) l = size;
            byte[] subarray = new byte[l];
            System.arraycopy(eucbytes, inpos, subarray, 0, l);
            ByteBuffer bbuf = ByteBuffer.wrap(subarray);
            CharBuffer cbuf = CharBuffer.wrap(new char[l]);
            boolean endInput = (inpos + l >= eucbytes.length);
            decoder.reset();
            CoderResult cr = decoder.decode(bbuf, cbuf, endInput);
            System.err.println("Processed: pos=" + inpos + " " +
bbuf.position() + " bytes --> " + cbuf.position() + " characters");
            cbuf.flip();
            check(bbuf, cbuf, encoder, inpos);
            if (cr.isError()) {
                describe(cr, bbuf, inpos);
                bbuf.position(bbuf.position() + cr.length());
            }
            inpos += bbuf.position();
        }
    }

    private static void check(ByteBuffer in, CharBuffer out, CharsetEncoder
encoder, int inpos) {
        try {
            encoder.reset();
            ByteBuffer outb = encoder.encode(out);
            // System.err.println("Encoded: " + out.limit() + " --> " +
outb.limit());
            for (int i = 0; i < in.position() || i < outb.limit(); i++) {
                if (i < in.position() && i < outb.limit()) {
                    if (in.get(i) != outb.get(i)) {
                        System.err.println("Changed: pos=" + (inpos+i) + " " +
hex(in.get(i)) + "-->" + hex(outb.get(i)));
                    }
                }
                else if (i >= in.position()) {
                    System.err.println("Appeared: pos=" + (inpos+i) + " " +
hex(outb.get(i)));
                }
                else {
                        System.err.println("Lost: pos=" + (inpos+i) + " " +
hex(in.get(i)));
                }
            }
        }
        catch (Exception e) {
            System.err.println("check: " + e);
        }
    }

    private static void describe(CoderResult cr, ByteBuffer in, int inpos) {
        try {
            int len = cr.length();
            int pos = in.position();
            for (int i = pos; i < pos + len; i++) {
                System.err.println(cr.toString() + ": pos = " + (inpos+i) + " "
+ hex(in.get(i)));
            }
        }
        catch (Exception e) {
            System.err.println("describe: " + e);
        }
    }

    private static String hex(byte b) {
        return "0x" + Integer.toHexString(b & 0xff);
    }
}

For example,

$ java TestIconvCharset 600
Processed: pos=0 599 bytes --> 381 characters
MALFORMED[1]: pos = 599 0xa1
Processed: pos=600 2 bytes --> 1 characters
MALFORMED[1]: pos = 602 0xa1
Processed: pos=603 497 bytes --> 317 characters

After deleting "errno == EINVAL", this program gives an expected result.

$ java TestIconvCharset 600
Processed: pos=0 599 bytes --> 381 characters
Processed: pos=599 501 bytes --> 319 characters


-- 
           Summary: gnu_java_nio_charset_iconv_IconvDecoder.c treats normal
                    bytes as invalid
           Product: classpath
           Version: unspecified
            Status: UNCONFIRMED
          Severity: normal
          Priority: P2
         Component: classpath
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: kaz at maczuka dot gcd dot org


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=24467





reply via email to

[Prev in Thread] Current Thread [Next in Thread]