Index: gnu/xml/stream/CRLFReader.java =================================================================== RCS file: /cvsroot/classpath/classpath/gnu/xml/stream/CRLFReader.java,v retrieving revision 1.1 diff -u -r1.1 CRLFReader.java --- gnu/xml/stream/CRLFReader.java 12 Dec 2005 11:35:38 -0000 1.1 +++ gnu/xml/stream/CRLFReader.java 28 Dec 2005 19:17:50 -0000 @@ -109,7 +109,7 @@ in.reset(); if (i != -1) { - l = in.read(b, off, i + 1); // read to CR + l = in.read(b, off, (i + 1) - off); // read to CR in.read(); // skip LF b[i] = '\n'; // fix CR as LF } Index: gnu/xml/stream/UnicodeReader.java =================================================================== RCS file: gnu/xml/stream/UnicodeReader.java diff -N gnu/xml/stream/UnicodeReader.java --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ gnu/xml/stream/UnicodeReader.java 28 Dec 2005 19:17:50 -0000 @@ -0,0 +1,197 @@ +/* UnicodeReader.java -- + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package gnu.xml.stream; + +import java.io.IOException; +import java.io.Reader; + +/** + * A reader that converts UTF-16 characters to Unicode code points. + * + * @author Chris Burdess + */ +class UnicodeReader +{ + + final Reader in; + int carry, markCarry; + boolean isCarry, isMarkCarry; + + UnicodeReader(Reader in) + { + this.in = in; + } + + public void mark(int limit) + throws IOException + { + in.mark(limit); + markCarry = carry; + isMarkCarry = isCarry; + } + + public void reset() + throws IOException + { + in.reset(); + carry = markCarry; + isCarry = isMarkCarry; + } + + public int read() + throws IOException + { + if (isCarry) + { + isCarry = false; + return carry; + } + int ret = in.read(); + if (ret == -1) + return ret; + if (ret >= 0xd800 && ret < 0xdc00) + { + // Unicode surrogate? + int low = in.read(); + if (low >= 0xdc00 && low < 0xe000) + ret = Character.toCodePoint((char) ret, (char) low); + else + { + carry = low; + isCarry = true; + } + } + return ret; + } + + public int read(int[] buf, int off, int len) + throws IOException + { + if (len == 0) + return 0; + if (isCarry) + { + isCarry = false; + buf[off] = carry; + return 1; + } + char[] b2 = new char[len]; + int ret = in.read(b2, 0, len); + if (ret <= 0) + return ret; + int l = ret - 1; + int j = off; + for (int i = 0; i < l; i++) + { + char c = b2[i]; + if (c >= 0xd800 && c < 0xdc00) + { + // Unicode surrogate? + char d = b2[i + 1]; + if (d >= 0xdc00 && d < 0xe000) + { + buf[j++] = Character.toCodePoint(c, d); + i++; + continue; + } + } + buf[j++] = (int) c; + } + // last char + char c = b2[l]; + if (c >= 0xd800 && c < 0xdc00) + { + int low = in.read(); + if (low >= 0xdc00 && low < 0xe000) + { + buf[j++] = Character.toCodePoint(c, (char) low); + return j; + } + else + { + carry = low; + isCarry = true; + } + } + buf[j++] = (int) c; + return j; + } + + public void close() + throws IOException + { + in.close(); + } + + public static int[] toCodePointArray(String text) + { + char[] b2 = text.toCharArray(); + int[] buf = new int[b2.length]; + if (b2.length > 0) + { + int l = b2.length - 1; + int j = 0; + for (int i = 0; i < l; i++) + { + char c = b2[i]; + if (c >= 0xd800 && c < 0xdc00) + { + // Unicode surrogate? + char d = b2[i + 1]; + if (d >= 0xdc00 && d < 0xe000) + { + buf[j++] = Character.toCodePoint(c, d); + i++; + continue; + } + } + buf[j++] = (int) c; + } + // last char + buf[j++] = (int) b2[l]; + if (j < buf.length) + { + int[] buf2 = new int[j]; + System.arraycopy(buf, 0, buf2, 0, j); + buf = buf2; + } + } + return buf; + } + +} Index: gnu/xml/stream/XMLParser.java =================================================================== RCS file: /cvsroot/classpath/classpath/gnu/xml/stream/XMLParser.java,v retrieving revision 1.12 diff -u -r1.12 XMLParser.java --- gnu/xml/stream/XMLParser.java 27 Dec 2005 19:56:16 -0000 1.12 +++ gnu/xml/stream/XMLParser.java 28 Dec 2005 19:17:51 -0000 @@ -130,7 +130,7 @@ private StringBuffer buf = new StringBuffer(); private StringBuffer nmtokenBuf = new StringBuffer(); private StringBuffer literalBuf = new StringBuffer(); - private char[] tmpBuf = new char[1024]; + private int[] tmpBuf = new int[1024]; private ContentModel currentContentModel; private LinkedList validationStack = new LinkedList(); @@ -800,11 +800,11 @@ { // Check for character reference or predefined entity mark(8); - char c = readCh(); - if (c == '&') + int c = readCh(); + if (c == 0x26) // '&' { c = readCh(); - if (c == '#') + if (c == 0x23) // '#' { reset(); event = readCharData(null); @@ -881,8 +881,8 @@ } else { - char c = readCh(); - error("no root element: +U"+Integer.toHexString(c)); + int c = readCh(); + error("no root element: U+" + Integer.toHexString(c)); } break; case MISC: // Comment | PI | S @@ -901,8 +901,8 @@ { if (event == XMLStreamConstants.END_DOCUMENT) throw new NoSuchElementException(); - char c = readCh(); - if (c != '\uffff') + int c = readCh(); + if (c != -1) error("Only comments and PIs may appear after " + "the root element"); event = XMLStreamConstants.END_DOCUMENT; @@ -952,7 +952,7 @@ return ret; } - private int read(char[] b, int off, int len) + private int read(int[] b, int off, int len) throws IOException { int ret = input.read(b, off, len); @@ -963,11 +963,11 @@ /** * Parsed character read. */ - private char readCh() + private int readCh() throws IOException, XMLStreamException { - char c = (char) read(); - if (expandPE && c == '%') + int c = read(); + if (expandPE && c == 0x25) // '%' { if (peIsError) error("PE reference within decl in internal subset."); @@ -981,11 +981,12 @@ throws IOException, XMLStreamException { mark(1); - char c = readCh(); + int c = readCh(); if (delim != c) { reset(); - error("required character (got U+"+Integer.toHexString(c)+")", new Character(delim)); + error("required character (got U+" + Integer.toHexString(c) + ")", + new Character(delim)); } } @@ -1024,7 +1025,7 @@ throws IOException, XMLStreamException { mark(1); - char c = readCh(); + int c = readCh(); if (delim != c) { reset(); @@ -1082,24 +1083,28 @@ { while (!tryRead(delim)) { - char c = readCh(); - if (c == '\uffff') + int c = readCh(); + if (c == -1) throw new EOFException(); else if (input.xml11) { - if (!isXML11Char((int) c)) - error("illegal XML 1.1 character", Character.toString(c)); + if (!isXML11Char(c)) + error("illegal XML 1.1 character", + "U+" + Integer.toHexString(c)); } else { - if (c < 32 && c != 10 && c != 9 && c != 13) - error("illegal XML character", Character.toString(c)); + if (c < 0x20 && c != 0x09 && c != 0x0a && c != 0x0d) + error("illegal XML character", + "U+" + Integer.toHexString(c)); else if (c > '\ud7ff' && c < '\ue000') - error("illegal XML character", Character.toString(c)); + error("illegal XML character", + "U+" + Integer.toHexString(c)); else if (c > '\ufffd') - error("illegal XML character", Character.toString(c)); + error("illegal XML character", + "U+" + Integer.toHexString(c)); } - buf.append(c); + buf.append(Character.toChars(c)); } } catch (EOFException e) @@ -1117,11 +1122,11 @@ do { mark(1); - char c = readCh(); - white = (c == ' ' || c == '\t' || c == '\n' || c == '\r'); + int c = readCh(); + white = (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d); if (white) ret = true; - else if (c == '\uffff') + else if (c == -1) { if (inputStack.size() > 1) popInput(); @@ -1144,13 +1149,13 @@ do { mark(1); - char c = readCh(); - while (c == '\uffff' && inputStack.size() > 1) + int c = readCh(); + while (c == -1 && inputStack.size() > 1) { popInput(); c = readCh(); } - white = (c == ' ' || c == '\t' || c == '\n' || c == '\r'); + white = (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d); } while (white); reset(); @@ -1487,10 +1492,10 @@ skipWhitespace(); expandPE = false; mark(1); - char c = readCh(); - if (c == '>') + int c = readCh(); + if (c == 0x3e) // '>' break; - else if (c == '\uffff') + else if (c == -1) popInput(); else { @@ -1575,18 +1580,18 @@ expandPE = false; for (int nesting = 1; nesting > 0; ) { - char c = readCh(); + int c = readCh(); switch (c) { - case '<': + case 0x3c: // '<' if (tryRead("![")) nesting++; break; - case ']': + case 0x5d: // ']' if (tryRead("]>")) nesting--; break; - case '\uffff': + case -1: throw new EOFException(); } } @@ -1668,7 +1673,7 @@ private ElementContentModel readElements(StringBuffer acc) throws IOException, XMLStreamException { - char separator; + int separator; ElementContentModel model = new ElementContentModel(); // Parse first content particle @@ -1676,27 +1681,27 @@ model.addContentParticle(readContentParticle(acc)); // End or separator skipWhitespace(); - char c = readCh(); + int c = readCh(); switch (c) { - case ')': + case 0x29: // ')' acc.append(')'); mark(1); c = readCh(); switch (c) { - case '?': - acc.append(c); + case 0x3f: // '?' + acc.append('?'); model.min = 0; model.max = 1; break; - case '*': - acc.append(c); + case 0x2a: // '*' + acc.append('*'); model.min = 0; model.max = -1; break; - case '+': - acc.append(c); + case 0x2b: // '+' + acc.append('+'); model.min = 1; model.max = -1; break; @@ -1704,13 +1709,14 @@ reset(); } return model; // done - case ',': - case '|': + case 0x2c: // ',' + case 0x7c: // '|' separator = c; - acc.append(c); + acc.append(Character.toChars(c)); break; default: - error("bad separator in content model", new Character(c)); + error("bad separator in content model", + "U+" + Integer.toHexString(c)); return model; } // Parse subsequent content particles @@ -1720,14 +1726,15 @@ model.addContentParticle(readContentParticle(acc)); skipWhitespace(); c = readCh(); - if (c == ')') + if (c == 0x29) // ')' { acc.append(')'); break; } else if (c != separator) { - error("bad separator in content model", new Character(c)); + error("bad separator in content model", + "U+" + Integer.toHexString(c)); return model; } else @@ -1738,18 +1745,18 @@ c = readCh(); switch (c) { - case '?': - acc.append(c); + case 0x3f: // '?' + acc.append('?'); model.min = 0; model.max = 1; break; - case '*': - acc.append(c); + case 0x2a: // '*' + acc.append('*'); model.min = 0; model.max = -1; break; - case '+': - acc.append(c); + case 0x2b: // '+' + acc.append('+'); model.min = 1; model.max = -1; break; @@ -1774,21 +1781,21 @@ acc.append(name); cp.content = name; mark(1); - char c = readCh(); + int c = readCh(); switch (c) { - case '?': - acc.append(c); + case 0x3f: // '?' + acc.append('?'); cp.min = 0; cp.max = 1; break; - case '*': - acc.append(c); + case 0x2a: // '*' + acc.append('*'); cp.min = 0; cp.max = -1; break; - case '+': - acc.append(c); + case 0x2b: // '+' + acc.append('+'); cp.min = 1; cp.max = -1; break; @@ -1974,9 +1981,9 @@ name = "%" + name; requireWhitespace(); mark(1); - char c = readCh(); + int c = readCh(); reset(); - if (c == '"' || c == '\'') + if (c == 0x22 || c == 0x27) // " | ' { // Internal entity replacement text String value = readLiteral(flags | LIT_DISABLE_EREF); @@ -2022,7 +2029,7 @@ private ExternalIds readExternalIds(boolean inNotation, boolean isSubset) throws IOException, XMLStreamException { - char c; + int c; int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF; ExternalIds ids = new ExternalIds(); @@ -2036,7 +2043,7 @@ mark(1); c = readCh(); reset(); - if (c == '"' || c == '\'') + if (c == 0x22 || c == 0x27) // " | ' ids.systemId = absolutize(input.systemId, readLiteral(flags)); } else @@ -2047,15 +2054,15 @@ for (int i = 0; i < ids.publicId.length(); i++) { - c = ids.publicId.charAt(i); - if (c >= 'a' && c <= 'z') + char d = ids.publicId.charAt(i); + if (d >= 'a' && d <= 'z') continue; - if (c >= 'A' && c <= 'Z') + if (d >= 'A' && d <= 'Z') continue; - if (" \r\n0123456789-' ()+,./:=?;address@hidden".indexOf(c) != -1) + if (" \r\n0123456789-' ()+,./:=?;address@hidden".indexOf(d) != -1) continue; error("illegal PUBLIC id character", - "U+" + Integer.toHexString(c)); + "U+" + Integer.toHexString(d)); } } else if (tryRead("SYSTEM")) @@ -2097,8 +2104,8 @@ // Read element content boolean white = tryWhitespace(); mark(1); - char c = readCh(); - while (c != '/' && c != '>') + int c = readCh(); + while (c != 0x2f && c != 0x3e) // '/' | '>' { // Read attribute reset(); @@ -2184,9 +2191,9 @@ stack.addLast(elementName); switch (c) { - case '>': + case 0x3e: // '>' return CONTENT; - case '/': + case 0x2f: // '/' require('>'); return EMPTY_ELEMENT; } @@ -2422,27 +2429,27 @@ } for (int i = 0; i < len && !done; i++) { - char c = tmpBuf[i]; + int c = tmpBuf[i]; switch (c) { - case ' ': - case '\t': - case '\n': - case '\r': - buf.append(c); + case 0x20: + case 0x09: + case 0x0a: + case 0x0d: + buf.append(Character.toChars(c)); break; // whitespace - case '&': + case 0x26: // '&' reset(); read(tmpBuf, 0, i); // character reference? mark(3); c = readCh(); // & c = readCh(); - if (c == '#') + if (c == 0x23) // '#' { mark(1); c = readCh(); - boolean hex = (c == 'x'); + boolean hex = (c == 0x78); // 'x' if (!hex) reset(); char[] ch = readCharacterRef(hex ? 16 : 10); @@ -2451,10 +2458,10 @@ { switch (ch[j]) { - case ' ': - case '\t': - case '\n': - case '\r': + case 0x20: + case 0x09: + case 0x0a: + case 0x0d: break; // whitespace default: white = false; @@ -2499,21 +2506,22 @@ } entities = true; break; // end of text sequence - case '>': + case 0x3e: // '>' int l = buf.length(); if (l > 1 && buf.charAt(l - 1) == ']' && buf.charAt(l - 2) == ']') error("Character data may not contain unescaped ']]>'"); - buf.append(c); + buf.append(Character.toChars(c)); break; - case '<': + case 0x3c: // '<' reset(); read(tmpBuf, 0, i); done = true; break; // end of text sequence default: if ((c < 0x0020 || c > 0xfffd) || + (c >= 0xd800 && c < 0xdc00) || (input.xml11 && (c >= 0x007f) && (c <= 0x009f) && (c != 0x0085))) { @@ -2521,7 +2529,7 @@ "U+" + Integer.toHexString(c)); } white = false; - buf.append(c); + buf.append(Character.toChars(c)); } } // if text buffer >= 2MB, return it as a chunk @@ -2588,11 +2596,11 @@ skipWhitespace(); } - private char literalReadCh() + private int literalReadCh() throws IOException, XMLStreamException { - char c = readCh(); - while (c == '\uffff') + int c = readCh(); + while (c == -1) { if (inputStack.size() > 1) { @@ -2611,9 +2619,9 @@ throws IOException, XMLStreamException { boolean saved = expandPE; - char delim = readCh(); - if (delim != '\'' && delim != '"') - error("expected '\"' or \"'\"", new Character(delim)); + int delim = readCh(); + if (delim != 0x27 && delim != 0x22) + error("expected '\"' or \"'\"", "U+" + Integer.toHexString(delim)); literalBuf.setLength(0); if ((flags & LIT_DISABLE_PE) != 0) expandPE = false; @@ -2621,35 +2629,35 @@ int inputStackSize = inputStack.size(); do { - char c = literalReadCh(); + int c = literalReadCh(); if (c == delim && inputStackSize == inputStack.size()) break; switch (c) { - case '\n': - case '\r': + case 0x0a: + case 0x0d: if ((flags & (LIT_ATTRIBUTE | LIT_PUBID)) != 0) - c = ' '; // normalize to space + c = 0x20; // normalize to space break; - case '\t': + case 0x09: if ((flags & LIT_ATTRIBUTE) != 0) - c = ' '; // normalize to space + c = 0x20; // normalize to space break; - case '&': + case 0x26: // '&' mark(2); c = readCh(); - if (c == '#') + if (c == 0x23) // '#' { if ((flags & LIT_DISABLE_CREF) != 0) { reset(); - c = '&'; + c = 0x26; // '&' } else { mark(1); c = readCh(); - boolean hex = (c == 'x'); + boolean hex = (c == 0x78); // 'x' if (!hex) reset(); char[] ref = readCharacterRef(hex ? 16 : 10); @@ -2657,11 +2665,11 @@ { c = ref[i]; if ((flags & (LIT_ATTRIBUTE | LIT_PUBID)) != 0 && - (c == '\n' || c == '\r')) - c = ' '; // normalize - else if ((flags & LIT_ATTRIBUTE) != 0 && c == '\t') - c = ' '; // normalize - literalBuf.append(c); + (c == 0x0a || c == 0x0d)) + c = 0x20; // normalize + else if ((flags & LIT_ATTRIBUTE) != 0 && c == 0x09) + c = 0x20; // normalize + literalBuf.append(Character.toChars(c)); } entities = true; continue; @@ -2672,7 +2680,7 @@ if ((flags & LIT_DISABLE_EREF) != 0) { reset(); - c = '&'; + c = 0x26; // '&' } else { @@ -2697,19 +2705,25 @@ } } break; - case '<': + case 0x3c: // '<' if ((flags & LIT_ATTRIBUTE) != 0) error("attribute values may not contain '<'"); break; - case '\uffff': + case -1: if (inputStack.size() > 1) { popInput(); continue; } throw new EOFException(); + default: + if ((c < 0x0020 || c > 0xfffd) || + (c >= 0xd800 && c < 0xdc00) || + (input.xml11 && (c >= 0x007f) && + (c <= 0x009f) && (c != 0x0085))) + error("illegal character", "U+" + Integer.toHexString(c)); } - literalBuf.append(c); + literalBuf.append(Character.toChars(c)); } while (true); expandPE = saved; @@ -2802,8 +2816,8 @@ throws IOException, XMLStreamException { StringBuffer b = new StringBuffer(); - for (char c = readCh(); c != ';' && c != '\uffff'; c = readCh()) - b.append(c); + for (int c = readCh(); c != 0x3b && c != -1; c = readCh()) + b.append(Character.toChars(c)); try { int ord = Integer.parseInt(b.toString(), base); @@ -2835,7 +2849,7 @@ throws IOException, XMLStreamException { nmtokenBuf.setLength(0); - char c = readCh(); + int c = readCh(); if (isName) { if (!isNameStartCharacter(c)) @@ -2848,33 +2862,33 @@ error("not a name character", "U+" + Integer.toHexString(c)); } - nmtokenBuf.append(c); + nmtokenBuf.append(Character.toChars(c)); do { mark(1); c = readCh(); switch (c) { - case '%': - case '<': - case '>': - case '&': - case ',': - case '|': - case '*': - case '+': - case '?': - case ')': - case '=': - case '\'': - case '"': - case '[': - case ' ': - case '\t': - case '\n': - case '\r': - case ';': - case '/': + case 0x25: // '%' + case 0x3c: // '<' + case 0x3e: // '>' + case 0x26: // '&' + case 0x2c: // ',' + case 0x7c: // '|' + case 0x2a: // '*' + case 0x2b: // '+' + case 0x3f: // '?' + case 0x29: // ')' + case 0x3d: // '=' + case 0x27: // '\'' + case 0x22: // '"' + case 0x5b: // '[' + case 0x20: // ' ' + case 0x09: // '\t' + case 0x0a: // '\n' + case 0x0d: // '\r' + case 0x3b: // ';' + case 0x2f: // '/' reset(); return intern(nmtokenBuf.toString()); default: @@ -2882,7 +2896,7 @@ error("not a name character", "U+" + Integer.toHexString(c)); else - nmtokenBuf.append(c); + nmtokenBuf.append(Character.toChars(c)); } } while (true); @@ -2904,13 +2918,13 @@ (c >= 0x0086 && c <= 0x009F)); } - private boolean isNameStartCharacter(char c) + private boolean isNameStartCharacter(int c) { if (input.xml11) return ((c >= 0x0041 && c <= 0x005a) || (c >= 0x0061 && c <= 0x007a) || - c == ':' | - c == '_' | + c == 0x3a | + c == 0x5f | (c >= 0xC0 && c <= 0xD6) || (c >= 0xD8 && c <= 0xF6) || (c >= 0xF8 && c <= 0x2FF) || @@ -2924,19 +2938,19 @@ (c >= 0xFDF0 && c <= 0xFFFD) || (c >= 0x10000 && c <= 0xEFFFF)); else - return (c == '_' || c == ':' || isLetter(c)); + return (c == 0x5f || c == 0x3a || isLetter(c)); } - private boolean isNameCharacter(char c) + private boolean isNameCharacter(int c) { if (input.xml11) return ((c >= 0x0041 && c <= 0x005a) || (c >= 0x0061 && c <= 0x007a) || (c >= 0x0030 && c <= 0x0039) || - c == ':' | - c == '_' | - c == '-' | - c == '.' | + c == 0x3a | + c == 0x5f | + c == 0x2d | + c == 0x2e | c == 0xB7 | (c >= 0xC0 && c <= 0xD6) || (c >= 0xD8 && c <= 0xF6) || @@ -2952,12 +2966,12 @@ (c >= 0xFDF0 && c <= 0xFFFD) || (c >= 0x10000 && c <= 0xEFFFF)); else - return (c == '.' || c == '-' || c == '_' || c == ':' || + return (c == 0x2e || c == 0x2d || c == 0x5f || c == 0x3a || isLetter(c) || isDigit(c) || isCombiningChar(c) || isExtender(c)); } - public static boolean isLetter(char c) + public static boolean isLetter(int c) { if ((c >= 0x0041 && c <= 0x005A) || (c >= 0x0061 && c <= 0x007A) || @@ -3169,7 +3183,7 @@ return false; } - public static boolean isDigit(char c) + public static boolean isDigit(int c) { return ((c >= 0x0030 && c <= 0x0039) || (c >= 0x0660 && c <= 0x0669) || @@ -3188,7 +3202,7 @@ (c >= 0x0F20 && c <= 0x0F29)); } - public static boolean isCombiningChar(char c) + public static boolean isCombiningChar(int c) { return ((c >= 0x0300 && c <= 0x0345) || (c >= 0x0360 && c <= 0x0361) || @@ -3287,7 +3301,7 @@ c == 0x309A); } - public static boolean isExtender(char c) + public static boolean isExtender(int c) { return (c == 0x00B7 || c == 0x02D0 || @@ -3889,6 +3903,7 @@ InputStream in; Reader reader; + UnicodeReader unicodeReader; boolean initialized; String inputEncoding; boolean xml11; @@ -3929,7 +3944,10 @@ this.in = in; } else - this.reader = new CRLFReader(reader); + { + this.reader = new CRLFReader(reader); + unicodeReader = new UnicodeReader(this.reader); + } initialized = false; } @@ -3972,8 +3990,8 @@ markOffset = offset; markLine = line; markColumn = column; - if (reader != null) - reader.mark(len); + if (unicodeReader != null) + unicodeReader.mark(len); else in.mark(len); } @@ -3985,11 +4003,15 @@ throws IOException { offset++; - int ret = (reader != null) ? reader.read() : in.read(); + int ret = (unicodeReader != null) ? unicodeReader.read() : in.read(); //if (ret != -1) // System.out.println(" read1:"+((char) ret)); if (ret == 0x0d || (xml11 && (ret == 0x85 || ret == 0x2028))) - ret = 0x0a; + { + // Normalize CR etc to LF + ret = 0x0a; + } + // Locator handling if (ret == 0x0a) { line++; @@ -4003,12 +4025,12 @@ /** * Block read. */ - int read(char[] b, int off, int len) + int read(int[] b, int off, int len) throws IOException { int ret; - if (reader != null) - ret = reader.read(b, off, len); + if (unicodeReader != null) + ret = unicodeReader.read(b, off, len); else { byte[] b2 = new byte[len]; @@ -4016,19 +4038,21 @@ if (ret != -1) { String s = new String(b2, 0, ret, inputEncoding); - char[] c = s.toCharArray(); + int[] c = UnicodeReader.toCodePointArray(s); ret = c.length; System.arraycopy(c, 0, b, off, ret); } } if (ret != -1) { + // Locator handling //System.out.println(" read:"+new String(b, off, ret)); for (int i = 0; i < ret; i++) { - char c = b[off + i]; + int c = b[off + i]; if (c == 0x0d || (xml11 && (c == 0x85 || c == 0x2028))) { + // Normalize CR etc to LF c = 0x0a; b[off + i] = c; } @@ -4048,8 +4072,8 @@ throws IOException { //System.out.println(" reset"); - if (reader != null) - reader.reset(); + if (unicodeReader != null) + unicodeReader.reset(); else in.reset(); offset = markOffset; @@ -4178,8 +4202,8 @@ { if (reader != null) return; - //reader = new XMLInputStreamReader(in, inputEncoding); reader = new BufferedReader(new InputStreamReader(in, inputEncoding)); + unicodeReader = new UnicodeReader(reader); mark(1); } Index: resource/META-INF/services/org.xml.sax.driver =================================================================== RCS file: /cvsroot/classpath/classpath/resource/META-INF/services/org.xml.sax.driver,v retrieving revision 1.1 diff -u -r1.1 org.xml.sax.driver --- resource/META-INF/services/org.xml.sax.driver 25 May 2005 22:26:30 -0000 1.1 +++ resource/META-INF/services/org.xml.sax.driver 28 Dec 2005 19:17:51 -0000 @@ -1 +1 @@ -gnu.xml.aelfred2.XmlReader +gnu.xml.stream.SAXParser