diff -urN ORIG/com/jclark/xml/tok/EUC_JPEncoding.java com/jclark/xml/tok/EUC_JPEncoding.java --- ORIG/com/jclark/xml/tok/EUC_JPEncoding.java Thu Jan 1 09:00:00 1970 +++ com/jclark/xml/tok/EUC_JPEncoding.java Fri Dec 25 12:08:54 1998 @@ -0,0 +1,334 @@ +package com.jclark.xml.tok; + +/** + * An Encoding for EUC-JP. + * @version $Revision$ $Date$ + */ +final class EUC_JPEncoding extends Encoding { + private static final byte[] euc_jpHiTypeTable = { + /* 0x80 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x84 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x88 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x8C */ BT_NONXML, BT_NONXML, BT_LEAD2, BT_LEAD3, + /* 0x90 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x94 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x98 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x9C */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0xA0 */ BT_NONXML, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xA4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xA8 */ BT_LEAD2, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0xAC */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0xB0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xB4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xB8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xBC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xC0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xC4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xC8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xCC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xD0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xD4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xD8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xDC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xE0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xE4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xE8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xEC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xF0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xF4 */ BT_LEAD2, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0xF8 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0xFC */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML + }; + + private static final byte[] euc_jpTypeTable = new byte[256]; + + static { + System.arraycopy(asciiTypeTable, 0, euc_jpTypeTable, 0, 128); + System.arraycopy(euc_jpHiTypeTable, 0, euc_jpTypeTable, 128, 128); + } + + EUC_JPEncoding() { + super(1); + } + + int byteType(byte[] buf, int off) { + return euc_jpTypeTable[buf[off] & 0xFF]; + } + + int byteToAscii(byte[] buf, int off) { + return (char)buf[off]; + } + + // c is a significant ASCII character + boolean charMatches(byte[] buf, int off, char c) { + return (char)buf[off] == c; + } + + int byteType2(byte[] buf, int off) { + int b1 = buf[off] & 0xff; + int b2 = buf[off + 1] & 0xff; + + if (b1 == 0x8e) // JIS X 0201 Kana + return BT_OTHER; + // Extender + else if (b1 == 0xa1 && ((b2 >= 0xb3 && b2 <= 0xb6) || + b2 == 0xb9 || + b2 == 0xbc)) + return BT_NAME; + // BaseChar and Ideographic + switch (b1) { + case 0xa1: + if (b2 == 0xb8 || b2 == 0xbb) + return BT_NMSTRT; + case 0xa2: + if (b2 == 0xf2) + return BT_NMSTRT; + case 0xa4: + if (b2 >= 0xa1 && b2 <= 0xf2) + return BT_NMSTRT; + case 0xa5: + if (b2 >= 0xa1 && b2 <= 0xf6) + return BT_NMSTRT; + case 0xa6: + if ((b2 >= 0xa1 && b2 <= 0xb8) || + (b2 >= 0xc1 && b2 <= 0xd8)) + return BT_NMSTRT; + case 0xa7: + if ((b2 >= 0xa1 && b2 <= 0xc1) || + (b2 >= 0xd1 && b2 <= 0xf1)) + return BT_NMSTRT; + case 0xcf: + if (b2 >= 0xa1 && b2 <= 0xd3) + return BT_NMSTRT; + case 0xf4: + if (b2 >= 0xa1 && b2 <= 0xa6) + return BT_NMSTRT; + default: + if (((b1 >= 0xb0 && b1 <= 0xce) && (b2 >= 0xa1 && b2 <= 0xfe)) || + ((b1 >= 0xd0 && b1 <= 0xf3) && (b2 >= 0xa1 && b2 <= 0xfe))) + return BT_NMSTRT; + } + return BT_OTHER; + } + + int byteType3(byte[] buf, int off) { + int b1 = buf[off] & 0xff; + int b2 = buf[off + 1] & 0xff; + int b3 = buf[off + 2] & 0xff; + + if (b1 != 0x8f) + return BT_MALFORM; + // BaseChar and Ideographic + switch (b2) { + case 0xa6: + if ((b3 >= 0xe1 && b3 <= 0xe5) || + b3 == 0xe7 || + (b3 >= 0xe9 && b3 <= 0xea) || + b3 == 0xec || + (b3 >= 0xf1 && b3 <= 0xfc)) + return BT_NMSTRT; + case 0xa7: + if ((b3 >= 0xc2 && b3 <= 0xce) || + (b3 >= 0xf2 && b3 <= 0xfe)) + return BT_NMSTRT; + case 0xa9: + if ((b3 >= 0xa1 && b3 <= 0xa2) || + b3 == 0xa4 || b3 == 0xa8 || + (b3 >= 0xab && b3 <= 0xad) || + (b3 >= 0xaf && b3 <= 0xb0) || + (b3 >= 0xc1 && b3 <= 0xc5) || + (b3 >= 0xc7 && b3 <= 0xc8) || + (b3 >= 0xcb && b3 <= 0xd0)) + return BT_NMSTRT; + case 0xaa: + if ((b3 >= 0xa1 && b3 <= 0xb8) || + (b3 >= 0xba && b3 <= 0xf7)) + return BT_NMSTRT; + case 0xab: + if ((b3 >= 0xa1 && b3 <= 0xbb) || + (b3 >= 0xbd && b3 <= 0xc3) || + (b3 >= 0xc5 && b3 <= 0xf7)) + return BT_NMSTRT; + case 0xed: + if (b3 >= 0xa1 && b3 <= 0xe3) + return BT_NMSTRT; + default: + if ((b2 >= 0xb0 && b2 <= 0xec) && (b3 >= 0xa1 && b3 <= 0xfe)) + return BT_NMSTRT; + } + return BT_OTHER; + } + + void check2(byte[] buf, int off) throws InvalidTokenException { + int b1 = buf[off] & 0xff; + int b2 = buf[off + 1] & 0xff; + + // JIS X 0201 Kana + if (b1 == 0x8e && (b2 >= 0xa1 && b2 <= 0xdf)) + return; + // JIS X 0208 + switch (b1) { + case 0xa1: + if (b2 >= 0xa1 && b2 <= 0xfe) + return; + case 0xa2: + if ((b2 >= 0xa1 && b2 <= 0xae) || + (b2 >= 0xba && b2 <= 0xc1) || + (b2 >= 0xca && b2 <= 0xd0) || + (b2 >= 0xdc && b2 <= 0xea) || + (b2 >= 0xf2 && b2 <= 0xf9) || + b2 == 0xfe) + return; + case 0xa3: + if ((b2 >= 0xb0 && b2 <= 0xb9) || + (b2 >= 0xc1 && b2 <= 0xda) || + (b2 >= 0xe1 && b2 <= 0xfa)) + return; + case 0xa4: + if (b2 >= 0xa1 && b2 <= 0xf3) + return; + case 0xa5: + if (b2 >= 0xa1 && b2 <= 0xf6) + return; + case 0xa6: + if ((b2 >= 0xa1 && b2 <= 0xb8) || + (b2 >= 0xc1 && b2 <= 0xd8)) + return; + case 0xa7: + if ((b2 >= 0xa1 && b2 <= 0xc1) || + (b2 >= 0xd1 && b2 <= 0xf1)) + return; + case 0xa8: + if (b2 >= 0xa1 && b2 <= 0xc0) + return; + case 0xcf: + if (b2 >= 0xa1 && b2 <= 0xd3) + return; + case 0xf4: + if (b2 >= 0xa1 && b2 <= 0xa6) + return; + default: + if (((b1 >= 0xb0 && b1 <= 0xce) && (b2 >= 0xa1 && b2 <= 0xfe)) || + ((b1 >= 0xd0 && b1 <= 0xf3) && (b2 >= 0xa1 && b2 <= 0xfe))) + return; + } + throw new InvalidTokenException(off); + } + + void check3(byte[] buf, int off) throws InvalidTokenException { + int b1 = buf[off] & 0xff; + int b2 = buf[off + 1] & 0xff; + int b3 = buf[off + 2] & 0xff; + + // JIS X 0212 + if (b1 != 0x8f) + throw new InvalidTokenException(off); + switch (b2) { + case 0xa2: + if ((b3 >= 0xaf && b3 <= 0xb9) || + (b3 >= 0xc2 && b3 <= 0xc4) || + (b3 >= 0xeb && b3 <= 0xf1)) + return; + case 0xa6: + if ((b3 >= 0xe1 && b3 <= 0xe5) || + b3 == 0xe7 || + (b3 >= 0xe9 && b3 <= 0xea) || + b3 == 0xec || + (b3 >= 0xf1 && b3 <= 0xfc)) + return; + case 0xa7: + if ((b3 >= 0xc2 && b3 <= 0xce)|| + (b3 >= 0xf2 && b3 <= 0xfe)) + return; + case 0xa9: + if ((b3 >= 0xa1 && b3 <= 0xa2) || + b3 == 0xa4 || b3 == 0xa6 || + (b3 >= 0xa8 && b3 <= 0xa9) || + (b3 >= 0xab && b3 <= 0xad) || + (b3 >= 0xaf && b3 <= 0xb0) || + (b3 >= 0xc1 && b3 <= 0xd0)) + return; + case 0xaa: + if ((b3 >= 0xa1 && b3 <= 0xb8) || + (b3 >= 0xba && b3 <= 0xf7)) + return; + case 0xab: + if ((b3 >= 0xa1 && b3 <= 0xbb) || + (b3 >= 0xbd && b3 <= 0xc3) || + (b3 >= 0xc5 && b3 <= 0xf7)) + return; + case 0xed: + if (b3 >= 0xa1 && b3 <= 0xe3) + return; + default: + if ((b2 >= 0xb0 && b2 <= 0xec) && (b3 >= 0xa1 && b3 <= 0xfe)) + return; + } + throw new InvalidTokenException(off); + } + + void check4(byte[] buf, int off) throws InvalidTokenException { + throw new InvalidTokenException(off); + } + + public int convert(byte[] sourceBuf, int sourceStart, int sourceEnd, + char[] targetBuf, int targetStart) { + String target = null; + try { + target = new String(sourceBuf, sourceStart, + sourceEnd - sourceStart, "EUCJIS"); + } + catch (java.io.UnsupportedEncodingException e) { + return 0; + } + for (int i = 0; i < target.length(); i++) { + targetBuf[targetStart + i] = target.charAt(i); + } + return target.length(); + } + + public int getFixedBytesPerChar() { + return 0; + } + + public void movePosition(final byte[] buf, int off, int end, Position pos) { + /* Maintain the invariant: off - colDiff == colNumber. */ + int colDiff = off - pos.columnNumber; + int lineNumber = pos.lineNumber; + while (off != end) { + byte b = buf[off]; + if (b >= 0) { + ++off; + switch (b) { + case (byte)'\n': + lineNumber += 1; + colDiff = off; + break; + case (byte)'\r': + lineNumber += 1; + if (off != end && buf[off] == '\n') + off++; + colDiff = off; + break; + } + } + else { + switch (euc_jpTypeTable[b & 0xFF]) { + default: + off += 1; + break; + case BT_LEAD2: + off += 2; + colDiff++; + break; + case BT_LEAD3: + off += 3; + colDiff += 2; + break; + } + } + } + pos.columnNumber = off - colDiff; + pos.lineNumber = lineNumber; + } +} diff -urN ORIG/com/jclark/xml/tok/Encoding.java com/jclark/xml/tok/Encoding.java --- ORIG/com/jclark/xml/tok/Encoding.java Tue Jun 16 10:57:02 1998 +++ com/jclark/xml/tok/Encoding.java Wed Dec 23 22:56:11 1998 @@ -300,6 +300,8 @@ private static Encoding internalEncoding; private static Encoding iso8859_1Encoding; private static Encoding asciiEncoding; + private static Encoding euc_jpEncoding; + private static Encoding shift_jisEncoding; private static final byte UTF8_ENCODING = 0; private static final byte UTF16_LITTLE_ENDIAN_ENCODING = 1; @@ -307,6 +309,8 @@ private static final byte INTERNAL_ENCODING = 3; private static final byte ISO8859_1_ENCODING = 4; private static final byte ASCII_ENCODING = 5; + private static final byte EUC_JP_ENCODING = 16; + private static final byte SHIFT_JIS_ENCODING = 17; private static synchronized Encoding getEncoding(byte enc) { switch (enc) { @@ -334,6 +338,14 @@ if (asciiEncoding == null) asciiEncoding = new ASCIIEncoding(); return asciiEncoding; + case EUC_JP_ENCODING: + if (euc_jpEncoding == null) + euc_jpEncoding = new EUC_JPEncoding(); + return euc_jpEncoding; + case SHIFT_JIS_ENCODING: + if (shift_jisEncoding == null) + shift_jisEncoding = new Shift_JISEncoding(); + return shift_jisEncoding; } return null; } @@ -2067,6 +2079,10 @@ return getEncoding(ISO8859_1_ENCODING); if (name.equalsIgnoreCase("US-ASCII")) return getEncoding(ASCII_ENCODING); + if (name.equalsIgnoreCase("EUC-JP")) + return getEncoding(EUC_JP_ENCODING); + if (name.equalsIgnoreCase("Shift_JIS")) + return getEncoding(SHIFT_JIS_ENCODING); return null; } diff -urN ORIG/com/jclark/xml/tok/Shift_JISEncoding.java com/jclark/xml/tok/Shift_JISEncoding.java --- ORIG/com/jclark/xml/tok/Shift_JISEncoding.java Thu Jan 1 09:00:00 1970 +++ com/jclark/xml/tok/Shift_JISEncoding.java Fri Dec 25 12:08:25 1998 @@ -0,0 +1,236 @@ +package com.jclark.xml.tok; + +/** + * An Encoding for Shift_JIS. + * @version $Revision$ $Date$ + */ +final class Shift_JISEncoding extends Encoding { + private static final byte[] shift_jisHiTypeTable = { + /* 0x80 */ BT_NONXML, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0x84 */ BT_LEAD2, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x88 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0x8C */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0x90 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0x94 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0x98 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0x9C */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xA0 */ BT_NONXML, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xA4 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xA8 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xAC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xB0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xB4 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xB8 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xBC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xC0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xC4 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xC8 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xCC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xD0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xD4 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xD8 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xDC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xE0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xE4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xE8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_NONXML, + /* 0xEC */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0xF0 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0xF4 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0xF8 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0xFC */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML + }; + + private static final byte[] shift_jisTypeTable = new byte[256]; + + static { + System.arraycopy(asciiTypeTable, 0, shift_jisTypeTable, 0, 128); + System.arraycopy(shift_jisHiTypeTable, 0, shift_jisTypeTable, 128, 128); + } + + Shift_JISEncoding() { + super(1); + } + + int byteType(byte[] buf, int off) { + return shift_jisTypeTable[buf[off] & 0xFF]; + } + + int byteToAscii(byte[] buf, int off) { + return (char)buf[off]; + } + + // c is a significant ASCII character + boolean charMatches(byte[] buf, int off, char c) { + return (char)buf[off] == c; + } + + int byteType2(byte[] buf, int off) { + int b1 = buf[off] & 0xff; + int b2 = buf[off + 1] & 0xff; + + // Extender + if (b1 == 0x81 && ((b2 >= 0x52 && b2 <= 0x55) || + (b2 == 0x58) || + (b2 == 0x5b))) + return BT_NAME; + + // BaseChar and Ideographic + switch (b1) { + case 0x81: + if (b2 == 0x57 || b2 == 0x5a || b2 == 0xf0) + return BT_NMSTRT; + case 0x82: + if (b2 >= 0x9f && b2 <= 0xf1) + return BT_NMSTRT; + case 0x83: + if ((b2 >= 0x40 && b2 <= 0x7e) || + (b2 >= 0x80 && b2 <= 0x96) || + (b2 >= 0x9f && b2 <= 0xb6) || + (b2 >= 0xbf && b2 <= 0xd6)) + return BT_NMSTRT; + case 0x84: + if ((b2 >= 0x40 && b2 <= 0x60) || + (b2 >= 0x70 && b2 <= 0x7e) || + (b2 >= 0x80 && b2 <= 0x91)) + return BT_NMSTRT; + case 0x88: + if (b2 >= 0x9f && b2 <= 0xfc) + return BT_NMSTRT; + case 0x98: + if ((b2 >= 0x40 && b2 <= 0x72) || + (b2 >= 0x9f && b2 <= 0xfc)) + return BT_NMSTRT; + case 0xea: + if ((b2 >= 0x40 && b2 <= 0x7e) || + (b2 >= 0x80 && b2 <= 0xa4)) + return BT_NMSTRT; + default: + if ((((b1 >= 0x89 && b1 <= 0x97) || + (b1 >= 0x99 && b1 <= 0x9f) || + (b1 >= 0xe0 && b1 <= 0xe9)) && + ((b2 >= 0x40 && b2 <= 0x7e) || + (b2 >= 0x80 && b2 <= 0xfc)))) + return BT_NMSTRT; + } + return BT_OTHER; + } + + void check2(byte[] buf, int off) throws InvalidTokenException { + int b1 = buf[off] & 0xff; + int b2 = buf[off + 1] & 0xff; + + switch (b1) { + case 0x81: + if ((b2 >= 0x40 && b2 <= 0x7e) || + (b2 >= 0x80 && b2 <= 0xac) || + (b2 >= 0xb8 && b2 <= 0xbf) || + (b2 >= 0xc8 && b2 <= 0xce) || + (b2 >= 0xda && b2 <= 0xe8) || + (b2 >= 0xf0 && b2 <= 0xf7) || + b2 == 0xfc) + return; + case 0x82: + if ((b2 >= 0x4f && b2 <= 0x58) || + (b2 >= 0x60 && b2 <= 0x79) || + (b2 >= 0x81 && b2 <= 0x9a) || + (b2 >= 0x9f && b2 <= 0xf1)) + return; + case 0x83: + if ((b2 >= 0x40 && b2 <= 0x7e) || + (b2 >= 0x80 && b2 <= 0x96) || + (b2 >= 0x9f && b2 <= 0xb6) || + (b2 >= 0xbf && b2 <= 0xd6)) + return; + case 0x84: + if ((b2 >= 0x40 && b2 <= 0x60) || + (b2 >= 0x70 && b2 <= 0x7e) || + (b2 >= 0x80 && b2 <= 0x91) || + (b2 >= 0x9f && b2 <= 0xbe)) + return; + case 0x88: + if (b2 >= 0x9f && b2 <= 0xfc) + return; + case 0x98: + if ((b2 >= 0x40 && b2 <= 0x72) || + (b2 >= 0x9f && b2 <= 0xfc)) + return; + case 0xea: + if ((b2 >= 0x40 && b2 <= 0x7e) || + (b2 >= 0x80 && b2 <= 0xa4)) + return; + default: + if (((b1 >= 0x89 && b1 <= 0x97) || + (b1 >= 0x99 && b1 <= 0x9f) || + (b1 >= 0xe0 && b1 <= 0xe9)) && + ((b2 >= 0x40 && b2 <= 0x7e) || + (b2 >= 0x80 && b2 <= 0xfc))) + return; + } + throw new InvalidTokenException(off); + } + + void check3(byte[] buf, int off) throws InvalidTokenException { + throw new InvalidTokenException(off); + } + + void check4(byte[] buf, int off) throws InvalidTokenException { + throw new InvalidTokenException(off); + } + + public int convert(byte[] sourceBuf, int sourceStart, int sourceEnd, + char[] targetBuf, int targetStart) { + String target = null; + try { + target = new String(sourceBuf, sourceStart, + sourceEnd - sourceStart, "SJIS"); + } + catch (java.io.UnsupportedEncodingException e) { + return 0; + } + for (int i = 0; i < target.length(); i++) { + targetBuf[targetStart + i] = target.charAt(i); + } + return target.length(); + } + + public int getFixedBytesPerChar() { + return 0; + } + + public void movePosition(final byte[] buf, int off, int end, Position pos) { + /* Maintain the invariant: off - colDiff == colNumber. */ + int colDiff = off - pos.columnNumber; + int lineNumber = pos.lineNumber; + while (off != end) { + byte b = buf[off]; + if (b >= 0) { + ++off; + switch (b) { + case (byte)'\n': + lineNumber += 1; + colDiff = off; + break; + case (byte)'\r': + lineNumber += 1; + if (off != end && buf[off] == '\n') + off++; + colDiff = off; + break; + } + } + else { + switch (shift_jisTypeTable[b & 0xFF]) { + default: + off += 1; + break; + case BT_LEAD2: + off += 2; + colDiff++; + break; + } + } + } + pos.columnNumber = off - colDiff; + pos.lineNumber = lineNumber; + } +}