diff -urN ORIG/com/jclark/xml/tok/EUC_JPEncoding.java com/jclark/xml/tok/EUC_JPEncoding.java
--- ORIG/com/jclark/xml/tok/EUC_JPEncoding.java Thu Jan 1 09:00:00 1970
+++ com/jclark/xml/tok/EUC_JPEncoding.java Fri Dec 25 12:08:54 1998
@@ -0,0 +1,334 @@
+package com.jclark.xml.tok;
+
+/**
+ * An Encoding for EUC-JP.
+ * @version $Revision$ $Date$
+ */
+final class EUC_JPEncoding extends Encoding {
+ private static final byte[] euc_jpHiTypeTable = {
+ /* 0x80 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
+ /* 0x84 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
+ /* 0x88 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
+ /* 0x8C */ BT_NONXML, BT_NONXML, BT_LEAD2, BT_LEAD3,
+ /* 0x90 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
+ /* 0x94 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
+ /* 0x98 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
+ /* 0x9C */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
+ /* 0xA0 */ BT_NONXML, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+ /* 0xA4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+ /* 0xA8 */ BT_LEAD2, BT_NONXML, BT_NONXML, BT_NONXML,
+ /* 0xAC */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
+ /* 0xB0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+ /* 0xB4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+ /* 0xB8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+ /* 0xBC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+ /* 0xC0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+ /* 0xC4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+ /* 0xC8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+ /* 0xCC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+ /* 0xD0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+ /* 0xD4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+ /* 0xD8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+ /* 0xDC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+ /* 0xE0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+ /* 0xE4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+ /* 0xE8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+ /* 0xEC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+ /* 0xF0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+ /* 0xF4 */ BT_LEAD2, BT_NONXML, BT_NONXML, BT_NONXML,
+ /* 0xF8 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
+ /* 0xFC */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML
+ };
+
+ private static final byte[] euc_jpTypeTable = new byte[256];
+
+ static {
+ System.arraycopy(asciiTypeTable, 0, euc_jpTypeTable, 0, 128);
+ System.arraycopy(euc_jpHiTypeTable, 0, euc_jpTypeTable, 128, 128);
+ }
+
+ EUC_JPEncoding() {
+ super(1);
+ }
+
+ int byteType(byte[] buf, int off) {
+ return euc_jpTypeTable[buf[off] & 0xFF];
+ }
+
+ int byteToAscii(byte[] buf, int off) {
+ return (char)buf[off];
+ }
+
+ // c is a significant ASCII character
+ boolean charMatches(byte[] buf, int off, char c) {
+ return (char)buf[off] == c;
+ }
+
+ int byteType2(byte[] buf, int off) {
+ int b1 = buf[off] & 0xff;
+ int b2 = buf[off + 1] & 0xff;
+
+ if (b1 == 0x8e) // JIS X 0201 Kana
+ return BT_OTHER;
+ // Extender
+ else if (b1 == 0xa1 && ((b2 >= 0xb3 && b2 <= 0xb6) ||
+ b2 == 0xb9 ||
+ b2 == 0xbc))
+ return BT_NAME;
+ // BaseChar and Ideographic
+ switch (b1) {
+ case 0xa1:
+ if (b2 == 0xb8 || b2 == 0xbb)
+ return BT_NMSTRT;
+ case 0xa2:
+ if (b2 == 0xf2)
+ return BT_NMSTRT;
+ case 0xa4:
+ if (b2 >= 0xa1 && b2 <= 0xf2)
+ return BT_NMSTRT;
+ case 0xa5:
+ if (b2 >= 0xa1 && b2 <= 0xf6)
+ return BT_NMSTRT;
+ case 0xa6:
+ if ((b2 >= 0xa1 && b2 <= 0xb8) ||
+ (b2 >= 0xc1 && b2 <= 0xd8))
+ return BT_NMSTRT;
+ case 0xa7:
+ if ((b2 >= 0xa1 && b2 <= 0xc1) ||
+ (b2 >= 0xd1 && b2 <= 0xf1))
+ return BT_NMSTRT;
+ case 0xcf:
+ if (b2 >= 0xa1 && b2 <= 0xd3)
+ return BT_NMSTRT;
+ case 0xf4:
+ if (b2 >= 0xa1 && b2 <= 0xa6)
+ return BT_NMSTRT;
+ default:
+ if (((b1 >= 0xb0 && b1 <= 0xce) && (b2 >= 0xa1 && b2 <= 0xfe)) ||
+ ((b1 >= 0xd0 && b1 <= 0xf3) && (b2 >= 0xa1 && b2 <= 0xfe)))
+ return BT_NMSTRT;
+ }
+ return BT_OTHER;
+ }
+
+ int byteType3(byte[] buf, int off) {
+ int b1 = buf[off] & 0xff;
+ int b2 = buf[off + 1] & 0xff;
+ int b3 = buf[off + 2] & 0xff;
+
+ if (b1 != 0x8f)
+ return BT_MALFORM;
+ // BaseChar and Ideographic
+ switch (b2) {
+ case 0xa6:
+ if ((b3 >= 0xe1 && b3 <= 0xe5) ||
+ b3 == 0xe7 ||
+ (b3 >= 0xe9 && b3 <= 0xea) ||
+ b3 == 0xec ||
+ (b3 >= 0xf1 && b3 <= 0xfc))
+ return BT_NMSTRT;
+ case 0xa7:
+ if ((b3 >= 0xc2 && b3 <= 0xce) ||
+ (b3 >= 0xf2 && b3 <= 0xfe))
+ return BT_NMSTRT;
+ case 0xa9:
+ if ((b3 >= 0xa1 && b3 <= 0xa2) ||
+ b3 == 0xa4 || b3 == 0xa8 ||
+ (b3 >= 0xab && b3 <= 0xad) ||
+ (b3 >= 0xaf && b3 <= 0xb0) ||
+ (b3 >= 0xc1 && b3 <= 0xc5) ||
+ (b3 >= 0xc7 && b3 <= 0xc8) ||
+ (b3 >= 0xcb && b3 <= 0xd0))
+ return BT_NMSTRT;
+ case 0xaa:
+ if ((b3 >= 0xa1 && b3 <= 0xb8) ||
+ (b3 >= 0xba && b3 <= 0xf7))
+ return BT_NMSTRT;
+ case 0xab:
+ if ((b3 >= 0xa1 && b3 <= 0xbb) ||
+ (b3 >= 0xbd && b3 <= 0xc3) ||
+ (b3 >= 0xc5 && b3 <= 0xf7))
+ return BT_NMSTRT;
+ case 0xed:
+ if (b3 >= 0xa1 && b3 <= 0xe3)
+ return BT_NMSTRT;
+ default:
+ if ((b2 >= 0xb0 && b2 <= 0xec) && (b3 >= 0xa1 && b3 <= 0xfe))
+ return BT_NMSTRT;
+ }
+ return BT_OTHER;
+ }
+
+ void check2(byte[] buf, int off) throws InvalidTokenException {
+ int b1 = buf[off] & 0xff;
+ int b2 = buf[off + 1] & 0xff;
+
+ // JIS X 0201 Kana
+ if (b1 == 0x8e && (b2 >= 0xa1 && b2 <= 0xdf))
+ return;
+ // JIS X 0208
+ switch (b1) {
+ case 0xa1:
+ if (b2 >= 0xa1 && b2 <= 0xfe)
+ return;
+ case 0xa2:
+ if ((b2 >= 0xa1 && b2 <= 0xae) ||
+ (b2 >= 0xba && b2 <= 0xc1) ||
+ (b2 >= 0xca && b2 <= 0xd0) ||
+ (b2 >= 0xdc && b2 <= 0xea) ||
+ (b2 >= 0xf2 && b2 <= 0xf9) ||
+ b2 == 0xfe)
+ return;
+ case 0xa3:
+ if ((b2 >= 0xb0 && b2 <= 0xb9) ||
+ (b2 >= 0xc1 && b2 <= 0xda) ||
+ (b2 >= 0xe1 && b2 <= 0xfa))
+ return;
+ case 0xa4:
+ if (b2 >= 0xa1 && b2 <= 0xf3)
+ return;
+ case 0xa5:
+ if (b2 >= 0xa1 && b2 <= 0xf6)
+ return;
+ case 0xa6:
+ if ((b2 >= 0xa1 && b2 <= 0xb8) ||
+ (b2 >= 0xc1 && b2 <= 0xd8))
+ return;
+ case 0xa7:
+ if ((b2 >= 0xa1 && b2 <= 0xc1) ||
+ (b2 >= 0xd1 && b2 <= 0xf1))
+ return;
+ case 0xa8:
+ if (b2 >= 0xa1 && b2 <= 0xc0)
+ return;
+ case 0xcf:
+ if (b2 >= 0xa1 && b2 <= 0xd3)
+ return;
+ case 0xf4:
+ if (b2 >= 0xa1 && b2 <= 0xa6)
+ return;
+ default:
+ if (((b1 >= 0xb0 && b1 <= 0xce) && (b2 >= 0xa1 && b2 <= 0xfe)) ||
+ ((b1 >= 0xd0 && b1 <= 0xf3) && (b2 >= 0xa1 && b2 <= 0xfe)))
+ return;
+ }
+ throw new InvalidTokenException(off);
+ }
+
+ void check3(byte[] buf, int off) throws InvalidTokenException {
+ int b1 = buf[off] & 0xff;
+ int b2 = buf[off + 1] & 0xff;
+ int b3 = buf[off + 2] & 0xff;
+
+ // JIS X 0212
+ if (b1 != 0x8f)
+ throw new InvalidTokenException(off);
+ switch (b2) {
+ case 0xa2:
+ if ((b3 >= 0xaf && b3 <= 0xb9) ||
+ (b3 >= 0xc2 && b3 <= 0xc4) ||
+ (b3 >= 0xeb && b3 <= 0xf1))
+ return;
+ case 0xa6:
+ if ((b3 >= 0xe1 && b3 <= 0xe5) ||
+ b3 == 0xe7 ||
+ (b3 >= 0xe9 && b3 <= 0xea) ||
+ b3 == 0xec ||
+ (b3 >= 0xf1 && b3 <= 0xfc))
+ return;
+ case 0xa7:
+ if ((b3 >= 0xc2 && b3 <= 0xce)||
+ (b3 >= 0xf2 && b3 <= 0xfe))
+ return;
+ case 0xa9:
+ if ((b3 >= 0xa1 && b3 <= 0xa2) ||
+ b3 == 0xa4 || b3 == 0xa6 ||
+ (b3 >= 0xa8 && b3 <= 0xa9) ||
+ (b3 >= 0xab && b3 <= 0xad) ||
+ (b3 >= 0xaf && b3 <= 0xb0) ||
+ (b3 >= 0xc1 && b3 <= 0xd0))
+ return;
+ case 0xaa:
+ if ((b3 >= 0xa1 && b3 <= 0xb8) ||
+ (b3 >= 0xba && b3 <= 0xf7))
+ return;
+ case 0xab:
+ if ((b3 >= 0xa1 && b3 <= 0xbb) ||
+ (b3 >= 0xbd && b3 <= 0xc3) ||
+ (b3 >= 0xc5 && b3 <= 0xf7))
+ return;
+ case 0xed:
+ if (b3 >= 0xa1 && b3 <= 0xe3)
+ return;
+ default:
+ if ((b2 >= 0xb0 && b2 <= 0xec) && (b3 >= 0xa1 && b3 <= 0xfe))
+ return;
+ }
+ throw new InvalidTokenException(off);
+ }
+
+ void check4(byte[] buf, int off) throws InvalidTokenException {
+ throw new InvalidTokenException(off);
+ }
+
+ public int convert(byte[] sourceBuf, int sourceStart, int sourceEnd,
+ char[] targetBuf, int targetStart) {
+ String target = null;
+ try {
+ target = new String(sourceBuf, sourceStart,
+ sourceEnd - sourceStart, "EUCJIS");
+ }
+ catch (java.io.UnsupportedEncodingException e) {
+ return 0;
+ }
+ for (int i = 0; i < target.length(); i++) {
+ targetBuf[targetStart + i] = target.charAt(i);
+ }
+ return target.length();
+ }
+
+ public int getFixedBytesPerChar() {
+ return 0;
+ }
+
+ public void movePosition(final byte[] buf, int off, int end, Position pos) {
+ /* Maintain the invariant: off - colDiff == colNumber. */
+ int colDiff = off - pos.columnNumber;
+ int lineNumber = pos.lineNumber;
+ while (off != end) {
+ byte b = buf[off];
+ if (b >= 0) {
+ ++off;
+ switch (b) {
+ case (byte)'\n':
+ lineNumber += 1;
+ colDiff = off;
+ break;
+ case (byte)'\r':
+ lineNumber += 1;
+ if (off != end && buf[off] == '\n')
+ off++;
+ colDiff = off;
+ break;
+ }
+ }
+ else {
+ switch (euc_jpTypeTable[b & 0xFF]) {
+ default:
+ off += 1;
+ break;
+ case BT_LEAD2:
+ off += 2;
+ colDiff++;
+ break;
+ case BT_LEAD3:
+ off += 3;
+ colDiff += 2;
+ break;
+ }
+ }
+ }
+ pos.columnNumber = off - colDiff;
+ pos.lineNumber = lineNumber;
+ }
+}
diff -urN ORIG/com/jclark/xml/tok/Encoding.java com/jclark/xml/tok/Encoding.java
--- ORIG/com/jclark/xml/tok/Encoding.java Tue Jun 16 10:57:02 1998
+++ com/jclark/xml/tok/Encoding.java Wed Dec 23 22:56:11 1998
@@ -300,6 +300,8 @@
private static Encoding internalEncoding;
private static Encoding iso8859_1Encoding;
private static Encoding asciiEncoding;
+ private static Encoding euc_jpEncoding;
+ private static Encoding shift_jisEncoding;
private static final byte UTF8_ENCODING = 0;
private static final byte UTF16_LITTLE_ENDIAN_ENCODING = 1;
@@ -307,6 +309,8 @@
private static final byte INTERNAL_ENCODING = 3;
private static final byte ISO8859_1_ENCODING = 4;
private static final byte ASCII_ENCODING = 5;
+ private static final byte EUC_JP_ENCODING = 16;
+ private static final byte SHIFT_JIS_ENCODING = 17;
private static synchronized Encoding getEncoding(byte enc) {
switch (enc) {
@@ -334,6 +338,14 @@
if (asciiEncoding == null)
asciiEncoding = new ASCIIEncoding();
return asciiEncoding;
+ case EUC_JP_ENCODING:
+ if (euc_jpEncoding == null)
+ euc_jpEncoding = new EUC_JPEncoding();
+ return euc_jpEncoding;
+ case SHIFT_JIS_ENCODING:
+ if (shift_jisEncoding == null)
+ shift_jisEncoding = new Shift_JISEncoding();
+ return shift_jisEncoding;
}
return null;
}
@@ -2067,6 +2079,10 @@
return getEncoding(ISO8859_1_ENCODING);
if (name.equalsIgnoreCase("US-ASCII"))
return getEncoding(ASCII_ENCODING);
+ if (name.equalsIgnoreCase("EUC-JP"))
+ return getEncoding(EUC_JP_ENCODING);
+ if (name.equalsIgnoreCase("Shift_JIS"))
+ return getEncoding(SHIFT_JIS_ENCODING);
return null;
}
diff -urN ORIG/com/jclark/xml/tok/Shift_JISEncoding.java com/jclark/xml/tok/Shift_JISEncoding.java
--- ORIG/com/jclark/xml/tok/Shift_JISEncoding.java Thu Jan 1 09:00:00 1970
+++ com/jclark/xml/tok/Shift_JISEncoding.java Fri Dec 25 12:08:25 1998
@@ -0,0 +1,236 @@
+package com.jclark.xml.tok;
+
+/**
+ * An Encoding for Shift_JIS.
+ * @version $Revision$ $Date$
+ */
+final class Shift_JISEncoding extends Encoding {
+ private static final byte[] shift_jisHiTypeTable = {
+ /* 0x80 */ BT_NONXML, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+ /* 0x84 */ BT_LEAD2, BT_NONXML, BT_NONXML, BT_NONXML,
+ /* 0x88 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+ /* 0x8C */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+ /* 0x90 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+ /* 0x94 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+ /* 0x98 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+ /* 0x9C */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+ /* 0xA0 */ BT_NONXML, BT_OTHER, BT_OTHER, BT_OTHER,
+ /* 0xA4 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
+ /* 0xA8 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
+ /* 0xAC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
+ /* 0xB0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
+ /* 0xB4 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
+ /* 0xB8 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
+ /* 0xBC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
+ /* 0xC0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
+ /* 0xC4 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
+ /* 0xC8 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
+ /* 0xCC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
+ /* 0xD0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
+ /* 0xD4 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
+ /* 0xD8 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
+ /* 0xDC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
+ /* 0xE0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+ /* 0xE4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+ /* 0xE8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_NONXML,
+ /* 0xEC */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
+ /* 0xF0 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
+ /* 0xF4 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
+ /* 0xF8 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
+ /* 0xFC */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML
+ };
+
+ private static final byte[] shift_jisTypeTable = new byte[256];
+
+ static {
+ System.arraycopy(asciiTypeTable, 0, shift_jisTypeTable, 0, 128);
+ System.arraycopy(shift_jisHiTypeTable, 0, shift_jisTypeTable, 128, 128);
+ }
+
+ Shift_JISEncoding() {
+ super(1);
+ }
+
+ int byteType(byte[] buf, int off) {
+ return shift_jisTypeTable[buf[off] & 0xFF];
+ }
+
+ int byteToAscii(byte[] buf, int off) {
+ return (char)buf[off];
+ }
+
+ // c is a significant ASCII character
+ boolean charMatches(byte[] buf, int off, char c) {
+ return (char)buf[off] == c;
+ }
+
+ int byteType2(byte[] buf, int off) {
+ int b1 = buf[off] & 0xff;
+ int b2 = buf[off + 1] & 0xff;
+
+ // Extender
+ if (b1 == 0x81 && ((b2 >= 0x52 && b2 <= 0x55) ||
+ (b2 == 0x58) ||
+ (b2 == 0x5b)))
+ return BT_NAME;
+
+ // BaseChar and Ideographic
+ switch (b1) {
+ case 0x81:
+ if (b2 == 0x57 || b2 == 0x5a || b2 == 0xf0)
+ return BT_NMSTRT;
+ case 0x82:
+ if (b2 >= 0x9f && b2 <= 0xf1)
+ return BT_NMSTRT;
+ case 0x83:
+ if ((b2 >= 0x40 && b2 <= 0x7e) ||
+ (b2 >= 0x80 && b2 <= 0x96) ||
+ (b2 >= 0x9f && b2 <= 0xb6) ||
+ (b2 >= 0xbf && b2 <= 0xd6))
+ return BT_NMSTRT;
+ case 0x84:
+ if ((b2 >= 0x40 && b2 <= 0x60) ||
+ (b2 >= 0x70 && b2 <= 0x7e) ||
+ (b2 >= 0x80 && b2 <= 0x91))
+ return BT_NMSTRT;
+ case 0x88:
+ if (b2 >= 0x9f && b2 <= 0xfc)
+ return BT_NMSTRT;
+ case 0x98:
+ if ((b2 >= 0x40 && b2 <= 0x72) ||
+ (b2 >= 0x9f && b2 <= 0xfc))
+ return BT_NMSTRT;
+ case 0xea:
+ if ((b2 >= 0x40 && b2 <= 0x7e) ||
+ (b2 >= 0x80 && b2 <= 0xa4))
+ return BT_NMSTRT;
+ default:
+ if ((((b1 >= 0x89 && b1 <= 0x97) ||
+ (b1 >= 0x99 && b1 <= 0x9f) ||
+ (b1 >= 0xe0 && b1 <= 0xe9)) &&
+ ((b2 >= 0x40 && b2 <= 0x7e) ||
+ (b2 >= 0x80 && b2 <= 0xfc))))
+ return BT_NMSTRT;
+ }
+ return BT_OTHER;
+ }
+
+ void check2(byte[] buf, int off) throws InvalidTokenException {
+ int b1 = buf[off] & 0xff;
+ int b2 = buf[off + 1] & 0xff;
+
+ switch (b1) {
+ case 0x81:
+ if ((b2 >= 0x40 && b2 <= 0x7e) ||
+ (b2 >= 0x80 && b2 <= 0xac) ||
+ (b2 >= 0xb8 && b2 <= 0xbf) ||
+ (b2 >= 0xc8 && b2 <= 0xce) ||
+ (b2 >= 0xda && b2 <= 0xe8) ||
+ (b2 >= 0xf0 && b2 <= 0xf7) ||
+ b2 == 0xfc)
+ return;
+ case 0x82:
+ if ((b2 >= 0x4f && b2 <= 0x58) ||
+ (b2 >= 0x60 && b2 <= 0x79) ||
+ (b2 >= 0x81 && b2 <= 0x9a) ||
+ (b2 >= 0x9f && b2 <= 0xf1))
+ return;
+ case 0x83:
+ if ((b2 >= 0x40 && b2 <= 0x7e) ||
+ (b2 >= 0x80 && b2 <= 0x96) ||
+ (b2 >= 0x9f && b2 <= 0xb6) ||
+ (b2 >= 0xbf && b2 <= 0xd6))
+ return;
+ case 0x84:
+ if ((b2 >= 0x40 && b2 <= 0x60) ||
+ (b2 >= 0x70 && b2 <= 0x7e) ||
+ (b2 >= 0x80 && b2 <= 0x91) ||
+ (b2 >= 0x9f && b2 <= 0xbe))
+ return;
+ case 0x88:
+ if (b2 >= 0x9f && b2 <= 0xfc)
+ return;
+ case 0x98:
+ if ((b2 >= 0x40 && b2 <= 0x72) ||
+ (b2 >= 0x9f && b2 <= 0xfc))
+ return;
+ case 0xea:
+ if ((b2 >= 0x40 && b2 <= 0x7e) ||
+ (b2 >= 0x80 && b2 <= 0xa4))
+ return;
+ default:
+ if (((b1 >= 0x89 && b1 <= 0x97) ||
+ (b1 >= 0x99 && b1 <= 0x9f) ||
+ (b1 >= 0xe0 && b1 <= 0xe9)) &&
+ ((b2 >= 0x40 && b2 <= 0x7e) ||
+ (b2 >= 0x80 && b2 <= 0xfc)))
+ return;
+ }
+ throw new InvalidTokenException(off);
+ }
+
+ void check3(byte[] buf, int off) throws InvalidTokenException {
+ throw new InvalidTokenException(off);
+ }
+
+ void check4(byte[] buf, int off) throws InvalidTokenException {
+ throw new InvalidTokenException(off);
+ }
+
+ public int convert(byte[] sourceBuf, int sourceStart, int sourceEnd,
+ char[] targetBuf, int targetStart) {
+ String target = null;
+ try {
+ target = new String(sourceBuf, sourceStart,
+ sourceEnd - sourceStart, "SJIS");
+ }
+ catch (java.io.UnsupportedEncodingException e) {
+ return 0;
+ }
+ for (int i = 0; i < target.length(); i++) {
+ targetBuf[targetStart + i] = target.charAt(i);
+ }
+ return target.length();
+ }
+
+ public int getFixedBytesPerChar() {
+ return 0;
+ }
+
+ public void movePosition(final byte[] buf, int off, int end, Position pos) {
+ /* Maintain the invariant: off - colDiff == colNumber. */
+ int colDiff = off - pos.columnNumber;
+ int lineNumber = pos.lineNumber;
+ while (off != end) {
+ byte b = buf[off];
+ if (b >= 0) {
+ ++off;
+ switch (b) {
+ case (byte)'\n':
+ lineNumber += 1;
+ colDiff = off;
+ break;
+ case (byte)'\r':
+ lineNumber += 1;
+ if (off != end && buf[off] == '\n')
+ off++;
+ colDiff = off;
+ break;
+ }
+ }
+ else {
+ switch (shift_jisTypeTable[b & 0xFF]) {
+ default:
+ off += 1;
+ break;
+ case BT_LEAD2:
+ off += 2;
+ colDiff++;
+ break;
+ }
+ }
+ }
+ pos.columnNumber = off - colDiff;
+ pos.lineNumber = lineNumber;
+ }
+}