--- com/jclark/xsl/sax/HTMLOutputHandler.java.org Sat Oct 23 18:19:50 1999 +++ com/jclark/xsl/sax/HTMLOutputHandler.java Wed Feb 20 16:27:19 2002 @@ -101,8 +101,35 @@ write(" "); break; default: - if (c <= maxRepresentableChar) + if (c <= '\u007F') write(c); + else if ((c & 0xFC00) == 0xD800) { + // surrogate pair + char c2 = ch[++off]; + len--; + if ((c2 & 0xFC00) != 0xDC00) { + throw new SAXException("invalid surrogate pair"); + } + int u32 = ((c & 0x3FF) << 10) | (c2 & 0x3FF); + u32 += 0x10000; + write("&#" + Integer.toString(u32) + ";"); + } + else if (c <= maxRepresentableChar) { + char[] cs = { c }; + String s = new String(cs); + try { + byte[] bs = s.getBytes(EncodingName.toJava(encoding)); + if ((bs.length == 1 && bs[0] == '?') || + (bs.length == 3 && bs[0] == (byte)0x8f && encoding.equalsIgnoreCase("EUC-JP"))) + write(getCharString(c)); + else + write(c); + } + catch (java.io.UnsupportedEncodingException e) { + e.printStackTrace(); + throw new RuntimeException(); + } + } else write(getCharString(c)); break; @@ -214,8 +241,34 @@ write(" "); break; default: - if (c <= maxRepresentableChar) + if (c <= '\u007F') write(c); + else if ((c & 0xFC00) == 0xD800) { + // surrogate pair + char c2 = value.charAt(++i); + if ((c2 & 0xFC00) != 0xDC00) { + throw new SAXException("invalid surrogate pair"); + } + int u32 = ((c & 0x3FF) << 10) | (c2 & 0x3FF); + u32 += 0x10000; + write("&#" + Integer.toString(u32) + ";"); + } + else if (c <= maxRepresentableChar) { + char[] cs = { c }; + String s = new String(cs); + try { + byte[] bs = s.getBytes(EncodingName.toJava(encoding)); + if ((bs.length == 1 && bs[0] == '?') || + (bs.length == 3 && bs[0] == (byte)0x8f && encoding.equalsIgnoreCase("EUC-JP"))) + write(getCharString(c)); + else + write(c); + } + catch (java.io.UnsupportedEncodingException e) { + e.printStackTrace(); + throw new RuntimeException(); + } + } else write(getCharString(c)); break;