616 lines
14 KiB
C#
616 lines
14 KiB
C#
//
|
|
// System.Xml.XmlInputStream
|
|
// encoding-specification-wise XML input stream and reader
|
|
//
|
|
// Author:
|
|
// Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
|
|
//
|
|
// (C)2003 Atsushi Enomoto
|
|
//
|
|
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining
|
|
// a copy of this software and associated documentation files (the
|
|
// "Software"), to deal in the Software without restriction, including
|
|
// without limitation the rights to use, copy, modify, merge, publish,
|
|
// distribute, sublicense, and/or sell copies of the Software, and to
|
|
// permit persons to whom the Software is furnished to do so, subject to
|
|
// the following conditions:
|
|
//
|
|
// The above copyright notice and this permission notice shall be
|
|
// included in all copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
//
|
|
using System;
|
|
using System.IO;
|
|
using System.Text;
|
|
using System.Runtime.InteropServices;
|
|
|
|
namespace System.Xml
|
|
{
|
|
#region XmlStreamReader
|
|
internal class XmlStreamReader : NonBlockingStreamReader
|
|
{
|
|
XmlInputStream input;
|
|
|
|
XmlStreamReader (XmlInputStream input)
|
|
: base (input, input.ActualEncoding != null ? input.ActualEncoding : XmlInputStream.StrictUTF8)
|
|
{
|
|
this.input = input;
|
|
}
|
|
|
|
public XmlStreamReader (Stream input)
|
|
: this (new XmlInputStream (input))
|
|
{
|
|
}
|
|
|
|
public override void Close ()
|
|
{
|
|
this.input.Close ();
|
|
}
|
|
|
|
public override int Read ([In, Out] char[] dest_buffer, int index, int count)
|
|
{
|
|
try {
|
|
return base.Read (dest_buffer, index, count);
|
|
}
|
|
catch (System.ArgumentException ex) {
|
|
throw new XmlException ("Invalid data", ex);
|
|
}
|
|
}
|
|
|
|
protected override void Dispose (bool disposing)
|
|
{
|
|
base.Dispose (disposing);
|
|
if (disposing) {
|
|
Close ();
|
|
}
|
|
}
|
|
|
|
}
|
|
#endregion
|
|
|
|
#region NonBlockingStreamReader
|
|
// mostly copied from StreamReader, removing BOM checks, ctor
|
|
// parameter checks and some extra public members.
|
|
internal class NonBlockingStreamReader : TextReader {
|
|
|
|
const int DefaultBufferSize = 1024;
|
|
const int DefaultFileBufferSize = 4096;
|
|
const int MinimumBufferSize = 128;
|
|
|
|
//
|
|
// The input buffer
|
|
//
|
|
byte [] input_buffer;
|
|
|
|
//
|
|
// The decoded buffer from the above input buffer
|
|
//
|
|
char [] decoded_buffer;
|
|
|
|
//
|
|
// Decoded bytes in decoded_buffer.
|
|
//
|
|
int decoded_count;
|
|
|
|
//
|
|
// Current position in the decoded_buffer
|
|
//
|
|
int pos;
|
|
|
|
//
|
|
// The buffer size that we are using
|
|
//
|
|
int buffer_size;
|
|
|
|
Encoding encoding;
|
|
Decoder decoder;
|
|
|
|
Stream base_stream;
|
|
bool mayBlock;
|
|
StringBuilder line_builder;
|
|
|
|
public NonBlockingStreamReader(Stream stream, Encoding encoding)
|
|
{
|
|
int buffer_size = DefaultBufferSize;
|
|
base_stream = stream;
|
|
input_buffer = new byte [buffer_size];
|
|
this.buffer_size = buffer_size;
|
|
this.encoding = encoding;
|
|
decoder = encoding.GetDecoder ();
|
|
|
|
decoded_buffer = new char [encoding.GetMaxCharCount (buffer_size)];
|
|
decoded_count = 0;
|
|
pos = 0;
|
|
}
|
|
|
|
public Encoding Encoding {
|
|
get { return encoding; }
|
|
}
|
|
|
|
public override void Close ()
|
|
{
|
|
Dispose (true);
|
|
}
|
|
|
|
protected override void Dispose (bool disposing)
|
|
{
|
|
if (disposing && base_stream != null)
|
|
base_stream.Close ();
|
|
|
|
input_buffer = null;
|
|
decoded_buffer = null;
|
|
encoding = null;
|
|
decoder = null;
|
|
base_stream = null;
|
|
base.Dispose (disposing);
|
|
}
|
|
|
|
public void DiscardBufferedData ()
|
|
{
|
|
pos = decoded_count = 0;
|
|
mayBlock = false;
|
|
#if NET_2_0
|
|
decoder.Reset ();
|
|
#else
|
|
decoder = encoding.GetDecoder ();
|
|
#endif
|
|
}
|
|
|
|
// the buffer is empty, fill it again
|
|
private int ReadBuffer ()
|
|
{
|
|
pos = 0;
|
|
int cbEncoded = 0;
|
|
|
|
// keep looping until the decoder gives us some chars
|
|
decoded_count = 0;
|
|
int parse_start = 0;
|
|
do
|
|
{
|
|
cbEncoded = base_stream.Read (input_buffer, 0, buffer_size);
|
|
|
|
if (cbEncoded == 0)
|
|
return 0;
|
|
|
|
mayBlock = (cbEncoded < buffer_size);
|
|
decoded_count += decoder.GetChars (input_buffer, parse_start, cbEncoded, decoded_buffer, 0);
|
|
parse_start = 0;
|
|
} while (decoded_count == 0);
|
|
|
|
return decoded_count;
|
|
}
|
|
|
|
public override int Peek ()
|
|
{
|
|
if (base_stream == null)
|
|
throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
|
|
if (pos >= decoded_count && (mayBlock || ReadBuffer () == 0))
|
|
return -1;
|
|
|
|
return decoded_buffer [pos];
|
|
}
|
|
|
|
public override int Read ()
|
|
{
|
|
if (base_stream == null)
|
|
throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
|
|
if (pos >= decoded_count && ReadBuffer () == 0)
|
|
return -1;
|
|
|
|
return decoded_buffer [pos++];
|
|
}
|
|
|
|
public override int Read ([In, Out] char[] dest_buffer, int index, int count)
|
|
{
|
|
if (base_stream == null)
|
|
throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
|
|
if (dest_buffer == null)
|
|
throw new ArgumentNullException ("dest_buffer");
|
|
if (index < 0)
|
|
throw new ArgumentOutOfRangeException ("index", "< 0");
|
|
if (count < 0)
|
|
throw new ArgumentOutOfRangeException ("count", "< 0");
|
|
// re-ordered to avoid possible integer overflow
|
|
if (index > dest_buffer.Length - count)
|
|
throw new ArgumentException ("index + count > dest_buffer.Length");
|
|
|
|
int chars_read = 0;
|
|
// while (count > 0)
|
|
{
|
|
if (pos >= decoded_count && ReadBuffer () == 0)
|
|
return chars_read > 0 ? chars_read : 0;
|
|
|
|
int cch = Math.Min (decoded_count - pos, count);
|
|
Array.Copy (decoded_buffer, pos, dest_buffer, index, cch);
|
|
pos += cch;
|
|
index += cch;
|
|
count -= cch;
|
|
chars_read += cch;
|
|
}
|
|
return chars_read;
|
|
}
|
|
|
|
bool foundCR;
|
|
int FindNextEOL ()
|
|
{
|
|
char c = '\0';
|
|
for (; pos < decoded_count; pos++) {
|
|
c = decoded_buffer [pos];
|
|
if (c == '\n') {
|
|
pos++;
|
|
int res = (foundCR) ? (pos - 2) : (pos - 1);
|
|
if (res < 0)
|
|
res = 0; // if a new buffer starts with a \n and there was a \r at
|
|
// the end of the previous one, we get here.
|
|
foundCR = false;
|
|
return res;
|
|
} else if (foundCR) {
|
|
foundCR = false;
|
|
return pos - 1;
|
|
}
|
|
|
|
foundCR = (c == '\r');
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
public override string ReadLine()
|
|
{
|
|
if (base_stream == null)
|
|
throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
|
|
|
|
if (pos >= decoded_count && ReadBuffer () == 0)
|
|
return null;
|
|
|
|
int begin = pos;
|
|
int end = FindNextEOL ();
|
|
if (end < decoded_count && end >= begin)
|
|
return new string (decoded_buffer, begin, end - begin);
|
|
|
|
if (line_builder == null)
|
|
line_builder = new StringBuilder ();
|
|
else
|
|
line_builder.Length = 0;
|
|
|
|
while (true) {
|
|
if (foundCR) // don't include the trailing CR if present
|
|
decoded_count--;
|
|
|
|
line_builder.Append (new string (decoded_buffer, begin, decoded_count - begin));
|
|
if (ReadBuffer () == 0) {
|
|
if (line_builder.Capacity > 32768) {
|
|
StringBuilder sb = line_builder;
|
|
line_builder = null;
|
|
return sb.ToString (0, sb.Length);
|
|
}
|
|
return line_builder.ToString (0, line_builder.Length);
|
|
}
|
|
|
|
begin = pos;
|
|
end = FindNextEOL ();
|
|
if (end < decoded_count && end >= begin) {
|
|
line_builder.Append (new string (decoded_buffer, begin, end - begin));
|
|
if (line_builder.Capacity > 32768) {
|
|
StringBuilder sb = line_builder;
|
|
line_builder = null;
|
|
return sb.ToString (0, sb.Length);
|
|
}
|
|
return line_builder.ToString (0, line_builder.Length);
|
|
}
|
|
}
|
|
}
|
|
|
|
public override string ReadToEnd()
|
|
{
|
|
if (base_stream == null)
|
|
throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
|
|
|
|
StringBuilder text = new StringBuilder ();
|
|
|
|
int size = decoded_buffer.Length;
|
|
char [] buffer = new char [size];
|
|
int len;
|
|
|
|
while ((len = Read (buffer, 0, size)) != 0)
|
|
text.Append (buffer, 0, len);
|
|
|
|
return text.ToString ();
|
|
}
|
|
}
|
|
#endregion
|
|
|
|
class XmlInputStream : Stream
|
|
{
|
|
internal static readonly Encoding StrictUTF8, Strict1234UTF32, StrictBigEndianUTF16, StrictUTF16;
|
|
|
|
static XmlInputStream ()
|
|
{
|
|
StrictUTF8 = new UTF8Encoding (false, true);
|
|
Strict1234UTF32 = new UTF32Encoding (true, false, true);
|
|
StrictBigEndianUTF16 = new UnicodeEncoding (true, false, true);
|
|
StrictUTF16 = new UnicodeEncoding (false, false, true);
|
|
}
|
|
|
|
Encoding enc;
|
|
Stream stream;
|
|
byte[] buffer;
|
|
int bufLength;
|
|
int bufPos;
|
|
|
|
static XmlException encodingException = new XmlException ("invalid encoding specification.");
|
|
|
|
public XmlInputStream (Stream stream)
|
|
{
|
|
Initialize (stream);
|
|
}
|
|
|
|
// this returns null, instead of throwing ArgumentOutOfRangeException
|
|
string GetStringFromBytes (int index, int count)
|
|
{
|
|
int posBak = bufPos;
|
|
while (bufPos < index + count)
|
|
if (ReadByteSpecial () < 0)
|
|
return null;
|
|
bufPos = posBak;
|
|
return Encoding.ASCII.GetString (buffer, index, count);
|
|
}
|
|
|
|
private void Initialize (Stream stream)
|
|
{
|
|
buffer = new byte [6];
|
|
this.stream = stream;
|
|
enc = StrictUTF8; // Default to UTF8 if we can't guess it
|
|
bufLength = stream.Read (buffer, 0, buffer.Length);
|
|
if (bufLength == -1 || bufLength == 0) {
|
|
return;
|
|
}
|
|
|
|
int c = ReadByteSpecial ();
|
|
switch (c) {
|
|
case 0xFF:
|
|
c = ReadByteSpecial ();
|
|
if (c == 0xFE) {
|
|
// BOM-ed little endian utf-16
|
|
enc = Encoding.Unicode;
|
|
} else {
|
|
// It doesn't start from "<?xml" then its encoding is utf-8
|
|
bufPos = 0;
|
|
}
|
|
break;
|
|
case 0xFE:
|
|
c = ReadByteSpecial ();
|
|
if (c == 0xFF) {
|
|
// BOM-ed big endian utf-16
|
|
enc = Encoding.BigEndianUnicode;
|
|
return;
|
|
} else {
|
|
// It doesn't start from "<?xml" then its encoding is utf-8
|
|
bufPos = 0;
|
|
}
|
|
break;
|
|
case 0xEF:
|
|
c = ReadByteSpecial ();
|
|
if (c == 0xBB) {
|
|
c = ReadByteSpecial ();
|
|
if (c != 0xBF) {
|
|
bufPos = 0;
|
|
}
|
|
} else {
|
|
buffer [--bufPos] = 0xEF;
|
|
}
|
|
break;
|
|
case 0:
|
|
// It could still be 1234/2143/3412 variants of UTF32, but only 1234 version is available on .NET.
|
|
c = ReadByteSpecial ();
|
|
if (c == 0)
|
|
enc = Strict1234UTF32;
|
|
else
|
|
enc = StrictBigEndianUTF16;
|
|
break;
|
|
case '<':
|
|
c = ReadByteSpecial ();
|
|
if (c == 0) {
|
|
if (ReadByteSpecial () == 0)
|
|
enc = Encoding.UTF32; // little endian UTF32
|
|
else
|
|
enc = Encoding.Unicode; // little endian UTF16
|
|
} else if (bufLength >= 4 && GetStringFromBytes (1, 4) == "?xml") {
|
|
// try to get encoding name from XMLDecl.
|
|
bufPos += 4;
|
|
c = SkipWhitespace ();
|
|
|
|
// version. It is optional here.
|
|
if (c == 'v') {
|
|
while (c >= 0) {
|
|
c = ReadByteSpecial ();
|
|
if (c == '0') { // 0 of 1.0
|
|
ReadByteSpecial ();
|
|
break;
|
|
}
|
|
}
|
|
c = SkipWhitespace ();
|
|
}
|
|
|
|
if (c == 'e') {
|
|
if (GetStringFromBytes (bufPos, 7) == "ncoding") {
|
|
bufPos += 7;
|
|
c = SkipWhitespace();
|
|
if (c != '=')
|
|
throw encodingException;
|
|
c = SkipWhitespace ();
|
|
int quoteChar = c;
|
|
StringBuilder sb = new StringBuilder ();
|
|
while (true) {
|
|
c = ReadByteSpecial ();
|
|
if (c == quoteChar)
|
|
break;
|
|
else if (c < 0)
|
|
throw encodingException;
|
|
|
|
sb.Append ((char) c);
|
|
}
|
|
string encodingName = sb.ToString ();
|
|
if (!XmlChar.IsValidIANAEncoding (encodingName))
|
|
throw encodingException;
|
|
enc = Encoding.GetEncoding (encodingName);
|
|
}
|
|
}
|
|
}
|
|
bufPos = 0;
|
|
break;
|
|
default:
|
|
if (c == 0)
|
|
enc = StrictUTF16;
|
|
bufPos = 0;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Just like readbyte, but grows the buffer too.
|
|
int ReadByteSpecial ()
|
|
{
|
|
if (bufLength > bufPos)
|
|
return buffer [bufPos++];
|
|
|
|
byte [] newbuf = new byte [buffer.Length * 2];
|
|
Buffer.BlockCopy (buffer, 0, newbuf, 0, bufLength);
|
|
int nbytes = stream.Read (newbuf, bufLength, buffer.Length);
|
|
if (nbytes == -1 || nbytes == 0)
|
|
return -1;
|
|
|
|
bufLength += nbytes;
|
|
buffer = newbuf;
|
|
return buffer [bufPos++];
|
|
}
|
|
|
|
// skips whitespace and returns misc char that was read from stream
|
|
private int SkipWhitespace ()
|
|
{
|
|
int c;
|
|
while (true) {
|
|
c = ReadByteSpecial ();
|
|
switch ((char) c) {
|
|
case '\r': goto case ' ';
|
|
case '\n': goto case ' ';
|
|
case '\t': goto case ' ';
|
|
case ' ':
|
|
continue;
|
|
default:
|
|
return c;
|
|
}
|
|
}
|
|
}
|
|
|
|
public Encoding ActualEncoding {
|
|
get { return enc; }
|
|
}
|
|
|
|
#region Public Overrides
|
|
public override bool CanRead {
|
|
get {
|
|
if (bufLength > bufPos)
|
|
return true;
|
|
else
|
|
return stream.CanRead;
|
|
}
|
|
}
|
|
|
|
// FIXME: It should support base stream's CanSeek.
|
|
public override bool CanSeek {
|
|
get { return false; } // stream.CanSeek; }
|
|
}
|
|
|
|
public override bool CanWrite {
|
|
get { return false; }
|
|
}
|
|
|
|
public override long Length {
|
|
get {
|
|
return stream.Length;
|
|
}
|
|
}
|
|
|
|
public override long Position {
|
|
get {
|
|
return stream.Position - bufLength + bufPos;
|
|
}
|
|
set {
|
|
if(value < bufLength)
|
|
bufPos = (int)value;
|
|
else
|
|
stream.Position = value - bufLength;
|
|
}
|
|
}
|
|
|
|
public override void Close ()
|
|
{
|
|
stream.Close ();
|
|
}
|
|
|
|
public override void Flush ()
|
|
{
|
|
stream.Flush ();
|
|
}
|
|
|
|
public override int Read (byte[] buffer, int offset, int count)
|
|
{
|
|
int ret;
|
|
if (count <= bufLength - bufPos) { // all from buffer
|
|
Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, count);
|
|
bufPos += count;
|
|
ret = count;
|
|
} else {
|
|
int bufRest = bufLength - bufPos;
|
|
if (bufLength > bufPos) {
|
|
Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, bufRest);
|
|
bufPos += bufRest;
|
|
}
|
|
ret = bufRest +
|
|
stream.Read (buffer, offset + bufRest, count - bufRest);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
public override int ReadByte ()
|
|
{
|
|
if (bufLength > bufPos) {
|
|
return buffer [bufPos++];
|
|
}
|
|
return stream.ReadByte ();
|
|
}
|
|
|
|
public override long Seek (long offset, System.IO.SeekOrigin origin)
|
|
{
|
|
int bufRest = bufLength - bufPos;
|
|
if (origin == SeekOrigin.Current)
|
|
if (offset < bufRest)
|
|
return buffer [bufPos + offset];
|
|
else
|
|
return stream.Seek (offset - bufRest, origin);
|
|
else
|
|
return stream.Seek (offset, origin);
|
|
}
|
|
|
|
public override void SetLength (long value)
|
|
{
|
|
stream.SetLength (value);
|
|
}
|
|
|
|
public override void Write (byte[] buffer, int offset, int count)
|
|
{
|
|
throw new NotSupportedException ();
|
|
}
|
|
#endregion
|
|
}
|
|
}
|