Jo Shields 8b9b85e7f5 Imported Upstream version 3.10.0
Former-commit-id: 172c8e3c300b39d5785c7a3e8dfb08ebdbc1a99b
2014-10-04 11:27:48 +01:00

616 lines
14 KiB
C#

//
// System.Xml.XmlInputStream
// encoding-specification-wise XML input stream and reader
//
// Author:
// Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
//
// (C)2003 Atsushi Enomoto
//
//
// Permission is hereby granted, free of charge, to any person obtaining
// a copy of this software and associated documentation files (the
// "Software"), to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish,
// distribute, sublicense, and/or sell copies of the Software, and to
// permit persons to whom the Software is furnished to do so, subject to
// the following conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//
using System;
using System.IO;
using System.Text;
using System.Runtime.InteropServices;
namespace System.Xml
{
#region XmlStreamReader
internal class XmlStreamReader : NonBlockingStreamReader
{
XmlInputStream input;
XmlStreamReader (XmlInputStream input)
: base (input, input.ActualEncoding != null ? input.ActualEncoding : XmlInputStream.StrictUTF8)
{
this.input = input;
}
public XmlStreamReader (Stream input)
: this (new XmlInputStream (input))
{
}
public override void Close ()
{
this.input.Close ();
}
public override int Read ([In, Out] char[] dest_buffer, int index, int count)
{
try {
return base.Read (dest_buffer, index, count);
}
catch (System.ArgumentException ex) {
throw new XmlException ("Invalid data", ex);
}
}
protected override void Dispose (bool disposing)
{
base.Dispose (disposing);
if (disposing) {
Close ();
}
}
}
#endregion
#region NonBlockingStreamReader
// mostly copied from StreamReader, removing BOM checks, ctor
// parameter checks and some extra public members.
internal class NonBlockingStreamReader : TextReader {
const int DefaultBufferSize = 1024;
const int DefaultFileBufferSize = 4096;
const int MinimumBufferSize = 128;
//
// The input buffer
//
byte [] input_buffer;
//
// The decoded buffer from the above input buffer
//
char [] decoded_buffer;
//
// Decoded bytes in decoded_buffer.
//
int decoded_count;
//
// Current position in the decoded_buffer
//
int pos;
//
// The buffer size that we are using
//
int buffer_size;
Encoding encoding;
Decoder decoder;
Stream base_stream;
bool mayBlock;
StringBuilder line_builder;
public NonBlockingStreamReader(Stream stream, Encoding encoding)
{
int buffer_size = DefaultBufferSize;
base_stream = stream;
input_buffer = new byte [buffer_size];
this.buffer_size = buffer_size;
this.encoding = encoding;
decoder = encoding.GetDecoder ();
decoded_buffer = new char [encoding.GetMaxCharCount (buffer_size)];
decoded_count = 0;
pos = 0;
}
public Encoding Encoding {
get { return encoding; }
}
public override void Close ()
{
Dispose (true);
}
protected override void Dispose (bool disposing)
{
if (disposing && base_stream != null)
base_stream.Close ();
input_buffer = null;
decoded_buffer = null;
encoding = null;
decoder = null;
base_stream = null;
base.Dispose (disposing);
}
public void DiscardBufferedData ()
{
pos = decoded_count = 0;
mayBlock = false;
#if NET_2_0
decoder.Reset ();
#else
decoder = encoding.GetDecoder ();
#endif
}
// the buffer is empty, fill it again
private int ReadBuffer ()
{
pos = 0;
int cbEncoded = 0;
// keep looping until the decoder gives us some chars
decoded_count = 0;
int parse_start = 0;
do
{
cbEncoded = base_stream.Read (input_buffer, 0, buffer_size);
if (cbEncoded == 0)
return 0;
mayBlock = (cbEncoded < buffer_size);
decoded_count += decoder.GetChars (input_buffer, parse_start, cbEncoded, decoded_buffer, 0);
parse_start = 0;
} while (decoded_count == 0);
return decoded_count;
}
public override int Peek ()
{
if (base_stream == null)
throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
if (pos >= decoded_count && (mayBlock || ReadBuffer () == 0))
return -1;
return decoded_buffer [pos];
}
public override int Read ()
{
if (base_stream == null)
throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
if (pos >= decoded_count && ReadBuffer () == 0)
return -1;
return decoded_buffer [pos++];
}
public override int Read ([In, Out] char[] dest_buffer, int index, int count)
{
if (base_stream == null)
throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
if (dest_buffer == null)
throw new ArgumentNullException ("dest_buffer");
if (index < 0)
throw new ArgumentOutOfRangeException ("index", "< 0");
if (count < 0)
throw new ArgumentOutOfRangeException ("count", "< 0");
// re-ordered to avoid possible integer overflow
if (index > dest_buffer.Length - count)
throw new ArgumentException ("index + count > dest_buffer.Length");
int chars_read = 0;
// while (count > 0)
{
if (pos >= decoded_count && ReadBuffer () == 0)
return chars_read > 0 ? chars_read : 0;
int cch = Math.Min (decoded_count - pos, count);
Array.Copy (decoded_buffer, pos, dest_buffer, index, cch);
pos += cch;
index += cch;
count -= cch;
chars_read += cch;
}
return chars_read;
}
bool foundCR;
int FindNextEOL ()
{
char c = '\0';
for (; pos < decoded_count; pos++) {
c = decoded_buffer [pos];
if (c == '\n') {
pos++;
int res = (foundCR) ? (pos - 2) : (pos - 1);
if (res < 0)
res = 0; // if a new buffer starts with a \n and there was a \r at
// the end of the previous one, we get here.
foundCR = false;
return res;
} else if (foundCR) {
foundCR = false;
return pos - 1;
}
foundCR = (c == '\r');
}
return -1;
}
public override string ReadLine()
{
if (base_stream == null)
throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
if (pos >= decoded_count && ReadBuffer () == 0)
return null;
int begin = pos;
int end = FindNextEOL ();
if (end < decoded_count && end >= begin)
return new string (decoded_buffer, begin, end - begin);
if (line_builder == null)
line_builder = new StringBuilder ();
else
line_builder.Length = 0;
while (true) {
if (foundCR) // don't include the trailing CR if present
decoded_count--;
line_builder.Append (new string (decoded_buffer, begin, decoded_count - begin));
if (ReadBuffer () == 0) {
if (line_builder.Capacity > 32768) {
StringBuilder sb = line_builder;
line_builder = null;
return sb.ToString (0, sb.Length);
}
return line_builder.ToString (0, line_builder.Length);
}
begin = pos;
end = FindNextEOL ();
if (end < decoded_count && end >= begin) {
line_builder.Append (new string (decoded_buffer, begin, end - begin));
if (line_builder.Capacity > 32768) {
StringBuilder sb = line_builder;
line_builder = null;
return sb.ToString (0, sb.Length);
}
return line_builder.ToString (0, line_builder.Length);
}
}
}
public override string ReadToEnd()
{
if (base_stream == null)
throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
StringBuilder text = new StringBuilder ();
int size = decoded_buffer.Length;
char [] buffer = new char [size];
int len;
while ((len = Read (buffer, 0, size)) != 0)
text.Append (buffer, 0, len);
return text.ToString ();
}
}
#endregion
class XmlInputStream : Stream
{
internal static readonly Encoding StrictUTF8, Strict1234UTF32, StrictBigEndianUTF16, StrictUTF16;
static XmlInputStream ()
{
StrictUTF8 = new UTF8Encoding (false, true);
Strict1234UTF32 = new UTF32Encoding (true, false, true);
StrictBigEndianUTF16 = new UnicodeEncoding (true, false, true);
StrictUTF16 = new UnicodeEncoding (false, false, true);
}
Encoding enc;
Stream stream;
byte[] buffer;
int bufLength;
int bufPos;
static XmlException encodingException = new XmlException ("invalid encoding specification.");
public XmlInputStream (Stream stream)
{
Initialize (stream);
}
// this returns null, instead of throwing ArgumentOutOfRangeException
string GetStringFromBytes (int index, int count)
{
int posBak = bufPos;
while (bufPos < index + count)
if (ReadByteSpecial () < 0)
return null;
bufPos = posBak;
return Encoding.ASCII.GetString (buffer, index, count);
}
private void Initialize (Stream stream)
{
buffer = new byte [6];
this.stream = stream;
enc = StrictUTF8; // Default to UTF8 if we can't guess it
bufLength = stream.Read (buffer, 0, buffer.Length);
if (bufLength == -1 || bufLength == 0) {
return;
}
int c = ReadByteSpecial ();
switch (c) {
case 0xFF:
c = ReadByteSpecial ();
if (c == 0xFE) {
// BOM-ed little endian utf-16
enc = Encoding.Unicode;
} else {
// It doesn't start from "<?xml" then its encoding is utf-8
bufPos = 0;
}
break;
case 0xFE:
c = ReadByteSpecial ();
if (c == 0xFF) {
// BOM-ed big endian utf-16
enc = Encoding.BigEndianUnicode;
return;
} else {
// It doesn't start from "<?xml" then its encoding is utf-8
bufPos = 0;
}
break;
case 0xEF:
c = ReadByteSpecial ();
if (c == 0xBB) {
c = ReadByteSpecial ();
if (c != 0xBF) {
bufPos = 0;
}
} else {
buffer [--bufPos] = 0xEF;
}
break;
case 0:
// It could still be 1234/2143/3412 variants of UTF32, but only 1234 version is available on .NET.
c = ReadByteSpecial ();
if (c == 0)
enc = Strict1234UTF32;
else
enc = StrictBigEndianUTF16;
break;
case '<':
c = ReadByteSpecial ();
if (c == 0) {
if (ReadByteSpecial () == 0)
enc = Encoding.UTF32; // little endian UTF32
else
enc = Encoding.Unicode; // little endian UTF16
} else if (bufLength >= 4 && GetStringFromBytes (1, 4) == "?xml") {
// try to get encoding name from XMLDecl.
bufPos += 4;
c = SkipWhitespace ();
// version. It is optional here.
if (c == 'v') {
while (c >= 0) {
c = ReadByteSpecial ();
if (c == '0') { // 0 of 1.0
ReadByteSpecial ();
break;
}
}
c = SkipWhitespace ();
}
if (c == 'e') {
if (GetStringFromBytes (bufPos, 7) == "ncoding") {
bufPos += 7;
c = SkipWhitespace();
if (c != '=')
throw encodingException;
c = SkipWhitespace ();
int quoteChar = c;
StringBuilder sb = new StringBuilder ();
while (true) {
c = ReadByteSpecial ();
if (c == quoteChar)
break;
else if (c < 0)
throw encodingException;
sb.Append ((char) c);
}
string encodingName = sb.ToString ();
if (!XmlChar.IsValidIANAEncoding (encodingName))
throw encodingException;
enc = Encoding.GetEncoding (encodingName);
}
}
}
bufPos = 0;
break;
default:
if (c == 0)
enc = StrictUTF16;
bufPos = 0;
break;
}
}
// Just like readbyte, but grows the buffer too.
int ReadByteSpecial ()
{
if (bufLength > bufPos)
return buffer [bufPos++];
byte [] newbuf = new byte [buffer.Length * 2];
Buffer.BlockCopy (buffer, 0, newbuf, 0, bufLength);
int nbytes = stream.Read (newbuf, bufLength, buffer.Length);
if (nbytes == -1 || nbytes == 0)
return -1;
bufLength += nbytes;
buffer = newbuf;
return buffer [bufPos++];
}
// skips whitespace and returns misc char that was read from stream
private int SkipWhitespace ()
{
int c;
while (true) {
c = ReadByteSpecial ();
switch ((char) c) {
case '\r': goto case ' ';
case '\n': goto case ' ';
case '\t': goto case ' ';
case ' ':
continue;
default:
return c;
}
}
}
public Encoding ActualEncoding {
get { return enc; }
}
#region Public Overrides
public override bool CanRead {
get {
if (bufLength > bufPos)
return true;
else
return stream.CanRead;
}
}
// FIXME: It should support base stream's CanSeek.
public override bool CanSeek {
get { return false; } // stream.CanSeek; }
}
public override bool CanWrite {
get { return false; }
}
public override long Length {
get {
return stream.Length;
}
}
public override long Position {
get {
return stream.Position - bufLength + bufPos;
}
set {
if(value < bufLength)
bufPos = (int)value;
else
stream.Position = value - bufLength;
}
}
public override void Close ()
{
stream.Close ();
}
public override void Flush ()
{
stream.Flush ();
}
public override int Read (byte[] buffer, int offset, int count)
{
int ret;
if (count <= bufLength - bufPos) { // all from buffer
Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, count);
bufPos += count;
ret = count;
} else {
int bufRest = bufLength - bufPos;
if (bufLength > bufPos) {
Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, bufRest);
bufPos += bufRest;
}
ret = bufRest +
stream.Read (buffer, offset + bufRest, count - bufRest);
}
return ret;
}
public override int ReadByte ()
{
if (bufLength > bufPos) {
return buffer [bufPos++];
}
return stream.ReadByte ();
}
public override long Seek (long offset, System.IO.SeekOrigin origin)
{
int bufRest = bufLength - bufPos;
if (origin == SeekOrigin.Current)
if (offset < bufRest)
return buffer [bufPos + offset];
else
return stream.Seek (offset - bufRest, origin);
else
return stream.Seek (offset, origin);
}
public override void SetLength (long value)
{
stream.SetLength (value);
}
public override void Write (byte[] buffer, int offset, int count)
{
throw new NotSupportedException ();
}
#endregion
}
}