572 lines
19 KiB
C#
572 lines
19 KiB
C#
|
//------------------------------------------------------------------------------
|
||
|
// <copyright file="XmlEncoding.cs" company="Microsoft">
|
||
|
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||
|
// </copyright>
|
||
|
// <owner current="true" primary="true">[....]</owner>
|
||
|
//------------------------------------------------------------------------------
|
||
|
|
||
|
using System.Text;
|
||
|
using System.Diagnostics;
|
||
|
|
||
|
namespace System.Xml {
|
||
|
|
||
|
internal class UTF16Decoder : System.Text.Decoder {
|
||
|
private bool bigEndian;
|
||
|
private int lastByte;
|
||
|
private const int CharSize = 2;
|
||
|
|
||
|
public UTF16Decoder( bool bigEndian ) {
|
||
|
this.lastByte = -1;
|
||
|
this.bigEndian = bigEndian;
|
||
|
}
|
||
|
|
||
|
public override int GetCharCount( byte[] bytes, int index, int count ) {
|
||
|
return GetCharCount( bytes, index, count, false );
|
||
|
}
|
||
|
|
||
|
public override int GetCharCount( byte[] bytes, int index, int count, bool flush ) {
|
||
|
int byteCount = count + ( ( lastByte >= 0 ) ? 1 : 0 );
|
||
|
if ( flush && ( byteCount % CharSize != 0 ) ) {
|
||
|
throw new ArgumentException( Res.GetString( Res.Enc_InvalidByteInEncoding, new object[1] { -1 } ), (string)null );
|
||
|
}
|
||
|
return byteCount / CharSize;
|
||
|
}
|
||
|
|
||
|
public override int GetChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) {
|
||
|
int charCount = GetCharCount( bytes, byteIndex, byteCount );
|
||
|
|
||
|
if ( lastByte >= 0 ) {
|
||
|
if ( byteCount == 0 ) {
|
||
|
return charCount;
|
||
|
}
|
||
|
int nextByte = bytes[byteIndex++];
|
||
|
byteCount--;
|
||
|
|
||
|
chars[charIndex++] = bigEndian
|
||
|
? (char)( lastByte << 8 | nextByte )
|
||
|
: (char)( nextByte << 8 | lastByte );
|
||
|
lastByte = -1;
|
||
|
}
|
||
|
|
||
|
if ( ( byteCount & 1 ) != 0 ) {
|
||
|
lastByte = bytes[byteIndex + --byteCount];
|
||
|
}
|
||
|
|
||
|
// use the fast BlockCopy if possible
|
||
|
if ( bigEndian == BitConverter.IsLittleEndian ) {
|
||
|
int byteEnd = byteIndex + byteCount;
|
||
|
if ( bigEndian ) {
|
||
|
while ( byteIndex < byteEnd ) {
|
||
|
int hi = bytes[byteIndex++];
|
||
|
int lo = bytes[byteIndex++];
|
||
|
chars[charIndex++] = (char)( hi << 8 | lo );
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
while ( byteIndex < byteEnd ) {
|
||
|
int lo = bytes[byteIndex++];
|
||
|
int hi = bytes[byteIndex++];
|
||
|
chars[charIndex++] = (char)( hi << 8 | lo );
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
Buffer.BlockCopy( bytes, byteIndex, chars, charIndex * CharSize, byteCount );
|
||
|
}
|
||
|
return charCount;
|
||
|
}
|
||
|
|
||
|
public override void Convert( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex, int charCount, bool flush, out int bytesUsed, out int charsUsed, out bool completed ) {
|
||
|
charsUsed = 0;
|
||
|
bytesUsed = 0;
|
||
|
|
||
|
if ( lastByte >= 0 ) {
|
||
|
if ( byteCount == 0 ) {
|
||
|
completed = true;
|
||
|
return;
|
||
|
}
|
||
|
int nextByte = bytes[byteIndex++];
|
||
|
byteCount--;
|
||
|
bytesUsed++;
|
||
|
|
||
|
chars[charIndex++] = bigEndian
|
||
|
? (char)( lastByte << 8 | nextByte )
|
||
|
: (char)( nextByte << 8 | lastByte );
|
||
|
charCount--;
|
||
|
charsUsed++;
|
||
|
lastByte = -1;
|
||
|
}
|
||
|
|
||
|
if ( charCount * CharSize < byteCount ) {
|
||
|
byteCount = charCount * CharSize;
|
||
|
completed = false;
|
||
|
}
|
||
|
else {
|
||
|
completed = true;
|
||
|
}
|
||
|
|
||
|
if ( bigEndian == BitConverter.IsLittleEndian ) {
|
||
|
int i = byteIndex;
|
||
|
int byteEnd = i + ( byteCount & ~0x1 );
|
||
|
if ( bigEndian ) {
|
||
|
while ( i < byteEnd ) {
|
||
|
int hi = bytes[i++];
|
||
|
int lo = bytes[i++];
|
||
|
chars[charIndex++] = (char)( hi << 8 | lo );
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
while ( i < byteEnd ) {
|
||
|
int lo = bytes[i++];
|
||
|
int hi = bytes[i++];
|
||
|
chars[charIndex++] = (char)( hi << 8 | lo );
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
Buffer.BlockCopy( bytes, byteIndex, chars, charIndex * CharSize, (int)(byteCount & ~0x1) );
|
||
|
}
|
||
|
charsUsed += byteCount / CharSize;
|
||
|
bytesUsed += byteCount;
|
||
|
|
||
|
if ( ( byteCount & 1 ) != 0 ) {
|
||
|
lastByte = bytes[byteIndex + byteCount - 1];
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
internal class SafeAsciiDecoder : Decoder {
|
||
|
|
||
|
public SafeAsciiDecoder() {
|
||
|
}
|
||
|
|
||
|
public override int GetCharCount( byte[] bytes, int index, int count ) {
|
||
|
return count;
|
||
|
}
|
||
|
|
||
|
public override int GetChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) {
|
||
|
int i = byteIndex;
|
||
|
int j = charIndex;
|
||
|
while ( i < byteIndex + byteCount ) {
|
||
|
chars[j++] = (char)bytes[i++];
|
||
|
}
|
||
|
return byteCount;
|
||
|
}
|
||
|
|
||
|
public override void Convert( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex, int charCount, bool flush, out int bytesUsed, out int charsUsed, out bool completed ) {
|
||
|
if ( charCount < byteCount ) {
|
||
|
byteCount = charCount;
|
||
|
completed = false;
|
||
|
}
|
||
|
else {
|
||
|
completed = true;
|
||
|
}
|
||
|
|
||
|
int i = byteIndex;
|
||
|
int j = charIndex;
|
||
|
int byteEndIndex = byteIndex + byteCount;
|
||
|
|
||
|
while ( i < byteEndIndex ) {
|
||
|
chars[j++] = (char)bytes[i++];
|
||
|
}
|
||
|
|
||
|
charsUsed = byteCount;
|
||
|
bytesUsed = byteCount;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#if !SILVERLIGHT
|
||
|
internal class Ucs4Encoding : Encoding {
|
||
|
internal Ucs4Decoder ucs4Decoder;
|
||
|
|
||
|
public override string WebName {
|
||
|
get {
|
||
|
return this.EncodingName;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
public override Decoder GetDecoder() {
|
||
|
return ucs4Decoder;
|
||
|
}
|
||
|
|
||
|
public override int GetByteCount( char[] chars, int index, int count ) {
|
||
|
return checked( count * 4 );
|
||
|
}
|
||
|
|
||
|
public override int GetByteCount( char[] chars ) {
|
||
|
return chars.Length * 4;
|
||
|
}
|
||
|
|
||
|
public override byte[] GetBytes( string s ) {
|
||
|
return null; //ucs4Decoder.GetByteCount(chars, index, count);
|
||
|
}
|
||
|
public override int GetBytes( char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex ) {
|
||
|
return 0;
|
||
|
}
|
||
|
public override int GetMaxByteCount( int charCount ) {
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
public override int GetCharCount( byte[] bytes, int index, int count ) {
|
||
|
return ucs4Decoder.GetCharCount( bytes, index, count );
|
||
|
}
|
||
|
|
||
|
public override int GetChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) {
|
||
|
return ucs4Decoder.GetChars( bytes, byteIndex, byteCount, chars, charIndex );
|
||
|
}
|
||
|
|
||
|
public override int GetMaxCharCount( int byteCount ) {
|
||
|
return ( byteCount + 3 ) / 4;
|
||
|
}
|
||
|
|
||
|
public override int CodePage {
|
||
|
get {
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
public override int GetCharCount( byte[] bytes ) {
|
||
|
return bytes.Length / 4;
|
||
|
}
|
||
|
|
||
|
public override Encoder GetEncoder() {
|
||
|
return null;
|
||
|
}
|
||
|
|
||
|
internal static Encoding UCS4_Littleendian {
|
||
|
get {
|
||
|
return new Ucs4Encoding4321();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
internal static Encoding UCS4_Bigendian {
|
||
|
get {
|
||
|
return new Ucs4Encoding1234();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
internal static Encoding UCS4_2143 {
|
||
|
get {
|
||
|
return new Ucs4Encoding2143();
|
||
|
}
|
||
|
}
|
||
|
internal static Encoding UCS4_3412 {
|
||
|
get {
|
||
|
return new Ucs4Encoding3412();
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
internal class Ucs4Encoding1234 : Ucs4Encoding {
|
||
|
|
||
|
public Ucs4Encoding1234() {
|
||
|
ucs4Decoder = new Ucs4Decoder1234();
|
||
|
}
|
||
|
|
||
|
public override string EncodingName {
|
||
|
get {
|
||
|
return "ucs-4 (Bigendian)";
|
||
|
}
|
||
|
}
|
||
|
|
||
|
public override byte[] GetPreamble() {
|
||
|
return new byte[4] { 0x00, 0x00, 0xfe, 0xff };
|
||
|
}
|
||
|
}
|
||
|
|
||
|
internal class Ucs4Encoding4321 : Ucs4Encoding {
|
||
|
public Ucs4Encoding4321() {
|
||
|
ucs4Decoder = new Ucs4Decoder4321();
|
||
|
}
|
||
|
|
||
|
public override string EncodingName {
|
||
|
get {
|
||
|
return "ucs-4";
|
||
|
}
|
||
|
}
|
||
|
|
||
|
public override byte[] GetPreamble() {
|
||
|
return new byte[4] { 0xff, 0xfe, 0x00, 0x00 };
|
||
|
}
|
||
|
}
|
||
|
|
||
|
internal class Ucs4Encoding2143 : Ucs4Encoding {
|
||
|
public Ucs4Encoding2143() {
|
||
|
ucs4Decoder = new Ucs4Decoder2143();
|
||
|
}
|
||
|
|
||
|
public override string EncodingName {
|
||
|
get {
|
||
|
return "ucs-4 (order 2143)";
|
||
|
}
|
||
|
}
|
||
|
public override byte[] GetPreamble() {
|
||
|
return new byte[4] { 0x00, 0x00, 0xff, 0xfe };
|
||
|
}
|
||
|
}
|
||
|
|
||
|
internal class Ucs4Encoding3412 : Ucs4Encoding {
|
||
|
public Ucs4Encoding3412() {
|
||
|
ucs4Decoder = new Ucs4Decoder3412();
|
||
|
}
|
||
|
|
||
|
public override string EncodingName {
|
||
|
get {
|
||
|
return "ucs-4 (order 3412)";
|
||
|
}
|
||
|
}
|
||
|
|
||
|
public override byte[] GetPreamble() {
|
||
|
return new byte[4] { 0xfe, 0xff, 0x00, 0x00 };
|
||
|
}
|
||
|
}
|
||
|
|
||
|
internal abstract class Ucs4Decoder : Decoder {
|
||
|
|
||
|
internal byte [] lastBytes = new byte[4];
|
||
|
internal int lastBytesCount = 0;
|
||
|
|
||
|
public override int GetCharCount( byte[] bytes, int index, int count ) {
|
||
|
return ( count + lastBytesCount ) / 4;
|
||
|
}
|
||
|
|
||
|
internal abstract int GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex );
|
||
|
|
||
|
public override int GetChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) {
|
||
|
// finish a character from the bytes that were cached last time
|
||
|
int i = lastBytesCount;
|
||
|
if ( lastBytesCount > 0 ) {
|
||
|
// copy remaining bytes into the cache
|
||
|
for ( ; lastBytesCount < 4 && byteCount > 0; lastBytesCount++ ) {
|
||
|
lastBytes[lastBytesCount] = bytes[byteIndex];
|
||
|
byteIndex++;
|
||
|
byteCount--;
|
||
|
}
|
||
|
// still not enough bytes -> return
|
||
|
if ( lastBytesCount < 4 ) {
|
||
|
return 0;
|
||
|
}
|
||
|
// decode 1 character from the byte cache
|
||
|
i = GetFullChars( lastBytes, 0 , 4, chars, charIndex );
|
||
|
Debug.Assert( i == 1 );
|
||
|
charIndex += i;
|
||
|
lastBytesCount = 0;
|
||
|
}
|
||
|
else {
|
||
|
i = 0;
|
||
|
}
|
||
|
|
||
|
// decode block of byte quadruplets
|
||
|
i = GetFullChars( bytes, byteIndex, byteCount, chars, charIndex ) + i;
|
||
|
|
||
|
// cache remaining bytes that does not make up a character
|
||
|
int bytesLeft = ( byteCount & 0x3 );
|
||
|
if ( bytesLeft >= 0 ) {
|
||
|
for( int j = 0; j < bytesLeft; j++ ) {
|
||
|
lastBytes[j] = bytes[byteIndex + byteCount - bytesLeft + j];
|
||
|
}
|
||
|
lastBytesCount = bytesLeft;
|
||
|
}
|
||
|
return i;
|
||
|
}
|
||
|
|
||
|
public override void Convert( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex, int charCount, bool flush, out int bytesUsed, out int charsUsed, out bool completed ) {
|
||
|
bytesUsed = 0;
|
||
|
charsUsed = 0;
|
||
|
// finish a character from the bytes that were cached last time
|
||
|
int i = 0;
|
||
|
int lbc = lastBytesCount;
|
||
|
if ( lbc > 0 ) {
|
||
|
// copy remaining bytes into the cache
|
||
|
for ( ; lbc < 4 && byteCount > 0; lbc++ ) {
|
||
|
lastBytes[lbc] = bytes[byteIndex];
|
||
|
byteIndex++;
|
||
|
byteCount--;
|
||
|
bytesUsed++;
|
||
|
}
|
||
|
// still not enough bytes -> return
|
||
|
if ( lbc < 4 ) {
|
||
|
lastBytesCount = lbc;
|
||
|
completed = true;
|
||
|
return;
|
||
|
}
|
||
|
// decode 1 character from the byte cache
|
||
|
i = GetFullChars( lastBytes, 0 , 4, chars, charIndex );
|
||
|
Debug.Assert( i == 1 );
|
||
|
charIndex += i;
|
||
|
charCount -= i;
|
||
|
charsUsed = i;
|
||
|
|
||
|
lastBytesCount = 0;
|
||
|
|
||
|
// if that's all that was requested -> return
|
||
|
if ( charCount == 0 ) {
|
||
|
completed = ( byteCount == 0 );
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
i = 0;
|
||
|
}
|
||
|
|
||
|
// modify the byte count for GetFullChars depending on how many characters were requested
|
||
|
if ( charCount * 4 < byteCount ) {
|
||
|
byteCount = charCount * 4;
|
||
|
completed = false;
|
||
|
}
|
||
|
else {
|
||
|
completed = true;
|
||
|
}
|
||
|
bytesUsed += byteCount;
|
||
|
|
||
|
// decode block of byte quadruplets
|
||
|
charsUsed = GetFullChars( bytes, byteIndex, byteCount, chars, charIndex ) + i;
|
||
|
|
||
|
// cache remaining bytes that does not make up a character
|
||
|
int bytesLeft = ( byteCount & 0x3 );
|
||
|
if ( bytesLeft >= 0 ) {
|
||
|
for( int j = 0; j < bytesLeft; j++ ) {
|
||
|
lastBytes[j] = bytes[byteIndex + byteCount - bytesLeft + j];
|
||
|
}
|
||
|
lastBytesCount = bytesLeft;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
internal void Ucs4ToUTF16(uint code, char[] chars, int charIndex) {
|
||
|
chars[charIndex] = (char)(XmlCharType.SurHighStart + (char)((code >> 16) - 1) + (char)((code >> 10) & 0x3F));
|
||
|
chars[charIndex + 1] = (char)(XmlCharType.SurLowStart + (char)(code & 0x3FF));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
internal class Ucs4Decoder4321 : Ucs4Decoder {
|
||
|
|
||
|
internal override int GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) {
|
||
|
uint code;
|
||
|
int i, j;
|
||
|
|
||
|
byteCount += byteIndex;
|
||
|
|
||
|
for ( i = byteIndex, j = charIndex; i + 3 < byteCount; ) {
|
||
|
code = (uint)( ( bytes[i+3] << 24 ) | ( bytes[i+2] << 16 ) | ( bytes[i+1] << 8 ) | bytes[i] );
|
||
|
if ( code > 0x10FFFF ) {
|
||
|
throw new ArgumentException( Res.GetString( Res.Enc_InvalidByteInEncoding, new object[1] { i } ), (string)null );
|
||
|
}
|
||
|
else if ( code > 0xFFFF ) {
|
||
|
Ucs4ToUTF16(code, chars, j);
|
||
|
j++;
|
||
|
}
|
||
|
else {
|
||
|
if ( XmlCharType.IsSurrogate( (int)code ) ) {
|
||
|
throw new XmlException( Res.Xml_InvalidCharInThisEncoding, string.Empty );
|
||
|
}
|
||
|
else {
|
||
|
chars[j] = (char)code;
|
||
|
}
|
||
|
}
|
||
|
j++;
|
||
|
i += 4;
|
||
|
}
|
||
|
return j - charIndex;
|
||
|
}
|
||
|
};
|
||
|
|
||
|
internal class Ucs4Decoder1234 : Ucs4Decoder {
|
||
|
|
||
|
internal override int GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) {
|
||
|
uint code;
|
||
|
int i,j;
|
||
|
|
||
|
byteCount += byteIndex;
|
||
|
|
||
|
for ( i = byteIndex, j = charIndex; i+3 < byteCount; ) {
|
||
|
code = (uint)( ( bytes[i] << 24 ) | ( bytes[i+1] << 16 ) | ( bytes[i+2] << 8 ) | bytes[i+3] );
|
||
|
if ( code > 0x10FFFF ) {
|
||
|
throw new ArgumentException( Res.GetString( Res.Enc_InvalidByteInEncoding, new object[1] { i } ), (string)null );
|
||
|
}
|
||
|
else if ( code > 0xFFFF ) {
|
||
|
Ucs4ToUTF16(code, chars, j);
|
||
|
j++;
|
||
|
}
|
||
|
else {
|
||
|
if ( XmlCharType.IsSurrogate( (int)code ) ) {
|
||
|
throw new XmlException( Res.Xml_InvalidCharInThisEncoding, string.Empty );
|
||
|
}
|
||
|
else {
|
||
|
chars[j] = (char)code;
|
||
|
}
|
||
|
}
|
||
|
j++;
|
||
|
i += 4;
|
||
|
}
|
||
|
return j - charIndex;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
internal class Ucs4Decoder2143 : Ucs4Decoder {
|
||
|
|
||
|
internal override int GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) {
|
||
|
uint code;
|
||
|
int i,j;
|
||
|
|
||
|
byteCount += byteIndex;
|
||
|
|
||
|
for ( i = byteIndex, j = charIndex; i+3 < byteCount; ) {
|
||
|
code = (uint)( ( bytes[i+1] << 24 ) | ( bytes[i] << 16 ) | ( bytes[i+3] << 8 ) | bytes[i+2] );
|
||
|
if ( code > 0x10FFFF ) {
|
||
|
throw new ArgumentException( Res.GetString( Res.Enc_InvalidByteInEncoding, new object[1] { i } ), (string)null );
|
||
|
}
|
||
|
else if ( code > 0xFFFF ) {
|
||
|
Ucs4ToUTF16(code, chars, j);
|
||
|
j++;
|
||
|
}
|
||
|
else {
|
||
|
if ( XmlCharType.IsSurrogate( (int)code ) ) {
|
||
|
throw new XmlException( Res.Xml_InvalidCharInThisEncoding, string.Empty );
|
||
|
}
|
||
|
else {
|
||
|
chars[j] = (char)code;
|
||
|
}
|
||
|
}
|
||
|
j++;
|
||
|
i += 4;
|
||
|
}
|
||
|
return j - charIndex;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
internal class Ucs4Decoder3412 : Ucs4Decoder {
|
||
|
|
||
|
internal override int GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) {
|
||
|
uint code;
|
||
|
int i,j;
|
||
|
|
||
|
byteCount += byteIndex;
|
||
|
|
||
|
for ( i = byteIndex, j = charIndex; i+3 < byteCount; ) {
|
||
|
code = (uint)( ( bytes[i+2] << 24 ) | ( bytes[i+3] << 16 ) | ( bytes[i] << 8 ) | bytes[i+1] );
|
||
|
if ( code > 0x10FFFF ) {
|
||
|
throw new ArgumentException( Res.GetString( Res.Enc_InvalidByteInEncoding, new object[1] { i } ), (string)null );
|
||
|
}
|
||
|
else if ( code > 0xFFFF ) {
|
||
|
Ucs4ToUTF16(code, chars, j);
|
||
|
j++;
|
||
|
}
|
||
|
else {
|
||
|
if ( XmlCharType.IsSurrogate( (int)code ) ) {
|
||
|
throw new XmlException( Res.Xml_InvalidCharInThisEncoding, string.Empty );
|
||
|
}
|
||
|
else {
|
||
|
chars[j] = (char)code;
|
||
|
}
|
||
|
}
|
||
|
j++;
|
||
|
i += 4;
|
||
|
}
|
||
|
return j - charIndex;
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
}
|