//------------------------------------------------------------------------------ // // Copyright (c) Microsoft Corporation. All rights reserved. // // Microsoft //------------------------------------------------------------------------------ using System.Text; using System.Diagnostics; namespace System.Xml { internal class UTF16Decoder : System.Text.Decoder { private bool bigEndian; private int lastByte; private const int CharSize = 2; public UTF16Decoder( bool bigEndian ) { this.lastByte = -1; this.bigEndian = bigEndian; } public override int GetCharCount( byte[] bytes, int index, int count ) { return GetCharCount( bytes, index, count, false ); } public override int GetCharCount( byte[] bytes, int index, int count, bool flush ) { int byteCount = count + ( ( lastByte >= 0 ) ? 1 : 0 ); if ( flush && ( byteCount % CharSize != 0 ) ) { throw new ArgumentException( Res.GetString( Res.Enc_InvalidByteInEncoding, new object[1] { -1 } ), (string)null ); } return byteCount / CharSize; } public override int GetChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) { int charCount = GetCharCount( bytes, byteIndex, byteCount ); if ( lastByte >= 0 ) { if ( byteCount == 0 ) { return charCount; } int nextByte = bytes[byteIndex++]; byteCount--; chars[charIndex++] = bigEndian ? (char)( lastByte << 8 | nextByte ) : (char)( nextByte << 8 | lastByte ); lastByte = -1; } if ( ( byteCount & 1 ) != 0 ) { lastByte = bytes[byteIndex + --byteCount]; } // use the fast BlockCopy if possible if ( bigEndian == BitConverter.IsLittleEndian ) { int byteEnd = byteIndex + byteCount; if ( bigEndian ) { while ( byteIndex < byteEnd ) { int hi = bytes[byteIndex++]; int lo = bytes[byteIndex++]; chars[charIndex++] = (char)( hi << 8 | lo ); } } else { while ( byteIndex < byteEnd ) { int lo = bytes[byteIndex++]; int hi = bytes[byteIndex++]; chars[charIndex++] = (char)( hi << 8 | lo ); } } } else { Buffer.BlockCopy( bytes, byteIndex, chars, charIndex * CharSize, byteCount ); } return charCount; } public override void Convert( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex, int charCount, bool flush, out int bytesUsed, out int charsUsed, out bool completed ) { charsUsed = 0; bytesUsed = 0; if ( lastByte >= 0 ) { if ( byteCount == 0 ) { completed = true; return; } int nextByte = bytes[byteIndex++]; byteCount--; bytesUsed++; chars[charIndex++] = bigEndian ? (char)( lastByte << 8 | nextByte ) : (char)( nextByte << 8 | lastByte ); charCount--; charsUsed++; lastByte = -1; } if ( charCount * CharSize < byteCount ) { byteCount = charCount * CharSize; completed = false; } else { completed = true; } if ( bigEndian == BitConverter.IsLittleEndian ) { int i = byteIndex; int byteEnd = i + ( byteCount & ~0x1 ); if ( bigEndian ) { while ( i < byteEnd ) { int hi = bytes[i++]; int lo = bytes[i++]; chars[charIndex++] = (char)( hi << 8 | lo ); } } else { while ( i < byteEnd ) { int lo = bytes[i++]; int hi = bytes[i++]; chars[charIndex++] = (char)( hi << 8 | lo ); } } } else { Buffer.BlockCopy( bytes, byteIndex, chars, charIndex * CharSize, (int)(byteCount & ~0x1) ); } charsUsed += byteCount / CharSize; bytesUsed += byteCount; if ( ( byteCount & 1 ) != 0 ) { lastByte = bytes[byteIndex + byteCount - 1]; } } } internal class SafeAsciiDecoder : Decoder { public SafeAsciiDecoder() { } public override int GetCharCount( byte[] bytes, int index, int count ) { return count; } public override int GetChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) { int i = byteIndex; int j = charIndex; while ( i < byteIndex + byteCount ) { chars[j++] = (char)bytes[i++]; } return byteCount; } public override void Convert( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex, int charCount, bool flush, out int bytesUsed, out int charsUsed, out bool completed ) { if ( charCount < byteCount ) { byteCount = charCount; completed = false; } else { completed = true; } int i = byteIndex; int j = charIndex; int byteEndIndex = byteIndex + byteCount; while ( i < byteEndIndex ) { chars[j++] = (char)bytes[i++]; } charsUsed = byteCount; bytesUsed = byteCount; } } #if !SILVERLIGHT internal class Ucs4Encoding : Encoding { internal Ucs4Decoder ucs4Decoder; public override string WebName { get { return this.EncodingName; } } public override Decoder GetDecoder() { return ucs4Decoder; } public override int GetByteCount( char[] chars, int index, int count ) { return checked( count * 4 ); } public override int GetByteCount( char[] chars ) { return chars.Length * 4; } public override byte[] GetBytes( string s ) { return null; //ucs4Decoder.GetByteCount(chars, index, count); } public override int GetBytes( char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex ) { return 0; } public override int GetMaxByteCount( int charCount ) { return 0; } public override int GetCharCount( byte[] bytes, int index, int count ) { return ucs4Decoder.GetCharCount( bytes, index, count ); } public override int GetChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) { return ucs4Decoder.GetChars( bytes, byteIndex, byteCount, chars, charIndex ); } public override int GetMaxCharCount( int byteCount ) { return ( byteCount + 3 ) / 4; } public override int CodePage { get { return 0; } } public override int GetCharCount( byte[] bytes ) { return bytes.Length / 4; } public override Encoder GetEncoder() { return null; } internal static Encoding UCS4_Littleendian { get { return new Ucs4Encoding4321(); } } internal static Encoding UCS4_Bigendian { get { return new Ucs4Encoding1234(); } } internal static Encoding UCS4_2143 { get { return new Ucs4Encoding2143(); } } internal static Encoding UCS4_3412 { get { return new Ucs4Encoding3412(); } } } internal class Ucs4Encoding1234 : Ucs4Encoding { public Ucs4Encoding1234() { ucs4Decoder = new Ucs4Decoder1234(); } public override string EncodingName { get { return "ucs-4 (Bigendian)"; } } public override byte[] GetPreamble() { return new byte[4] { 0x00, 0x00, 0xfe, 0xff }; } } internal class Ucs4Encoding4321 : Ucs4Encoding { public Ucs4Encoding4321() { ucs4Decoder = new Ucs4Decoder4321(); } public override string EncodingName { get { return "ucs-4"; } } public override byte[] GetPreamble() { return new byte[4] { 0xff, 0xfe, 0x00, 0x00 }; } } internal class Ucs4Encoding2143 : Ucs4Encoding { public Ucs4Encoding2143() { ucs4Decoder = new Ucs4Decoder2143(); } public override string EncodingName { get { return "ucs-4 (order 2143)"; } } public override byte[] GetPreamble() { return new byte[4] { 0x00, 0x00, 0xff, 0xfe }; } } internal class Ucs4Encoding3412 : Ucs4Encoding { public Ucs4Encoding3412() { ucs4Decoder = new Ucs4Decoder3412(); } public override string EncodingName { get { return "ucs-4 (order 3412)"; } } public override byte[] GetPreamble() { return new byte[4] { 0xfe, 0xff, 0x00, 0x00 }; } } internal abstract class Ucs4Decoder : Decoder { internal byte [] lastBytes = new byte[4]; internal int lastBytesCount = 0; public override int GetCharCount( byte[] bytes, int index, int count ) { return ( count + lastBytesCount ) / 4; } internal abstract int GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ); public override int GetChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) { // finish a character from the bytes that were cached last time int i = lastBytesCount; if ( lastBytesCount > 0 ) { // copy remaining bytes into the cache for ( ; lastBytesCount < 4 && byteCount > 0; lastBytesCount++ ) { lastBytes[lastBytesCount] = bytes[byteIndex]; byteIndex++; byteCount--; } // still not enough bytes -> return if ( lastBytesCount < 4 ) { return 0; } // decode 1 character from the byte cache i = GetFullChars( lastBytes, 0 , 4, chars, charIndex ); Debug.Assert( i == 1 ); charIndex += i; lastBytesCount = 0; } else { i = 0; } // decode block of byte quadruplets i = GetFullChars( bytes, byteIndex, byteCount, chars, charIndex ) + i; // cache remaining bytes that does not make up a character int bytesLeft = ( byteCount & 0x3 ); if ( bytesLeft >= 0 ) { for( int j = 0; j < bytesLeft; j++ ) { lastBytes[j] = bytes[byteIndex + byteCount - bytesLeft + j]; } lastBytesCount = bytesLeft; } return i; } public override void Convert( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex, int charCount, bool flush, out int bytesUsed, out int charsUsed, out bool completed ) { bytesUsed = 0; charsUsed = 0; // finish a character from the bytes that were cached last time int i = 0; int lbc = lastBytesCount; if ( lbc > 0 ) { // copy remaining bytes into the cache for ( ; lbc < 4 && byteCount > 0; lbc++ ) { lastBytes[lbc] = bytes[byteIndex]; byteIndex++; byteCount--; bytesUsed++; } // still not enough bytes -> return if ( lbc < 4 ) { lastBytesCount = lbc; completed = true; return; } // decode 1 character from the byte cache i = GetFullChars( lastBytes, 0 , 4, chars, charIndex ); Debug.Assert( i == 1 ); charIndex += i; charCount -= i; charsUsed = i; lastBytesCount = 0; // if that's all that was requested -> return if ( charCount == 0 ) { completed = ( byteCount == 0 ); return; } } else { i = 0; } // modify the byte count for GetFullChars depending on how many characters were requested if ( charCount * 4 < byteCount ) { byteCount = charCount * 4; completed = false; } else { completed = true; } bytesUsed += byteCount; // decode block of byte quadruplets charsUsed = GetFullChars( bytes, byteIndex, byteCount, chars, charIndex ) + i; // cache remaining bytes that does not make up a character int bytesLeft = ( byteCount & 0x3 ); if ( bytesLeft >= 0 ) { for( int j = 0; j < bytesLeft; j++ ) { lastBytes[j] = bytes[byteIndex + byteCount - bytesLeft + j]; } lastBytesCount = bytesLeft; } } internal void Ucs4ToUTF16(uint code, char[] chars, int charIndex) { chars[charIndex] = (char)(XmlCharType.SurHighStart + (char)((code >> 16) - 1) + (char)((code >> 10) & 0x3F)); chars[charIndex + 1] = (char)(XmlCharType.SurLowStart + (char)(code & 0x3FF)); } } internal class Ucs4Decoder4321 : Ucs4Decoder { internal override int GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) { uint code; int i, j; byteCount += byteIndex; for ( i = byteIndex, j = charIndex; i + 3 < byteCount; ) { code = (uint)( ( bytes[i+3] << 24 ) | ( bytes[i+2] << 16 ) | ( bytes[i+1] << 8 ) | bytes[i] ); if ( code > 0x10FFFF ) { throw new ArgumentException( Res.GetString( Res.Enc_InvalidByteInEncoding, new object[1] { i } ), (string)null ); } else if ( code > 0xFFFF ) { Ucs4ToUTF16(code, chars, j); j++; } else { if ( XmlCharType.IsSurrogate( (int)code ) ) { throw new XmlException( Res.Xml_InvalidCharInThisEncoding, string.Empty ); } else { chars[j] = (char)code; } } j++; i += 4; } return j - charIndex; } }; internal class Ucs4Decoder1234 : Ucs4Decoder { internal override int GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) { uint code; int i,j; byteCount += byteIndex; for ( i = byteIndex, j = charIndex; i+3 < byteCount; ) { code = (uint)( ( bytes[i] << 24 ) | ( bytes[i+1] << 16 ) | ( bytes[i+2] << 8 ) | bytes[i+3] ); if ( code > 0x10FFFF ) { throw new ArgumentException( Res.GetString( Res.Enc_InvalidByteInEncoding, new object[1] { i } ), (string)null ); } else if ( code > 0xFFFF ) { Ucs4ToUTF16(code, chars, j); j++; } else { if ( XmlCharType.IsSurrogate( (int)code ) ) { throw new XmlException( Res.Xml_InvalidCharInThisEncoding, string.Empty ); } else { chars[j] = (char)code; } } j++; i += 4; } return j - charIndex; } } internal class Ucs4Decoder2143 : Ucs4Decoder { internal override int GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) { uint code; int i,j; byteCount += byteIndex; for ( i = byteIndex, j = charIndex; i+3 < byteCount; ) { code = (uint)( ( bytes[i+1] << 24 ) | ( bytes[i] << 16 ) | ( bytes[i+3] << 8 ) | bytes[i+2] ); if ( code > 0x10FFFF ) { throw new ArgumentException( Res.GetString( Res.Enc_InvalidByteInEncoding, new object[1] { i } ), (string)null ); } else if ( code > 0xFFFF ) { Ucs4ToUTF16(code, chars, j); j++; } else { if ( XmlCharType.IsSurrogate( (int)code ) ) { throw new XmlException( Res.Xml_InvalidCharInThisEncoding, string.Empty ); } else { chars[j] = (char)code; } } j++; i += 4; } return j - charIndex; } } internal class Ucs4Decoder3412 : Ucs4Decoder { internal override int GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) { uint code; int i,j; byteCount += byteIndex; for ( i = byteIndex, j = charIndex; i+3 < byteCount; ) { code = (uint)( ( bytes[i+2] << 24 ) | ( bytes[i+3] << 16 ) | ( bytes[i] << 8 ) | bytes[i+1] ); if ( code > 0x10FFFF ) { throw new ArgumentException( Res.GetString( Res.Enc_InvalidByteInEncoding, new object[1] { i } ), (string)null ); } else if ( code > 0xFFFF ) { Ucs4ToUTF16(code, chars, j); j++; } else { if ( XmlCharType.IsSurrogate( (int)code ) ) { throw new XmlException( Res.Xml_InvalidCharInThisEncoding, string.Empty ); } else { chars[j] = (char)code; } } j++; i += 4; } return j - charIndex; } } #endif }