Imported Upstream version 4.0.0~alpha1

Former-commit-id: 806294f5ded97629b74c85c09952f2a74fe182d9
This commit is contained in:
Jo Shields
2015-04-07 09:35:12 +01:00
parent 283343f570
commit 3c1f479b9d
22469 changed files with 2931443 additions and 869343 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,355 @@
// ==++==
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// ==--==
#if FEATURE_CODEPAGES_FILE
namespace System.Text
{
using System;
using System.Diagnostics.Contracts;
using System.Globalization;
using System.Runtime.InteropServices;
using System.Security;
using System.Collections;
using System.Runtime.CompilerServices;
using System.Runtime.Serialization;
using System.Runtime.Versioning;
using System.Security.Permissions;
using Microsoft.Win32.SafeHandles;
// Our input file data structures look like:
//
// Header Structure Looks Like:
// struct NLSPlusHeader
// {
// WORD[16] filename; // 32 bytes
// WORD[4] version; // 8 bytes = 40 // I.e: 3, 2, 0, 0
// WORD count; // 2 bytes = 42 // Number of code page index's that'll follow
// }
//
// Each code page section looks like:
// struct NLSCodePageIndex
// {
// WORD[16] codePageName; // 32 bytes
// WORD codePage; // +2 bytes = 34
// WORD byteCount; // +2 bytes = 36
// DWORD offset; // +4 bytes = 40 // Bytes from beginning of FILE.
// }
//
// Each code page then has its own header
// struct NLSCodePage
// {
// WORD[16] codePageName; // 32 bytes
// WORD[4] version; // 8 bytes = 40 // I.e: 3.2.0.0
// WORD codePage; // 2 bytes = 42
// WORD byteCount; // 2 bytes = 44 // 1 or 2 byte code page (SBCS or DBCS)
// WORD unicodeReplace; // 2 bytes = 46 // default replacement unicode character
// WORD byteReplace; // 2 bytes = 48 // default replacement byte(s)
// BYTE[] data; // data section
// }
[Serializable]
internal abstract class BaseCodePageEncoding : EncodingNLS, ISerializable
{
// Static & Const stuff
internal const String CODE_PAGE_DATA_FILE_NAME = "codepages.nlp";
[NonSerialized]
protected int dataTableCodePage;
// Variables to help us allocate/mark our memory section correctly
[NonSerialized]
protected bool bFlagDataTable = true;
[NonSerialized]
protected int iExtraBytes = 0;
// Our private unicode to bytes best fit array and visa versa.
[NonSerialized]
protected char[] arrayUnicodeBestFit = null;
[NonSerialized]
protected char[] arrayBytesBestFit = null;
// This is used to help ISCII, EUCJP and ISO2022 figure out they're MlangEncodings
[NonSerialized]
protected bool m_bUseMlangTypeForSerialization = false;
[System.Security.SecuritySafeCritical] // static constructors should be safe to call
static BaseCodePageEncoding()
{
}
//
// This is the header for the native data table that we load from CODE_PAGE_DATA_FILE_NAME.
//
// Explicit layout is used here since a syntax like char[16] can not be used in sequential layout.
[StructLayout(LayoutKind.Explicit)]
internal unsafe struct CodePageDataFileHeader
{
[FieldOffset(0)]
internal char TableName; // WORD[16]
[FieldOffset(0x20)]
internal ushort Version; // WORD[4]
[FieldOffset(0x28)]
internal short CodePageCount; // WORD
[FieldOffset(0x2A)]
internal short unused1; // Add a unused WORD so that CodePages is aligned with DWORD boundary.
// Otherwise, 64-bit version will fail.
[FieldOffset(0x2C)]
internal CodePageIndex CodePages; // Start of code page index
}
[StructLayout(LayoutKind.Explicit, Pack=2)]
internal unsafe struct CodePageIndex
{
[FieldOffset(0)]
internal char CodePageName; // WORD[16]
[FieldOffset(0x20)]
internal short CodePage; // WORD
[FieldOffset(0x22)]
internal short ByteCount; // WORD
[FieldOffset(0x24)]
internal int Offset; // DWORD
}
[StructLayout(LayoutKind.Explicit)]
internal unsafe struct CodePageHeader
{
[FieldOffset(0)]
internal char CodePageName; // WORD[16]
[FieldOffset(0x20)]
internal ushort VersionMajor; // WORD
[FieldOffset(0x22)]
internal ushort VersionMinor; // WORD
[FieldOffset(0x24)]
internal ushort VersionRevision;// WORD
[FieldOffset(0x26)]
internal ushort VersionBuild; // WORD
[FieldOffset(0x28)]
internal short CodePage; // WORD
[FieldOffset(0x2a)]
internal short ByteCount; // WORD // 1 or 2 byte code page (SBCS or DBCS)
[FieldOffset(0x2c)]
internal char UnicodeReplace; // WORD // default replacement unicode character
[FieldOffset(0x2e)]
internal ushort ByteReplace; // WORD // default replacement bytes
[FieldOffset(0x30)]
internal short FirstDataWord; // WORD[]
}
// Initialize our global stuff
[SecurityCritical]
unsafe static CodePageDataFileHeader* m_pCodePageFileHeader =
(CodePageDataFileHeader*)GlobalizationAssembly.GetGlobalizationResourceBytePtr(
typeof(CharUnicodeInfo).Assembly, CODE_PAGE_DATA_FILE_NAME);
// Real variables
[NonSerialized]
[SecurityCritical]
unsafe protected CodePageHeader* pCodePage = null;
// Safe handle wrapper around section map view
[System.Security.SecurityCritical] // auto-generated
[NonSerialized]
protected SafeViewOfFileHandle safeMemorySectionHandle = null;
// Safe handle wrapper around mapped file handle
[System.Security.SecurityCritical] // auto-generated
[NonSerialized]
protected SafeFileMappingHandle safeFileMappingHandle = null;
[System.Security.SecurityCritical] // auto-generated
internal BaseCodePageEncoding(int codepage) : this(codepage, codepage)
{
}
[System.Security.SecurityCritical] // auto-generated
internal BaseCodePageEncoding(int codepage, int dataCodePage) :
base(codepage == 0? Microsoft.Win32.Win32Native.GetACP(): codepage)
{
// Remember number of code page that we'll be using the table for.
dataTableCodePage = dataCodePage;
LoadCodePageTables();
}
// Constructor called by serialization.
[System.Security.SecurityCritical] // auto-generated
internal BaseCodePageEncoding(SerializationInfo info, StreamingContext context) : base(0)
{
// We cannot ever call this, we've proxied ourselved to CodePageEncoding
throw new ArgumentNullException("this");
}
// ISerializable implementation
#if FEATURE_SERIALIZATION
[System.Security.SecurityCritical] // auto-generated_required
void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
{
// Make sure to get teh base stuff too This throws if info is null
SerializeEncoding(info, context);
Contract.Assert(info!=null, "[BaseCodePageEncoding.GetObjectData] Expected null info to throw");
// Just need Everett maxCharSize (BaseCodePageEncoding) or m_maxByteSize (MLangBaseCodePageEncoding)
info.AddValue(m_bUseMlangTypeForSerialization ? "m_maxByteSize" : "maxCharSize",
this.IsSingleByte ? 1 : 2);
// Use this class or MLangBaseCodePageEncoding as our deserializer.
info.SetType(m_bUseMlangTypeForSerialization ? typeof(MLangCodePageEncoding) :
typeof(CodePageEncoding));
}
#endif
// We need to load tables for our code page
[System.Security.SecurityCritical] // auto-generated
private unsafe void LoadCodePageTables()
{
CodePageHeader* pCodePage = FindCodePage(dataTableCodePage);
// Make sure we have one
if (pCodePage == null)
{
// Didn't have one
throw new NotSupportedException(
Environment.GetResourceString("NotSupported_NoCodepageData", CodePage));
}
// Remember our code page
this.pCodePage = pCodePage;
// We had it, so load it
LoadManagedCodePage();
}
// Look up the code page pointer
[System.Security.SecurityCritical] // auto-generated
private static unsafe CodePageHeader* FindCodePage(int codePage)
{
// We'll have to loop through all of the m_pCodePageIndex[] items to find our code page, this isn't
// binary or anything so its not monsterously fast.
for (int i = 0; i < m_pCodePageFileHeader->CodePageCount; i++)
{
CodePageIndex* pCodePageIndex = (&(m_pCodePageFileHeader->CodePages)) + i;
if (pCodePageIndex->CodePage == codePage)
{
// Found it!
CodePageHeader* pCodePage =
(CodePageHeader*)((byte*)m_pCodePageFileHeader + pCodePageIndex->Offset);
return pCodePage;
}
}
// Couldn't find it
return null;
}
// Get our code page byte count
[System.Security.SecurityCritical] // auto-generated
internal static unsafe int GetCodePageByteSize(int codePage)
{
// Get our code page info
CodePageHeader* pCodePage = FindCodePage(codePage);
// If null return 0
if (pCodePage == null)
return 0;
Contract.Assert(pCodePage->ByteCount == 1 || pCodePage->ByteCount == 2,
"[BaseCodePageEncoding] Code page (" + codePage + ") has invalid byte size (" + pCodePage->ByteCount + ") in table");
// Return what it says for byte count
return pCodePage->ByteCount;
}
// We have a managed code page entry, so load our tables
[System.Security.SecurityCritical]
protected abstract unsafe void LoadManagedCodePage();
// Allocate memory to load our code page
[System.Security.SecurityCritical] // auto-generated
[ResourceExposure(ResourceScope.None)]
[ResourceConsumption(ResourceScope.Machine, ResourceScope.Machine)]
protected unsafe byte* GetSharedMemory(int iSize)
{
// Build our name
String strName = GetMemorySectionName();
IntPtr mappedFileHandle;
// This gets shared memory for our map. If its can't, it gives us clean memory.
Byte *pMemorySection = EncodingTable.nativeCreateOpenFileMapping(strName, iSize, out mappedFileHandle);
Contract.Assert(pMemorySection != null,
"[BaseCodePageEncoding.GetSharedMemory] Expected non-null memory section to be opened");
// If that failed, we have to die.
if (pMemorySection == null)
throw new OutOfMemoryException(
Environment.GetResourceString("Arg_OutOfMemoryException"));
// if we have null file handle. this means memory was allocated after
// failing to open the mapped file.
if (mappedFileHandle != IntPtr.Zero)
{
safeMemorySectionHandle = new SafeViewOfFileHandle((IntPtr) pMemorySection, true);
safeFileMappingHandle = new SafeFileMappingHandle(mappedFileHandle, true);
}
return pMemorySection;
}
[System.Security.SecurityCritical] // auto-generated
protected unsafe virtual String GetMemorySectionName()
{
int iUseCodePage = this.bFlagDataTable ? dataTableCodePage : CodePage;
String strName = String.Format(CultureInfo.InvariantCulture, "NLS_CodePage_{0}_{1}_{2}_{3}_{4}",
iUseCodePage, this.pCodePage->VersionMajor, this.pCodePage->VersionMinor,
this.pCodePage->VersionRevision, this.pCodePage->VersionBuild);
return strName;
}
[System.Security.SecurityCritical]
protected abstract unsafe void ReadBestFitTable();
[System.Security.SecuritySafeCritical] //
internal override char[] GetBestFitUnicodeToBytesData()
{
// Read in our best fit table if necessary
if (arrayUnicodeBestFit == null) ReadBestFitTable();
Contract.Assert(arrayUnicodeBestFit != null,
"[BaseCodePageEncoding.GetBestFitUnicodeToBytesData]Expected non-null arrayUnicodeBestFit");
// Normally we don't have any best fit data.
return arrayUnicodeBestFit;
}
[System.Security.SecuritySafeCritical] //
internal override char[] GetBestFitBytesToUnicodeData()
{
// Read in our best fit table if necessary
if (arrayBytesBestFit == null) ReadBestFitTable();
Contract.Assert(arrayBytesBestFit != null,
"[BaseCodePageEncoding.GetBestFitBytesToUnicodeData]Expected non-null arrayBytesBestFit");
// Normally we don't have any best fit data.
return arrayBytesBestFit;
}
// During the AppDomain shutdown the Encoding class may already finalized and the memory section
// is invalid. so we detect that by validating the memory section handle then re-initialize the memory
// section by calling LoadManagedCodePage() method and eventually the mapped file handle and
// the memory section pointer will get finalized one more time.
[System.Security.SecurityCritical] // auto-generated
internal unsafe void CheckMemorySection()
{
if (safeMemorySectionHandle != null && safeMemorySectionHandle.DangerousGetHandle() == IntPtr.Zero)
{
LoadManagedCodePage();
}
}
}
}
#endif // FEATURE_CODEPAGES_FILE

View File

@@ -0,0 +1,144 @@
// ==++==
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// ==--==
// WARNING:
//
// This is just an IObjectReference proxy for the Code Page Encodings.
namespace System.Text
{
using System;
using System.Runtime.Serialization;
using System.Security.Permissions;
using System.Diagnostics.Contracts;
/*=================================CodePageEncoding==================================
** This class is here only to deserialize the Code Page classes from Everett (V1.1) into
** Appropriate Whidbey (V2.0) objects. We also serialize the Whidbey classes
** using this proxy since we pretty much need one anyway and that solves Whidbey
** to Everett compatibility as well.
==============================================================================*/
[Serializable]
internal sealed class CodePageEncoding : ISerializable, IObjectReference
{
// Temp stuff
[NonSerialized]
private int m_codePage;
[NonSerialized]
private bool m_isReadOnly;
[NonSerialized]
private bool m_deserializedFromEverett = false;
[NonSerialized]
private EncoderFallback encoderFallback = null;
[NonSerialized]
private DecoderFallback decoderFallback = null;
// Might need this when GetRealObjecting
[NonSerialized]
private Encoding realEncoding = null;
// Constructor called by serialization.
internal CodePageEncoding(SerializationInfo info, StreamingContext context)
{
// Any info?
if (info==null) throw new ArgumentNullException("info");
Contract.EndContractBlock();
// All versions have a code page
this.m_codePage = (int)info.GetValue("m_codePage", typeof(int));
// See if we have a code page
try
{
//
// Try Whidbey V2.0 Fields
//
this.m_isReadOnly = (bool)info.GetValue("m_isReadOnly", typeof(bool));
this.encoderFallback = (EncoderFallback)info.GetValue("encoderFallback", typeof(EncoderFallback));
this.decoderFallback = (DecoderFallback)info.GetValue("decoderFallback", typeof(DecoderFallback));
}
catch (SerializationException)
{
//
// Didn't have Whidbey things, must be Everett
//
this.m_deserializedFromEverett = true;
// May as well be read only
this.m_isReadOnly = true;
}
}
// Just get it from GetEncoding
[System.Security.SecurityCritical] // auto-generated
public Object GetRealObject(StreamingContext context)
{
// Get our encoding (Note: This has default fallbacks for readonly and everett cases)
this.realEncoding = Encoding.GetEncoding(this.m_codePage);
// If its read only then it uses default fallbacks, otherwise pick up the new ones
// Otherwise we want to leave the new one read only
if (!this.m_deserializedFromEverett && !this.m_isReadOnly)
{
this.realEncoding = (Encoding)this.realEncoding.Clone();
this.realEncoding.EncoderFallback = this.encoderFallback;
this.realEncoding.DecoderFallback = this.decoderFallback;
}
return this.realEncoding;
}
#if FEATURE_SERIALIZATION
// ISerializable implementation
[System.Security.SecurityCritical] // auto-generated_required
void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
{
// We cannot ever call this.
Contract.Assert(false, "Didn't expect to make it to CodePageEncoding ISerializable.GetObjectData");
throw new ArgumentException(Environment.GetResourceString("Arg_ExecutionEngineException"));
}
#endif
// Same problem with the Decoder, this only happens with Everett Decoders
[Serializable]
internal sealed class Decoder : ISerializable, IObjectReference
{
// Might need this when GetRealObjecting
[NonSerialized]
private Encoding realEncoding = null;
// Constructor called by serialization, have to handle deserializing from Everett
internal Decoder(SerializationInfo info, StreamingContext context)
{
// Any info?
if (info==null) throw new ArgumentNullException("info");
Contract.EndContractBlock();
this.realEncoding = (Encoding)info.GetValue("encoding", typeof(Encoding));
}
// Just get it from GetDecider
[System.Security.SecurityCritical] // auto-generated
public Object GetRealObject(StreamingContext context)
{
return this.realEncoding.GetDecoder();
}
#if FEATURE_SERIALIZATION
// ISerializable implementation, get data for this object
[System.Security.SecurityCritical] // auto-generated_required
void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
{
// We cannot ever call this.
Contract.Assert(false, "Didn't expect to make it to CodePageEncoding.Decoder.GetObjectData");
throw new ArgumentException(Environment.GetResourceString("Arg_ExecutionEngineException"));
}
#endif
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,351 @@
// ==++==
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// ==--==
namespace System.Text
{
using System.Runtime.Serialization;
using System.Text;
using System;
using System.Diagnostics.Contracts;
// A Decoder is used to decode a sequence of blocks of bytes into a
// sequence of blocks of characters. Following instantiation of a decoder,
// sequential blocks of bytes are converted into blocks of characters through
// calls to the GetChars method. The decoder maintains state between the
// conversions, allowing it to correctly decode byte sequences that span
// adjacent blocks.
//
// Instances of specific implementations of the Decoder abstract base
// class are typically obtained through calls to the GetDecoder method
// of Encoding objects.
//
[System.Runtime.InteropServices.ComVisible(true)]
[Serializable]
public abstract class Decoder
{
internal DecoderFallback m_fallback = null;
[NonSerialized]
internal DecoderFallbackBuffer m_fallbackBuffer = null;
internal void SerializeDecoder(SerializationInfo info)
{
info.AddValue("m_fallback", this.m_fallback);
}
protected Decoder( )
{
// We don't call default reset because default reset probably isn't good if we aren't initialized.
}
[System.Runtime.InteropServices.ComVisible(false)]
public DecoderFallback Fallback
{
get
{
return m_fallback;
}
set
{
if (value == null)
throw new ArgumentNullException("value");
Contract.EndContractBlock();
// Can't change fallback if buffer is wrong
if (m_fallbackBuffer != null && m_fallbackBuffer.Remaining > 0)
throw new ArgumentException(
Environment.GetResourceString("Argument_FallbackBufferNotEmpty"), "value");
m_fallback = value;
m_fallbackBuffer = null;
}
}
// Note: we don't test for threading here because async access to Encoders and Decoders
// doesn't work anyway.
[System.Runtime.InteropServices.ComVisible(false)]
public DecoderFallbackBuffer FallbackBuffer
{
get
{
if (m_fallbackBuffer == null)
{
if (m_fallback != null)
m_fallbackBuffer = m_fallback.CreateFallbackBuffer();
else
m_fallbackBuffer = DecoderFallback.ReplacementFallback.CreateFallbackBuffer();
}
return m_fallbackBuffer;
}
}
internal bool InternalHasFallbackBuffer
{
get
{
return m_fallbackBuffer != null;
}
}
// Reset the Decoder
//
// Normally if we call GetChars() and an error is thrown we don't change the state of the Decoder. This
// would allow the caller to correct the error condition and try again (such as if they need a bigger buffer.)
//
// If the caller doesn't want to try again after GetChars() throws an error, then they need to call Reset().
//
// Virtual implimentation has to call GetChars with flush and a big enough buffer to clear a 0 byte string
// We avoid GetMaxCharCount() because a) we can't call the base encoder and b) it might be really big.
[System.Runtime.InteropServices.ComVisible(false)]
public virtual void Reset()
{
byte[] byteTemp = {};
char[] charTemp = new char[GetCharCount(byteTemp, 0, 0, true)];
GetChars(byteTemp, 0, 0, charTemp, 0, true);
if (m_fallbackBuffer != null)
m_fallbackBuffer.Reset();
}
// Returns the number of characters the next call to GetChars will
// produce if presented with the given range of bytes. The returned value
// takes into account the state in which the decoder was left following the
// last call to GetChars. The state of the decoder is not affected
// by a call to this method.
//
public abstract int GetCharCount(byte[] bytes, int index, int count);
[System.Runtime.InteropServices.ComVisible(false)]
public virtual int GetCharCount(byte[] bytes, int index, int count, bool flush)
{
return GetCharCount(bytes, index, count);
}
// We expect this to be the workhorse for NLS Encodings, but for existing
// ones we need a working (if slow) default implimentation)
[System.Security.SecurityCritical] // auto-generated
[CLSCompliant(false)]
[System.Runtime.InteropServices.ComVisible(false)]
public virtual unsafe int GetCharCount(byte* bytes, int count, bool flush)
{
// Validate input parameters
if (bytes == null)
throw new ArgumentNullException("bytes",
Environment.GetResourceString("ArgumentNull_Array"));
if (count < 0)
throw new ArgumentOutOfRangeException("count",
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
Contract.EndContractBlock();
byte[] arrbyte = new byte[count];
int index;
for (index = 0; index < count; index++)
arrbyte[index] = bytes[index];
return GetCharCount(arrbyte, 0, count);
}
// Decodes a range of bytes in a byte array into a range of characters
// in a character array. The method decodes byteCount bytes from
// bytes starting at index byteIndex, storing the resulting
// characters in chars starting at index charIndex. The
// decoding takes into account the state in which the decoder was left
// following the last call to this method.
//
// An exception occurs if the character array is not large enough to
// hold the complete decoding of the bytes. The GetCharCount method
// can be used to determine the exact number of characters that will be
// produced for a given range of bytes. Alternatively, the
// GetMaxCharCount method of the Encoding that produced this
// decoder can be used to determine the maximum number of characters that
// will be produced for a given number of bytes, regardless of the actual
// byte values.
//
public abstract int GetChars(byte[] bytes, int byteIndex, int byteCount,
char[] chars, int charIndex);
public virtual int GetChars(byte[] bytes, int byteIndex, int byteCount,
char[] chars, int charIndex, bool flush)
{
return GetChars(bytes, byteIndex, byteCount, chars, charIndex);
}
// We expect this to be the workhorse for NLS Encodings, but for existing
// ones we need a working (if slow) default implimentation)
//
// WARNING WARNING WARNING
//
// WARNING: If this breaks it could be a security threat. Obviously we
// call this internally, so you need to make sure that your pointers, counts
// and indexes are correct when you call this method.
//
// In addition, we have internal code, which will be marked as "safe" calling
// this code. However this code is dependent upon the implimentation of an
// external GetChars() method, which could be overridden by a third party and
// the results of which cannot be guaranteed. We use that result to copy
// the char[] to our char* output buffer. If the result count was wrong, we
// could easily overflow our output buffer. Therefore we do an extra test
// when we copy the buffer so that we don't overflow charCount either.
[System.Security.SecurityCritical] // auto-generated
[CLSCompliant(false)]
[System.Runtime.InteropServices.ComVisible(false)]
public virtual unsafe int GetChars(byte* bytes, int byteCount,
char* chars, int charCount, bool flush)
{
// Validate input parameters
if (chars == null || bytes == null)
throw new ArgumentNullException(chars == null ? "chars" : "bytes",
Environment.GetResourceString("ArgumentNull_Array"));
if (byteCount < 0 || charCount < 0)
throw new ArgumentOutOfRangeException((byteCount<0 ? "byteCount" : "charCount"),
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
Contract.EndContractBlock();
// Get the byte array to convert
byte[] arrByte = new byte[byteCount];
int index;
for (index = 0; index < byteCount; index++)
arrByte[index] = bytes[index];
// Get the char array to fill
char[] arrChar = new char[charCount];
// Do the work
int result = GetChars(arrByte, 0, byteCount, arrChar, 0, flush);
// The only way this could fail is a bug in GetChars
Contract.Assert(result <= charCount, "Returned more chars than we have space for");
// Copy the char array
// WARNING: We MUST make sure that we don't copy too many chars. We can't
// rely on result because it could be a 3rd party implimentation. We need
// to make sure we never copy more than charCount chars no matter the value
// of result
if (result < charCount)
charCount = result;
// We check both result and charCount so that we don't accidentally overrun
// our pointer buffer just because of any GetChars bug.
for (index = 0; index < charCount; index++)
chars[index] = arrChar[index];
return charCount;
}
// This method is used when the output buffer might not be large enough.
// It will decode until it runs out of bytes, and then it will return
// true if it the entire input was converted. In either case it
// will also return the number of converted bytes and output characters used.
// It will only throw a buffer overflow exception if the entire lenght of chars[] is
// too small to store the next char. (like 0 or maybe 1 or 4 for some encodings)
// We're done processing this buffer only if completed returns true.
//
// Might consider checking Max...Count to avoid the extra counting step.
//
// Note that if all of the input bytes are not consumed, then we'll do a /2, which means
// that its likely that we didn't consume as many bytes as we could have. For some
// applications this could be slow. (Like trying to exactly fill an output buffer from a bigger stream)
[System.Runtime.InteropServices.ComVisible(false)]
public virtual void Convert(byte[] bytes, int byteIndex, int byteCount,
char[] chars, int charIndex, int charCount, bool flush,
out int bytesUsed, out int charsUsed, out bool completed)
{
// Validate parameters
if (bytes == null || chars == null)
throw new ArgumentNullException((bytes == null ? "bytes" : "chars"),
Environment.GetResourceString("ArgumentNull_Array"));
if (byteIndex < 0 || byteCount < 0)
throw new ArgumentOutOfRangeException((byteIndex<0 ? "byteIndex" : "byteCount"),
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
if (charIndex < 0 || charCount < 0)
throw new ArgumentOutOfRangeException((charIndex<0 ? "charIndex" : "charCount"),
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
if (bytes.Length - byteIndex < byteCount)
throw new ArgumentOutOfRangeException("bytes",
Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
if (chars.Length - charIndex < charCount)
throw new ArgumentOutOfRangeException("chars",
Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
Contract.EndContractBlock();
bytesUsed = byteCount;
// Its easy to do if it won't overrun our buffer.
while (bytesUsed > 0)
{
if (GetCharCount(bytes, byteIndex, bytesUsed, flush) <= charCount)
{
charsUsed = GetChars(bytes, byteIndex, bytesUsed, chars, charIndex, flush);
completed = (bytesUsed == byteCount &&
(m_fallbackBuffer == null || m_fallbackBuffer.Remaining == 0));
return;
}
// Try again with 1/2 the count, won't flush then 'cause won't read it all
flush = false;
bytesUsed /= 2;
}
// Oops, we didn't have anything, we'll have to throw an overflow
throw new ArgumentException(Environment.GetResourceString("Argument_ConversionOverflow"));
}
// This is the version that uses *.
// We're done processing this buffer only if completed returns true.
//
// Might consider checking Max...Count to avoid the extra counting step.
//
// Note that if all of the input bytes are not consumed, then we'll do a /2, which means
// that its likely that we didn't consume as many bytes as we could have. For some
// applications this could be slow. (Like trying to exactly fill an output buffer from a bigger stream)
[System.Security.SecurityCritical] // auto-generated
[CLSCompliant(false)]
[System.Runtime.InteropServices.ComVisible(false)]
public virtual unsafe void Convert(byte* bytes, int byteCount,
char* chars, int charCount, bool flush,
out int bytesUsed, out int charsUsed, out bool completed)
{
// Validate input parameters
if (chars == null || bytes == null)
throw new ArgumentNullException(chars == null ? "chars" : "bytes",
Environment.GetResourceString("ArgumentNull_Array"));
if (byteCount < 0 || charCount < 0)
throw new ArgumentOutOfRangeException((byteCount<0 ? "byteCount" : "charCount"),
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
Contract.EndContractBlock();
// Get ready to do it
bytesUsed = byteCount;
// Its easy to do if it won't overrun our buffer.
while (bytesUsed > 0)
{
if (GetCharCount(bytes, bytesUsed, flush) <= charCount)
{
charsUsed = GetChars(bytes, bytesUsed, chars, charCount, flush);
completed = (bytesUsed == byteCount &&
(m_fallbackBuffer == null || m_fallbackBuffer.Remaining == 0));
return;
}
// Try again with 1/2 the count, won't flush then 'cause won't read it all
flush = false;
bytesUsed /= 2;
}
// Oops, we didn't have anything, we'll have to throw an overflow
throw new ArgumentException(Environment.GetResourceString("Argument_ConversionOverflow"));
}
}
}

View File

@@ -0,0 +1,249 @@
// ==++==
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// ==--==
// DecoderBestFitFallback.cs
//
// This is used internally to create best fit behavior as per the original windows best fit behavior.
//
namespace System.Text
{
using System;
using System.Text;
using System.Threading;
using System.Diagnostics.Contracts;
[Serializable]
internal sealed class InternalDecoderBestFitFallback : DecoderFallback
{
// Our variables
internal Encoding encoding = null;
internal char[] arrayBestFit = null;
internal char cReplacement = '?';
internal InternalDecoderBestFitFallback(Encoding encoding)
{
// Need to load our replacement characters table.
this.encoding = encoding;
this.bIsMicrosoftBestFitFallback = true;
}
public override DecoderFallbackBuffer CreateFallbackBuffer()
{
return new InternalDecoderBestFitFallbackBuffer(this);
}
// Maximum number of characters that this instance of this fallback could return
public override int MaxCharCount
{
get
{
return 1;
}
}
public override bool Equals(Object value)
{
InternalDecoderBestFitFallback that = value as InternalDecoderBestFitFallback;
if (that != null)
{
return (this.encoding.CodePage == that.encoding.CodePage);
}
return (false);
}
public override int GetHashCode()
{
return this.encoding.CodePage;
}
}
internal sealed class InternalDecoderBestFitFallbackBuffer : DecoderFallbackBuffer
{
// Our variables
internal char cBestFit = '\0';
internal int iCount = -1;
internal int iSize;
private InternalDecoderBestFitFallback oFallback;
// Private object for locking instead of locking on a public type for SQL reliability work.
private static Object s_InternalSyncObject;
private static Object InternalSyncObject
{
get
{
if (s_InternalSyncObject == null)
{
Object o = new Object();
Interlocked.CompareExchange<Object>(ref s_InternalSyncObject, o, null);
}
return s_InternalSyncObject;
}
}
// Constructor
public InternalDecoderBestFitFallbackBuffer(InternalDecoderBestFitFallback fallback)
{
this.oFallback = fallback;
if (oFallback.arrayBestFit == null)
{
// Lock so we don't confuse ourselves.
lock(InternalSyncObject)
{
// Double check before we do it again.
if (oFallback.arrayBestFit == null)
oFallback.arrayBestFit = fallback.encoding.GetBestFitBytesToUnicodeData();
}
}
}
// Fallback methods
public override bool Fallback(byte[] bytesUnknown, int index)
{
// We expect no previous fallback in our buffer
Contract.Assert(iCount < 1, "[DecoderReplacementFallbackBuffer.Fallback] Calling fallback without a previously empty buffer");
cBestFit = TryBestFit(bytesUnknown);
if (cBestFit == '\0')
cBestFit = oFallback.cReplacement;
iCount = iSize = 1;
return true;
}
// Default version is overridden in DecoderReplacementFallback.cs
public override char GetNextChar()
{
// We want it to get < 0 because == 0 means that the current/last character is a fallback
// and we need to detect recursion. We could have a flag but we already have this counter.
iCount--;
// Do we have anything left? 0 is now last fallback char, negative is nothing left
if (iCount < 0)
return '\0';
// Need to get it out of the buffer.
// Make sure it didn't wrap from the fast count-- path
if (iCount == int.MaxValue)
{
iCount = -1;
return '\0';
}
// Return the best fit character
return cBestFit;
}
public override bool MovePrevious()
{
// Exception fallback doesn't have anywhere to back up to.
if (iCount >= 0)
iCount++;
// Return true if we could do it.
return (iCount >= 0 && iCount <= iSize);
}
// How many characters left to output?
public override int Remaining
{
get
{
return (iCount > 0) ? iCount : 0;
}
}
// Clear the buffer
[System.Security.SecuritySafeCritical] // overrides public transparent member
public override unsafe void Reset()
{
iCount = -1;
byteStart = null;
}
// This version just counts the fallback and doesn't actually copy anything.
[System.Security.SecurityCritical] // auto-generated
internal unsafe override int InternalFallback(byte[] bytes, byte* pBytes)
// Right now this has both bytes and bytes[], since we might have extra bytes, hence the
// array, and we might need the index, hence the byte*
{
// return our replacement string Length (always 1 for InternalDecoderBestFitFallback, either
// a best fit char or ?
return 1;
}
// private helper methods
private char TryBestFit(byte[] bytesCheck)
{
// Need to figure out our best fit character, low is beginning of array, high is 1 AFTER end of array
int lowBound = 0;
int highBound = oFallback.arrayBestFit.Length;
int index;
char cCheck;
// Check trivial case first (no best fit)
if (highBound == 0)
return '\0';
// If our array is too small or too big we can't check
if (bytesCheck.Length == 0 || bytesCheck.Length > 2)
return '\0';
if (bytesCheck.Length == 1)
cCheck = unchecked((char)bytesCheck[0]);
else
cCheck = unchecked((char)((bytesCheck[0] << 8) + bytesCheck[1]));
// Check trivial out of range case
if (cCheck < oFallback.arrayBestFit[0] || cCheck > oFallback.arrayBestFit[highBound - 2])
return '\0';
// Binary search the array
int iDiff;
while ((iDiff = (highBound - lowBound)) > 6)
{
// Look in the middle, which is complicated by the fact that we have 2 #s for each pair,
// so we don't want index to be odd because it must be word aligned.
// Also note that index can never == highBound (because diff is rounded down)
index = ((iDiff / 2) + lowBound) & 0xFFFE;
char cTest = oFallback.arrayBestFit[index];
if (cTest == cCheck)
{
// We found it
Contract.Assert(index + 1 < oFallback.arrayBestFit.Length,
"[InternalDecoderBestFitFallbackBuffer.TryBestFit]Expected replacement character at end of array");
return oFallback.arrayBestFit[index + 1];
}
else if (cTest < cCheck)
{
// We weren't high enough
lowBound = index;
}
else
{
// We weren't low enough
highBound = index;
}
}
for (index = lowBound; index < highBound; index += 2)
{
if (oFallback.arrayBestFit[index] == cCheck)
{
// We found it
Contract.Assert(index + 1 < oFallback.arrayBestFit.Length,
"[InternalDecoderBestFitFallbackBuffer.TryBestFit]Expected replacement character at end of array");
return oFallback.arrayBestFit[index + 1];
}
}
// Char wasn't in our table
return '\0';
}
}
}

View File

@@ -0,0 +1,158 @@
// ==++==
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// ==--==
// DecoderExceptionFallback.cs
//
namespace System.Text
{
using System;
using System.Runtime.Serialization;
using System.Globalization;
[Serializable]
public sealed class DecoderExceptionFallback : DecoderFallback
{
// Construction
public DecoderExceptionFallback()
{
}
public override DecoderFallbackBuffer CreateFallbackBuffer()
{
return new DecoderExceptionFallbackBuffer();
}
// Maximum number of characters that this instance of this fallback could return
public override int MaxCharCount
{
get
{
return 0;
}
}
public override bool Equals(Object value)
{
DecoderExceptionFallback that = value as DecoderExceptionFallback;
if (that != null)
{
return (true);
}
return (false);
}
public override int GetHashCode()
{
return 879;
}
}
public sealed class DecoderExceptionFallbackBuffer : DecoderFallbackBuffer
{
public override bool Fallback(byte[] bytesUnknown, int index)
{
Throw(bytesUnknown, index);
return true;
}
public override char GetNextChar()
{
return (char)0;
}
public override bool MovePrevious()
{
// Exception fallback doesn't have anywhere to back up to.
return false;
}
// Exceptions are always empty
public override int Remaining
{
get
{
return 0;
}
}
private void Throw(byte[] bytesUnknown, int index)
{
// Create a string representation of our bytes.
StringBuilder strBytes = new StringBuilder(bytesUnknown.Length * 3);
int i;
for (i = 0; i < bytesUnknown.Length && i < 20; i++)
{
strBytes.Append("[");
strBytes.Append(bytesUnknown[i].ToString("X2", CultureInfo.InvariantCulture));
strBytes.Append("]");
}
// In case the string's really long
if (i == 20)
strBytes.Append(" ...");
// Known index
throw new DecoderFallbackException(
Environment.GetResourceString("Argument_InvalidCodePageBytesIndex",
strBytes, index), bytesUnknown, index);
}
}
// Exception for decoding unknown byte sequences.
[Serializable]
public sealed class DecoderFallbackException : ArgumentException
{
byte[] bytesUnknown = null;
int index = 0;
public DecoderFallbackException()
: base(Environment.GetResourceString("Arg_ArgumentException"))
{
SetErrorCode(__HResults.COR_E_ARGUMENT);
}
public DecoderFallbackException(String message)
: base(message)
{
SetErrorCode(__HResults.COR_E_ARGUMENT);
}
public DecoderFallbackException(String message, Exception innerException)
: base(message, innerException)
{
SetErrorCode(__HResults.COR_E_ARGUMENT);
}
internal DecoderFallbackException(SerializationInfo info, StreamingContext context) : base(info, context)
{
}
public DecoderFallbackException(
String message, byte[] bytesUnknown, int index) : base(message)
{
this.bytesUnknown = bytesUnknown;
this.index = index;
}
public byte[] BytesUnknown
{
get
{
return (bytesUnknown);
}
}
public int Index
{
get
{
return this.index;
}
}
}
}

View File

@@ -0,0 +1,285 @@
// ==++==
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// ==--==
//
using System;
using System.Security;
using System.Threading;
using System.Globalization;
using System.Diagnostics.Contracts;
namespace System.Text
{
[Serializable]
public abstract class DecoderFallback
{
internal bool bIsMicrosoftBestFitFallback = false;
private static volatile DecoderFallback replacementFallback; // Default fallback, uses no best fit & "?"
private static volatile DecoderFallback exceptionFallback;
// Private object for locking instead of locking on a internal type for SQL reliability work.
private static Object s_InternalSyncObject;
private static Object InternalSyncObject
{
get
{
if (s_InternalSyncObject == null)
{
Object o = new Object();
Interlocked.CompareExchange<Object>(ref s_InternalSyncObject, o, null);
}
return s_InternalSyncObject;
}
}
// Get each of our generic fallbacks.
public static DecoderFallback ReplacementFallback
{
get
{
if (replacementFallback == null)
lock(InternalSyncObject)
if (replacementFallback == null)
replacementFallback = new DecoderReplacementFallback();
return replacementFallback;
}
}
public static DecoderFallback ExceptionFallback
{
get
{
if (exceptionFallback == null)
lock(InternalSyncObject)
if (exceptionFallback == null)
exceptionFallback = new DecoderExceptionFallback();
return exceptionFallback;
}
}
// Fallback
//
// Return the appropriate unicode string alternative to the character that need to fall back.
// Most implimentations will be:
// return new MyCustomDecoderFallbackBuffer(this);
public abstract DecoderFallbackBuffer CreateFallbackBuffer();
// Maximum number of characters that this instance of this fallback could return
public abstract int MaxCharCount { get; }
internal bool IsMicrosoftBestFitFallback
{
get
{
return bIsMicrosoftBestFitFallback;
}
}
}
public abstract class DecoderFallbackBuffer
{
// Most implimentations will probably need an implimenation-specific constructor
// internal methods that cannot be overriden that let us do our fallback thing
// These wrap the internal methods so that we can check for people doing stuff that's incorrect
public abstract bool Fallback(byte[] bytesUnknown, int index);
// Get next character
public abstract char GetNextChar();
// Back up a character
public abstract bool MovePrevious();
// How many chars left in this fallback?
public abstract int Remaining { get; }
// Clear the buffer
public virtual void Reset()
{
while (GetNextChar() != (char)0);
}
// Internal items to help us figure out what we're doing as far as error messages, etc.
// These help us with our performance and messages internally
[SecurityCritical]
internal unsafe byte* byteStart;
[SecurityCritical]
internal unsafe char* charEnd;
// Internal Reset
[System.Security.SecurityCritical] // auto-generated
internal unsafe void InternalReset()
{
byteStart = null;
Reset();
}
// Set the above values
// This can't be part of the constructor because DecoderFallbacks would have to know how to impliment these.
[System.Security.SecurityCritical] // auto-generated
internal unsafe void InternalInitialize(byte* byteStart, char* charEnd)
{
this.byteStart = byteStart;
this.charEnd = charEnd;
}
// Fallback the current byte by sticking it into the remaining char buffer.
// This can only be called by our encodings (other have to use the public fallback methods), so
// we can use our DecoderNLS here too (except we don't).
// Returns true if we are successful, false if we can't fallback the character (no buffer space)
// So caller needs to throw buffer space if return false.
// Right now this has both bytes and bytes[], since we might have extra bytes, hence the
// array, and we might need the index, hence the byte*
// Don't touch ref chars unless we succeed
[System.Security.SecurityCritical] // auto-generated
internal unsafe virtual bool InternalFallback(byte[] bytes, byte* pBytes, ref char* chars)
{
// Copy bytes to array (slow, but right now that's what we get to do.
// byte[] bytesUnknown = new byte[count];
// for (int i = 0; i < count; i++)
// bytesUnknown[i] = *(bytes++);
Contract.Assert(byteStart != null, "[DecoderFallback.InternalFallback]Used InternalFallback without calling InternalInitialize");
// See if there's a fallback character and we have an output buffer then copy our string.
if (this.Fallback(bytes, (int)(pBytes - byteStart - bytes.Length)))
{
// Copy the chars to our output
char ch;
char* charTemp = chars;
bool bHighSurrogate = false;
while ((ch = GetNextChar()) != 0)
{
// Make sure no mixed up surrogates
if (Char.IsSurrogate(ch))
{
if (Char.IsHighSurrogate(ch))
{
// High Surrogate
if (bHighSurrogate)
throw new ArgumentException(Environment.GetResourceString("Argument_InvalidCharSequenceNoIndex"));
bHighSurrogate = true;
}
else
{
// Low surrogate
if (bHighSurrogate == false)
throw new ArgumentException(Environment.GetResourceString("Argument_InvalidCharSequenceNoIndex"));
bHighSurrogate = false;
}
}
if (charTemp >= charEnd)
{
// No buffer space
return false;
}
*(charTemp++) = ch;
}
// Need to make sure that bHighSurrogate isn't true
if (bHighSurrogate)
throw new ArgumentException(Environment.GetResourceString("Argument_InvalidCharSequenceNoIndex"));
// Now we aren't going to be false, so its OK to update chars
chars = charTemp;
}
return true;
}
// This version just counts the fallback and doesn't actually copy anything.
[System.Security.SecurityCritical] // auto-generated
internal unsafe virtual int InternalFallback(byte[] bytes, byte* pBytes)
// Right now this has both bytes and bytes[], since we might have extra bytes, hence the
// array, and we might need the index, hence the byte*
{
// Copy bytes to array (slow, but right now that's what we get to do.
// byte[] bytesUnknown = new byte[count];
// for (int i = 0; i < count; i++)
// bytesUnknown[i] = *(bytes++);
Contract.Assert(byteStart != null, "[DecoderFallback.InternalFallback]Used InternalFallback without calling InternalInitialize");
// See if there's a fallback character and we have an output buffer then copy our string.
if (this.Fallback(bytes, (int)(pBytes - byteStart - bytes.Length)))
{
int count = 0;
char ch;
bool bHighSurrogate = false;
while ((ch = GetNextChar()) != 0)
{
// Make sure no mixed up surrogates
if (Char.IsSurrogate(ch))
{
if (Char.IsHighSurrogate(ch))
{
// High Surrogate
if (bHighSurrogate)
throw new ArgumentException(Environment.GetResourceString("Argument_InvalidCharSequenceNoIndex"));
bHighSurrogate = true;
}
else
{
// Low surrogate
if (bHighSurrogate == false)
throw new ArgumentException(Environment.GetResourceString("Argument_InvalidCharSequenceNoIndex"));
bHighSurrogate = false;
}
}
count++;
}
// Need to make sure that bHighSurrogate isn't true
if (bHighSurrogate)
throw new ArgumentException(Environment.GetResourceString("Argument_InvalidCharSequenceNoIndex"));
return count;
}
// If no fallback return 0
return 0;
}
// private helper methods
internal void ThrowLastBytesRecursive(byte[] bytesUnknown)
{
// Create a string representation of our bytes.
StringBuilder strBytes = new StringBuilder(bytesUnknown.Length * 3);
int i;
for (i = 0; i < bytesUnknown.Length && i < 20; i++)
{
if (strBytes.Length > 0)
strBytes.Append(" ");
strBytes.Append(String.Format(CultureInfo.InvariantCulture, "\\x{0:X2}", bytesUnknown[i]));
}
// In case the string's really long
if (i == 20)
strBytes.Append(" ...");
// Throw it, using our complete bytes
throw new ArgumentException(
Environment.GetResourceString("Argument_RecursiveFallbackBytes",
strBytes.ToString()), "bytesUnknown");
}
}
}

View File

@@ -0,0 +1,301 @@
// ==++==
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// ==--==
namespace System.Text
{
using System.Runtime.Serialization;
using System.Security.Permissions;
using System.Text;
using System;
using System.Diagnostics.Contracts;
// A Decoder is used to decode a sequence of blocks of bytes into a
// sequence of blocks of characters. Following instantiation of a decoder,
// sequential blocks of bytes are converted into blocks of characters through
// calls to the GetChars method. The decoder maintains state between the
// conversions, allowing it to correctly decode byte sequences that span
// adjacent blocks.
//
// Instances of specific implementations of the Decoder abstract base
// class are typically obtained through calls to the GetDecoder method
// of Encoding objects.
//
[Serializable]
internal class DecoderNLS : Decoder, ISerializable
{
// Remember our encoding
protected Encoding m_encoding;
[NonSerialized] protected bool m_mustFlush;
[NonSerialized] internal bool m_throwOnOverflow;
[NonSerialized] internal int m_bytesUsed;
#region Serialization
// Constructor called by serialization. called during deserialization.
internal DecoderNLS(SerializationInfo info, StreamingContext context)
{
throw new NotSupportedException(
String.Format(
System.Globalization.CultureInfo.CurrentCulture,
Environment.GetResourceString("NotSupported_TypeCannotDeserialized"), this.GetType()));
}
#if FEATURE_SERIALIZATION
// ISerializable implementation. called during serialization.
[System.Security.SecurityCritical] // auto-generated_required
void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
{
SerializeDecoder(info);
info.AddValue("encoding", this.m_encoding);
info.SetType(typeof(Encoding.DefaultDecoder));
}
#endif
#endregion Serialization
internal DecoderNLS( Encoding encoding )
{
this.m_encoding = encoding;
this.m_fallback = this.m_encoding.DecoderFallback;
this.Reset();
}
// This is used by our child deserializers
internal DecoderNLS( )
{
this.m_encoding = null;
this.Reset();
}
public override void Reset()
{
if (m_fallbackBuffer != null)
m_fallbackBuffer.Reset();
}
public override unsafe int GetCharCount(byte[] bytes, int index, int count)
{
return GetCharCount(bytes, index, count, false);
}
[System.Security.SecuritySafeCritical] // auto-generated
public override unsafe int GetCharCount(byte[] bytes, int index, int count, bool flush)
{
// Validate Parameters
if (bytes == null)
throw new ArgumentNullException("bytes",
Environment.GetResourceString("ArgumentNull_Array"));
if (index < 0 || count < 0)
throw new ArgumentOutOfRangeException((index<0 ? "index" : "count"),
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
if (bytes.Length - index < count)
throw new ArgumentOutOfRangeException("bytes",
Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
Contract.EndContractBlock();
// Avoid null fixed problem
if (bytes.Length == 0)
bytes = new byte[1];
// Just call pointer version
fixed (byte* pBytes = bytes)
return GetCharCount(pBytes + index, count, flush);
}
[System.Security.SecurityCritical] // auto-generated
public unsafe override int GetCharCount(byte* bytes, int count, bool flush)
{
// Validate parameters
if (bytes == null)
throw new ArgumentNullException("bytes",
Environment.GetResourceString("ArgumentNull_Array"));
if (count < 0)
throw new ArgumentOutOfRangeException("count",
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
Contract.EndContractBlock();
// Remember the flush
this.m_mustFlush = flush;
this.m_throwOnOverflow = true;
// By default just call the encoding version, no flush by default
return m_encoding.GetCharCount(bytes, count, this);
}
public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
char[] chars, int charIndex)
{
return GetChars(bytes, byteIndex, byteCount, chars, charIndex, false);
}
[System.Security.SecuritySafeCritical] // auto-generated
public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
char[] chars, int charIndex, bool flush)
{
// Validate Parameters
if (bytes == null || chars == null)
throw new ArgumentNullException(bytes == null ? "bytes" : "chars",
Environment.GetResourceString("ArgumentNull_Array"));
if (byteIndex < 0 || byteCount < 0)
throw new ArgumentOutOfRangeException((byteIndex<0 ? "byteIndex" : "byteCount"),
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
if ( bytes.Length - byteIndex < byteCount)
throw new ArgumentOutOfRangeException("bytes",
Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
if (charIndex < 0 || charIndex > chars.Length)
throw new ArgumentOutOfRangeException("charIndex",
Environment.GetResourceString("ArgumentOutOfRange_Index"));
Contract.EndContractBlock();
// Avoid empty input fixed problem
if (bytes.Length == 0)
bytes = new byte[1];
int charCount = chars.Length - charIndex;
if (chars.Length == 0)
chars = new char[1];
// Just call pointer version
fixed (byte* pBytes = bytes)
fixed (char* pChars = chars)
// Remember that charCount is # to decode, not size of array
return GetChars(pBytes + byteIndex, byteCount,
pChars + charIndex, charCount, flush);
}
[System.Security.SecurityCritical] // auto-generated
public unsafe override int GetChars(byte* bytes, int byteCount,
char* chars, int charCount, bool flush)
{
// Validate parameters
if (chars == null || bytes == null)
throw new ArgumentNullException((chars == null ? "chars" : "bytes"),
Environment.GetResourceString("ArgumentNull_Array"));
if (byteCount < 0 || charCount < 0)
throw new ArgumentOutOfRangeException((byteCount<0 ? "byteCount" : "charCount"),
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
Contract.EndContractBlock();
// Remember our flush
m_mustFlush = flush;
m_throwOnOverflow = true;
// By default just call the encoding's version
return m_encoding.GetChars(bytes, byteCount, chars, charCount, this);
}
// This method is used when the output buffer might not be big enough.
// Just call the pointer version. (This gets chars)
[System.Security.SecuritySafeCritical] // auto-generated
public override unsafe void Convert(byte[] bytes, int byteIndex, int byteCount,
char[] chars, int charIndex, int charCount, bool flush,
out int bytesUsed, out int charsUsed, out bool completed)
{
// Validate parameters
if (bytes == null || chars == null)
throw new ArgumentNullException((bytes == null ? "bytes" : "chars"),
Environment.GetResourceString("ArgumentNull_Array"));
if (byteIndex < 0 || byteCount < 0)
throw new ArgumentOutOfRangeException((byteIndex<0 ? "byteIndex" : "byteCount"),
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
if (charIndex < 0 || charCount < 0)
throw new ArgumentOutOfRangeException((charIndex<0 ? "charIndex" : "charCount"),
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
if (bytes.Length - byteIndex < byteCount)
throw new ArgumentOutOfRangeException("bytes",
Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
if (chars.Length - charIndex < charCount)
throw new ArgumentOutOfRangeException("chars",
Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
Contract.EndContractBlock();
// Avoid empty input problem
if (bytes.Length == 0)
bytes = new byte[1];
if (chars.Length == 0)
chars = new char[1];
// Just call the pointer version (public overrides can't do this)
fixed (byte* pBytes = bytes)
{
fixed (char* pChars = chars)
{
Convert(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, flush,
out bytesUsed, out charsUsed, out completed);
}
}
}
// This is the version that used pointers. We call the base encoding worker function
// after setting our appropriate internal variables. This is getting chars
[System.Security.SecurityCritical] // auto-generated
public unsafe override void Convert(byte* bytes, int byteCount,
char* chars, int charCount, bool flush,
out int bytesUsed, out int charsUsed, out bool completed)
{
// Validate input parameters
if (chars == null || bytes == null)
throw new ArgumentNullException(chars == null ? "chars" : "bytes",
Environment.GetResourceString("ArgumentNull_Array"));
if (byteCount < 0 || charCount < 0)
throw new ArgumentOutOfRangeException((byteCount<0 ? "byteCount" : "charCount"),
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
Contract.EndContractBlock();
// We don't want to throw
this.m_mustFlush = flush;
this.m_throwOnOverflow = false;
this.m_bytesUsed = 0;
// Do conversion
charsUsed = this.m_encoding.GetChars(bytes, byteCount, chars, charCount, this);
bytesUsed = this.m_bytesUsed;
// Its completed if they've used what they wanted AND if they didn't want flush or if we are flushed
completed = (bytesUsed == byteCount) && (!flush || !this.HasState) &&
(m_fallbackBuffer == null || m_fallbackBuffer.Remaining == 0);
// Our data thingys are now full, we can return
}
public bool MustFlush
{
get
{
return m_mustFlush;
}
}
// Anything left in our decoder?
internal virtual bool HasState
{
get
{
return false;
}
}
// Allow encoding to clear our must flush instead of throwing (in ThrowCharsOverflow)
internal void ClearMustFlush()
{
m_mustFlush = false;
}
}
}

View File

@@ -0,0 +1,212 @@
// ==++==
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// ==--==
// DecoderReplacementFallback.cs
//
namespace System.Text
{
using System;
using System.Diagnostics.Contracts;
[Serializable]
public sealed class DecoderReplacementFallback : DecoderFallback
{
// Our variables
private String strDefault;
// Construction. Default replacement fallback uses no best fit and ? replacement string
public DecoderReplacementFallback() : this("?")
{
}
public DecoderReplacementFallback(String replacement)
{
if (replacement == null)
throw new ArgumentNullException("replacement");
Contract.EndContractBlock();
// Make sure it doesn't have bad surrogate pairs
bool bFoundHigh=false;
for (int i = 0; i < replacement.Length; i++)
{
// Found a surrogate?
if (Char.IsSurrogate(replacement,i))
{
// High or Low?
if (Char.IsHighSurrogate(replacement, i))
{
// if already had a high one, stop
if (bFoundHigh)
break; // break & throw at the bFoundHIgh below
bFoundHigh = true;
}
else
{
// Low, did we have a high?
if (!bFoundHigh)
{
// Didn't have one, make if fail when we stop
bFoundHigh = true;
break;
}
// Clear flag
bFoundHigh = false;
}
}
// If last was high we're in trouble (not surrogate so not low surrogate, so break)
else if (bFoundHigh)
break;
}
if (bFoundHigh)
throw new ArgumentException(Environment.GetResourceString("Argument_InvalidCharSequenceNoIndex", "replacement"));
strDefault = replacement;
}
public String DefaultString
{
get
{
return strDefault;
}
}
public override DecoderFallbackBuffer CreateFallbackBuffer()
{
return new DecoderReplacementFallbackBuffer(this);
}
// Maximum number of characters that this instance of this fallback could return
public override int MaxCharCount
{
get
{
return strDefault.Length;
}
}
public override bool Equals(Object value)
{
DecoderReplacementFallback that = value as DecoderReplacementFallback;
if (that != null)
{
return (this.strDefault == that.strDefault);
}
return (false);
}
public override int GetHashCode()
{
return strDefault.GetHashCode();
}
}
public sealed class DecoderReplacementFallbackBuffer : DecoderFallbackBuffer
{
// Store our default string
private String strDefault;
int fallbackCount = -1;
int fallbackIndex = -1;
// Construction
public DecoderReplacementFallbackBuffer(DecoderReplacementFallback fallback)
{
this.strDefault = fallback.DefaultString;
}
// Fallback Methods
public override bool Fallback(byte[] bytesUnknown, int index)
{
// We expect no previous fallback in our buffer
// We can't call recursively but others might (note, we don't test on last char!!!)
if (fallbackCount >= 1)
{
ThrowLastBytesRecursive(bytesUnknown);
}
// Go ahead and get our fallback
if (strDefault.Length == 0)
return false;
fallbackCount = strDefault.Length;
fallbackIndex = -1;
return true;
}
public override char GetNextChar()
{
// We want it to get < 0 because == 0 means that the current/last character is a fallback
// and we need to detect recursion. We could have a flag but we already have this counter.
fallbackCount--;
fallbackIndex++;
// Do we have anything left? 0 is now last fallback char, negative is nothing left
if (fallbackCount < 0)
return '\0';
// Need to get it out of the buffer.
// Make sure it didn't wrap from the fast count-- path
if (fallbackCount == int.MaxValue)
{
fallbackCount = -1;
return '\0';
}
// Now make sure its in the expected range
Contract.Assert(fallbackIndex < strDefault.Length && fallbackIndex >= 0,
"Index exceeds buffer range");
return strDefault[fallbackIndex];
}
public override bool MovePrevious()
{
// Back up one, only if we just processed the last character (or earlier)
if (fallbackCount >= -1 && fallbackIndex >= 0)
{
fallbackIndex--;
fallbackCount++;
return true;
}
// Return false 'cause we couldn't do it.
return false;
}
// How many characters left to output?
public override int Remaining
{
get
{
// Our count is 0 for 1 character left.
return (fallbackCount < 0) ? 0 : fallbackCount;
}
}
// Clear the buffer
[System.Security.SecuritySafeCritical] // auto-generated
public override unsafe void Reset()
{
fallbackCount = -1;
fallbackIndex = -1;
byteStart = null;
}
// This version just counts the fallback and doesn't actually copy anything.
[System.Security.SecurityCritical] // auto-generated
internal unsafe override int InternalFallback(byte[] bytes, byte* pBytes)
// Right now this has both bytes and bytes[], since we might have extra bytes, hence the
// array, and we might need the index, hence the byte*
{
// return our replacement string Length
return strDefault.Length;
}
}
}

View File

@@ -0,0 +1,344 @@
// ==++==
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// ==--==
namespace System.Text
{
using System.Runtime.Serialization;
using System.Text;
using System;
using System.Diagnostics.Contracts;
// An Encoder is used to encode a sequence of blocks of characters into
// a sequence of blocks of bytes. Following instantiation of an encoder,
// sequential blocks of characters are converted into blocks of bytes through
// calls to the GetBytes method. The encoder maintains state between the
// conversions, allowing it to correctly encode character sequences that span
// adjacent blocks.
//
// Instances of specific implementations of the Encoder abstract base
// class are typically obtained through calls to the GetEncoder method
// of Encoding objects.
//
[System.Runtime.InteropServices.ComVisible(true)]
[Serializable]
public abstract class Encoder
{
internal EncoderFallback m_fallback = null;
[NonSerialized]
internal EncoderFallbackBuffer m_fallbackBuffer = null;
internal void SerializeEncoder(SerializationInfo info)
{
info.AddValue("m_fallback", this.m_fallback);
}
protected Encoder()
{
// We don't call default reset because default reset probably isn't good if we aren't initialized.
}
[System.Runtime.InteropServices.ComVisible(false)]
public EncoderFallback Fallback
{
get
{
return m_fallback;
}
set
{
if (value == null)
throw new ArgumentNullException("value");
Contract.EndContractBlock();
// Can't change fallback if buffer is wrong
if (m_fallbackBuffer != null && m_fallbackBuffer.Remaining > 0)
throw new ArgumentException(
Environment.GetResourceString("Argument_FallbackBufferNotEmpty"), "value");
m_fallback = value;
m_fallbackBuffer = null;
}
}
// Note: we don't test for threading here because async access to Encoders and Decoders
// doesn't work anyway.
[System.Runtime.InteropServices.ComVisible(false)]
public EncoderFallbackBuffer FallbackBuffer
{
get
{
if (m_fallbackBuffer == null)
{
if (m_fallback != null)
m_fallbackBuffer = m_fallback.CreateFallbackBuffer();
else
m_fallbackBuffer = EncoderFallback.ReplacementFallback.CreateFallbackBuffer();
}
return m_fallbackBuffer;
}
}
internal bool InternalHasFallbackBuffer
{
get
{
return m_fallbackBuffer != null;
}
}
// Reset the Encoder
//
// Normally if we call GetBytes() and an error is thrown we don't change the state of the encoder. This
// would allow the caller to correct the error condition and try again (such as if they need a bigger buffer.)
//
// If the caller doesn't want to try again after GetBytes() throws an error, then they need to call Reset().
//
// Virtual implimentation has to call GetBytes with flush and a big enough buffer to clear a 0 char string
// We avoid GetMaxByteCount() because a) we can't call the base encoder and b) it might be really big.
[System.Runtime.InteropServices.ComVisible(false)]
public virtual void Reset()
{
char[] charTemp = {};
byte[] byteTemp = new byte[GetByteCount(charTemp, 0, 0, true)];
GetBytes(charTemp, 0, 0, byteTemp, 0, true);
if (m_fallbackBuffer != null)
m_fallbackBuffer.Reset();
}
// Returns the number of bytes the next call to GetBytes will
// produce if presented with the given range of characters and the given
// value of the flush parameter. The returned value takes into
// account the state in which the encoder was left following the last call
// to GetBytes. The state of the encoder is not affected by a call
// to this method.
//
public abstract int GetByteCount(char[] chars, int index, int count, bool flush);
// We expect this to be the workhorse for NLS encodings
// unfortunately for existing overrides, it has to call the [] version,
// which is really slow, so avoid this method if you might be calling external encodings.
[System.Security.SecurityCritical] // auto-generated
[CLSCompliant(false)]
[System.Runtime.InteropServices.ComVisible(false)]
public virtual unsafe int GetByteCount(char* chars, int count, bool flush)
{
// Validate input parameters
if (chars == null)
throw new ArgumentNullException("chars",
Environment.GetResourceString("ArgumentNull_Array"));
if (count < 0)
throw new ArgumentOutOfRangeException("count",
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
Contract.EndContractBlock();
char[] arrChar = new char[count];
int index;
for (index = 0; index < count; index++)
arrChar[index] = chars[index];
return GetByteCount(arrChar, 0, count, flush);
}
// Encodes a range of characters in a character array into a range of bytes
// in a byte array. The method encodes charCount characters from
// chars starting at index charIndex, storing the resulting
// bytes in bytes starting at index byteIndex. The encoding
// takes into account the state in which the encoder was left following the
// last call to this method. The flush parameter indicates whether
// the encoder should flush any shift-states and partial characters at the
// end of the conversion. To ensure correct termination of a sequence of
// blocks of encoded bytes, the last call to GetBytes should specify
// a value of true for the flush parameter.
//
// An exception occurs if the byte array is not large enough to hold the
// complete encoding of the characters. The GetByteCount method can
// be used to determine the exact number of bytes that will be produced for
// a given range of characters. Alternatively, the GetMaxByteCount
// method of the Encoding that produced this encoder can be used to
// determine the maximum number of bytes that will be produced for a given
// number of characters, regardless of the actual character values.
//
public abstract int GetBytes(char[] chars, int charIndex, int charCount,
byte[] bytes, int byteIndex, bool flush);
// We expect this to be the workhorse for NLS Encodings, but for existing
// ones we need a working (if slow) default implimentation)
//
// WARNING WARNING WARNING
//
// WARNING: If this breaks it could be a security threat. Obviously we
// call this internally, so you need to make sure that your pointers, counts
// and indexes are correct when you call this method.
//
// In addition, we have internal code, which will be marked as "safe" calling
// this code. However this code is dependent upon the implimentation of an
// external GetBytes() method, which could be overridden by a third party and
// the results of which cannot be guaranteed. We use that result to copy
// the byte[] to our byte* output buffer. If the result count was wrong, we
// could easily overflow our output buffer. Therefore we do an extra test
// when we copy the buffer so that we don't overflow byteCount either.
[System.Security.SecurityCritical] // auto-generated
[CLSCompliant(false)]
[System.Runtime.InteropServices.ComVisible(false)]
public virtual unsafe int GetBytes(char* chars, int charCount,
byte* bytes, int byteCount, bool flush)
{
// Validate input parameters
if (bytes == null || chars == null)
throw new ArgumentNullException(bytes == null ? "bytes" : "chars",
Environment.GetResourceString("ArgumentNull_Array"));
if (charCount < 0 || byteCount < 0)
throw new ArgumentOutOfRangeException((charCount<0 ? "charCount" : "byteCount"),
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
Contract.EndContractBlock();
// Get the char array to convert
char[] arrChar = new char[charCount];
int index;
for (index = 0; index < charCount; index++)
arrChar[index] = chars[index];
// Get the byte array to fill
byte[] arrByte = new byte[byteCount];
// Do the work
int result = GetBytes(arrChar, 0, charCount, arrByte, 0, flush);
// The only way this could fail is a bug in GetBytes
Contract.Assert(result <= byteCount, "Returned more bytes than we have space for");
// Copy the byte array
// WARNING: We MUST make sure that we don't copy too many bytes. We can't
// rely on result because it could be a 3rd party implimentation. We need
// to make sure we never copy more than byteCount bytes no matter the value
// of result
if (result < byteCount)
byteCount = result;
// Don't copy too many bytes!
for (index = 0; index < byteCount; index++)
bytes[index] = arrByte[index];
return byteCount;
}
// This method is used to avoid running out of output buffer space.
// It will encode until it runs out of chars, and then it will return
// true if it the entire input was converted. In either case it
// will also return the number of converted chars and output bytes used.
// It will only throw a buffer overflow exception if the entire lenght of bytes[] is
// too small to store the next byte. (like 0 or maybe 1 or 4 for some encodings)
// We're done processing this buffer only if completed returns true.
//
// Might consider checking Max...Count to avoid the extra counting step.
//
// Note that if all of the input chars are not consumed, then we'll do a /2, which means
// that its likely that we didn't consume as many chars as we could have. For some
// applications this could be slow. (Like trying to exactly fill an output buffer from a bigger stream)
[System.Runtime.InteropServices.ComVisible(false)]
public virtual void Convert(char[] chars, int charIndex, int charCount,
byte[] bytes, int byteIndex, int byteCount, bool flush,
out int charsUsed, out int bytesUsed, out bool completed)
{
// Validate parameters
if (chars == null || bytes == null)
throw new ArgumentNullException((chars == null ? "chars" : "bytes"),
Environment.GetResourceString("ArgumentNull_Array"));
if (charIndex < 0 || charCount < 0)
throw new ArgumentOutOfRangeException((charIndex<0 ? "charIndex" : "charCount"),
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
if (byteIndex < 0 || byteCount < 0)
throw new ArgumentOutOfRangeException((byteIndex<0 ? "byteIndex" : "byteCount"),
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
if (chars.Length - charIndex < charCount)
throw new ArgumentOutOfRangeException("chars",
Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
if (bytes.Length - byteIndex < byteCount)
throw new ArgumentOutOfRangeException("bytes",
Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
Contract.EndContractBlock();
charsUsed = charCount;
// Its easy to do if it won't overrun our buffer.
// Note: We don't want to call unsafe version because that might be an untrusted version
// which could be really unsafe and we don't want to mix it up.
while (charsUsed > 0)
{
if (GetByteCount(chars, charIndex, charsUsed, flush) <= byteCount)
{
bytesUsed = GetBytes(chars, charIndex, charsUsed, bytes, byteIndex, flush);
completed = (charsUsed == charCount &&
(m_fallbackBuffer == null || m_fallbackBuffer.Remaining == 0));
return;
}
// Try again with 1/2 the count, won't flush then 'cause won't read it all
flush = false;
charsUsed /= 2;
}
// Oops, we didn't have anything, we'll have to throw an overflow
throw new ArgumentException(Environment.GetResourceString("Argument_ConversionOverflow"));
}
// Same thing, but using pointers
//
// Might consider checking Max...Count to avoid the extra counting step.
//
// Note that if all of the input chars are not consumed, then we'll do a /2, which means
// that its likely that we didn't consume as many chars as we could have. For some
// applications this could be slow. (Like trying to exactly fill an output buffer from a bigger stream)
[System.Security.SecurityCritical] // auto-generated
[CLSCompliant(false)]
[System.Runtime.InteropServices.ComVisible(false)]
public virtual unsafe void Convert(char* chars, int charCount,
byte* bytes, int byteCount, bool flush,
out int charsUsed, out int bytesUsed, out bool completed)
{
// Validate input parameters
if (bytes == null || chars == null)
throw new ArgumentNullException(bytes == null ? "bytes" : "chars",
Environment.GetResourceString("ArgumentNull_Array"));
if (charCount < 0 || byteCount < 0)
throw new ArgumentOutOfRangeException((charCount<0 ? "charCount" : "byteCount"),
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
Contract.EndContractBlock();
// Get ready to do it
charsUsed = charCount;
// Its easy to do if it won't overrun our buffer.
while (charsUsed > 0)
{
if (GetByteCount(chars, charsUsed, flush) <= byteCount)
{
bytesUsed = GetBytes(chars, charsUsed, bytes, byteCount, flush);
completed = (charsUsed == charCount &&
(m_fallbackBuffer == null || m_fallbackBuffer.Remaining == 0));
return;
}
// Try again with 1/2 the count, won't flush then 'cause won't read it all
flush = false;
charsUsed /= 2;
}
// Oops, we didn't have anything, we'll have to throw an overflow
throw new ArgumentException(Environment.GetResourceString("Argument_ConversionOverflow"));
}
}
}

View File

@@ -0,0 +1,249 @@
// ==++==
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// ==--==
// EncoderBestFitFallback.cs
//
// This is used internally to create best fit behavior as per the original windows best fit behavior.
//
namespace System.Text
{
using System;
using System.Globalization;
using System.Text;
using System.Threading;
using System.Diagnostics.Contracts;
[Serializable]
internal class InternalEncoderBestFitFallback : EncoderFallback
{
// Our variables
internal Encoding encoding = null;
internal char[] arrayBestFit = null;
internal InternalEncoderBestFitFallback(Encoding encoding)
{
// Need to load our replacement characters table.
this.encoding = encoding;
this.bIsMicrosoftBestFitFallback = true;
}
public override EncoderFallbackBuffer CreateFallbackBuffer()
{
return new InternalEncoderBestFitFallbackBuffer(this);
}
// Maximum number of characters that this instance of this fallback could return
public override int MaxCharCount
{
get
{
return 1;
}
}
public override bool Equals(Object value)
{
InternalEncoderBestFitFallback that = value as InternalEncoderBestFitFallback;
if (that != null)
{
return (this.encoding.CodePage == that.encoding.CodePage);
}
return (false);
}
public override int GetHashCode()
{
return this.encoding.CodePage;
}
}
internal sealed class InternalEncoderBestFitFallbackBuffer : EncoderFallbackBuffer
{
// Our variables
private char cBestFit = '\0';
private InternalEncoderBestFitFallback oFallback;
private int iCount = -1;
private int iSize;
// Private object for locking instead of locking on a public type for SQL reliability work.
private static Object s_InternalSyncObject;
private static Object InternalSyncObject
{
get
{
if (s_InternalSyncObject == null)
{
Object o = new Object();
Interlocked.CompareExchange<Object>(ref s_InternalSyncObject, o, null);
}
return s_InternalSyncObject;
}
}
// Constructor
public InternalEncoderBestFitFallbackBuffer(InternalEncoderBestFitFallback fallback)
{
this.oFallback = fallback;
if (oFallback.arrayBestFit == null)
{
// Lock so we don't confuse ourselves.
lock(InternalSyncObject)
{
// Double check before we do it again.
if (oFallback.arrayBestFit == null)
oFallback.arrayBestFit = fallback.encoding.GetBestFitUnicodeToBytesData();
}
}
}
// Fallback methods
public override bool Fallback(char charUnknown, int index)
{
// If we had a buffer already we're being recursive, throw, it's probably at the suspect
// character in our array.
// Shouldn't be able to get here for all of our code pages, table would have to be messed up.
Contract.Assert(iCount < 1, "[InternalEncoderBestFitFallbackBuffer.Fallback(non surrogate)] Fallback char " + ((int)cBestFit).ToString("X4", CultureInfo.InvariantCulture) + " caused recursive fallback");
iCount = iSize = 1;
cBestFit = TryBestFit(charUnknown);
if (cBestFit == '\0')
cBestFit = '?';
return true;
}
public override bool Fallback(char charUnknownHigh, char charUnknownLow, int index)
{
// Double check input surrogate pair
if (!Char.IsHighSurrogate(charUnknownHigh))
throw new ArgumentOutOfRangeException("charUnknownHigh",
Environment.GetResourceString("ArgumentOutOfRange_Range",
0xD800, 0xDBFF));
if (!Char.IsLowSurrogate(charUnknownLow))
throw new ArgumentOutOfRangeException("CharUnknownLow",
Environment.GetResourceString("ArgumentOutOfRange_Range",
0xDC00, 0xDFFF));
Contract.EndContractBlock();
// If we had a buffer already we're being recursive, throw, it's probably at the suspect
// character in our array. 0 is processing last character, < 0 is not falling back
// Shouldn't be able to get here, table would have to be messed up.
Contract.Assert(iCount < 1, "[InternalEncoderBestFitFallbackBuffer.Fallback(surrogate)] Fallback char " + ((int)cBestFit).ToString("X4", CultureInfo.InvariantCulture) + " caused recursive fallback");
// Go ahead and get our fallback, surrogates don't have best fit
cBestFit = '?';
iCount = iSize = 2;
return true;
}
// Default version is overridden in EncoderReplacementFallback.cs
public override char GetNextChar()
{
// We want it to get < 0 because == 0 means that the current/last character is a fallback
// and we need to detect recursion. We could have a flag but we already have this counter.
iCount--;
// Do we have anything left? 0 is now last fallback char, negative is nothing left
if (iCount < 0)
return '\0';
// Need to get it out of the buffer.
// Make sure it didn't wrap from the fast count-- path
if (iCount == int.MaxValue)
{
iCount = -1;
return '\0';
}
// Return the best fit character
return cBestFit;
}
public override bool MovePrevious()
{
// Exception fallback doesn't have anywhere to back up to.
if (iCount >= 0)
iCount++;
// Return true if we could do it.
return (iCount >= 0 && iCount <= iSize);
}
// How many characters left to output?
public override int Remaining
{
get
{
return (iCount > 0) ? iCount : 0;
}
}
// Clear the buffer
[System.Security.SecuritySafeCritical] // overrides public transparent member
public override unsafe void Reset()
{
iCount = -1;
charStart = null;
bFallingBack = false;
}
// private helper methods
private char TryBestFit(char cUnknown)
{
// Need to figure out our best fit character, low is beginning of array, high is 1 AFTER end of array
int lowBound = 0;
int highBound = oFallback.arrayBestFit.Length;
int index;
// Binary search the array
int iDiff;
while ((iDiff = (highBound - lowBound)) > 6)
{
// Look in the middle, which is complicated by the fact that we have 2 #s for each pair,
// so we don't want index to be odd because we want to be on word boundaries.
// Also note that index can never == highBound (because diff is rounded down)
index = ((iDiff / 2) + lowBound) & 0xFFFE;
char cTest = oFallback.arrayBestFit[index];
if (cTest == cUnknown)
{
// We found it
Contract.Assert(index + 1 < oFallback.arrayBestFit.Length,
"[InternalEncoderBestFitFallbackBuffer.TryBestFit]Expected replacement character at end of array");
return oFallback.arrayBestFit[index + 1];
}
else if (cTest < cUnknown)
{
// We weren't high enough
lowBound = index;
}
else
{
// We weren't low enough
highBound = index;
}
}
for (index = lowBound; index < highBound; index += 2)
{
if (oFallback.arrayBestFit[index] == cUnknown)
{
// We found it
Contract.Assert(index + 1 < oFallback.arrayBestFit.Length,
"[InternalEncoderBestFitFallbackBuffer.TryBestFit]Expected replacement character at end of array");
return oFallback.arrayBestFit[index + 1];
}
}
// Char wasn't in our table
return '\0';
}
}
}

View File

@@ -0,0 +1,205 @@
// ==++==
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// ==--==
// EncoderExceptionFallback.cs
namespace System.Text
{
using System;
using System.Runtime.Serialization;
using System.Diagnostics.Contracts;
[Serializable]
public sealed class EncoderExceptionFallback : EncoderFallback
{
// Construction
public EncoderExceptionFallback()
{
}
public override EncoderFallbackBuffer CreateFallbackBuffer()
{
return new EncoderExceptionFallbackBuffer();
}
// Maximum number of characters that this instance of this fallback could return
public override int MaxCharCount
{
get
{
return 0;
}
}
public override bool Equals(Object value)
{
EncoderExceptionFallback that = value as EncoderExceptionFallback;
if (that != null)
{
return (true);
}
return (false);
}
public override int GetHashCode()
{
return 654;
}
}
public sealed class EncoderExceptionFallbackBuffer : EncoderFallbackBuffer
{
public EncoderExceptionFallbackBuffer(){}
public override bool Fallback(char charUnknown, int index)
{
// Fall back our char
throw new EncoderFallbackException(
Environment.GetResourceString("Argument_InvalidCodePageConversionIndex",
(int)charUnknown, index), charUnknown, index);
}
public override bool Fallback(char charUnknownHigh, char charUnknownLow, int index)
{
if (!Char.IsHighSurrogate(charUnknownHigh))
{
throw new ArgumentOutOfRangeException("charUnknownHigh",
Environment.GetResourceString("ArgumentOutOfRange_Range",
0xD800, 0xDBFF));
}
if (!Char.IsLowSurrogate(charUnknownLow))
{
throw new ArgumentOutOfRangeException("CharUnknownLow",
Environment.GetResourceString("ArgumentOutOfRange_Range",
0xDC00, 0xDFFF));
}
Contract.EndContractBlock();
int iTemp = Char.ConvertToUtf32(charUnknownHigh, charUnknownLow);
// Fall back our char
throw new EncoderFallbackException(
Environment.GetResourceString("Argument_InvalidCodePageConversionIndex",
iTemp, index), charUnknownHigh, charUnknownLow, index);
}
public override char GetNextChar()
{
return (char)0;
}
public override bool MovePrevious()
{
// Exception fallback doesn't have anywhere to back up to.
return false;
}
// Exceptions are always empty
public override int Remaining
{
get
{
return 0;
}
}
}
[Serializable]
public sealed class EncoderFallbackException : ArgumentException
{
char charUnknown;
char charUnknownHigh;
char charUnknownLow;
int index;
public EncoderFallbackException()
: base(Environment.GetResourceString("Arg_ArgumentException"))
{
SetErrorCode(__HResults.COR_E_ARGUMENT);
}
public EncoderFallbackException(String message)
: base(message)
{
SetErrorCode(__HResults.COR_E_ARGUMENT);
}
public EncoderFallbackException(String message, Exception innerException)
: base(message, innerException)
{
SetErrorCode(__HResults.COR_E_ARGUMENT);
}
internal EncoderFallbackException(SerializationInfo info, StreamingContext context) : base(info, context)
{
}
internal EncoderFallbackException(
String message, char charUnknown, int index) : base(message)
{
this.charUnknown = charUnknown;
this.index = index;
}
internal EncoderFallbackException(
String message, char charUnknownHigh, char charUnknownLow, int index) : base(message)
{
if (!Char.IsHighSurrogate(charUnknownHigh))
{
throw new ArgumentOutOfRangeException("charUnknownHigh",
Environment.GetResourceString("ArgumentOutOfRange_Range",
0xD800, 0xDBFF));
}
if (!Char.IsLowSurrogate(charUnknownLow))
{
throw new ArgumentOutOfRangeException("CharUnknownLow",
Environment.GetResourceString("ArgumentOutOfRange_Range",
0xDC00, 0xDFFF));
}
Contract.EndContractBlock();
this.charUnknownHigh = charUnknownHigh;
this.charUnknownLow = charUnknownLow;
this.index = index;
}
public char CharUnknown
{
get
{
return (charUnknown);
}
}
public char CharUnknownHigh
{
get
{
return (charUnknownHigh);
}
}
public char CharUnknownLow
{
get
{
return (charUnknownLow);
}
}
public int Index
{
get
{
return index;
}
}
// Return true if the unknown character is a surrogate pair.
public bool IsUnknownSurrogate()
{
return (this.charUnknownHigh != '\0');
}
}
}

View File

@@ -0,0 +1,237 @@
// ==++==
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// ==--==
using System;
using System.Security;
using System.Threading;
using System.Diagnostics.Contracts;
namespace System.Text
{
[Serializable]
public abstract class EncoderFallback
{
// disable csharp compiler warning #0414: field assigned unused value
#pragma warning disable 0414
internal bool bIsMicrosoftBestFitFallback = false;
#pragma warning restore 0414
private static volatile EncoderFallback replacementFallback; // Default fallback, uses no best fit & "?"
private static volatile EncoderFallback exceptionFallback;
// Private object for locking instead of locking on a public type for SQL reliability work.
private static Object s_InternalSyncObject;
private static Object InternalSyncObject
{
get
{
if (s_InternalSyncObject == null)
{
Object o = new Object();
Interlocked.CompareExchange<Object>(ref s_InternalSyncObject, o, null);
}
return s_InternalSyncObject;
}
}
// Get each of our generic fallbacks.
public static EncoderFallback ReplacementFallback
{
get
{
if (replacementFallback == null)
lock(InternalSyncObject)
if (replacementFallback == null)
replacementFallback = new EncoderReplacementFallback();
return replacementFallback;
}
}
public static EncoderFallback ExceptionFallback
{
get
{
if (exceptionFallback == null)
lock(InternalSyncObject)
if (exceptionFallback == null)
exceptionFallback = new EncoderExceptionFallback();
return exceptionFallback;
}
}
// Fallback
//
// Return the appropriate unicode string alternative to the character that need to fall back.
// Most implimentations will be:
// return new MyCustomEncoderFallbackBuffer(this);
public abstract EncoderFallbackBuffer CreateFallbackBuffer();
// Maximum number of characters that this instance of this fallback could return
public abstract int MaxCharCount { get; }
}
public abstract class EncoderFallbackBuffer
{
// Most implementations will probably need an implemenation-specific constructor
// Public methods that cannot be overriden that let us do our fallback thing
// These wrap the internal methods so that we can check for people doing stuff that is incorrect
public abstract bool Fallback(char charUnknown, int index);
public abstract bool Fallback(char charUnknownHigh, char charUnknownLow, int index);
// Get next character
public abstract char GetNextChar();
// Back up a character
public abstract bool MovePrevious();
// How many chars left in this fallback?
public abstract int Remaining { get; }
// Not sure if this should be public or not.
// Clear the buffer
public virtual void Reset()
{
while (GetNextChar() != (char)0);
}
// Internal items to help us figure out what we're doing as far as error messages, etc.
// These help us with our performance and messages internally
[SecurityCritical]
internal unsafe char* charStart;
[SecurityCritical]
internal unsafe char* charEnd;
internal EncoderNLS encoder;
internal bool setEncoder;
internal bool bUsedEncoder;
internal bool bFallingBack = false;
internal int iRecursionCount = 0;
private const int iMaxRecursion = 250;
// Internal Reset
// For example, what if someone fails a conversion and wants to reset one of our fallback buffers?
[System.Security.SecurityCritical] // auto-generated
internal unsafe void InternalReset()
{
charStart = null;
bFallingBack = false;
iRecursionCount = 0;
Reset();
}
// Set the above values
// This can't be part of the constructor because EncoderFallbacks would have to know how to impliment these.
[System.Security.SecurityCritical] // auto-generated
internal unsafe void InternalInitialize(char* charStart, char* charEnd, EncoderNLS encoder, bool setEncoder)
{
this.charStart = charStart;
this.charEnd = charEnd;
this.encoder = encoder;
this.setEncoder = setEncoder;
this.bUsedEncoder = false;
this.bFallingBack = false;
this.iRecursionCount = 0;
}
internal char InternalGetNextChar()
{
char ch = GetNextChar();
bFallingBack = (ch != 0);
if (ch == 0) iRecursionCount = 0;
return ch;
}
// Fallback the current character using the remaining buffer and encoder if necessary
// This can only be called by our encodings (other have to use the public fallback methods), so
// we can use our EncoderNLS here too.
// setEncoder is true if we're calling from a GetBytes method, false if we're calling from a GetByteCount
//
// Note that this could also change the contents of this.encoder, which is the same
// object that the caller is using, so the caller could mess up the encoder for us
// if they aren't careful.
[System.Security.SecurityCritical] // auto-generated
internal unsafe virtual bool InternalFallback(char ch, ref char* chars)
{
// Shouldn't have null charStart
Contract.Assert(charStart != null,
"[EncoderFallback.InternalFallbackBuffer]Fallback buffer is not initialized");
// Get our index, remember chars was preincremented to point at next char, so have to -1
int index = (int)(chars - charStart) - 1;
// See if it was a high surrogate
if (Char.IsHighSurrogate(ch))
{
// See if there's a low surrogate to go with it
if (chars >= this.charEnd)
{
// Nothing left in input buffer
// No input, return 0 if mustflush is false
if (this.encoder != null && !this.encoder.MustFlush)
{
// Done, nothing to fallback
if (this.setEncoder)
{
bUsedEncoder = true;
this.encoder.charLeftOver = ch;
}
bFallingBack = false;
return false;
}
}
else
{
// Might have a low surrogate
char cNext = *chars;
if (Char.IsLowSurrogate(cNext))
{
// If already falling back then fail
if (bFallingBack && iRecursionCount++ > iMaxRecursion)
ThrowLastCharRecursive(Char.ConvertToUtf32(ch, cNext));
// Next is a surrogate, add it as surrogate pair, and increment chars
chars++;
bFallingBack = Fallback(ch, cNext, index);
return bFallingBack;
}
// Next isn't a low surrogate, just fallback the high surrogate
}
}
// If already falling back then fail
if (bFallingBack && iRecursionCount++ > iMaxRecursion)
ThrowLastCharRecursive((int)ch);
// Fall back our char
bFallingBack = Fallback(ch, index);
return bFallingBack;
}
// private helper methods
internal void ThrowLastCharRecursive(int charRecursive)
{
// Throw it, using our complete character
throw new ArgumentException(
Environment.GetResourceString("Argument_RecursiveFallback",
charRecursive), "chars");
}
}
}

View File

@@ -0,0 +1,299 @@
// ==++==
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// ==--==
namespace System.Text
{
using System.Runtime.Serialization;
using System.Security.Permissions;
using System.Text;
using System;
using System.Diagnostics.Contracts;
// An Encoder is used to encode a sequence of blocks of characters into
// a sequence of blocks of bytes. Following instantiation of an encoder,
// sequential blocks of characters are converted into blocks of bytes through
// calls to the GetBytes method. The encoder maintains state between the
// conversions, allowing it to correctly encode character sequences that span
// adjacent blocks.
//
// Instances of specific implementations of the Encoder abstract base
// class are typically obtained through calls to the GetEncoder method
// of Encoding objects.
//
[Serializable]
internal class EncoderNLS : Encoder, ISerializable
{
// Need a place for the last left over character, most of our encodings use this
internal char charLeftOver;
protected Encoding m_encoding;
[NonSerialized] protected bool m_mustFlush;
[NonSerialized] internal bool m_throwOnOverflow;
[NonSerialized] internal int m_charsUsed;
#region Serialization
// Constructor called by serialization. called during deserialization.
internal EncoderNLS(SerializationInfo info, StreamingContext context)
{
throw new NotSupportedException(
String.Format(
System.Globalization.CultureInfo.CurrentCulture,
Environment.GetResourceString("NotSupported_TypeCannotDeserialized"), this.GetType()));
}
#if FEATURE_SERIALIZATION
// ISerializable implementation. called during serialization.
[System.Security.SecurityCritical] // auto-generated_required
void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
{
SerializeEncoder(info);
info.AddValue("encoding", this.m_encoding);
info.AddValue("charLeftOver", this.charLeftOver);
info.SetType(typeof(Encoding.DefaultEncoder));
}
#endif
#endregion Serialization
internal EncoderNLS(Encoding encoding)
{
this.m_encoding = encoding;
this.m_fallback = this.m_encoding.EncoderFallback;
this.Reset();
}
// This one is used when deserializing (like UTF7Encoding.Encoder)
internal EncoderNLS()
{
this.m_encoding = null;
this.Reset();
}
public override void Reset()
{
this.charLeftOver = (char)0;
if (m_fallbackBuffer != null)
m_fallbackBuffer.Reset();
}
[System.Security.SecuritySafeCritical] // auto-generated
public override unsafe int GetByteCount(char[] chars, int index, int count, bool flush)
{
// Validate input parameters
if (chars == null)
throw new ArgumentNullException( "chars",
Environment.GetResourceString("ArgumentNull_Array"));
if (index < 0 || count < 0)
throw new ArgumentOutOfRangeException((index<0 ? "index" : "count"),
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
if (chars.Length - index < count)
throw new ArgumentOutOfRangeException("chars",
Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
Contract.EndContractBlock();
// Avoid empty input problem
if (chars.Length == 0)
chars = new char[1];
// Just call the pointer version
int result = -1;
fixed (char* pChars = chars)
{
result = GetByteCount(pChars + index, count, flush);
}
return result;
}
[System.Security.SecurityCritical] // auto-generated
public unsafe override int GetByteCount(char* chars, int count, bool flush)
{
// Validate input parameters
if (chars == null)
throw new ArgumentNullException( "chars",
Environment.GetResourceString("ArgumentNull_Array"));
if (count < 0)
throw new ArgumentOutOfRangeException("count",
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
Contract.EndContractBlock();
this.m_mustFlush = flush;
this.m_throwOnOverflow = true;
return m_encoding.GetByteCount(chars, count, this);
}
[System.Security.SecuritySafeCritical] // auto-generated
public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
byte[] bytes, int byteIndex, bool flush)
{
// Validate parameters
if (chars == null || bytes == null)
throw new ArgumentNullException((chars == null ? "chars" : "bytes"),
Environment.GetResourceString("ArgumentNull_Array"));
if (charIndex < 0 || charCount < 0)
throw new ArgumentOutOfRangeException((charIndex<0 ? "charIndex" : "charCount"),
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
if (chars.Length - charIndex < charCount)
throw new ArgumentOutOfRangeException("chars",
Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
if (byteIndex < 0 || byteIndex > bytes.Length)
throw new ArgumentOutOfRangeException("byteIndex",
Environment.GetResourceString("ArgumentOutOfRange_Index"));
Contract.EndContractBlock();
if (chars.Length == 0)
chars = new char[1];
int byteCount = bytes.Length - byteIndex;
if (bytes.Length == 0)
bytes = new byte[1];
// Just call pointer version
fixed (char* pChars = chars)
fixed (byte* pBytes = bytes)
// Remember that charCount is # to decode, not size of array.
return GetBytes(pChars + charIndex, charCount,
pBytes + byteIndex, byteCount, flush);
}
[System.Security.SecurityCritical] // auto-generated
public unsafe override int GetBytes(char* chars, int charCount, byte* bytes, int byteCount, bool flush)
{
// Validate parameters
if (chars == null || bytes == null)
throw new ArgumentNullException((chars == null ? "chars" : "bytes"),
Environment.GetResourceString("ArgumentNull_Array"));
if (byteCount < 0 || charCount < 0)
throw new ArgumentOutOfRangeException((byteCount<0 ? "byteCount" : "charCount"),
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
Contract.EndContractBlock();
this.m_mustFlush = flush;
this.m_throwOnOverflow = true;
return m_encoding.GetBytes(chars, charCount, bytes, byteCount, this);
}
// This method is used when your output buffer might not be large enough for the entire result.
// Just call the pointer version. (This gets bytes)
[System.Security.SecuritySafeCritical] // auto-generated
public override unsafe void Convert(char[] chars, int charIndex, int charCount,
byte[] bytes, int byteIndex, int byteCount, bool flush,
out int charsUsed, out int bytesUsed, out bool completed)
{
// Validate parameters
if (chars == null || bytes == null)
throw new ArgumentNullException((chars == null ? "chars" : "bytes"),
Environment.GetResourceString("ArgumentNull_Array"));
if (charIndex < 0 || charCount < 0)
throw new ArgumentOutOfRangeException((charIndex<0 ? "charIndex" : "charCount"),
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
if (byteIndex < 0 || byteCount < 0)
throw new ArgumentOutOfRangeException((byteIndex<0 ? "byteIndex" : "byteCount"),
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
if (chars.Length - charIndex < charCount)
throw new ArgumentOutOfRangeException("chars",
Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
if (bytes.Length - byteIndex < byteCount)
throw new ArgumentOutOfRangeException("bytes",
Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
Contract.EndContractBlock();
// Avoid empty input problem
if (chars.Length == 0)
chars = new char[1];
if (bytes.Length == 0)
bytes = new byte[1];
// Just call the pointer version (can't do this for non-msft encoders)
fixed (char* pChars = chars)
{
fixed (byte* pBytes = bytes)
{
Convert(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, flush,
out charsUsed, out bytesUsed, out completed);
}
}
}
// This is the version that uses pointers. We call the base encoding worker function
// after setting our appropriate internal variables. This is getting bytes
[System.Security.SecurityCritical] // auto-generated
public override unsafe void Convert(char* chars, int charCount,
byte* bytes, int byteCount, bool flush,
out int charsUsed, out int bytesUsed, out bool completed)
{
// Validate input parameters
if (bytes == null || chars == null)
throw new ArgumentNullException(bytes == null ? "bytes" : "chars",
Environment.GetResourceString("ArgumentNull_Array"));
if (charCount < 0 || byteCount < 0)
throw new ArgumentOutOfRangeException((charCount<0 ? "charCount" : "byteCount"),
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
Contract.EndContractBlock();
// We don't want to throw
this.m_mustFlush = flush;
this.m_throwOnOverflow = false;
this.m_charsUsed = 0;
// Do conversion
bytesUsed = this.m_encoding.GetBytes(chars, charCount, bytes, byteCount, this);
charsUsed = this.m_charsUsed;
// Its completed if they've used what they wanted AND if they didn't want flush or if we are flushed
completed = (charsUsed == charCount) && (!flush || !this.HasState) &&
(m_fallbackBuffer == null || m_fallbackBuffer.Remaining == 0);
// Our data thingys are now full, we can return
}
public Encoding Encoding
{
get
{
return m_encoding;
}
}
public bool MustFlush
{
get
{
return m_mustFlush;
}
}
// Anything left in our encoder?
internal virtual bool HasState
{
get
{
return (this.charLeftOver != (char)0);
}
}
// Allow encoding to clear our must flush instead of throwing (in ThrowBytesOverflow)
internal void ClearMustFlush()
{
m_mustFlush = false;
}
}
}

View File

@@ -0,0 +1,242 @@
// ==++==
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// ==--==
// EncoderReplacementFallback.cs
//
namespace System.Text
{
using System;
using System.Runtime;
using System.Diagnostics.Contracts;
[Serializable]
public sealed class EncoderReplacementFallback : EncoderFallback
{
// Our variables
private String strDefault;
// Construction. Default replacement fallback uses no best fit and ? replacement string
public EncoderReplacementFallback() : this("?")
{
}
public EncoderReplacementFallback(String replacement)
{
// Must not be null
if (replacement == null)
throw new ArgumentNullException("replacement");
Contract.EndContractBlock();
// Make sure it doesn't have bad surrogate pairs
bool bFoundHigh=false;
for (int i = 0; i < replacement.Length; i++)
{
// Found a surrogate?
if (Char.IsSurrogate(replacement,i))
{
// High or Low?
if (Char.IsHighSurrogate(replacement, i))
{
// if already had a high one, stop
if (bFoundHigh)
break; // break & throw at the bFoundHIgh below
bFoundHigh = true;
}
else
{
// Low, did we have a high?
if (!bFoundHigh)
{
// Didn't have one, make if fail when we stop
bFoundHigh = true;
break;
}
// Clear flag
bFoundHigh = false;
}
}
// If last was high we're in trouble (not surrogate so not low surrogate, so break)
else if (bFoundHigh)
break;
}
if (bFoundHigh)
throw new ArgumentException(Environment.GetResourceString("Argument_InvalidCharSequenceNoIndex", "replacement"));
strDefault = replacement;
}
public String DefaultString
{
get
{
return strDefault;
}
}
public override EncoderFallbackBuffer CreateFallbackBuffer()
{
return new EncoderReplacementFallbackBuffer(this);
}
// Maximum number of characters that this instance of this fallback could return
public override int MaxCharCount
{
#if !FEATURE_CORECLR
[TargetedPatchingOptOut("Performance critical to inline across NGen image boundaries")]
#endif
get
{
return strDefault.Length;
}
}
#if !FEATURE_CORECLR
[TargetedPatchingOptOut("Performance critical to inline across NGen image boundaries")]
#endif
public override bool Equals(Object value)
{
EncoderReplacementFallback that = value as EncoderReplacementFallback;
if (that != null)
{
return (this.strDefault == that.strDefault);
}
return (false);
}
public override int GetHashCode()
{
return strDefault.GetHashCode();
}
}
public sealed class EncoderReplacementFallbackBuffer : EncoderFallbackBuffer
{
// Store our default string
private String strDefault;
int fallbackCount = -1;
int fallbackIndex = -1;
// Construction
public EncoderReplacementFallbackBuffer(EncoderReplacementFallback fallback)
{
// 2X in case we're a surrogate pair
this.strDefault = fallback.DefaultString + fallback.DefaultString;
}
// Fallback Methods
public override bool Fallback(char charUnknown, int index)
{
// If we had a buffer already we're being recursive, throw, it's probably at the suspect
// character in our array.
if (fallbackCount >= 1)
{
// If we're recursive we may still have something in our buffer that makes this a surrogate
if (char.IsHighSurrogate(charUnknown) && fallbackCount >= 0 &&
char.IsLowSurrogate(strDefault[fallbackIndex+1]))
ThrowLastCharRecursive(Char.ConvertToUtf32(charUnknown, strDefault[fallbackIndex+1]));
// Nope, just one character
ThrowLastCharRecursive(unchecked((int)charUnknown));
}
// Go ahead and get our fallback
// Divide by 2 because we aren't a surrogate pair
fallbackCount = strDefault.Length/2;
fallbackIndex = -1;
return fallbackCount != 0;
}
public override bool Fallback(char charUnknownHigh, char charUnknownLow, int index)
{
// Double check input surrogate pair
if (!Char.IsHighSurrogate(charUnknownHigh))
throw new ArgumentOutOfRangeException("charUnknownHigh",
Environment.GetResourceString("ArgumentOutOfRange_Range",
0xD800, 0xDBFF));
if (!Char.IsLowSurrogate(charUnknownLow))
throw new ArgumentOutOfRangeException("CharUnknownLow",
Environment.GetResourceString("ArgumentOutOfRange_Range",
0xDC00, 0xDFFF));
Contract.EndContractBlock();
// If we had a buffer already we're being recursive, throw, it's probably at the suspect
// character in our array.
if (fallbackCount >= 1)
ThrowLastCharRecursive(Char.ConvertToUtf32(charUnknownHigh, charUnknownLow));
// Go ahead and get our fallback
fallbackCount = strDefault.Length;
fallbackIndex = -1;
return fallbackCount != 0;
}
public override char GetNextChar()
{
// We want it to get < 0 because == 0 means that the current/last character is a fallback
// and we need to detect recursion. We could have a flag but we already have this counter.
fallbackCount--;
fallbackIndex++;
// Do we have anything left? 0 is now last fallback char, negative is nothing left
if (fallbackCount < 0)
return '\0';
// Need to get it out of the buffer.
// Make sure it didn't wrap from the fast count-- path
if (fallbackCount == int.MaxValue)
{
fallbackCount = -1;
return '\0';
}
// Now make sure its in the expected range
Contract.Assert(fallbackIndex < strDefault.Length && fallbackIndex >= 0,
"Index exceeds buffer range");
return strDefault[fallbackIndex];
}
public override bool MovePrevious()
{
// Back up one, only if we just processed the last character (or earlier)
if (fallbackCount >= -1 && fallbackIndex >= 0)
{
fallbackIndex--;
fallbackCount++;
return true;
}
// Return false 'cause we couldn't do it.
return false;
}
// How many characters left to output?
public override int Remaining
{
get
{
// Our count is 0 for 1 character left.
return (fallbackCount < 0) ? 0 : fallbackCount;
}
}
// Clear the buffer
[System.Security.SecuritySafeCritical] // auto-generated
public override unsafe void Reset()
{
fallbackCount = -1;
fallbackIndex = 0;
charStart = null;
bFallingBack = false;
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,75 @@
// ==++==
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// ==--==
namespace System.Text
{
using System;
using System.Text;
[Serializable]
public sealed class EncodingInfo
{
int iCodePage; // Code Page #
String strEncodingName; // Short name (web name)
String strDisplayName; // Full localized name
internal EncodingInfo(int codePage, string name, string displayName)
{
this.iCodePage = codePage;
this.strEncodingName = name;
this.strDisplayName = displayName;
}
public int CodePage
{
get
{
return iCodePage;
}
}
public String Name
{
get
{
return strEncodingName;
}
}
public String DisplayName
{
get
{
return strDisplayName;
}
}
public Encoding GetEncoding()
{
return Encoding.GetEncoding(this.iCodePage);
}
public override bool Equals(Object value)
{
EncodingInfo that = value as EncodingInfo;
if (that != null)
{
return (this.CodePage == that.CodePage);
}
return (false);
}
public override int GetHashCode()
{
return this.CodePage;
}
}
}

View File

@@ -0,0 +1,375 @@
// ==++==
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// ==--==
#if FEATURE_ENCODINGNLS
namespace System.Text
{
using System;
using System.Diagnostics.Contracts;
using System.Collections;
using System.Runtime.Remoting;
using System.Globalization;
using System.Threading;
using Win32Native = Microsoft.Win32.Win32Native;
// This class overrides Encoding with the things we need for our NLS Encodings
//
// All of the GetBytes/Chars GetByte/CharCount methods are just wrappers for the pointer
// plus decoder/encoder method that is our real workhorse. Note that this is an internal
// class, so our public classes cannot derive from this class. Because of this, all of the
// GetBytes/Chars GetByte/CharCount wrapper methods are duplicated in all of our public
// encodings, which currently include:
//
// EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, & UnicodeEncoding
//
// So if you change the wrappers in this class, you must change the wrappers in the other classes
// as well because they should have the same behavior.
//
[System.Runtime.InteropServices.ComVisible(true)]
[Serializable]
internal abstract class EncodingNLS : Encoding
{
protected EncodingNLS(int codePage) : base(codePage)
{
}
// Returns the number of bytes required to encode a range of characters in
// a character array.
//
// All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
// So if you fix this, fix the others. Currently those include:
// EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
// parent method is safe
[System.Security.SecuritySafeCritical] // overrides public transparent member
public override unsafe int GetByteCount(char[] chars, int index, int count)
{
// Validate input parameters
if (chars == null)
throw new ArgumentNullException("chars",
Environment.GetResourceString("ArgumentNull_Array"));
if (index < 0 || count < 0)
throw new ArgumentOutOfRangeException((index<0 ? "index" : "count"),
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
if (chars.Length - index < count)
throw new ArgumentOutOfRangeException("chars",
Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
Contract.EndContractBlock();
// If no input, return 0, avoid fixed empty array problem
if (chars.Length == 0)
return 0;
// Just call the pointer version
fixed (char* pChars = chars)
return GetByteCount(pChars + index, count, null);
}
// All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
// So if you fix this, fix the others. Currently those include:
// EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
// parent method is safe
[System.Security.SecuritySafeCritical] // overrides public transparent member
public override unsafe int GetByteCount(String s)
{
// Validate input
if (s==null)
throw new ArgumentNullException("s");
Contract.EndContractBlock();
fixed (char* pChars = s)
return GetByteCount(pChars, s.Length, null);
}
// All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
// So if you fix this, fix the others. Currently those include:
// EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
[System.Security.SecurityCritical] // auto-generated
public override unsafe int GetByteCount(char* chars, int count)
{
// Validate Parameters
if (chars == null)
throw new ArgumentNullException("chars",
Environment.GetResourceString("ArgumentNull_Array"));
if (count < 0)
throw new ArgumentOutOfRangeException("count",
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
Contract.EndContractBlock();
// Call it with empty encoder
return GetByteCount(chars, count, null);
}
// Parent method is safe.
// All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
// So if you fix this, fix the others. Currently those include:
// EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
[System.Security.SecuritySafeCritical] // overrides public transparent member
public override unsafe int GetBytes(String s, int charIndex, int charCount,
byte[] bytes, int byteIndex)
{
if (s == null || bytes == null)
throw new ArgumentNullException((s == null ? "s" : "bytes"),
Environment.GetResourceString("ArgumentNull_Array"));
if (charIndex < 0 || charCount < 0)
throw new ArgumentOutOfRangeException((charIndex<0 ? "charIndex" : "charCount"),
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
if (s.Length - charIndex < charCount)
throw new ArgumentOutOfRangeException("s",
Environment.GetResourceString("ArgumentOutOfRange_IndexCount"));
if (byteIndex < 0 || byteIndex > bytes.Length)
throw new ArgumentOutOfRangeException("byteIndex",
Environment.GetResourceString("ArgumentOutOfRange_Index"));
Contract.EndContractBlock();
int byteCount = bytes.Length - byteIndex;
// Fixed doesn't like empty arrays
if (bytes.Length == 0)
bytes = new byte[1];
fixed (char* pChars = s)
fixed ( byte* pBytes = bytes)
return GetBytes(pChars + charIndex, charCount,
pBytes + byteIndex, byteCount, null);
}
// Encodes a range of characters in a character array into a range of bytes
// in a byte array. An exception occurs if the byte array is not large
// enough to hold the complete encoding of the characters. The
// GetByteCount method can be used to determine the exact number of
// bytes that will be produced for a given range of characters.
// Alternatively, the GetMaxByteCount method can be used to
// determine the maximum number of bytes that will be produced for a given
// number of characters, regardless of the actual character values.
//
// All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
// So if you fix this, fix the others. Currently those include:
// EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
// parent method is safe
[System.Security.SecuritySafeCritical] // overrides public transparent member
public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
byte[] bytes, int byteIndex)
{
// Validate parameters
if (chars == null || bytes == null)
throw new ArgumentNullException((chars == null ? "chars" : "bytes"),
Environment.GetResourceString("ArgumentNull_Array"));
if (charIndex < 0 || charCount < 0)
throw new ArgumentOutOfRangeException((charIndex<0 ? "charIndex" : "charCount"),
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
if (chars.Length - charIndex < charCount)
throw new ArgumentOutOfRangeException("chars",
Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
if (byteIndex < 0 || byteIndex > bytes.Length)
throw new ArgumentOutOfRangeException("byteIndex",
Environment.GetResourceString("ArgumentOutOfRange_Index"));
Contract.EndContractBlock();
// If nothing to encode return 0, avoid fixed problem
if (chars.Length == 0)
return 0;
// Just call pointer version
int byteCount = bytes.Length - byteIndex;
// Fixed doesn't like empty arrays
if (bytes.Length == 0)
bytes = new byte[1];
fixed (char* pChars = chars)
fixed (byte* pBytes = bytes)
// Remember that byteCount is # to decode, not size of array.
return GetBytes(pChars + charIndex, charCount,
pBytes + byteIndex, byteCount, null);
}
// All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
// So if you fix this, fix the others. Currently those include:
// EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
[System.Security.SecurityCritical] // auto-generated
public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
{
// Validate Parameters
if (bytes == null || chars == null)
throw new ArgumentNullException(bytes == null ? "bytes" : "chars",
Environment.GetResourceString("ArgumentNull_Array"));
if (charCount < 0 || byteCount < 0)
throw new ArgumentOutOfRangeException((charCount<0 ? "charCount" : "byteCount"),
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
Contract.EndContractBlock();
return GetBytes(chars, charCount, bytes, byteCount, null);
}
// Returns the number of characters produced by decoding a range of bytes
// in a byte array.
//
// All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
// So if you fix this, fix the others. Currently those include:
// EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
// parent method is safe
[System.Security.SecuritySafeCritical] // overrides public transparent member
public override unsafe int GetCharCount(byte[] bytes, int index, int count)
{
// Validate Parameters
if (bytes == null)
throw new ArgumentNullException("bytes",
Environment.GetResourceString("ArgumentNull_Array"));
if (index < 0 || count < 0)
throw new ArgumentOutOfRangeException((index<0 ? "index" : "count"),
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
if (bytes.Length - index < count)
throw new ArgumentOutOfRangeException("bytes",
Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
Contract.EndContractBlock();
// If no input just return 0, fixed doesn't like 0 length arrays
if (bytes.Length == 0)
return 0;
// Just call pointer version
fixed (byte* pBytes = bytes)
return GetCharCount(pBytes + index, count, null);
}
// All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
// So if you fix this, fix the others. Currently those include:
// EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
[System.Security.SecurityCritical] // auto-generated
public override unsafe int GetCharCount(byte* bytes, int count)
{
// Validate Parameters
if (bytes == null)
throw new ArgumentNullException("bytes",
Environment.GetResourceString("ArgumentNull_Array"));
if (count < 0)
throw new ArgumentOutOfRangeException("count",
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
Contract.EndContractBlock();
return GetCharCount(bytes, count, null);
}
// All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
// So if you fix this, fix the others. Currently those include:
// EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
// parent method is safe
[System.Security.SecuritySafeCritical] // overrides public transparent member
public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
char[] chars, int charIndex)
{
// Validate Parameters
if (bytes == null || chars == null)
throw new ArgumentNullException(bytes == null ? "bytes" : "chars",
Environment.GetResourceString("ArgumentNull_Array"));
if (byteIndex < 0 || byteCount < 0)
throw new ArgumentOutOfRangeException((byteIndex<0 ? "byteIndex" : "byteCount"),
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
if ( bytes.Length - byteIndex < byteCount)
throw new ArgumentOutOfRangeException("bytes",
Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
if (charIndex < 0 || charIndex > chars.Length)
throw new ArgumentOutOfRangeException("charIndex",
Environment.GetResourceString("ArgumentOutOfRange_Index"));
Contract.EndContractBlock();
// If no input, return 0 & avoid fixed problem
if (bytes.Length == 0)
return 0;
// Just call pointer version
int charCount = chars.Length - charIndex;
// Fixed doesn't like empty arrays
if (chars.Length == 0)
chars = new char[1];
fixed (byte* pBytes = bytes)
fixed (char* pChars = chars)
// Remember that charCount is # to decode, not size of array
return GetChars(pBytes + byteIndex, byteCount,
pChars + charIndex, charCount, null);
}
// All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
// So if you fix this, fix the others. Currently those include:
// EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
[System.Security.SecurityCritical] // auto-generated
public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
{
// Validate Parameters
if (bytes == null || chars == null)
throw new ArgumentNullException(bytes == null ? "bytes" : "chars",
Environment.GetResourceString("ArgumentNull_Array"));
if (charCount < 0 || byteCount < 0)
throw new ArgumentOutOfRangeException((charCount<0 ? "charCount" : "byteCount"),
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
Contract.EndContractBlock();
return GetChars(bytes, byteCount, chars, charCount, null);
}
// Returns a string containing the decoded representation of a range of
// bytes in a byte array.
//
// All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
// So if you fix this, fix the others. Currently those include:
// EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
// parent method is safe
[System.Security.SecuritySafeCritical] // overrides public transparent member
public override unsafe String GetString(byte[] bytes, int index, int count)
{
// Validate Parameters
if (bytes == null)
throw new ArgumentNullException("bytes",
Environment.GetResourceString("ArgumentNull_Array"));
if (index < 0 || count < 0)
throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"),
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
if (bytes.Length - index < count)
throw new ArgumentOutOfRangeException("bytes",
Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
Contract.EndContractBlock();
// Avoid problems with empty input buffer
if (bytes.Length == 0) return String.Empty;
fixed (byte* pBytes = bytes)
return String.CreateStringFromEncoding(
pBytes + index, count, this);
}
public override Decoder GetDecoder()
{
return new DecoderNLS(this);
}
public override Encoder GetEncoder()
{
return new EncoderNLS(this);
}
}
}
#endif // FEATURE_ENCODINGNLS

View File

@@ -0,0 +1,187 @@
// ==++==
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// ==--==
#if FEATURE_CODEPAGES_FILE // requires BaseCodePageEncooding
namespace System.Text
{
using System.Text;
using System.Globalization;
// EUCJPEncoding
//
// EUC-JP Encoding (51932)
//
// EUC-JP has the following code points:
// 00-7F - ASCII
// 80-8D & 90-9F - Control. (Like Unicode, except for 8e and 8f)
// A1-FE, A1-FE - 2 byte JIS X 0208 range.
// 8E, A1-DF - 2 byte half-width Katakana
// 8F, A1-FE, A1-FE - 3 byte JIX X 0212 range. WE DON'T USE JIS 0212!!!
//
// New thoughts:
// Fixing windows 20932 code page so that all characters can be looked up there.
//
// Old thoughts:
// Windows NLS uses a special CP20932 for EUC-JP, but it is not used by mlang. Windows
// Maps the 3 byte ranges to the 2 byte CP20932 by masking the 2nd byte with & 0x7F.
// MLang uses the native windows 932 code page, which is more reliable, however the code points
// don't line up as nicely as the 20932 code page, however it doesn't have JIS X 0212 support.
//
// So what we do is:
// 1. For ASCII, leave it alone
// 2. For half-width Katakana, use the leading byte and convert with 20936 code page.
// 3. For JIS X 0208, Use the leading & trailing bytes with 20936 code page
// 4. For JIS X 0212, Remove the lead byte, & 0xFF7F, and use the CP20936 table to convert.
//
// Regarding Normalization:
// Forms KC & KD are precluded because of things like halfwidth Katakana that has compatibility mappings
// Form D is precluded because of 0x00a8, which changes to space + dierises.
//
// I think that IsAlwaysNormalized should probably return true for form C (but not certain)
//
// NOTE: We don't use JIS 0212 so we are basically a DBCS code page, we just have to modify
// the 932 table we're basing this on.
//
using System;
[Serializable]
internal class EUCJPEncoding : DBCSCodePageEncoding
{
// This pretends to be CP 932 as far as memory tables are concerned.
[System.Security.SecurityCritical] // auto-generated
public EUCJPEncoding() : base(51932, 932)
{
this.m_bUseMlangTypeForSerialization = true;
}
[System.Security.SecurityCritical] // auto-generated
protected unsafe override String GetMemorySectionName()
{
int iUseCodePage = this.bFlagDataTable ? dataTableCodePage : CodePage;
String strName = String.Format(CultureInfo.InvariantCulture, "CodePage_{0}_{1}_{2}_{3}_{4}_EUCJP",
iUseCodePage, this.pCodePage->VersionMajor, this.pCodePage->VersionMinor,
this.pCodePage->VersionRevision, this.pCodePage->VersionBuild);
return strName;
}
// Clean up characters for EUC-JP code pages, etc.
protected override bool CleanUpBytes(ref int bytes)
{
if (bytes >= 0x100)
{
// map extended char (0xfa40-0xfc4b) to a special range
// (ported from mlang)
if (bytes >= 0xfa40 && bytes <= 0xfc4b)
{
if ( bytes >= 0xfa40 && bytes <= 0xfa5b )
{
if ( bytes <= 0xfa49 )
bytes = bytes - 0x0b51 ;
else if ( bytes >= 0xfa4a && bytes <= 0xfa53 )
bytes = bytes - 0x072f6 ;
else if ( bytes >= 0xfa54 && bytes <= 0xfa57 )
bytes = bytes - 0x0b5b ;
else if ( bytes == 0xfa58 )
bytes = 0x878a ;
else if ( bytes == 0xfa59 )
bytes = 0x8782 ;
else if ( bytes == 0xfa5a )
bytes = 0x8784 ;
else if ( bytes == 0xfa5b )
bytes = 0x879a ;
}
else if ( bytes >= 0xfa5c && bytes <= 0xfc4b )
{
byte tc = unchecked((byte)bytes);
if ( tc < 0x5c )
bytes = bytes - 0x0d5f;
else if ( tc >= 0x80 && tc <= 0x9B )
bytes = bytes - 0x0d1d;
else
bytes = bytes - 0x0d1c;
}
}
// Convert 932 code page to 20932 like code page range
// (also ported from mlang)
byte bLead = unchecked((byte)(bytes >> 8));
byte bTrail = unchecked((byte)bytes);
bLead -= ((bLead > (byte)0x9f) ? (byte)0xb1 : (byte)0x71);
bLead = (byte)((bLead << 1) + 1);
if (bTrail > (byte)0x9e)
{
bTrail -= (byte)0x7e;
bLead++;
}
else
{
if (bTrail > (byte)0x7e)
bTrail--;
bTrail -= (byte)0x1f;
}
bytes = ((int)bLead) << 8 | (int)bTrail | 0x8080;
// // Don't step on our katakana special hack plane, if katakana space return false.
// if (bytes >= 0x8E00 && bytes <= 0x8EFF)
// return false;
// Don't step out of our allocated lead byte area.
// All DBCS lead and trail bytes should be >= 0xa1 and <= 0xfe
if ((bytes & 0xFF00) < 0xa100 || (bytes & 0xFF00) > 0xfe00 ||
(bytes & 0xFF) < 0xa1 || (bytes & 0xFF) > 0xfe)
return false;
// WARNING: Our funky mapping allows illegal values, which we continue to use
// so that we're compatible with Everett.
}
else
{
// For 51932 1/2 Katakana gets a 0x8E lead byte
// Adjust 1/2 Katakana
if (bytes >= 0xa1 && bytes <= 0xdf)
{
bytes |= 0x8E00;
return true;
}
// 0x81-0x9f and 0xe0-0xfc CP 932
// 0x8e and 0xa1-0xfe CP 20932 (we don't use 8e though)
// b0-df is 1/2 Katakana
// So 81-9f & e0-fc are 932 lead bytes, a1-fe are our lead bytes
// so ignore everything above 0x80 except 0xa0 and 0xff
if (bytes >= 0x81 && bytes != 0xa0 && bytes != 0xff)
{
// We set diffent lead bytes later, so just return false
return false;
}
}
return true;
}
[System.Security.SecurityCritical] // auto-generated
protected override unsafe void CleanUpEndBytes(char* chars)
{
// Need to special case CP 51932
// 0x81-0x9f and 0xe0-0xfc CP 932
// 0x8e and 0xa1-0xfe CP 20932
// 0x10 and 0x21-0x9? Us (remapping 932)
// b0-df is 1/2 Katakana (trail byte)
// A1-FE are DBCS code points
for (int i = 0xA1; i <= 0xFE; i++)
chars[i] = LEAD_BYTE_CHAR;
// And 8E is lead byte for Katakana (already set)
chars[0x8e] = LEAD_BYTE_CHAR;
}
}
}
#endif // FEATURE_CODEPAGES_FILE

Some files were not shown because too many files have changed in this diff Show More