1208 lines
52 KiB
C#
1208 lines
52 KiB
C#
|
// ==++==
|
||
|
//
|
||
|
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||
|
//
|
||
|
// ==--==
|
||
|
#if FEATURE_CODEPAGES_FILE // requires BaseCodePageEncooding
|
||
|
namespace System.Text
|
||
|
{
|
||
|
using System;
|
||
|
using System.Diagnostics.Contracts;
|
||
|
using System.Text;
|
||
|
using System.Threading;
|
||
|
using System.Runtime.Serialization;
|
||
|
using System.Security;
|
||
|
using System.Security.Permissions;
|
||
|
|
||
|
// DBCSCodePageEncoding
|
||
|
//
|
||
|
[Serializable]
|
||
|
internal class DBCSCodePageEncoding : BaseCodePageEncoding, ISerializable
|
||
|
{
|
||
|
// Pointers to our memory section parts
|
||
|
[NonSerialized]
|
||
|
[SecurityCritical]
|
||
|
protected unsafe char* mapBytesToUnicode = null; // char 65536
|
||
|
[NonSerialized]
|
||
|
[SecurityCritical]
|
||
|
protected unsafe ushort* mapUnicodeToBytes = null; // byte 65536
|
||
|
[NonSerialized]
|
||
|
[SecurityCritical]
|
||
|
protected unsafe int* mapCodePageCached = null; // to remember which CP is cached
|
||
|
|
||
|
[NonSerialized]
|
||
|
protected const char UNKNOWN_CHAR_FLAG=(char)0x0;
|
||
|
[NonSerialized]
|
||
|
protected const char UNICODE_REPLACEMENT_CHAR=(char)0xFFFD;
|
||
|
[NonSerialized]
|
||
|
protected const char LEAD_BYTE_CHAR=(char)0xFFFE; // For lead bytes
|
||
|
|
||
|
// Note that even though we provide bytesUnknown and byteCountUnknown,
|
||
|
// They aren't actually used because of the fallback mechanism. (char is though)
|
||
|
[NonSerialized]
|
||
|
ushort bytesUnknown;
|
||
|
[NonSerialized]
|
||
|
int byteCountUnknown;
|
||
|
[NonSerialized]
|
||
|
protected char charUnknown = (char)0;
|
||
|
|
||
|
[System.Security.SecurityCritical] // auto-generated
|
||
|
public DBCSCodePageEncoding(int codePage) : this(codePage, codePage)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
[System.Security.SecurityCritical] // auto-generated
|
||
|
internal DBCSCodePageEncoding(int codePage, int dataCodePage) : base(codePage, dataCodePage)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
// Constructor called by serialization.
|
||
|
// Note: We use the base GetObjectData however
|
||
|
[System.Security.SecurityCritical] // auto-generated
|
||
|
internal DBCSCodePageEncoding(SerializationInfo info, StreamingContext context) : base(0)
|
||
|
{
|
||
|
// Actually this can't ever get called, CodePageEncoding is our proxy
|
||
|
Contract.Assert(false, "Didn't expect to make it to DBCSCodePageEncoding serialization constructor");
|
||
|
throw new ArgumentNullException("this");
|
||
|
}
|
||
|
|
||
|
// MBCS data section:
|
||
|
//
|
||
|
// We treat each multibyte pattern as 2 bytes in our table. If its a single byte, then the high byte
|
||
|
// for that position will be 0. When the table is loaded, leading bytes are flagged with 0xFFFE, so
|
||
|
// when reading the table look up with each byte. If the result is 0xFFFE, then use 2 bytes to read
|
||
|
// further data. FFFF is a special value indicating that the unicode code is the same as the
|
||
|
// character code (this helps us support code points < 0x20). FFFD is used as replacement character.
|
||
|
//
|
||
|
// Normal table:
|
||
|
// WCHAR* - Starting with MB code point 0.
|
||
|
// FFFF indicates we are to use the multibyte value for our code point.
|
||
|
// FFFE is the lead byte mark. (This should only appear in positions < 0x100)
|
||
|
// FFFD is the replacement (unknown character) mark.
|
||
|
// 2-20 means to advance the pointer 2-0x20 characters.
|
||
|
// 1 means that to advance to the multibyte position contained in the next char.
|
||
|
// 0 nothing special (I don't think its possible.)
|
||
|
//
|
||
|
// Table ends when multibyte position has advanced to 0xFFFF.
|
||
|
//
|
||
|
// Bytes->Unicode Best Fit table:
|
||
|
// WCHAR* - Same as normal table, except first wchar is byte position to start at.
|
||
|
//
|
||
|
// Unicode->Bytes Best Fit Table:
|
||
|
// WCHAR* - Same as normal table, except first wchar is char position to start at and
|
||
|
// we loop through unicode code points and the table has the byte points that
|
||
|
// corrospond to those unicode code points.
|
||
|
// We have a managed code page entry, so load our tables
|
||
|
//
|
||
|
[System.Security.SecurityCritical] // auto-generated
|
||
|
protected override unsafe void LoadManagedCodePage()
|
||
|
{
|
||
|
// Should be loading OUR code page
|
||
|
Contract.Assert(pCodePage->CodePage == this.dataTableCodePage,
|
||
|
"[DBCSCodePageEncoding.LoadManagedCodePage]Expected to load data table code page");
|
||
|
|
||
|
// Make sure we're really a 1 byte code page
|
||
|
if (pCodePage->ByteCount != 2)
|
||
|
throw new NotSupportedException(
|
||
|
Environment.GetResourceString("NotSupported_NoCodepageData", CodePage));
|
||
|
// Remember our unknown bytes & chars
|
||
|
bytesUnknown = pCodePage->ByteReplace;
|
||
|
charUnknown = pCodePage->UnicodeReplace;
|
||
|
|
||
|
// Need to make sure the fallback buffer's fallback char is correct
|
||
|
if (this.DecoderFallback.IsMicrosoftBestFitFallback)
|
||
|
{
|
||
|
((InternalDecoderBestFitFallback)(this.DecoderFallback)).cReplacement = charUnknown;
|
||
|
}
|
||
|
|
||
|
// Is our replacement bytesUnknown a single or double byte character?
|
||
|
byteCountUnknown = 1;
|
||
|
if (bytesUnknown > 0xff)
|
||
|
byteCountUnknown++;
|
||
|
|
||
|
// We use fallback encoder, which uses ?, which so far all of our tables do as well
|
||
|
Contract.Assert(bytesUnknown == 0x3f,
|
||
|
"[DBCSCodePageEncoding.LoadManagedCodePage]Expected 0x3f (?) as unknown byte character");
|
||
|
|
||
|
// Get our mapped section (bytes to allocate = 2 bytes per 65536 Unicode chars + 2 bytes per 65536 DBCS chars)
|
||
|
// Plus 4 byte to remember CP # when done loading it. (Don't want to get IA64 or anything out of alignment)
|
||
|
byte *pMemorySection = GetSharedMemory(65536 * 2 * 2 + 4 + this.iExtraBytes);
|
||
|
|
||
|
mapBytesToUnicode = (char*)pMemorySection;
|
||
|
mapUnicodeToBytes = (ushort*)(pMemorySection + 65536 * 2);
|
||
|
mapCodePageCached = (int*)(pMemorySection + 65536 * 2 * 2 + this.iExtraBytes);
|
||
|
|
||
|
// If its cached (& filled in) we don't have to do anything else
|
||
|
if (*mapCodePageCached != 0)
|
||
|
{
|
||
|
Contract.Assert(((*mapCodePageCached == this.dataTableCodePage && this.bFlagDataTable) ||
|
||
|
(*mapCodePageCached == this.CodePage && !this.bFlagDataTable)),
|
||
|
"[DBCSCodePageEncoding.LoadManagedCodePage]Expected mapped section cached page flag to be set to data table or regular code page.");
|
||
|
|
||
|
// Special case for GB18030 because it mangles its own code page after this function
|
||
|
if ((*mapCodePageCached != this.dataTableCodePage && this.bFlagDataTable) ||
|
||
|
(*mapCodePageCached != this.CodePage && !this.bFlagDataTable))
|
||
|
throw new OutOfMemoryException(
|
||
|
Environment.GetResourceString("Arg_OutOfMemoryException"));
|
||
|
|
||
|
// If its cached (& filled in) we don't have to do anything else
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
// Need to read our data file and fill in our section.
|
||
|
// WARNING: Multiple code pieces could do this at once (so we don't have to lock machine-wide)
|
||
|
// so be careful here. Only stick legal values in here, don't stick temporary values.
|
||
|
|
||
|
// Move to the beginning of the data section
|
||
|
char* pData = (char*)&(pCodePage->FirstDataWord);
|
||
|
|
||
|
// We start at bytes position 0
|
||
|
int bytePosition = 0;
|
||
|
int useBytes = 0;
|
||
|
|
||
|
while (bytePosition < 0x10000)
|
||
|
{
|
||
|
// Get the next byte
|
||
|
char input = *pData;
|
||
|
pData++;
|
||
|
|
||
|
// build our table:
|
||
|
if (input == 1)
|
||
|
{
|
||
|
// Use next data as our byte position
|
||
|
bytePosition = (int)(*pData);
|
||
|
pData++;
|
||
|
continue;
|
||
|
}
|
||
|
else if (input < 0x20 && input > 0)
|
||
|
{
|
||
|
// Advance input characters
|
||
|
bytePosition += input;
|
||
|
continue;
|
||
|
}
|
||
|
else if (input == 0xFFFF)
|
||
|
{
|
||
|
// Same as our bytePosition
|
||
|
useBytes = bytePosition;
|
||
|
input = unchecked((char)bytePosition);
|
||
|
}
|
||
|
else if (input == LEAD_BYTE_CHAR) // 0xfffe
|
||
|
{
|
||
|
// Lead byte mark
|
||
|
Contract.Assert(bytePosition < 0x100, "[DBCSCodePageEncoding.LoadManagedCodePage]expected lead byte to be < 0x100");
|
||
|
useBytes = bytePosition;
|
||
|
// input stays 0xFFFE
|
||
|
}
|
||
|
else if (input == UNICODE_REPLACEMENT_CHAR)
|
||
|
{
|
||
|
// Replacement char is already done
|
||
|
bytePosition++;
|
||
|
continue;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// Use this character
|
||
|
useBytes = bytePosition;
|
||
|
// input == input;
|
||
|
}
|
||
|
|
||
|
// We may need to clean up the selected character & position
|
||
|
if (CleanUpBytes(ref useBytes))
|
||
|
{
|
||
|
// Use this selected character at the selected position, don't do this if not supposed to.
|
||
|
if (input != LEAD_BYTE_CHAR)
|
||
|
{
|
||
|
// Don't do this for lead byte marks.
|
||
|
mapUnicodeToBytes[input] = unchecked((ushort)useBytes);
|
||
|
}
|
||
|
mapBytesToUnicode[useBytes] = input;
|
||
|
}
|
||
|
bytePosition++;
|
||
|
}
|
||
|
|
||
|
// See if we have any clean up junk to do
|
||
|
CleanUpEndBytes(mapBytesToUnicode);
|
||
|
|
||
|
// We're done with our mapped section, set our flag so others don't have to rebuild table.
|
||
|
// We only do this if we're flagging(using) the data table as our primary mechanism
|
||
|
if (this.bFlagDataTable)
|
||
|
*mapCodePageCached = this.dataTableCodePage;
|
||
|
}
|
||
|
|
||
|
// Any special processing for this code page
|
||
|
protected virtual bool CleanUpBytes(ref int bytes)
|
||
|
{
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
// Any special processing for this code page
|
||
|
[System.Security.SecurityCritical] // auto-generated
|
||
|
protected virtual unsafe void CleanUpEndBytes(char* chars)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
// Private object for locking instead of locking on a public type for SQL reliability work.
|
||
|
private static Object s_InternalSyncObject;
|
||
|
private static Object InternalSyncObject
|
||
|
{
|
||
|
get
|
||
|
{
|
||
|
if (s_InternalSyncObject == null)
|
||
|
{
|
||
|
Object o = new Object();
|
||
|
Interlocked.CompareExchange<Object>(ref s_InternalSyncObject, o, null);
|
||
|
}
|
||
|
return s_InternalSyncObject;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Read in our best fit table
|
||
|
[System.Security.SecurityCritical] // auto-generated
|
||
|
protected unsafe override void ReadBestFitTable()
|
||
|
{
|
||
|
// Lock so we don't confuse ourselves.
|
||
|
lock(InternalSyncObject)
|
||
|
{
|
||
|
// If we got a best fit array already then don't do this
|
||
|
if (arrayUnicodeBestFit == null)
|
||
|
{
|
||
|
//
|
||
|
// Read in Best Fit table.
|
||
|
//
|
||
|
|
||
|
// First we have to advance past original character mapping table
|
||
|
// Move to the beginning of the data section
|
||
|
char* pData = (char*)&(pCodePage->FirstDataWord);
|
||
|
|
||
|
// We start at bytes position 0
|
||
|
int bytesPosition = 0;
|
||
|
|
||
|
while (bytesPosition < 0x10000)
|
||
|
{
|
||
|
// Get the next byte
|
||
|
char input = *pData;
|
||
|
pData++;
|
||
|
|
||
|
// build our table:
|
||
|
if (input == 1)
|
||
|
{
|
||
|
// Use next data as our byte position
|
||
|
bytesPosition = (int)(*pData);
|
||
|
pData++;
|
||
|
}
|
||
|
else if (input < 0x20 && input > 0)
|
||
|
{
|
||
|
// Advance input characters
|
||
|
bytesPosition += input;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// All other cases add 1 to bytes position
|
||
|
bytesPosition++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Now bytesPosition is at start of bytes->unicode best fit table
|
||
|
char* pBytes2Unicode = pData;
|
||
|
|
||
|
// Now pData should be pointing to first word of bytes -> unicode best fit table
|
||
|
// (which we're also not using at the moment)
|
||
|
int iBestFitCount = 0;
|
||
|
bytesPosition = *pData;
|
||
|
pData++;
|
||
|
|
||
|
while (bytesPosition < 0x10000)
|
||
|
{
|
||
|
// Get the next byte
|
||
|
char input = *pData;
|
||
|
pData++;
|
||
|
|
||
|
// build our table:
|
||
|
if (input == 1)
|
||
|
{
|
||
|
// Use next data as our byte position
|
||
|
bytesPosition = (int)(*pData);
|
||
|
pData++;
|
||
|
}
|
||
|
else if (input < 0x20 && input > 0)
|
||
|
{
|
||
|
// Advance input characters
|
||
|
bytesPosition += input;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// Use this character (unless its unknown, unk just skips 1)
|
||
|
if (input != UNICODE_REPLACEMENT_CHAR)
|
||
|
{
|
||
|
int correctedChar = bytesPosition;
|
||
|
if (CleanUpBytes(ref correctedChar))
|
||
|
{
|
||
|
// Sometimes correction makes them same as no best fit, skip those.
|
||
|
if (mapBytesToUnicode[correctedChar] != input)
|
||
|
{
|
||
|
iBestFitCount++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Position gets incremented in any case.
|
||
|
bytesPosition++;
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
// Now we know how big the best fit table has to be
|
||
|
char[] arrayTemp = new char[iBestFitCount * 2];
|
||
|
|
||
|
// Now we know how many best fits we have, so go back & read them in
|
||
|
iBestFitCount = 0;
|
||
|
pData = pBytes2Unicode;
|
||
|
bytesPosition = *pData;
|
||
|
pData++;
|
||
|
bool bOutOfOrder = false;
|
||
|
|
||
|
// Read it all in again
|
||
|
while (bytesPosition < 0x10000)
|
||
|
{
|
||
|
// Get the next byte
|
||
|
char input = *pData;
|
||
|
pData++;
|
||
|
|
||
|
// build our table:
|
||
|
if (input == 1)
|
||
|
{
|
||
|
// Use next data as our byte position
|
||
|
bytesPosition = (int)(*pData);
|
||
|
pData++;
|
||
|
}
|
||
|
else if (input < 0x20 && input > 0)
|
||
|
{
|
||
|
// Advance input characters
|
||
|
bytesPosition += input;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// Use this character (unless its unknown, unk just skips 1)
|
||
|
if (input != UNICODE_REPLACEMENT_CHAR)
|
||
|
{
|
||
|
int correctedChar = bytesPosition;
|
||
|
if (CleanUpBytes(ref correctedChar))
|
||
|
{
|
||
|
// Sometimes correction makes them same as no best fit, skip those.
|
||
|
if (mapBytesToUnicode[correctedChar] != input)
|
||
|
{
|
||
|
if (correctedChar != bytesPosition)
|
||
|
bOutOfOrder = true;
|
||
|
|
||
|
arrayTemp[iBestFitCount++] = unchecked((char)correctedChar);
|
||
|
arrayTemp[iBestFitCount++] = input;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Position gets incremented in any case.
|
||
|
bytesPosition++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// If they're out of order we need to sort them.
|
||
|
if (bOutOfOrder)
|
||
|
{
|
||
|
Contract.Assert((arrayTemp.Length / 2) < 20,
|
||
|
"[DBCSCodePageEncoding.ReadBestFitTable]Expected small best fit table < 20 for code page " + CodePage + ", not " + arrayTemp.Length / 2);
|
||
|
|
||
|
for (int i = 0; i < arrayTemp.Length - 2; i+=2)
|
||
|
{
|
||
|
int iSmallest = i;
|
||
|
char cSmallest = arrayTemp[i];
|
||
|
|
||
|
for (int j = i + 2; j < arrayTemp.Length; j+=2)
|
||
|
{
|
||
|
// Find smallest one for front
|
||
|
if (cSmallest > arrayTemp[j])
|
||
|
{
|
||
|
cSmallest = arrayTemp[j];
|
||
|
iSmallest = j;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// If smallest one is something else, switch them
|
||
|
if (iSmallest != i)
|
||
|
{
|
||
|
char temp = arrayTemp[iSmallest];
|
||
|
arrayTemp[iSmallest] = arrayTemp[i];
|
||
|
arrayTemp[i] = temp;
|
||
|
temp = arrayTemp[iSmallest+1];
|
||
|
arrayTemp[iSmallest+1] = arrayTemp[i+1];
|
||
|
arrayTemp[i+1] = temp;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Remember our array
|
||
|
arrayBytesBestFit = arrayTemp;
|
||
|
|
||
|
// Now were at beginning of Unicode -> Bytes best fit table, need to count them
|
||
|
char* pUnicode2Bytes = pData;
|
||
|
int unicodePosition = *(pData++);
|
||
|
iBestFitCount = 0;
|
||
|
|
||
|
while (unicodePosition < 0x10000)
|
||
|
{
|
||
|
// Get the next byte
|
||
|
char input = *pData;
|
||
|
pData++;
|
||
|
|
||
|
// build our table:
|
||
|
if (input == 1)
|
||
|
{
|
||
|
// Use next data as our byte position
|
||
|
unicodePosition = (int)*pData;
|
||
|
pData++;
|
||
|
}
|
||
|
else if (input < 0x20 && input > 0)
|
||
|
{
|
||
|
// Advance input characters
|
||
|
unicodePosition += input;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// Same as our unicodePosition or use this character
|
||
|
if (input > 0)
|
||
|
iBestFitCount++;
|
||
|
unicodePosition++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Allocate our table
|
||
|
arrayTemp = new char[iBestFitCount*2];
|
||
|
|
||
|
// Now do it again to fill the array with real values
|
||
|
pData = pUnicode2Bytes;
|
||
|
unicodePosition = *(pData++);
|
||
|
iBestFitCount = 0;
|
||
|
|
||
|
while (unicodePosition < 0x10000)
|
||
|
{
|
||
|
// Get the next byte
|
||
|
char input = *pData;
|
||
|
pData++;
|
||
|
|
||
|
// build our table:
|
||
|
if (input == 1)
|
||
|
{
|
||
|
// Use next data as our byte position
|
||
|
unicodePosition = (int)*pData;
|
||
|
pData++;
|
||
|
}
|
||
|
else if (input < 0x20 && input > 0)
|
||
|
{
|
||
|
// Advance input characters
|
||
|
unicodePosition += input;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if (input > 0)
|
||
|
{
|
||
|
// Use this character, may need to clean it up
|
||
|
int correctedChar = (int)input;
|
||
|
if (CleanUpBytes(ref correctedChar))
|
||
|
{
|
||
|
arrayTemp[iBestFitCount++] = unchecked((char)unicodePosition);
|
||
|
// Have to map it to Unicode because best fit will need unicode value of best fit char.
|
||
|
arrayTemp[iBestFitCount++] = mapBytesToUnicode[correctedChar];
|
||
|
|
||
|
// This won't work if it won't round trip.
|
||
|
// We can't do this assert for CP 51932 & 50220 because they aren't
|
||
|
// calling CleanUpBytes() for best fit. All the string stuff here
|
||
|
// also makes this assert slow.
|
||
|
// Contract.Assert(arrayTemp[iBestFitCount-1] != (char)0xFFFD, String.Format(
|
||
|
// "[DBCSCodePageEncoding.ReadBestFitTable] No valid Unicode value {0:X4} for round trip bytes {1:X4}, encoding {2}",
|
||
|
// (int)mapBytesToUnicode[input], (int)input, CodePage));
|
||
|
}
|
||
|
}
|
||
|
unicodePosition++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Remember our array
|
||
|
arrayUnicodeBestFit = arrayTemp;
|
||
|
}
|
||
|
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// GetByteCount
|
||
|
// Note: We start by assuming that the output will be the same as count. Having
|
||
|
// an encoder or fallback may change that assumption
|
||
|
[System.Security.SecurityCritical] // auto-generated
|
||
|
internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder)
|
||
|
{
|
||
|
// Just need to ASSERT, this is called by something else internal that checked parameters already
|
||
|
Contract.Assert(count >= 0, "[DBCSCodePageEncoding.GetByteCount]count is negative");
|
||
|
Contract.Assert(chars != null, "[DBCSCodePageEncoding.GetByteCount]chars is null");
|
||
|
|
||
|
// Assert because we shouldn't be able to have a null encoder.
|
||
|
Contract.Assert(encoderFallback != null, "[DBCSCodePageEncoding.GetByteCount]Attempting to use null fallback");
|
||
|
|
||
|
CheckMemorySection();
|
||
|
|
||
|
// Get any left over characters
|
||
|
char charLeftOver = (char)0;
|
||
|
if (encoder != null)
|
||
|
{
|
||
|
charLeftOver = encoder.charLeftOver;
|
||
|
|
||
|
// Only count if encoder.m_throwOnOverflow
|
||
|
if (encoder.InternalHasFallbackBuffer && encoder.FallbackBuffer.Remaining > 0)
|
||
|
throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty",
|
||
|
this.EncodingName, encoder.Fallback.GetType()));
|
||
|
}
|
||
|
|
||
|
// prepare our end
|
||
|
int byteCount = 0;
|
||
|
char* charEnd = chars + count;
|
||
|
|
||
|
// For fallback we will need a fallback buffer
|
||
|
EncoderFallbackBuffer fallbackBuffer = null;
|
||
|
|
||
|
// We may have a left over character from last time, try and process it.
|
||
|
if (charLeftOver > 0)
|
||
|
{
|
||
|
Contract.Assert(Char.IsHighSurrogate(charLeftOver), "[DBCSCodePageEncoding.GetByteCount]leftover character should be high surrogate");
|
||
|
Contract.Assert(encoder != null,
|
||
|
"[DBCSCodePageEncoding.GetByteCount]Expect to have encoder if we have a charLeftOver");
|
||
|
|
||
|
// Since left over char was a surrogate, it'll have to be fallen back.
|
||
|
// Get Fallback
|
||
|
fallbackBuffer = encoder.FallbackBuffer;
|
||
|
fallbackBuffer.InternalInitialize(chars, charEnd, encoder, false);
|
||
|
// This will fallback a pair if *chars is a low surrogate
|
||
|
fallbackBuffer.InternalFallback(charLeftOver, ref chars);
|
||
|
}
|
||
|
|
||
|
// Now we may have fallback char[] already (from the encoder)
|
||
|
|
||
|
// We have to use fallback method.
|
||
|
char ch;
|
||
|
while ((ch = (fallbackBuffer == null) ? '\0' : fallbackBuffer.InternalGetNextChar()) != 0 ||
|
||
|
chars < charEnd)
|
||
|
{
|
||
|
// First unwind any fallback
|
||
|
if (ch == 0)
|
||
|
{
|
||
|
// No fallback, just get next char
|
||
|
ch = *chars;
|
||
|
chars++;
|
||
|
}
|
||
|
|
||
|
// get byte for this char
|
||
|
ushort sTemp = mapUnicodeToBytes[ch];
|
||
|
|
||
|
// Check for fallback, this'll catch surrogate pairs too.
|
||
|
if (sTemp == 0 && ch != (char)0)
|
||
|
{
|
||
|
if (fallbackBuffer == null)
|
||
|
{
|
||
|
// Initialize the buffer
|
||
|
if (encoder == null)
|
||
|
fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
|
||
|
else
|
||
|
fallbackBuffer = encoder.FallbackBuffer;
|
||
|
fallbackBuffer.InternalInitialize(charEnd - count, charEnd, encoder, false);
|
||
|
}
|
||
|
|
||
|
// Get Fallback
|
||
|
fallbackBuffer.InternalFallback(ch, ref chars);
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
// We'll use this one
|
||
|
byteCount++;
|
||
|
if (sTemp >= 0x100)
|
||
|
byteCount++;
|
||
|
}
|
||
|
|
||
|
return (int)byteCount;
|
||
|
}
|
||
|
|
||
|
[System.Security.SecurityCritical] // auto-generated
|
||
|
internal override unsafe int GetBytes(char* chars, int charCount,
|
||
|
byte* bytes, int byteCount, EncoderNLS encoder)
|
||
|
{
|
||
|
// Just need to ASSERT, this is called by something else internal that checked parameters already
|
||
|
Contract.Assert(bytes != null, "[DBCSCodePageEncoding.GetBytes]bytes is null");
|
||
|
Contract.Assert(byteCount >= 0, "[DBCSCodePageEncoding.GetBytes]byteCount is negative");
|
||
|
Contract.Assert(chars != null, "[DBCSCodePageEncoding.GetBytes]chars is null");
|
||
|
Contract.Assert(charCount >= 0, "[DBCSCodePageEncoding.GetBytes]charCount is negative");
|
||
|
|
||
|
// Assert because we shouldn't be able to have a null encoder.
|
||
|
Contract.Assert(encoderFallback != null, "[DBCSCodePageEncoding.GetBytes]Attempting to use null encoder fallback");
|
||
|
|
||
|
CheckMemorySection();
|
||
|
|
||
|
// For fallback we will need a fallback buffer
|
||
|
EncoderFallbackBuffer fallbackBuffer = null;
|
||
|
|
||
|
// prepare our end
|
||
|
char* charEnd = chars + charCount;
|
||
|
char* charStart = chars;
|
||
|
byte* byteStart = bytes;
|
||
|
byte* byteEnd = bytes + byteCount;
|
||
|
|
||
|
// Get any left over characters
|
||
|
char charLeftOver = (char)0;
|
||
|
if (encoder != null)
|
||
|
{
|
||
|
charLeftOver = encoder.charLeftOver;
|
||
|
Contract.Assert(charLeftOver == 0 || Char.IsHighSurrogate(charLeftOver),
|
||
|
"[DBCSCodePageEncoding.GetBytes]leftover character should be high surrogate");
|
||
|
|
||
|
// Go ahead and get the fallback buffer (need leftover fallback if converting)
|
||
|
fallbackBuffer = encoder.FallbackBuffer;
|
||
|
fallbackBuffer.InternalInitialize(chars, charEnd, encoder, true);
|
||
|
|
||
|
// If we're not converting we must not have a fallback buffer
|
||
|
if (encoder.m_throwOnOverflow && fallbackBuffer.Remaining > 0)
|
||
|
throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty",
|
||
|
this.EncodingName, encoder.Fallback.GetType()));
|
||
|
|
||
|
// We may have a left over character from last time, try and process it.
|
||
|
if (charLeftOver > 0)
|
||
|
{
|
||
|
Contract.Assert(encoder != null,
|
||
|
"[DBCSCodePageEncoding.GetBytes]Expect to have encoder if we have a charLeftOver");
|
||
|
|
||
|
// Since left over char was a surrogate, it'll have to be fallen back.
|
||
|
// Get Fallback
|
||
|
fallbackBuffer.InternalFallback(charLeftOver, ref chars);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Now we may have fallback char[] already from the encoder
|
||
|
|
||
|
// Go ahead and do it, including the fallback.
|
||
|
char ch;
|
||
|
while ((ch = (fallbackBuffer == null) ? '\0' : fallbackBuffer.InternalGetNextChar()) != 0 ||
|
||
|
chars < charEnd)
|
||
|
{
|
||
|
// First unwind any fallback
|
||
|
if (ch == 0)
|
||
|
{
|
||
|
// No fallback, just get next char
|
||
|
ch = *chars;
|
||
|
chars++;
|
||
|
}
|
||
|
|
||
|
// get byte for this char
|
||
|
ushort sTemp = mapUnicodeToBytes[ch];
|
||
|
|
||
|
// Check for fallback, this'll catch surrogate pairs too.
|
||
|
if (sTemp == 0 && ch != (char)0)
|
||
|
{
|
||
|
if (fallbackBuffer == null)
|
||
|
{
|
||
|
// Initialize the buffer
|
||
|
Contract.Assert(encoder == null,
|
||
|
"[DBCSCodePageEncoding.GetBytes]Expected delayed create fallback only if no encoder.");
|
||
|
fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
|
||
|
fallbackBuffer.InternalInitialize(charEnd - charCount, charEnd, encoder, true);
|
||
|
}
|
||
|
|
||
|
// Get Fallback
|
||
|
fallbackBuffer.InternalFallback(ch, ref chars);
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
// We'll use this one (or two)
|
||
|
// Bounds check
|
||
|
|
||
|
// Go ahead and add it, lead byte 1st if necessary
|
||
|
if (sTemp >= 0x100)
|
||
|
{
|
||
|
if (bytes + 1 >= byteEnd)
|
||
|
{
|
||
|
// didn't use this char, we'll throw or use buffer
|
||
|
if (fallbackBuffer == null || fallbackBuffer.bFallingBack == false)
|
||
|
{
|
||
|
Contract.Assert(chars > charStart,
|
||
|
"[DBCSCodePageEncoding.GetBytes]Expected chars to have advanced (double byte case)");
|
||
|
chars--; // don't use last char
|
||
|
}
|
||
|
else
|
||
|
fallbackBuffer.MovePrevious(); // don't use last fallback
|
||
|
ThrowBytesOverflow(encoder, chars == charStart); // throw ?
|
||
|
break; // don't throw, stop
|
||
|
}
|
||
|
|
||
|
*bytes = unchecked((byte)(sTemp >> 8));
|
||
|
bytes++;
|
||
|
}
|
||
|
// Single byte
|
||
|
else if (bytes >= byteEnd)
|
||
|
{
|
||
|
// didn't use this char, we'll throw or use buffer
|
||
|
if (fallbackBuffer == null || fallbackBuffer.bFallingBack == false)
|
||
|
{
|
||
|
Contract.Assert(chars > charStart,
|
||
|
"[DBCSCodePageEncoding.GetBytes]Expected chars to have advanced (single byte case)");
|
||
|
chars--; // don't use last char
|
||
|
}
|
||
|
else
|
||
|
fallbackBuffer.MovePrevious(); // don't use last fallback
|
||
|
ThrowBytesOverflow(encoder, chars == charStart); // throw ?
|
||
|
break; // don't throw, stop
|
||
|
}
|
||
|
|
||
|
*bytes = unchecked((byte)(sTemp & 0xff));
|
||
|
bytes++;
|
||
|
}
|
||
|
|
||
|
// encoder stuff if we have one
|
||
|
if (encoder != null)
|
||
|
{
|
||
|
// Fallback stuck it in encoder if necessary, but we have to clear MustFlush cases
|
||
|
if (fallbackBuffer != null && !fallbackBuffer.bUsedEncoder)
|
||
|
// Clear it in case of MustFlush
|
||
|
encoder.charLeftOver = (char)0;
|
||
|
|
||
|
// Set our chars used count
|
||
|
encoder.m_charsUsed = (int)(chars - charStart);
|
||
|
}
|
||
|
|
||
|
// If we're not converting we must not have a fallback buffer
|
||
|
// (We don't really have a way to clear none-encoder using fallbacks however)
|
||
|
// Contract.Assert((encoder == null || encoder.m_throwOnOverflow) &&
|
||
|
// (fallbackBuffer == null || fallbackBuffer.Remaining == 0),
|
||
|
// "[DBCSEncoding.GetBytes]Expected empty fallback buffer at end if not converting");
|
||
|
|
||
|
return (int)(bytes - byteStart);
|
||
|
}
|
||
|
|
||
|
// This is internal and called by something else,
|
||
|
[System.Security.SecurityCritical] // auto-generated
|
||
|
internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
|
||
|
{
|
||
|
// Just assert, we're called internally so these should be safe, checked already
|
||
|
Contract.Assert(bytes != null, "[DBCSCodePageEncoding.GetCharCount]bytes is null");
|
||
|
Contract.Assert(count >= 0, "[DBCSCodePageEncoding.GetCharCount]byteCount is negative");
|
||
|
|
||
|
CheckMemorySection();
|
||
|
|
||
|
// Fix our decoder
|
||
|
DBCSDecoder decoder = (DBCSDecoder)baseDecoder;
|
||
|
|
||
|
// Get our fallback
|
||
|
DecoderFallbackBuffer fallbackBuffer = null;
|
||
|
|
||
|
// We'll need to know where the end is
|
||
|
byte* byteEnd = bytes + count;
|
||
|
int charCount = count; // Assume 1 char / byte
|
||
|
|
||
|
// Shouldn't have anything in fallback buffer for GetCharCount
|
||
|
// (don't have to check m_throwOnOverflow for count)
|
||
|
Contract.Assert(decoder == null ||
|
||
|
!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
|
||
|
"[DBCSCodePageEncoding.GetCharCount]Expected empty fallback buffer at start");
|
||
|
|
||
|
// If we have a left over byte, use it
|
||
|
if (decoder != null && decoder.bLeftOver > 0)
|
||
|
{
|
||
|
// We have a left over byte?
|
||
|
if (count == 0)
|
||
|
{
|
||
|
// No input though
|
||
|
if (!decoder.MustFlush)
|
||
|
{
|
||
|
// Don't have to flush
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
Contract.Assert(fallbackBuffer == null,
|
||
|
"[DBCSCodePageEncoding.GetCharCount]Expected empty fallback buffer");
|
||
|
fallbackBuffer = decoder.FallbackBuffer;
|
||
|
fallbackBuffer.InternalInitialize(bytes, null);
|
||
|
|
||
|
byte[] byteBuffer = new byte[] { unchecked((byte)decoder.bLeftOver) };
|
||
|
return fallbackBuffer.InternalFallback(byteBuffer, bytes);
|
||
|
}
|
||
|
|
||
|
// Get our full info
|
||
|
int iBytes = decoder.bLeftOver << 8;
|
||
|
iBytes |= (*bytes);
|
||
|
bytes++;
|
||
|
|
||
|
// This is either 1 known char or fallback
|
||
|
// Already counted 1 char
|
||
|
// Look up our bytes
|
||
|
char cDecoder = mapBytesToUnicode[iBytes];
|
||
|
if (cDecoder == 0 && iBytes != 0)
|
||
|
{
|
||
|
// Deallocate preallocated one
|
||
|
charCount--;
|
||
|
|
||
|
// We'll need a fallback
|
||
|
Contract.Assert(fallbackBuffer == null,
|
||
|
"[DBCSCodePageEncoding.GetCharCount]Expected empty fallback buffer for unknown pair");
|
||
|
fallbackBuffer = decoder.FallbackBuffer;
|
||
|
fallbackBuffer.InternalInitialize(byteEnd - count, null);
|
||
|
|
||
|
// Do fallback, we know there're 2 bytes
|
||
|
byte[] byteBuffer = new byte[] { unchecked((byte)(iBytes >> 8)), unchecked((byte)iBytes) };
|
||
|
charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
|
||
|
}
|
||
|
// else we already reserved space for this one.
|
||
|
}
|
||
|
|
||
|
// Loop, watch out for fallbacks
|
||
|
while (bytes < byteEnd)
|
||
|
{
|
||
|
// Faster if don't use *bytes++;
|
||
|
int iBytes = *bytes;
|
||
|
bytes++;
|
||
|
char c = mapBytesToUnicode[iBytes];
|
||
|
|
||
|
// See if it was a double byte character
|
||
|
if (c == LEAD_BYTE_CHAR)
|
||
|
{
|
||
|
// Its a lead byte
|
||
|
charCount--; // deallocate preallocated lead byte
|
||
|
if (bytes < byteEnd)
|
||
|
{
|
||
|
// Have another to use, so use it
|
||
|
iBytes <<= 8;
|
||
|
iBytes |= *bytes;
|
||
|
bytes++;
|
||
|
c = mapBytesToUnicode[iBytes];
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// No input left
|
||
|
if (decoder == null || decoder.MustFlush)
|
||
|
{
|
||
|
// have to flush anyway, set to unknown so we use fallback in a 'sec
|
||
|
charCount++; // reallocate deallocated lead byte
|
||
|
c = UNKNOWN_CHAR_FLAG;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// We'll stick it in decoder
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// See if it was unknown.
|
||
|
// Unknown and known chars already allocated, but fallbacks aren't
|
||
|
if (c == UNKNOWN_CHAR_FLAG && iBytes != 0)
|
||
|
{
|
||
|
if (fallbackBuffer == null)
|
||
|
{
|
||
|
if (decoder == null)
|
||
|
fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer();
|
||
|
else
|
||
|
fallbackBuffer = decoder.FallbackBuffer;
|
||
|
fallbackBuffer.InternalInitialize(byteEnd - count, null);
|
||
|
}
|
||
|
|
||
|
// Do fallback
|
||
|
charCount--; // Get rid of preallocated extra char
|
||
|
byte[] byteBuffer = null;
|
||
|
if (iBytes < 0x100)
|
||
|
byteBuffer = new byte[] { unchecked((byte)iBytes) };
|
||
|
else
|
||
|
byteBuffer = new byte[] { unchecked((byte)(iBytes >> 8)), unchecked((byte)iBytes) };
|
||
|
charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Shouldn't have anything in fallback buffer for GetChars
|
||
|
Contract.Assert(decoder == null || !decoder.m_throwOnOverflow ||
|
||
|
!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
|
||
|
"[DBCSCodePageEncoding.GetCharCount]Expected empty fallback buffer at end");
|
||
|
|
||
|
// Return our count
|
||
|
return charCount;
|
||
|
}
|
||
|
|
||
|
[System.Security.SecurityCritical] // auto-generated
|
||
|
internal override unsafe int GetChars(byte* bytes, int byteCount,
|
||
|
char* chars, int charCount, DecoderNLS baseDecoder)
|
||
|
{
|
||
|
// Just need to ASSERT, this is called by something else internal that checked parameters already
|
||
|
Contract.Assert(bytes != null, "[DBCSCodePageEncoding.GetChars]bytes is null");
|
||
|
Contract.Assert(byteCount >= 0, "[DBCSCodePageEncoding.GetChars]byteCount is negative");
|
||
|
Contract.Assert(chars != null, "[DBCSCodePageEncoding.GetChars]chars is null");
|
||
|
Contract.Assert(charCount >= 0, "[DBCSCodePageEncoding.GetChars]charCount is negative");
|
||
|
|
||
|
CheckMemorySection();
|
||
|
|
||
|
// Fix our decoder
|
||
|
DBCSDecoder decoder = (DBCSDecoder)baseDecoder;
|
||
|
|
||
|
// We'll need to know where the end is
|
||
|
byte* byteStart = bytes;
|
||
|
byte* byteEnd = bytes + byteCount;
|
||
|
char* charStart = chars;
|
||
|
char* charEnd = chars + charCount;
|
||
|
bool bUsedDecoder = false;
|
||
|
|
||
|
// Get our fallback
|
||
|
DecoderFallbackBuffer fallbackBuffer = null;
|
||
|
|
||
|
// Shouldn't have anything in fallback buffer for GetChars
|
||
|
Contract.Assert(decoder == null || !decoder.m_throwOnOverflow ||
|
||
|
!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
|
||
|
"[DBCSCodePageEncoding.GetChars]Expected empty fallback buffer at start");
|
||
|
|
||
|
// If we have a left over byte, use it
|
||
|
if (decoder != null && decoder.bLeftOver > 0)
|
||
|
{
|
||
|
// We have a left over byte?
|
||
|
if (byteCount == 0)
|
||
|
{
|
||
|
// No input though
|
||
|
if (!decoder.MustFlush)
|
||
|
{
|
||
|
// Don't have to flush
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
// Well, we're flushing, so use '?' or fallback
|
||
|
// fallback leftover byte
|
||
|
Contract.Assert(fallbackBuffer == null,
|
||
|
"[DBCSCodePageEncoding.GetChars]Expected empty fallback");
|
||
|
fallbackBuffer = decoder.FallbackBuffer;
|
||
|
fallbackBuffer.InternalInitialize(bytes, charEnd);
|
||
|
|
||
|
// If no room its hopeless, this was 1st fallback
|
||
|
byte[] byteBuffer = new byte[] { unchecked((byte)decoder.bLeftOver) };
|
||
|
if (!fallbackBuffer.InternalFallback(byteBuffer, bytes, ref chars))
|
||
|
ThrowCharsOverflow(decoder, true);
|
||
|
|
||
|
decoder.bLeftOver = 0;
|
||
|
|
||
|
// Done, return it
|
||
|
return (int)(chars-charStart);
|
||
|
}
|
||
|
|
||
|
// Get our full info
|
||
|
int iBytes = decoder.bLeftOver << 8;
|
||
|
iBytes |= (*bytes);
|
||
|
bytes++;
|
||
|
|
||
|
// Look up our bytes
|
||
|
char cDecoder = mapBytesToUnicode[iBytes];
|
||
|
if (cDecoder == UNKNOWN_CHAR_FLAG && iBytes != 0)
|
||
|
{
|
||
|
Contract.Assert(fallbackBuffer == null,
|
||
|
"[DBCSCodePageEncoding.GetChars]Expected empty fallback for two bytes");
|
||
|
fallbackBuffer = decoder.FallbackBuffer;
|
||
|
fallbackBuffer.InternalInitialize(byteEnd - byteCount, charEnd);
|
||
|
|
||
|
byte[] byteBuffer = new byte[] { unchecked((byte)(iBytes >> 8)), unchecked((byte)iBytes) };
|
||
|
if (!fallbackBuffer.InternalFallback(byteBuffer, bytes, ref chars))
|
||
|
ThrowCharsOverflow(decoder, true);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// Do we have output room?, hopeless if not, this is first char
|
||
|
if (chars >= charEnd)
|
||
|
ThrowCharsOverflow(decoder, true);
|
||
|
|
||
|
*(chars++) = cDecoder;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Loop, paying attention to our fallbacks.
|
||
|
while (bytes < byteEnd)
|
||
|
{
|
||
|
// Faster if don't use *bytes++;
|
||
|
int iBytes = *bytes;
|
||
|
bytes++;
|
||
|
char c = mapBytesToUnicode[iBytes];
|
||
|
|
||
|
// See if it was a double byte character
|
||
|
if (c == LEAD_BYTE_CHAR)
|
||
|
{
|
||
|
// Its a lead byte
|
||
|
if (bytes < byteEnd)
|
||
|
{
|
||
|
// Have another to use, so use it
|
||
|
iBytes <<= 8;
|
||
|
iBytes |= *bytes;
|
||
|
bytes++;
|
||
|
c = mapBytesToUnicode[iBytes];
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// No input left
|
||
|
if (decoder == null || decoder.MustFlush)
|
||
|
{
|
||
|
// have to flush anyway, set to unknown so we use fallback in a 'sec
|
||
|
c = UNKNOWN_CHAR_FLAG;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// Stick it in decoder
|
||
|
bUsedDecoder = true;
|
||
|
decoder.bLeftOver = (byte)iBytes;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// See if it was unknown
|
||
|
if (c == UNKNOWN_CHAR_FLAG && iBytes != 0)
|
||
|
{
|
||
|
if (fallbackBuffer == null)
|
||
|
{
|
||
|
if (decoder == null)
|
||
|
fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer();
|
||
|
else
|
||
|
fallbackBuffer = decoder.FallbackBuffer;
|
||
|
fallbackBuffer.InternalInitialize(byteEnd - byteCount, charEnd);
|
||
|
}
|
||
|
|
||
|
// Do fallback
|
||
|
byte[] byteBuffer = null;
|
||
|
if (iBytes < 0x100)
|
||
|
byteBuffer = new byte[] { unchecked((byte)iBytes) };
|
||
|
else
|
||
|
byteBuffer = new byte[] { unchecked((byte)(iBytes >> 8)), unchecked((byte)iBytes) };
|
||
|
if (!fallbackBuffer.InternalFallback(byteBuffer, bytes, ref chars))
|
||
|
{
|
||
|
// May or may not throw, but we didn't get these byte(s)
|
||
|
Contract.Assert(bytes >= byteStart + byteBuffer.Length,
|
||
|
"[DBCSCodePageEncoding.GetChars]Expected bytes to have advanced for fallback");
|
||
|
bytes-=byteBuffer.Length; // didn't use these byte(s)
|
||
|
fallbackBuffer.InternalReset(); // Didn't fall this back
|
||
|
ThrowCharsOverflow(decoder, bytes == byteStart); // throw?
|
||
|
break; // don't throw, but stop loop
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// Do we have buffer room?
|
||
|
if (chars >= charEnd)
|
||
|
{
|
||
|
// May or may not throw, but we didn't get these byte(s)
|
||
|
Contract.Assert(bytes > byteStart,
|
||
|
"[DBCSCodePageEncoding.GetChars]Expected bytes to have advanced for lead byte");
|
||
|
bytes--; // unused byte
|
||
|
if (iBytes >= 0x100)
|
||
|
{
|
||
|
Contract.Assert(bytes > byteStart,
|
||
|
"[DBCSCodePageEncoding.GetChars]Expected bytes to have advanced for trail byte");
|
||
|
bytes--; // 2nd unused byte
|
||
|
}
|
||
|
ThrowCharsOverflow(decoder, bytes == byteStart); // throw?
|
||
|
break; // don't throw, but stop loop
|
||
|
}
|
||
|
|
||
|
*(chars++) = c;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// We already stuck it in encoder if necessary, but we have to clear cases where nothing new got into decoder
|
||
|
if (decoder != null)
|
||
|
{
|
||
|
// Clear it in case of MustFlush
|
||
|
if (bUsedDecoder == false)
|
||
|
{
|
||
|
decoder.bLeftOver = 0;
|
||
|
}
|
||
|
|
||
|
// Remember our count
|
||
|
decoder.m_bytesUsed = (int)(bytes - byteStart);
|
||
|
}
|
||
|
|
||
|
// Shouldn't have anything in fallback buffer for GetChars
|
||
|
Contract.Assert(decoder == null || !decoder.m_throwOnOverflow ||
|
||
|
!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
|
||
|
"[DBCSCodePageEncoding.GetChars]Expected empty fallback buffer at end");
|
||
|
|
||
|
// Return length of our output
|
||
|
return (int)(chars - charStart);
|
||
|
}
|
||
|
|
||
|
public override int GetMaxByteCount(int charCount)
|
||
|
{
|
||
|
if (charCount < 0)
|
||
|
throw new ArgumentOutOfRangeException("charCount",
|
||
|
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
|
||
|
Contract.EndContractBlock();
|
||
|
|
||
|
// Characters would be # of characters + 1 in case high surrogate is ? * max fallback
|
||
|
long byteCount = (long)charCount + 1;
|
||
|
|
||
|
if (EncoderFallback.MaxCharCount > 1)
|
||
|
byteCount *= EncoderFallback.MaxCharCount;
|
||
|
|
||
|
// 2 to 1 is worst case. Already considered surrogate fallback
|
||
|
byteCount *= 2;
|
||
|
|
||
|
if (byteCount > 0x7fffffff)
|
||
|
throw new ArgumentOutOfRangeException("charCount", Environment.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow"));
|
||
|
|
||
|
return (int)byteCount;
|
||
|
}
|
||
|
|
||
|
public override int GetMaxCharCount(int byteCount)
|
||
|
{
|
||
|
if (byteCount < 0)
|
||
|
throw new ArgumentOutOfRangeException("byteCount",
|
||
|
Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
|
||
|
Contract.EndContractBlock();
|
||
|
|
||
|
// DBCS is pretty much the same, but could have hanging high byte making extra ? and fallback for unknown
|
||
|
long charCount = ((long)byteCount + 1);
|
||
|
|
||
|
// 1 to 1 for most characters. Only surrogates with fallbacks have less, unknown fallbacks could be longer.
|
||
|
if (DecoderFallback.MaxCharCount > 1)
|
||
|
charCount *= DecoderFallback.MaxCharCount;
|
||
|
|
||
|
if (charCount > 0x7fffffff)
|
||
|
throw new ArgumentOutOfRangeException("byteCount", Environment.GetResourceString("ArgumentOutOfRange_GetCharCountOverflow"));
|
||
|
|
||
|
return (int)charCount;
|
||
|
}
|
||
|
|
||
|
public override Decoder GetDecoder()
|
||
|
{
|
||
|
return new DBCSDecoder(this);
|
||
|
}
|
||
|
|
||
|
[Serializable]
|
||
|
internal class DBCSDecoder : DecoderNLS
|
||
|
{
|
||
|
// Need a place for the last left over byte
|
||
|
internal byte bLeftOver = 0;
|
||
|
|
||
|
public DBCSDecoder(DBCSCodePageEncoding encoding) : base(encoding)
|
||
|
{
|
||
|
// Base calls reset
|
||
|
}
|
||
|
|
||
|
public override void Reset()
|
||
|
{
|
||
|
this.bLeftOver = 0;
|
||
|
if (m_fallbackBuffer != null)
|
||
|
m_fallbackBuffer.Reset();
|
||
|
}
|
||
|
|
||
|
// Anything left in our decoder?
|
||
|
internal override bool HasState
|
||
|
{
|
||
|
get
|
||
|
{
|
||
|
return (this.bLeftOver != 0);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
#endif // FEATURE_CODEPAGES_FILE
|
||
|
|