356 lines
14 KiB
C#
356 lines
14 KiB
C#
|
// ==++==
|
||
|
//
|
||
|
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||
|
//
|
||
|
// ==--==
|
||
|
#if FEATURE_CODEPAGES_FILE
|
||
|
namespace System.Text
|
||
|
{
|
||
|
using System;
|
||
|
using System.Diagnostics.Contracts;
|
||
|
using System.Globalization;
|
||
|
using System.Runtime.InteropServices;
|
||
|
using System.Security;
|
||
|
using System.Collections;
|
||
|
using System.Runtime.CompilerServices;
|
||
|
using System.Runtime.Serialization;
|
||
|
using System.Runtime.Versioning;
|
||
|
using System.Security.Permissions;
|
||
|
using Microsoft.Win32.SafeHandles;
|
||
|
|
||
|
// Our input file data structures look like:
|
||
|
//
|
||
|
// Header Structure Looks Like:
|
||
|
// struct NLSPlusHeader
|
||
|
// {
|
||
|
// WORD[16] filename; // 32 bytes
|
||
|
// WORD[4] version; // 8 bytes = 40 // I.e: 3, 2, 0, 0
|
||
|
// WORD count; // 2 bytes = 42 // Number of code page index's that'll follow
|
||
|
// }
|
||
|
//
|
||
|
// Each code page section looks like:
|
||
|
// struct NLSCodePageIndex
|
||
|
// {
|
||
|
// WORD[16] codePageName; // 32 bytes
|
||
|
// WORD codePage; // +2 bytes = 34
|
||
|
// WORD byteCount; // +2 bytes = 36
|
||
|
// DWORD offset; // +4 bytes = 40 // Bytes from beginning of FILE.
|
||
|
// }
|
||
|
//
|
||
|
// Each code page then has its own header
|
||
|
// struct NLSCodePage
|
||
|
// {
|
||
|
// WORD[16] codePageName; // 32 bytes
|
||
|
// WORD[4] version; // 8 bytes = 40 // I.e: 3.2.0.0
|
||
|
// WORD codePage; // 2 bytes = 42
|
||
|
// WORD byteCount; // 2 bytes = 44 // 1 or 2 byte code page (SBCS or DBCS)
|
||
|
// WORD unicodeReplace; // 2 bytes = 46 // default replacement unicode character
|
||
|
// WORD byteReplace; // 2 bytes = 48 // default replacement byte(s)
|
||
|
// BYTE[] data; // data section
|
||
|
// }
|
||
|
|
||
|
[Serializable]
|
||
|
internal abstract class BaseCodePageEncoding : EncodingNLS, ISerializable
|
||
|
{
|
||
|
// Static & Const stuff
|
||
|
internal const String CODE_PAGE_DATA_FILE_NAME = "codepages.nlp";
|
||
|
[NonSerialized]
|
||
|
protected int dataTableCodePage;
|
||
|
|
||
|
// Variables to help us allocate/mark our memory section correctly
|
||
|
[NonSerialized]
|
||
|
protected bool bFlagDataTable = true;
|
||
|
[NonSerialized]
|
||
|
protected int iExtraBytes = 0;
|
||
|
|
||
|
// Our private unicode to bytes best fit array and visa versa.
|
||
|
[NonSerialized]
|
||
|
protected char[] arrayUnicodeBestFit = null;
|
||
|
[NonSerialized]
|
||
|
protected char[] arrayBytesBestFit = null;
|
||
|
|
||
|
// This is used to help ISCII, EUCJP and ISO2022 figure out they're MlangEncodings
|
||
|
[NonSerialized]
|
||
|
protected bool m_bUseMlangTypeForSerialization = false;
|
||
|
|
||
|
[System.Security.SecuritySafeCritical] // static constructors should be safe to call
|
||
|
static BaseCodePageEncoding()
|
||
|
{
|
||
|
}
|
||
|
|
||
|
//
|
||
|
// This is the header for the native data table that we load from CODE_PAGE_DATA_FILE_NAME.
|
||
|
//
|
||
|
// Explicit layout is used here since a syntax like char[16] can not be used in sequential layout.
|
||
|
[StructLayout(LayoutKind.Explicit)]
|
||
|
internal unsafe struct CodePageDataFileHeader
|
||
|
{
|
||
|
[FieldOffset(0)]
|
||
|
internal char TableName; // WORD[16]
|
||
|
[FieldOffset(0x20)]
|
||
|
internal ushort Version; // WORD[4]
|
||
|
[FieldOffset(0x28)]
|
||
|
internal short CodePageCount; // WORD
|
||
|
[FieldOffset(0x2A)]
|
||
|
internal short unused1; // Add a unused WORD so that CodePages is aligned with DWORD boundary.
|
||
|
// Otherwise, 64-bit version will fail.
|
||
|
[FieldOffset(0x2C)]
|
||
|
internal CodePageIndex CodePages; // Start of code page index
|
||
|
}
|
||
|
|
||
|
[StructLayout(LayoutKind.Explicit, Pack=2)]
|
||
|
internal unsafe struct CodePageIndex
|
||
|
{
|
||
|
[FieldOffset(0)]
|
||
|
internal char CodePageName; // WORD[16]
|
||
|
[FieldOffset(0x20)]
|
||
|
internal short CodePage; // WORD
|
||
|
[FieldOffset(0x22)]
|
||
|
internal short ByteCount; // WORD
|
||
|
[FieldOffset(0x24)]
|
||
|
internal int Offset; // DWORD
|
||
|
}
|
||
|
|
||
|
[StructLayout(LayoutKind.Explicit)]
|
||
|
internal unsafe struct CodePageHeader
|
||
|
{
|
||
|
[FieldOffset(0)]
|
||
|
internal char CodePageName; // WORD[16]
|
||
|
[FieldOffset(0x20)]
|
||
|
internal ushort VersionMajor; // WORD
|
||
|
[FieldOffset(0x22)]
|
||
|
internal ushort VersionMinor; // WORD
|
||
|
[FieldOffset(0x24)]
|
||
|
internal ushort VersionRevision;// WORD
|
||
|
[FieldOffset(0x26)]
|
||
|
internal ushort VersionBuild; // WORD
|
||
|
[FieldOffset(0x28)]
|
||
|
internal short CodePage; // WORD
|
||
|
[FieldOffset(0x2a)]
|
||
|
internal short ByteCount; // WORD // 1 or 2 byte code page (SBCS or DBCS)
|
||
|
[FieldOffset(0x2c)]
|
||
|
internal char UnicodeReplace; // WORD // default replacement unicode character
|
||
|
[FieldOffset(0x2e)]
|
||
|
internal ushort ByteReplace; // WORD // default replacement bytes
|
||
|
[FieldOffset(0x30)]
|
||
|
internal short FirstDataWord; // WORD[]
|
||
|
}
|
||
|
|
||
|
// Initialize our global stuff
|
||
|
[SecurityCritical]
|
||
|
unsafe static CodePageDataFileHeader* m_pCodePageFileHeader =
|
||
|
(CodePageDataFileHeader*)GlobalizationAssembly.GetGlobalizationResourceBytePtr(
|
||
|
typeof(CharUnicodeInfo).Assembly, CODE_PAGE_DATA_FILE_NAME);
|
||
|
|
||
|
// Real variables
|
||
|
[NonSerialized]
|
||
|
[SecurityCritical]
|
||
|
unsafe protected CodePageHeader* pCodePage = null;
|
||
|
|
||
|
// Safe handle wrapper around section map view
|
||
|
[System.Security.SecurityCritical] // auto-generated
|
||
|
[NonSerialized]
|
||
|
protected SafeViewOfFileHandle safeMemorySectionHandle = null;
|
||
|
|
||
|
// Safe handle wrapper around mapped file handle
|
||
|
[System.Security.SecurityCritical] // auto-generated
|
||
|
[NonSerialized]
|
||
|
protected SafeFileMappingHandle safeFileMappingHandle = null;
|
||
|
|
||
|
[System.Security.SecurityCritical] // auto-generated
|
||
|
internal BaseCodePageEncoding(int codepage) : this(codepage, codepage)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
[System.Security.SecurityCritical] // auto-generated
|
||
|
internal BaseCodePageEncoding(int codepage, int dataCodePage) :
|
||
|
base(codepage == 0? Microsoft.Win32.Win32Native.GetACP(): codepage)
|
||
|
{
|
||
|
// Remember number of code page that we'll be using the table for.
|
||
|
dataTableCodePage = dataCodePage;
|
||
|
LoadCodePageTables();
|
||
|
}
|
||
|
|
||
|
// Constructor called by serialization.
|
||
|
[System.Security.SecurityCritical] // auto-generated
|
||
|
internal BaseCodePageEncoding(SerializationInfo info, StreamingContext context) : base(0)
|
||
|
{
|
||
|
// We cannot ever call this, we've proxied ourselved to CodePageEncoding
|
||
|
throw new ArgumentNullException("this");
|
||
|
}
|
||
|
|
||
|
// ISerializable implementation
|
||
|
#if FEATURE_SERIALIZATION
|
||
|
[System.Security.SecurityCritical] // auto-generated_required
|
||
|
void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
|
||
|
{
|
||
|
// Make sure to get teh base stuff too This throws if info is null
|
||
|
SerializeEncoding(info, context);
|
||
|
Contract.Assert(info!=null, "[BaseCodePageEncoding.GetObjectData] Expected null info to throw");
|
||
|
|
||
|
// Just need Everett maxCharSize (BaseCodePageEncoding) or m_maxByteSize (MLangBaseCodePageEncoding)
|
||
|
info.AddValue(m_bUseMlangTypeForSerialization ? "m_maxByteSize" : "maxCharSize",
|
||
|
this.IsSingleByte ? 1 : 2);
|
||
|
|
||
|
// Use this class or MLangBaseCodePageEncoding as our deserializer.
|
||
|
info.SetType(m_bUseMlangTypeForSerialization ? typeof(MLangCodePageEncoding) :
|
||
|
typeof(CodePageEncoding));
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
// We need to load tables for our code page
|
||
|
[System.Security.SecurityCritical] // auto-generated
|
||
|
private unsafe void LoadCodePageTables()
|
||
|
{
|
||
|
CodePageHeader* pCodePage = FindCodePage(dataTableCodePage);
|
||
|
|
||
|
// Make sure we have one
|
||
|
if (pCodePage == null)
|
||
|
{
|
||
|
// Didn't have one
|
||
|
throw new NotSupportedException(
|
||
|
Environment.GetResourceString("NotSupported_NoCodepageData", CodePage));
|
||
|
}
|
||
|
|
||
|
// Remember our code page
|
||
|
this.pCodePage = pCodePage;
|
||
|
|
||
|
// We had it, so load it
|
||
|
LoadManagedCodePage();
|
||
|
}
|
||
|
|
||
|
// Look up the code page pointer
|
||
|
[System.Security.SecurityCritical] // auto-generated
|
||
|
private static unsafe CodePageHeader* FindCodePage(int codePage)
|
||
|
{
|
||
|
// We'll have to loop through all of the m_pCodePageIndex[] items to find our code page, this isn't
|
||
|
// binary or anything so its not monsterously fast.
|
||
|
for (int i = 0; i < m_pCodePageFileHeader->CodePageCount; i++)
|
||
|
{
|
||
|
CodePageIndex* pCodePageIndex = (&(m_pCodePageFileHeader->CodePages)) + i;
|
||
|
|
||
|
if (pCodePageIndex->CodePage == codePage)
|
||
|
{
|
||
|
// Found it!
|
||
|
CodePageHeader* pCodePage =
|
||
|
(CodePageHeader*)((byte*)m_pCodePageFileHeader + pCodePageIndex->Offset);
|
||
|
return pCodePage;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Couldn't find it
|
||
|
return null;
|
||
|
}
|
||
|
|
||
|
// Get our code page byte count
|
||
|
[System.Security.SecurityCritical] // auto-generated
|
||
|
internal static unsafe int GetCodePageByteSize(int codePage)
|
||
|
{
|
||
|
// Get our code page info
|
||
|
CodePageHeader* pCodePage = FindCodePage(codePage);
|
||
|
|
||
|
// If null return 0
|
||
|
if (pCodePage == null)
|
||
|
return 0;
|
||
|
|
||
|
Contract.Assert(pCodePage->ByteCount == 1 || pCodePage->ByteCount == 2,
|
||
|
"[BaseCodePageEncoding] Code page (" + codePage + ") has invalid byte size (" + pCodePage->ByteCount + ") in table");
|
||
|
// Return what it says for byte count
|
||
|
return pCodePage->ByteCount;
|
||
|
}
|
||
|
|
||
|
// We have a managed code page entry, so load our tables
|
||
|
[System.Security.SecurityCritical]
|
||
|
protected abstract unsafe void LoadManagedCodePage();
|
||
|
|
||
|
// Allocate memory to load our code page
|
||
|
[System.Security.SecurityCritical] // auto-generated
|
||
|
[ResourceExposure(ResourceScope.None)]
|
||
|
[ResourceConsumption(ResourceScope.Machine, ResourceScope.Machine)]
|
||
|
protected unsafe byte* GetSharedMemory(int iSize)
|
||
|
{
|
||
|
// Build our name
|
||
|
String strName = GetMemorySectionName();
|
||
|
|
||
|
IntPtr mappedFileHandle;
|
||
|
|
||
|
// This gets shared memory for our map. If its can't, it gives us clean memory.
|
||
|
Byte *pMemorySection = EncodingTable.nativeCreateOpenFileMapping(strName, iSize, out mappedFileHandle);
|
||
|
Contract.Assert(pMemorySection != null,
|
||
|
"[BaseCodePageEncoding.GetSharedMemory] Expected non-null memory section to be opened");
|
||
|
|
||
|
// If that failed, we have to die.
|
||
|
if (pMemorySection == null)
|
||
|
throw new OutOfMemoryException(
|
||
|
Environment.GetResourceString("Arg_OutOfMemoryException"));
|
||
|
|
||
|
// if we have null file handle. this means memory was allocated after
|
||
|
// failing to open the mapped file.
|
||
|
|
||
|
if (mappedFileHandle != IntPtr.Zero)
|
||
|
{
|
||
|
safeMemorySectionHandle = new SafeViewOfFileHandle((IntPtr) pMemorySection, true);
|
||
|
safeFileMappingHandle = new SafeFileMappingHandle(mappedFileHandle, true);
|
||
|
}
|
||
|
|
||
|
return pMemorySection;
|
||
|
}
|
||
|
|
||
|
[System.Security.SecurityCritical] // auto-generated
|
||
|
protected unsafe virtual String GetMemorySectionName()
|
||
|
{
|
||
|
int iUseCodePage = this.bFlagDataTable ? dataTableCodePage : CodePage;
|
||
|
|
||
|
String strName = String.Format(CultureInfo.InvariantCulture, "NLS_CodePage_{0}_{1}_{2}_{3}_{4}",
|
||
|
iUseCodePage, this.pCodePage->VersionMajor, this.pCodePage->VersionMinor,
|
||
|
this.pCodePage->VersionRevision, this.pCodePage->VersionBuild);
|
||
|
|
||
|
return strName;
|
||
|
}
|
||
|
|
||
|
[System.Security.SecurityCritical]
|
||
|
protected abstract unsafe void ReadBestFitTable();
|
||
|
|
||
|
[System.Security.SecuritySafeCritical] //
|
||
|
internal override char[] GetBestFitUnicodeToBytesData()
|
||
|
{
|
||
|
// Read in our best fit table if necessary
|
||
|
if (arrayUnicodeBestFit == null) ReadBestFitTable();
|
||
|
|
||
|
Contract.Assert(arrayUnicodeBestFit != null,
|
||
|
"[BaseCodePageEncoding.GetBestFitUnicodeToBytesData]Expected non-null arrayUnicodeBestFit");
|
||
|
|
||
|
// Normally we don't have any best fit data.
|
||
|
return arrayUnicodeBestFit;
|
||
|
}
|
||
|
|
||
|
[System.Security.SecuritySafeCritical] //
|
||
|
internal override char[] GetBestFitBytesToUnicodeData()
|
||
|
{
|
||
|
// Read in our best fit table if necessary
|
||
|
if (arrayBytesBestFit == null) ReadBestFitTable();
|
||
|
|
||
|
Contract.Assert(arrayBytesBestFit != null,
|
||
|
"[BaseCodePageEncoding.GetBestFitBytesToUnicodeData]Expected non-null arrayBytesBestFit");
|
||
|
|
||
|
// Normally we don't have any best fit data.
|
||
|
return arrayBytesBestFit;
|
||
|
}
|
||
|
|
||
|
// During the AppDomain shutdown the Encoding class may already finalized and the memory section
|
||
|
// is invalid. so we detect that by validating the memory section handle then re-initialize the memory
|
||
|
// section by calling LoadManagedCodePage() method and eventually the mapped file handle and
|
||
|
// the memory section pointer will get finalized one more time.
|
||
|
[System.Security.SecurityCritical] // auto-generated
|
||
|
internal unsafe void CheckMemorySection()
|
||
|
{
|
||
|
if (safeMemorySectionHandle != null && safeMemorySectionHandle.DangerousGetHandle() == IntPtr.Zero)
|
||
|
{
|
||
|
LoadManagedCodePage();
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#endif // FEATURE_CODEPAGES_FILE
|