98 lines
4.1 KiB
C#
98 lines
4.1 KiB
C#
|
//------------------------------------------------------------------------------
|
||
|
// <copyright file="CodePageUtils.cs" company="Microsoft">
|
||
|
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||
|
// </copyright>
|
||
|
//------------------------------------------------------------------------------
|
||
|
|
||
|
namespace System.Web.Util {
|
||
|
using System.Collections;
|
||
|
|
||
|
//
|
||
|
// Utility class to help with determining if a given code page
|
||
|
// is ASCII compatible (preserves 0-127 Unicode characters as is)
|
||
|
//
|
||
|
|
||
|
internal static class CodePageUtils {
|
||
|
|
||
|
/* The following array of ASCII compatible code pages
|
||
|
is generated by running this code on a machine that
|
||
|
has _many_ codepages installed:
|
||
|
|
||
|
using System;
|
||
|
using System.Collections;
|
||
|
using System.Text;
|
||
|
public class qq {
|
||
|
public static void Main(string[] args) {
|
||
|
ArrayList list = new ArrayList();
|
||
|
byte[] bb = new byte[128]; for (int i = 0; i < 128; i++) bb[i] = (byte)i;
|
||
|
String asciiString = Encoding.ASCII.GetString(bb);
|
||
|
for (int i = 1; i < 100000; i++) {
|
||
|
try {
|
||
|
Encoding e = Encoding.GetEncoding(i);
|
||
|
byte[] xx = e.GetBytes(asciiString);
|
||
|
if (xx.Length == 128) {
|
||
|
bool good = true;
|
||
|
for (int j = 0; j < 128; j++) { if (bb[j] != xx[j]) { good = false; break; } }
|
||
|
if (good) list.Add(i);
|
||
|
}
|
||
|
}
|
||
|
catch {}
|
||
|
}
|
||
|
int n = list.Count;
|
||
|
Console.Write("private const int[] _asciiCompatCodePages = new int[" + n + "] {\r\n ");
|
||
|
for (int i = 0; i < n; i++) {
|
||
|
Console.Write("{0,5}", list[i]);
|
||
|
if (i < n-1) Console.Write(", ");
|
||
|
if (((i+1) % 10) == 0) Console.Write("\r\n ");
|
||
|
}
|
||
|
Console.Write("\r\n};\r\n");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
*/
|
||
|
|
||
|
private static int[] _asciiCompatCodePages = new int[79] {
|
||
|
437, 708, 720, 737, 775, 850, 852, 855, 857, 858,
|
||
|
860, 861, 862, 863, 864, 865, 866, 869, 874, 932,
|
||
|
936, 949, 950, 1250, 1251, 1252, 1253, 1254, 1255, 1256,
|
||
|
1257, 1258, 1361, 10000, 10001, 10002, 10003, 10004, 10005, 10006,
|
||
|
10007, 10008, 10010, 10017, 10029, 10079, 10081, 10082, 20000, 20001,
|
||
|
20002, 20003, 20004, 20005, 20127, 20866, 20932, 20936, 20949, 21866,
|
||
|
28591, 28592, 28593, 28594, 28595, 28596, 28597, 28598, 28599, 28605,
|
||
|
38598, 50220, 50221, 50222, 50225, 50227, 51932, 51949, 65001
|
||
|
};
|
||
|
|
||
|
internal /*public*/ static bool IsAsciiCompatibleCodePage(int codepage) {
|
||
|
//alternatives to binary search considered
|
||
|
//Hashtable: static initialization increases startup up, perf relative
|
||
|
//Byte array would consume ~8K, but lookups constant
|
||
|
//with 80 entries, binary search limited to 7 indexes into array
|
||
|
int lo = 0;
|
||
|
int hi = 78;
|
||
|
while(lo <= hi) {
|
||
|
int i = (lo + hi) >> 1;
|
||
|
int c;
|
||
|
c = _asciiCompatCodePages[i] - codepage;
|
||
|
if (c == 0) return true; //i is the index of the item
|
||
|
if (c < 0) {
|
||
|
lo = i + 1;
|
||
|
}
|
||
|
else {
|
||
|
hi = i - 1;
|
||
|
}
|
||
|
}
|
||
|
return false;
|
||
|
//lo is the index of the item immediately after
|
||
|
//~lo returned in some implementations for false indicator with additional info
|
||
|
}
|
||
|
|
||
|
internal const int CodePageUT8 = 65001;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
|