2014-08-13 10:39:27 +01:00
|
|
|
/*
|
|
|
|
* CP932.cs - Japanese (Shift-JIS) code page.
|
|
|
|
*
|
|
|
|
* Copyright (c) 2002 Southern Storm Software, Pty Ltd
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining
|
|
|
|
* a copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice shall be included
|
|
|
|
* in all copies or substantial portions of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
|
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
|
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
|
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
//
|
|
|
|
// Copyright (C) 2005-2006 Novell, Inc.
|
|
|
|
//
|
|
|
|
|
|
|
|
namespace I18N.CJK
|
|
|
|
{
|
|
|
|
|
|
|
|
using System;
|
|
|
|
using System.Text;
|
|
|
|
using I18N.Common;
|
|
|
|
|
|
|
|
#if DISABLE_UNSAFE
|
|
|
|
using MonoEncoder = I18N.Common.MonoSafeEncoder;
|
|
|
|
using MonoEncoding = I18N.Common.MonoSafeEncoding;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
[Serializable]
|
|
|
|
public class CP932 : MonoEncoding
|
|
|
|
{
|
|
|
|
// Magic number used by Windows for the Shift-JIS code page.
|
|
|
|
private const int SHIFTJIS_CODE_PAGE = 932;
|
|
|
|
|
|
|
|
// Constructor.
|
|
|
|
public CP932() : base(SHIFTJIS_CODE_PAGE)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
#if !DISABLE_UNSAFE
|
|
|
|
// Get the number of bytes needed to encode a character buffer.
|
|
|
|
public unsafe override int GetByteCountImpl (char* chars, int count)
|
|
|
|
{
|
|
|
|
int index = 0;
|
|
|
|
|
|
|
|
// Determine the length of the final output.
|
|
|
|
int length = 0;
|
|
|
|
int ch, value;
|
|
|
|
#if __PNET__
|
|
|
|
byte *cjkToJis = JISConvert.Convert.cjkToJis;
|
|
|
|
byte *extraToJis = JISConvert.Convert.extraToJis;
|
|
|
|
#else
|
|
|
|
byte[] cjkToJis = JISConvert.Convert.cjkToJis;
|
|
|
|
byte[] extraToJis = JISConvert.Convert.extraToJis;
|
|
|
|
#endif
|
|
|
|
while(count > 0)
|
|
|
|
{
|
|
|
|
ch = chars[index++];
|
|
|
|
--count;
|
|
|
|
++length;
|
|
|
|
if(ch < 0x0080)
|
|
|
|
{
|
|
|
|
// Character maps to itself.
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
else if(ch < 0x0100)
|
|
|
|
{
|
|
|
|
// Check for special Latin 1 characters that
|
|
|
|
// can be mapped to double-byte code points.
|
|
|
|
if(ch == 0x00A2 || ch == 0x00A3 || ch == 0x00A7 ||
|
|
|
|
ch == 0x00A8 || ch == 0x00AC || ch == 0x00B0 ||
|
|
|
|
ch == 0x00B1 || ch == 0x00B4 || ch == 0x00B6 ||
|
|
|
|
ch == 0x00D7 || ch == 0x00F7)
|
|
|
|
{
|
|
|
|
++length;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if(ch >= 0x0391 && ch <= 0x0451)
|
|
|
|
{
|
|
|
|
// Greek subset characters.
|
|
|
|
++length;
|
|
|
|
}
|
|
|
|
else if(ch >= 0x2010 && ch <= 0x9FA5)
|
|
|
|
{
|
|
|
|
// This range contains the bulk of the CJK set.
|
|
|
|
value = (ch - 0x2010) * 2;
|
|
|
|
value = ((int)(cjkToJis[value])) |
|
|
|
|
(((int)(cjkToJis[value + 1])) << 8);
|
|
|
|
if(value >= 0x0100)
|
|
|
|
{
|
|
|
|
++length;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if(ch >= 0xE000 && ch <= 0xE757)
|
|
|
|
// PrivateUse
|
|
|
|
++length;
|
|
|
|
else if(ch >= 0xFF01 && ch <= 0xFFEF)
|
|
|
|
{
|
|
|
|
// This range contains extra characters,
|
|
|
|
// including half-width katakana.
|
|
|
|
value = (ch - 0xFF01) * 2;
|
|
|
|
value = ((int)(extraToJis[value])) |
|
|
|
|
(((int)(extraToJis[value + 1])) << 8);
|
|
|
|
if(value >= 0x0100)
|
|
|
|
{
|
|
|
|
++length;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return the length to the caller.
|
|
|
|
return length;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get the bytes that result from encoding a character buffer.
|
|
|
|
public unsafe override int GetBytesImpl (
|
|
|
|
char* chars, int charCount, byte* bytes, int byteCount)
|
|
|
|
{
|
|
|
|
int charIndex = 0;
|
|
|
|
int byteIndex = 0;
|
|
|
|
EncoderFallbackBuffer buffer = null;
|
|
|
|
|
|
|
|
// Convert the characters into their byte form.
|
|
|
|
int posn = byteIndex;
|
|
|
|
int end = charCount;
|
|
|
|
int byteLength = byteCount;
|
|
|
|
int ch, value;
|
|
|
|
#if __PNET__
|
|
|
|
byte *cjkToJis = JISConvert.Convert.cjkToJis;
|
|
|
|
byte *greekToJis = JISConvert.Convert.greekToJis;
|
|
|
|
byte *extraToJis = JISConvert.Convert.extraToJis;
|
|
|
|
#else
|
|
|
|
byte[] cjkToJis = JISConvert.Convert.cjkToJis;
|
|
|
|
byte[] greekToJis = JISConvert.Convert.greekToJis;
|
|
|
|
byte[] extraToJis = JISConvert.Convert.extraToJis;
|
|
|
|
#endif
|
|
|
|
for (int i = charIndex; i < end; i++, charCount--)
|
|
|
|
{
|
|
|
|
ch = chars[i];
|
|
|
|
if(posn >= byteLength)
|
|
|
|
{
|
|
|
|
throw new ArgumentException
|
|
|
|
(Strings.GetString("Arg_InsufficientSpace"),
|
|
|
|
"bytes");
|
|
|
|
}
|
|
|
|
if(ch < 0x0080)
|
|
|
|
{
|
|
|
|
// Character maps to itself.
|
|
|
|
bytes[posn++] = (byte)ch;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
else if(ch < 0x0100)
|
|
|
|
{
|
|
|
|
// Check for special Latin 1 characters that
|
|
|
|
// can be mapped to double-byte code points.
|
|
|
|
if(ch == 0x00A2 || ch == 0x00A3 || ch == 0x00A7 ||
|
|
|
|
ch == 0x00A8 || ch == 0x00AC || ch == 0x00B0 ||
|
|
|
|
ch == 0x00B1 || ch == 0x00B4 || ch == 0x00B6 ||
|
|
|
|
ch == 0x00D7 || ch == 0x00F7)
|
|
|
|
{
|
|
|
|
if((posn + 1) >= byteLength)
|
|
|
|
{
|
|
|
|
throw new ArgumentException
|
|
|
|
(Strings.GetString
|
|
|
|
("Arg_InsufficientSpace"), "bytes");
|
|
|
|
}
|
|
|
|
switch(ch)
|
|
|
|
{
|
|
|
|
case 0x00A2:
|
|
|
|
bytes[posn++] = (byte)0x81;
|
|
|
|
bytes[posn++] = (byte)0x91;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0x00A3:
|
|
|
|
bytes[posn++] = (byte)0x81;
|
|
|
|
bytes[posn++] = (byte)0x92;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0x00A7:
|
|
|
|
bytes[posn++] = (byte)0x81;
|
|
|
|
bytes[posn++] = (byte)0x98;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0x00A8:
|
|
|
|
bytes[posn++] = (byte)0x81;
|
|
|
|
bytes[posn++] = (byte)0x4E;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0x00AC:
|
|
|
|
bytes[posn++] = (byte)0x81;
|
|
|
|
bytes[posn++] = (byte)0xCA;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0x00B0:
|
|
|
|
bytes[posn++] = (byte)0x81;
|
|
|
|
bytes[posn++] = (byte)0x8B;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0x00B1:
|
|
|
|
bytes[posn++] = (byte)0x81;
|
|
|
|
bytes[posn++] = (byte)0x7D;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0x00B4:
|
|
|
|
bytes[posn++] = (byte)0x81;
|
|
|
|
bytes[posn++] = (byte)0x4C;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0x00B6:
|
|
|
|
bytes[posn++] = (byte)0x81;
|
|
|
|
bytes[posn++] = (byte)0xF7;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0x00D7:
|
|
|
|
bytes[posn++] = (byte)0x81;
|
|
|
|
bytes[posn++] = (byte)0x7E;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0x00F7:
|
|
|
|
bytes[posn++] = (byte)0x81;
|
|
|
|
bytes[posn++] = (byte)0x80;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if(ch == 0x00A5)
|
|
|
|
{
|
|
|
|
// Yen sign.
|
|
|
|
bytes[posn++] = (byte)0x5C;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
HandleFallback (ref buffer,
|
|
|
|
chars, ref charIndex, ref charCount,
|
|
|
|
bytes, ref posn, ref byteCount, null);
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
else if(ch >= 0x0391 && ch <= 0x0451)
|
|
|
|
{
|
|
|
|
// Greek subset characters.
|
|
|
|
value = (ch - 0x0391) * 2;
|
|
|
|
value = ((int)(greekToJis[value])) |
|
|
|
|
(((int)(greekToJis[value + 1])) << 8);
|
|
|
|
}
|
|
|
|
else if(ch >= 0x2010 && ch <= 0x9FA5)
|
|
|
|
{
|
|
|
|
// This range contains the bulk of the CJK set.
|
|
|
|
value = (ch - 0x2010) * 2;
|
|
|
|
value = ((int)(cjkToJis[value])) |
|
|
|
|
(((int)(cjkToJis[value + 1])) << 8);
|
|
|
|
}
|
|
|
|
else if(ch >= 0xE000 && ch <= 0xE757)
|
|
|
|
{
|
|
|
|
// PrivateUse
|
|
|
|
int diff = ch - 0xE000;
|
|
|
|
value = ((int) (diff / 0xBC) << 8)
|
|
|
|
+ (diff % 0xBC)
|
|
|
|
+ 0xF040;
|
|
|
|
if (value % 0x100 >= 0x7F)
|
|
|
|
value++;
|
|
|
|
}
|
|
|
|
else if(ch >= 0xFF01 && ch <= 0xFF60)
|
|
|
|
{
|
|
|
|
value = (ch - 0xFF01) * 2;
|
|
|
|
value = ((int)(extraToJis[value])) |
|
|
|
|
(((int)(extraToJis[value + 1])) << 8);
|
|
|
|
}
|
|
|
|
else if(ch >= 0xFF60 && ch <= 0xFFA0)
|
|
|
|
{
|
|
|
|
value = ch - 0xFF60 + 0xA0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// Invalid character.
|
|
|
|
value = 0;
|
|
|
|
}
|
|
|
|
if(value == 0)
|
|
|
|
{
|
|
|
|
HandleFallback (ref buffer,
|
|
|
|
chars, ref charIndex, ref charCount,
|
|
|
|
bytes, ref posn, ref byteCount, null);
|
|
|
|
}
|
|
|
|
else if(value < 0x0100)
|
|
|
|
{
|
|
|
|
bytes[posn++] = (byte)value;
|
|
|
|
}
|
|
|
|
else if((posn + 1) >= byteLength)
|
|
|
|
{
|
|
|
|
throw new ArgumentException
|
|
|
|
(Strings.GetString("Arg_InsufficientSpace"),
|
|
|
|
"bytes");
|
|
|
|
}
|
|
|
|
else if(value < 0x8000)
|
|
|
|
{
|
|
|
|
// JIS X 0208 character.
|
|
|
|
value -= 0x0100;
|
|
|
|
ch = (value / 0xBC);
|
|
|
|
value = (value % 0xBC) + 0x40;
|
|
|
|
if(value >= 0x7F)
|
|
|
|
{
|
|
|
|
++value;
|
|
|
|
}
|
|
|
|
if(ch < (0x9F - 0x80))
|
|
|
|
{
|
|
|
|
bytes[posn++] = (byte)(ch + 0x81);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
bytes[posn++] = (byte)(ch - (0x9F - 0x80) + 0xE0);
|
|
|
|
}
|
|
|
|
bytes[posn++] = (byte)value;
|
|
|
|
}
|
|
|
|
else if (value >= 0xF040 && value <= 0xF9FC)
|
|
|
|
{
|
|
|
|
// PrivateUse
|
|
|
|
bytes[posn++] = (byte) (value / 0x100);
|
|
|
|
bytes[posn++] = (byte) (value % 0x100);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// JIS X 0212 character, which Shift-JIS doesn't
|
|
|
|
// support, but we've already allocated two slots.
|
|
|
|
bytes[posn++] = (byte)'?';
|
|
|
|
bytes[posn++] = (byte)'?';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return the final length to the caller.
|
|
|
|
return posn - byteIndex;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
// Get the number of bytes needed to encode a character buffer.
|
|
|
|
public override int GetByteCount(char[] chars, int index, int count)
|
|
|
|
{
|
|
|
|
// Determine the length of the final output.
|
|
|
|
int length = 0;
|
|
|
|
int ch, value;
|
|
|
|
byte[] cjkToJis = JISConvert.Convert.cjkToJis;
|
|
|
|
byte[] extraToJis = JISConvert.Convert.extraToJis;
|
|
|
|
|
|
|
|
while (count > 0)
|
|
|
|
{
|
|
|
|
ch = chars[index++];
|
|
|
|
--count;
|
|
|
|
++length;
|
|
|
|
if (ch < 0x0080)
|
|
|
|
{
|
|
|
|
// Character maps to itself.
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
else if (ch < 0x0100)
|
|
|
|
{
|
|
|
|
// Check for special Latin 1 characters that
|
|
|
|
// can be mapped to double-byte code points.
|
|
|
|
if (ch == 0x00A2 || ch == 0x00A3 || ch == 0x00A7 ||
|
|
|
|
ch == 0x00A8 || ch == 0x00AC || ch == 0x00B0 ||
|
|
|
|
ch == 0x00B1 || ch == 0x00B4 || ch == 0x00B6 ||
|
|
|
|
ch == 0x00D7 || ch == 0x00F7)
|
|
|
|
{
|
|
|
|
++length;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (ch >= 0x0391 && ch <= 0x0451)
|
|
|
|
{
|
|
|
|
// Greek subset characters.
|
|
|
|
++length;
|
|
|
|
}
|
|
|
|
else if (ch >= 0x2010 && ch <= 0x9FA5)
|
|
|
|
{
|
|
|
|
// This range contains the bulk of the CJK set.
|
|
|
|
value = (ch - 0x2010) * 2;
|
|
|
|
value = ((int)(cjkToJis[value])) |
|
|
|
|
(((int)(cjkToJis[value + 1])) << 8);
|
|
|
|
if (value >= 0x0100)
|
|
|
|
{
|
|
|
|
++length;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (ch >= 0xE000 && ch <= 0xE757)
|
|
|
|
// PrivateUse
|
|
|
|
++length;
|
|
|
|
else if (ch >= 0xFF01 && ch <= 0xFFEF)
|
|
|
|
{
|
|
|
|
// This range contains extra characters,
|
|
|
|
// including half-width katakana.
|
|
|
|
value = (ch - 0xFF01) * 2;
|
|
|
|
value = ((int)(extraToJis[value])) |
|
|
|
|
(((int)(extraToJis[value + 1])) << 8);
|
|
|
|
if (value >= 0x0100)
|
|
|
|
{
|
|
|
|
++length;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return the length to the caller.
|
|
|
|
return length;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get the bytes that result from encoding a character buffer.
|
|
|
|
public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
|
|
|
|
{
|
|
|
|
int byteCount = bytes.Length;
|
|
|
|
EncoderFallbackBuffer buffer = null;
|
|
|
|
|
|
|
|
// Convert the characters into their byte form.
|
|
|
|
int posn = byteIndex;
|
|
|
|
int end = charIndex + charCount;
|
|
|
|
int byteLength = byteCount;
|
|
|
|
int /*ch,*/ value;
|
|
|
|
byte[] cjkToJis = JISConvert.Convert.cjkToJis;
|
|
|
|
byte[] greekToJis = JISConvert.Convert.greekToJis;
|
|
|
|
byte[] extraToJis = JISConvert.Convert.extraToJis;
|
|
|
|
|
|
|
|
for (int i = charIndex; i < end; i++, charCount--)
|
|
|
|
{
|
|
|
|
int ch = chars[i];
|
|
|
|
|
|
|
|
if (posn >= byteLength)
|
|
|
|
{
|
|
|
|
throw new ArgumentException
|
|
|
|
(Strings.GetString("Arg_InsufficientSpace"),
|
|
|
|
"bytes");
|
|
|
|
}
|
|
|
|
if (ch < 0x0080)
|
|
|
|
{
|
|
|
|
// Character maps to itself.
|
|
|
|
bytes[posn++] = (byte)ch;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
else if (ch < 0x0100)
|
|
|
|
{
|
|
|
|
// Check for special Latin 1 characters that
|
|
|
|
// can be mapped to double-byte code points.
|
|
|
|
if (ch == 0x00A2 || ch == 0x00A3 || ch == 0x00A7 ||
|
|
|
|
ch == 0x00A8 || ch == 0x00AC || ch == 0x00B0 ||
|
|
|
|
ch == 0x00B1 || ch == 0x00B4 || ch == 0x00B6 ||
|
|
|
|
ch == 0x00D7 || ch == 0x00F7)
|
|
|
|
{
|
|
|
|
if ((posn + 1) >= byteLength)
|
|
|
|
{
|
|
|
|
throw new ArgumentException
|
|
|
|
(Strings.GetString
|
|
|
|
("Arg_InsufficientSpace"), "bytes");
|
|
|
|
}
|
|
|
|
switch (ch)
|
|
|
|
{
|
|
|
|
case 0x00A2:
|
|
|
|
bytes[posn++] = (byte)0x81;
|
|
|
|
bytes[posn++] = (byte)0x91;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0x00A3:
|
|
|
|
bytes[posn++] = (byte)0x81;
|
|
|
|
bytes[posn++] = (byte)0x92;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0x00A7:
|
|
|
|
bytes[posn++] = (byte)0x81;
|
|
|
|
bytes[posn++] = (byte)0x98;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0x00A8:
|
|
|
|
bytes[posn++] = (byte)0x81;
|
|
|
|
bytes[posn++] = (byte)0x4E;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0x00AC:
|
|
|
|
bytes[posn++] = (byte)0x81;
|
|
|
|
bytes[posn++] = (byte)0xCA;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0x00B0:
|
|
|
|
bytes[posn++] = (byte)0x81;
|
|
|
|
bytes[posn++] = (byte)0x8B;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0x00B1:
|
|
|
|
bytes[posn++] = (byte)0x81;
|
|
|
|
bytes[posn++] = (byte)0x7D;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0x00B4:
|
|
|
|
bytes[posn++] = (byte)0x81;
|
|
|
|
bytes[posn++] = (byte)0x4C;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0x00B6:
|
|
|
|
bytes[posn++] = (byte)0x81;
|
|
|
|
bytes[posn++] = (byte)0xF7;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0x00D7:
|
|
|
|
bytes[posn++] = (byte)0x81;
|
|
|
|
bytes[posn++] = (byte)0x7E;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0x00F7:
|
|
|
|
bytes[posn++] = (byte)0x81;
|
|
|
|
bytes[posn++] = (byte)0x80;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (ch == 0x00A5)
|
|
|
|
{
|
|
|
|
// Yen sign.
|
|
|
|
bytes[posn++] = (byte)0x5C;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
HandleFallback (ref buffer, chars, ref i, ref charCount, bytes,
|
|
|
|
ref byteIndex, ref byteCount, null);
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
else if (ch >= 0x0391 && ch <= 0x0451)
|
|
|
|
{
|
|
|
|
// Greek subset characters.
|
|
|
|
value = (ch - 0x0391) * 2;
|
|
|
|
value = ((int)(greekToJis[value])) |
|
|
|
|
(((int)(greekToJis[value + 1])) << 8);
|
|
|
|
}
|
|
|
|
else if (ch >= 0x2010 && ch <= 0x9FA5)
|
|
|
|
{
|
|
|
|
// This range contains the bulk of the CJK set.
|
|
|
|
value = (ch - 0x2010) * 2;
|
|
|
|
value = ((int)(cjkToJis[value])) |
|
|
|
|
(((int)(cjkToJis[value + 1])) << 8);
|
|
|
|
}
|
|
|
|
else if (ch >= 0xE000 && ch <= 0xE757)
|
|
|
|
{
|
|
|
|
// PrivateUse
|
|
|
|
int diff = ch - 0xE000;
|
|
|
|
value = ((int)(diff / 0xBC) << 8)
|
|
|
|
+ (diff % 0xBC)
|
|
|
|
+ 0xF040;
|
|
|
|
if (value % 0x100 >= 0x7F)
|
|
|
|
value++;
|
|
|
|
}
|
|
|
|
else if (ch >= 0xFF01 && ch <= 0xFF60)
|
|
|
|
{
|
|
|
|
value = (ch - 0xFF01) * 2;
|
|
|
|
value = ((int)(extraToJis[value])) |
|
|
|
|
(((int)(extraToJis[value + 1])) << 8);
|
|
|
|
}
|
|
|
|
else if (ch >= 0xFF60 && ch <= 0xFFA0)
|
|
|
|
{
|
|
|
|
value = ch - 0xFF60 + 0xA0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// Invalid character.
|
|
|
|
value = 0;
|
|
|
|
}
|
|
|
|
if (value == 0)
|
|
|
|
{
|
|
|
|
HandleFallback (ref buffer, chars, ref charIndex, ref charCount,
|
|
|
|
bytes, ref posn, ref byteCount, null);
|
|
|
|
}
|
|
|
|
else if (value < 0x0100)
|
|
|
|
{
|
|
|
|
bytes[posn++] = (byte)value;
|
|
|
|
}
|
|
|
|
else if ((posn + 1) >= byteLength)
|
|
|
|
{
|
|
|
|
throw new ArgumentException
|
|
|
|
(Strings.GetString("Arg_InsufficientSpace"),
|
|
|
|
"bytes");
|
|
|
|
}
|
|
|
|
else if (value < 0x8000)
|
|
|
|
{
|
|
|
|
// JIS X 0208 character.
|
|
|
|
value -= 0x0100;
|
|
|
|
ch = (value / 0xBC);
|
|
|
|
value = (value % 0xBC) + 0x40;
|
|
|
|
if (value >= 0x7F)
|
|
|
|
{
|
|
|
|
++value;
|
|
|
|
}
|
|
|
|
if (ch < (0x9F - 0x80))
|
|
|
|
{
|
|
|
|
bytes[posn++] = (byte)(ch + 0x81);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
bytes[posn++] = (byte)(ch - (0x9F - 0x80) + 0xE0);
|
|
|
|
}
|
|
|
|
bytes[posn++] = (byte)value;
|
|
|
|
}
|
|
|
|
else if (value >= 0xF040 && value <= 0xF9FC)
|
|
|
|
{
|
|
|
|
// PrivateUse
|
|
|
|
bytes[posn++] = (byte)(value / 0x100);
|
|
|
|
bytes[posn++] = (byte)(value % 0x100);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// JIS X 0212 character, which Shift-JIS doesn't
|
|
|
|
// support, but we've already allocated two slots.
|
|
|
|
bytes[posn++] = (byte)'?';
|
|
|
|
bytes[posn++] = (byte)'?';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return the final length to the caller.
|
|
|
|
return posn - byteIndex;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
public override int GetCharCount (byte [] bytes, int index, int count)
|
|
|
|
{
|
|
|
|
return new CP932Decoder (JISConvert.Convert).GetCharCount (
|
|
|
|
bytes, index, count, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
public override int GetChars (
|
|
|
|
byte [] bytes, int byteIndex, int byteCount,
|
|
|
|
char [] chars, int charIndex)
|
|
|
|
{
|
|
|
|
return new CP932Decoder (JISConvert.Convert).GetChars (bytes,
|
|
|
|
byteIndex, byteCount, chars, charIndex,
|
|
|
|
true);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get the maximum number of bytes needed to encode a
|
|
|
|
// specified number of characters.
|
|
|
|
public override int GetMaxByteCount(int charCount)
|
|
|
|
{
|
|
|
|
if(charCount < 0)
|
|
|
|
{
|
|
|
|
throw new ArgumentOutOfRangeException
|
|
|
|
("charCount",
|
|
|
|
Strings.GetString("ArgRange_NonNegative"));
|
|
|
|
}
|
|
|
|
return charCount * 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get the maximum number of characters needed to decode a
|
|
|
|
// specified number of bytes.
|
|
|
|
public override int GetMaxCharCount(int byteCount)
|
|
|
|
{
|
|
|
|
if(byteCount < 0)
|
|
|
|
{
|
|
|
|
throw new ArgumentOutOfRangeException
|
|
|
|
("byteCount",
|
|
|
|
Strings.GetString("ArgRange_NonNegative"));
|
|
|
|
}
|
|
|
|
return byteCount;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get a decoder that handles a rolling Shift-JIS state.
|
|
|
|
public override Decoder GetDecoder()
|
|
|
|
{
|
|
|
|
return new CP932Decoder(JISConvert.Convert);
|
|
|
|
}
|
|
|
|
|
|
|
|
#if !ECMA_COMPAT
|
|
|
|
|
|
|
|
// Get the mail body name for this encoding.
|
|
|
|
public override String BodyName {
|
|
|
|
get { return "iso-2022-jp"; }
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get the human-readable name for this encoding.
|
|
|
|
public override String EncodingName {
|
|
|
|
get { return "Japanese (Shift-JIS)"; }
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get the mail agent header name for this encoding.
|
|
|
|
public override String HeaderName {
|
|
|
|
get { return "iso-2022-jp"; }
|
|
|
|
}
|
|
|
|
|
|
|
|
// Determine if this encoding can be displayed in a Web browser.
|
|
|
|
public override bool IsBrowserDisplay {
|
|
|
|
get { return true; }
|
|
|
|
}
|
|
|
|
|
|
|
|
// Determine if this encoding can be saved from a Web browser.
|
|
|
|
public override bool IsBrowserSave {
|
|
|
|
get { return true; }
|
|
|
|
}
|
|
|
|
|
|
|
|
// Determine if this encoding can be displayed in a mail/news agent.
|
|
|
|
public override bool IsMailNewsDisplay {
|
|
|
|
get { return true; }
|
|
|
|
}
|
|
|
|
|
|
|
|
// Determine if this encoding can be saved from a mail/news agent.
|
|
|
|
public override bool IsMailNewsSave {
|
|
|
|
get { return true; }
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get the IANA-preferred Web name for this encoding.
|
|
|
|
public override String WebName {
|
|
|
|
get { return "shift_jis"; }
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get the Windows code page represented by this object.
|
|
|
|
public override int WindowsCodePage {
|
|
|
|
get { return SHIFTJIS_CODE_PAGE; }
|
|
|
|
}
|
2015-04-07 09:35:12 +01:00
|
|
|
|
|
|
|
// FIXME: This doesn't make sense, but without declaring this override
|
|
|
|
// System.XML regresses at Encoder.Convert() in
|
|
|
|
// MonoTests.System.Xml.XmlWriterSettingsTests.EncodingTest.
|
|
|
|
public override Encoder GetEncoder ()
|
|
|
|
{
|
|
|
|
return new MonoEncodingDefaultEncoder (this);
|
|
|
|
}
|
2014-08-13 10:39:27 +01:00
|
|
|
|
|
|
|
}; // class CP932
|
|
|
|
|
|
|
|
#endif // !ECMA_COMPAT
|
|
|
|
|
|
|
|
// Decoder that handles a rolling Shift-JIS state.
|
|
|
|
sealed class CP932Decoder : DbcsEncoding.DbcsDecoder
|
|
|
|
{
|
|
|
|
private new JISConvert convert;
|
|
|
|
private int last_byte_count;
|
|
|
|
private int last_byte_chars;
|
|
|
|
|
|
|
|
// Constructor.
|
|
|
|
public CP932Decoder(JISConvert convert)
|
|
|
|
: base (null)
|
|
|
|
{
|
|
|
|
this.convert = convert;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Override inherited methods.
|
|
|
|
|
|
|
|
public override int GetCharCount (
|
|
|
|
byte [] bytes, int index, int count)
|
|
|
|
{
|
|
|
|
return GetCharCount (bytes, index, count, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
public
|
|
|
|
override
|
|
|
|
int GetCharCount (byte [] bytes, int index, int count, bool refresh)
|
|
|
|
{
|
|
|
|
CheckRange (bytes, index, count);
|
|
|
|
|
|
|
|
// Determine the total length of the converted string.
|
|
|
|
int length = 0;
|
|
|
|
int byteval;
|
|
|
|
int last = last_byte_count;
|
|
|
|
while(count > 0)
|
|
|
|
{
|
|
|
|
byteval = bytes[index++];
|
|
|
|
--count;
|
|
|
|
if(last == 0)
|
|
|
|
{
|
|
|
|
if((byteval >= 0x81 && byteval <= 0x9F) ||
|
|
|
|
(byteval >= 0xE0 && byteval <= 0xEF))
|
|
|
|
{
|
|
|
|
// First byte in a double-byte sequence.
|
|
|
|
last = byteval;
|
|
|
|
}
|
|
|
|
++length;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// Second byte in a double-byte sequence.
|
|
|
|
last = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (refresh) {
|
|
|
|
if (last != 0)
|
|
|
|
length++;
|
|
|
|
last_byte_count = '\0';
|
|
|
|
}
|
|
|
|
else
|
|
|
|
last_byte_count = last;
|
|
|
|
|
|
|
|
// Return the total length.
|
|
|
|
return length;
|
|
|
|
}
|
|
|
|
|
|
|
|
public override int GetChars (
|
|
|
|
byte [] bytes, int byteIndex, int byteCount,
|
|
|
|
char [] chars, int charIndex)
|
|
|
|
{
|
|
|
|
return GetChars (bytes, byteIndex, byteCount,
|
|
|
|
chars, charIndex, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
public
|
|
|
|
override
|
|
|
|
int GetChars (
|
|
|
|
byte [] bytes, int byteIndex, int byteCount,
|
|
|
|
char [] chars, int charIndex, bool refresh)
|
|
|
|
{
|
|
|
|
CheckRange (bytes, byteIndex, byteCount,
|
|
|
|
chars, charIndex);
|
|
|
|
|
|
|
|
// Decode the bytes in the buffer.
|
|
|
|
int posn = charIndex;
|
|
|
|
int charLength = chars.Length;
|
|
|
|
int byteval, value;
|
|
|
|
int last = last_byte_chars;
|
|
|
|
#if __PNET__
|
|
|
|
byte *table = convert.jisx0208ToUnicode;
|
|
|
|
#else
|
|
|
|
byte[] table = convert.jisx0208ToUnicode;
|
|
|
|
#endif
|
|
|
|
while(byteCount > 0)
|
|
|
|
{
|
|
|
|
byteval = bytes[byteIndex++];
|
|
|
|
--byteCount;
|
|
|
|
if(last == 0)
|
|
|
|
{
|
|
|
|
if(posn >= charLength)
|
|
|
|
{
|
|
|
|
throw new ArgumentException
|
|
|
|
(Strings.GetString
|
|
|
|
("Arg_InsufficientSpace"), "chars");
|
|
|
|
}
|
|
|
|
if((byteval >= 0x81 && byteval <= 0x9F) ||
|
|
|
|
(byteval >= 0xE0 && byteval <= 0xEF))
|
|
|
|
{
|
|
|
|
// First byte in a double-byte sequence.
|
|
|
|
last = byteval;
|
|
|
|
}
|
|
|
|
else if(byteval < 0x80)
|
|
|
|
{
|
|
|
|
// Ordinary ASCII/Latin1 character.
|
|
|
|
chars[posn++] = (char)byteval;
|
|
|
|
}
|
|
|
|
else if(byteval >= 0xA1 && byteval <= 0xDF)
|
|
|
|
{
|
|
|
|
// Half-width katakana character.
|
|
|
|
chars[posn++] = (char)(byteval - 0xA1 + 0xFF61);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// Invalid first byte.
|
|
|
|
chars[posn++] = '?';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// Second byte in a double-byte sequence.
|
|
|
|
if(last >= 0x81 && last <= 0x9F)
|
|
|
|
{
|
|
|
|
value = (last - 0x81) * 0xBC;
|
|
|
|
}
|
|
|
|
else if (last >= 0xF0 && last <= 0xFC && byteval <= 0xFC)
|
|
|
|
{
|
|
|
|
// PrivateUse
|
|
|
|
value = 0xE000 + (last - 0xF0) * 0xBC + byteval;
|
|
|
|
if (byteval > 0x7F)
|
|
|
|
value--;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
value = (last - 0xE0 + (0xA0 - 0x81)) * 0xBC;
|
|
|
|
}
|
|
|
|
last = 0;
|
|
|
|
if(byteval >= 0x40 && byteval <= 0x7E)
|
|
|
|
{
|
|
|
|
value += (byteval - 0x40);
|
|
|
|
}
|
|
|
|
else if(byteval >= 0x80 && byteval <= 0xFC)
|
|
|
|
{
|
|
|
|
value += (byteval - 0x80 + 0x3F);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// Invalid second byte.
|
|
|
|
chars[posn++] = '?';
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
value *= 2;
|
|
|
|
value = ((int)(table[value])) |
|
|
|
|
(((int)(table[value + 1])) << 8);
|
|
|
|
if(value != 0)
|
|
|
|
{
|
|
|
|
chars[posn++] = (char)value;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
chars[posn++] = '?';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (refresh) {
|
|
|
|
if (last != 0)
|
|
|
|
chars[posn++] = '\u30FB';
|
|
|
|
last_byte_chars = '\0';
|
|
|
|
}
|
|
|
|
else
|
|
|
|
last_byte_chars = last;
|
|
|
|
|
|
|
|
// Return the final length to the caller.
|
|
|
|
return posn - charIndex;
|
|
|
|
}
|
|
|
|
|
|
|
|
} // class CP932Decoder
|
|
|
|
|
|
|
|
[Serializable]
|
|
|
|
public class ENCshift_jis : CP932
|
|
|
|
{
|
|
|
|
public ENCshift_jis() : base() {}
|
|
|
|
|
|
|
|
}; // class ENCshift_jis
|
|
|
|
|
|
|
|
}; // namespace I18N.CJK
|