f3e3aab35a
Former-commit-id: 9c2cb47f45fa221e661ab616387c9cda183f283d
728 lines
19 KiB
C#
728 lines
19 KiB
C#
/*
|
|
* CP51932.cs - Japanese EUC-JP code page.
|
|
*
|
|
* It is based on CP932.cs from Portable.NET
|
|
*
|
|
* Author:
|
|
* Atsushi Enomoto <atsushi@ximian.com>
|
|
*
|
|
* Below are original (CP932.cs) copyright lines
|
|
*
|
|
* (C)2004 Novell Inc.
|
|
*
|
|
* Copyright (c) 2002 Southern Storm Software, Pty Ltd
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining
|
|
* a copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included
|
|
* in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
/*
|
|
|
|
Well, there looks no jis.table source. Thus, it seems like it is
|
|
generated from text files from Unicode Home Page such like
|
|
ftp://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0208.TXT
|
|
However, it is non-normative and in Japan it is contains many problem.
|
|
|
|
FIXME: Some characters such as 0xFF0B (wide "plus") are missing in
|
|
that table.
|
|
*/
|
|
|
|
/*
|
|
0x00-0x1F, 0x7F : control characters
|
|
0x20-0x7E : ASCII
|
|
0xA1A1-0xFEFE : Kanji (precisely, both bytes contain only A1-FE)
|
|
0x8EA1-0x8EDF : half-width Katakana
|
|
0x8FA1A1-0x8FFEFE : Complemental Kanji
|
|
|
|
*/
|
|
|
|
namespace I18N.CJK
|
|
{
|
|
|
|
using System;
|
|
using System.Text;
|
|
using I18N.Common;
|
|
|
|
#if DISABLE_UNSAFE
|
|
using MonoEncoder = I18N.Common.MonoSafeEncoder;
|
|
using MonoEncoding = I18N.Common.MonoSafeEncoding;
|
|
#endif
|
|
|
|
[Serializable]
|
|
public class CP51932 : MonoEncoding
|
|
{
|
|
// Magic number used by Windows for the EUC-JP code page.
|
|
private const int EUC_JP_CODE_PAGE = 51932;
|
|
|
|
// Constructor.
|
|
public CP51932 () : base (EUC_JP_CODE_PAGE, 932)
|
|
{
|
|
}
|
|
|
|
#if !DISABLE_UNSAFE
|
|
public unsafe override int GetByteCountImpl (char* chars, int count)
|
|
{
|
|
return new CP51932Encoder (this).GetByteCountImpl (chars, count, true);
|
|
}
|
|
|
|
public unsafe override int GetBytesImpl (char* chars, int charCount, byte* bytes, int byteCount)
|
|
{
|
|
return new CP51932Encoder (this).GetBytesImpl (chars, charCount, bytes, byteCount, true);
|
|
}
|
|
#else
|
|
public override int GetByteCount (char [] chars, int index, int length)
|
|
{
|
|
return new CP51932Encoder (this).GetByteCount (chars, index, length, true);
|
|
}
|
|
|
|
public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
|
|
{
|
|
return new CP51932Encoder (this).GetBytes (chars, charIndex, charCount, bytes, byteIndex, true);
|
|
}
|
|
#endif
|
|
|
|
public override int GetCharCount (byte [] bytes, int index, int count)
|
|
{
|
|
return new CP51932Decoder ().GetCharCount (
|
|
bytes, index, count, true);
|
|
}
|
|
|
|
public override int GetChars (
|
|
byte [] bytes, int byteIndex, int byteCount,
|
|
char [] chars, int charIndex)
|
|
{
|
|
return new CP51932Decoder ().GetChars (bytes,
|
|
byteIndex, byteCount, chars, charIndex, true);
|
|
}
|
|
|
|
// Get the maximum number of bytes needed to encode a
|
|
// specified number of characters.
|
|
public override int GetMaxByteCount(int charCount)
|
|
{
|
|
if(charCount < 0)
|
|
{
|
|
throw new ArgumentOutOfRangeException
|
|
("charCount",
|
|
Strings.GetString("ArgRange_NonNegative"));
|
|
}
|
|
return charCount * 3;
|
|
}
|
|
|
|
// Get the maximum number of characters needed to decode a
|
|
// specified number of bytes.
|
|
public override int GetMaxCharCount(int byteCount)
|
|
{
|
|
if(byteCount < 0)
|
|
{
|
|
throw new ArgumentOutOfRangeException
|
|
("byteCount",
|
|
Strings.GetString ("ArgRange_NonNegative"));
|
|
}
|
|
return byteCount;
|
|
}
|
|
|
|
public override Encoder GetEncoder ()
|
|
{
|
|
return new CP51932Encoder (this);
|
|
}
|
|
|
|
public override Decoder GetDecoder ()
|
|
{
|
|
return new CP51932Decoder ();
|
|
}
|
|
|
|
#if !ECMA_COMPAT
|
|
|
|
// Get the mail body name for this encoding.
|
|
public override String BodyName {
|
|
get { return "euc-jp"; }
|
|
}
|
|
|
|
// Get the human-readable name for this encoding.
|
|
public override String EncodingName {
|
|
get { return "Japanese (EUC)"; }
|
|
}
|
|
|
|
// Get the mail agent header name for this encoding.
|
|
public override String HeaderName {
|
|
get { return "euc-jp"; }
|
|
}
|
|
|
|
// Determine if this encoding can be displayed in a Web browser.
|
|
public override bool IsBrowserDisplay {
|
|
get { return true; }
|
|
}
|
|
|
|
// Determine if this encoding can be saved from a Web browser.
|
|
public override bool IsBrowserSave {
|
|
get { return true; }
|
|
}
|
|
|
|
// Determine if this encoding can be displayed in a mail/news agent.
|
|
public override bool IsMailNewsDisplay {
|
|
get { return true; }
|
|
}
|
|
|
|
// Determine if this encoding can be saved from a mail/news agent.
|
|
public override bool IsMailNewsSave {
|
|
get { return true; }
|
|
}
|
|
|
|
// Get the IANA-preferred Web name for this encoding.
|
|
public override String WebName {
|
|
get { return "euc-jp"; }
|
|
}
|
|
} // CP51932
|
|
#endif // !ECMA_COMPAT
|
|
|
|
public class CP51932Encoder : MonoEncoder
|
|
{
|
|
public CP51932Encoder (MonoEncoding encoding)
|
|
: base (encoding)
|
|
{
|
|
}
|
|
|
|
#if !DISABLE_UNSAFE
|
|
// Get the number of bytes needed to encode a character buffer.
|
|
public unsafe override int GetByteCountImpl (
|
|
char* chars, int count, bool refresh)
|
|
{
|
|
// Determine the length of the final output.
|
|
int index = 0;
|
|
int length = 0;
|
|
int ch, value;
|
|
byte [] cjkToJis = JISConvert.Convert.cjkToJis;
|
|
byte [] extraToJis = JISConvert.Convert.extraToJis;
|
|
|
|
while (count > 0) {
|
|
ch = chars [index++];
|
|
--count;
|
|
++length;
|
|
if (ch < 0x0080) {
|
|
// Character maps to itself.
|
|
continue;
|
|
} else if (ch < 0x0100) {
|
|
// Check for special Latin 1 characters that
|
|
// can be mapped to double-byte code points.
|
|
if(ch == 0x00A2 || ch == 0x00A3 || ch == 0x00A7 ||
|
|
ch == 0x00A8 || ch == 0x00AC || ch == 0x00B0 ||
|
|
ch == 0x00B1 || ch == 0x00B4 || ch == 0x00B6 ||
|
|
ch == 0x00D7 || ch == 0x00F7)
|
|
{
|
|
++length;
|
|
}
|
|
} else if (ch >= 0x0391 && ch <= 0x0451) {
|
|
// Greek subset characters.
|
|
++length;
|
|
} else if (ch >= 0x2010 && ch <= 0x9FA5) {
|
|
// This range contains the bulk of the CJK set.
|
|
value = (ch - 0x2010) * 2;
|
|
value = ((int) (cjkToJis[value])) | (((int)(cjkToJis[value + 1])) << 8);
|
|
if(value >= 0x0100)
|
|
++length;
|
|
} else if(ch >= 0xFF01 && ch < 0xFF60) {
|
|
// This range contains extra characters.
|
|
value = (ch - 0xFF01) * 2;
|
|
value = ((int)(extraToJis[value])) |
|
|
(((int)(extraToJis[value + 1])) << 8);
|
|
if(value >= 0x0100)
|
|
++length;
|
|
} else if(ch >= 0xFF60 && ch <= 0xFFA0) {
|
|
++length; // half-width kana
|
|
}
|
|
}
|
|
|
|
// Return the length to the caller.
|
|
return length;
|
|
}
|
|
|
|
// Get the bytes that result from encoding a character buffer.
|
|
public unsafe override int GetBytesImpl (
|
|
char* chars, int charCount, byte* bytes, int byteCount, bool refresh)
|
|
{
|
|
int charIndex = 0;
|
|
int byteIndex = 0;
|
|
int end = charCount;
|
|
|
|
// Convert the characters into their byte form.
|
|
int posn = byteIndex;
|
|
int byteLength = byteCount;
|
|
int ch, value;
|
|
|
|
byte[] cjkToJis = JISConvert.Convert.cjkToJis;
|
|
byte[] greekToJis = JISConvert.Convert.greekToJis;
|
|
byte[] extraToJis = JISConvert.Convert.extraToJis;
|
|
|
|
for (int i = charIndex; i < end; i++, charCount--) {
|
|
ch = chars [i];
|
|
if (posn >= byteLength) {
|
|
throw new ArgumentException (Strings.GetString ("Arg_InsufficientSpace"), "bytes");
|
|
}
|
|
|
|
if (ch < 0x0080) {
|
|
// Character maps to itself.
|
|
bytes[posn++] = (byte)ch;
|
|
continue;
|
|
} else if (ch >= 0x0391 && ch <= 0x0451) {
|
|
// Greek subset characters.
|
|
value = (ch - 0x0391) * 2;
|
|
value = ((int)(greekToJis[value])) |
|
|
(((int)(greekToJis[value + 1])) << 8);
|
|
} else if (ch >= 0x2010 && ch <= 0x9FA5) {
|
|
// This range contains the bulk of the CJK set.
|
|
value = (ch - 0x2010) * 2;
|
|
value = ((int) (cjkToJis[value])) |
|
|
(((int)(cjkToJis[value + 1])) << 8);
|
|
} else if (ch >= 0xFF01 && ch <= 0xFF60) {
|
|
// This range contains extra characters,
|
|
// including half-width katakana.
|
|
value = (ch - 0xFF01) * 2;
|
|
value = ((int) (extraToJis [value])) |
|
|
(((int) (extraToJis [value + 1])) << 8);
|
|
} else if (ch >= 0xFF60 && ch <= 0xFFA0) {
|
|
value = ch - 0xFF60 + 0x8EA0;
|
|
} else {
|
|
// Invalid character.
|
|
value = 0;
|
|
}
|
|
|
|
if (value == 0) {
|
|
HandleFallback (
|
|
chars, ref i, ref charCount,
|
|
bytes, ref posn, ref byteCount, null);
|
|
} else if (value < 0x0100) {
|
|
bytes [posn++] = (byte) value;
|
|
} else if ((posn + 1) >= byteLength) {
|
|
throw new ArgumentException (Strings.GetString ("Arg_InsufficientSpace"), "bytes");
|
|
} else if (value < 0x8000) {
|
|
// general 2byte glyph/kanji
|
|
value -= 0x0100;
|
|
bytes [posn++] = (byte) (value / 0x5E + 0xA1);
|
|
bytes [posn++] = (byte) (value % 0x5E + 0xA1);
|
|
//Console.WriteLine ("{0:X04}", ch);
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
// half-width kana
|
|
bytes [posn++] = 0x8E;
|
|
bytes [posn++] = (byte) (value - 0x8E00);
|
|
}
|
|
}
|
|
|
|
// Return the final length to the caller.
|
|
return posn - byteIndex;
|
|
}
|
|
#else
|
|
// Get the number of bytes needed to encode a character buffer.
|
|
public override int GetByteCount(char[] chars, int index, int count, bool flush)
|
|
{
|
|
// Determine the length of the final output.
|
|
int length = 0;
|
|
int ch, value;
|
|
byte[] cjkToJis = JISConvert.Convert.cjkToJis;
|
|
byte[] extraToJis = JISConvert.Convert.extraToJis;
|
|
|
|
while (count > 0)
|
|
{
|
|
ch = chars[index++];
|
|
--count;
|
|
++length;
|
|
if (ch < 0x0080)
|
|
{
|
|
// Character maps to itself.
|
|
continue;
|
|
}
|
|
else if (ch < 0x0100)
|
|
{
|
|
// Check for special Latin 1 characters that
|
|
// can be mapped to double-byte code points.
|
|
if (ch == 0x00A2 || ch == 0x00A3 || ch == 0x00A7 ||
|
|
ch == 0x00A8 || ch == 0x00AC || ch == 0x00B0 ||
|
|
ch == 0x00B1 || ch == 0x00B4 || ch == 0x00B6 ||
|
|
ch == 0x00D7 || ch == 0x00F7)
|
|
{
|
|
++length;
|
|
}
|
|
}
|
|
else if (ch >= 0x0391 && ch <= 0x0451)
|
|
{
|
|
// Greek subset characters.
|
|
++length;
|
|
}
|
|
else if (ch >= 0x2010 && ch <= 0x9FA5)
|
|
{
|
|
// This range contains the bulk of the CJK set.
|
|
value = (ch - 0x2010) * 2;
|
|
value = ((int)(cjkToJis[value])) | (((int)(cjkToJis[value + 1])) << 8);
|
|
if (value >= 0x0100)
|
|
++length;
|
|
}
|
|
else if (ch >= 0xFF01 && ch < 0xFF60)
|
|
{
|
|
// This range contains extra characters.
|
|
value = (ch - 0xFF01) * 2;
|
|
value = ((int)(extraToJis[value])) |
|
|
(((int)(extraToJis[value + 1])) << 8);
|
|
if (value >= 0x0100)
|
|
++length;
|
|
}
|
|
else if (ch >= 0xFF60 && ch <= 0xFFA0)
|
|
{
|
|
++length; // half-width kana
|
|
}
|
|
}
|
|
|
|
// Return the length to the caller.
|
|
return length;
|
|
}
|
|
|
|
// Get the bytes that result from encoding a character buffer.
|
|
public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex, bool flush)
|
|
{
|
|
// Convert the characters into their byte form.
|
|
int posn = byteIndex;
|
|
int byteLength = bytes.Length;
|
|
int byteCount = bytes.Length;
|
|
int end = charIndex + charCount;
|
|
int ch, value;
|
|
|
|
byte[] cjkToJis = JISConvert.Convert.cjkToJis;
|
|
byte[] greekToJis = JISConvert.Convert.greekToJis;
|
|
byte[] extraToJis = JISConvert.Convert.extraToJis;
|
|
|
|
for (int i = charIndex; i < end; i++, charCount--)
|
|
{
|
|
ch = chars[i];
|
|
if (posn >= byteLength)
|
|
{
|
|
throw new ArgumentException(Strings.GetString("Arg_InsufficientSpace"), "bytes");
|
|
}
|
|
|
|
if (ch < 0x0080)
|
|
{
|
|
// Character maps to itself.
|
|
bytes[posn++] = (byte)ch;
|
|
continue;
|
|
}
|
|
else if (ch >= 0x0391 && ch <= 0x0451)
|
|
{
|
|
// Greek subset characters.
|
|
value = (ch - 0x0391) * 2;
|
|
value = ((int)(greekToJis[value])) |
|
|
(((int)(greekToJis[value + 1])) << 8);
|
|
}
|
|
else if (ch >= 0x2010 && ch <= 0x9FA5)
|
|
{
|
|
// This range contains the bulk of the CJK set.
|
|
value = (ch - 0x2010) * 2;
|
|
value = ((int)(cjkToJis[value])) |
|
|
(((int)(cjkToJis[value + 1])) << 8);
|
|
}
|
|
else if (ch >= 0xFF01 && ch <= 0xFF60)
|
|
{
|
|
// This range contains extra characters,
|
|
// including half-width katakana.
|
|
value = (ch - 0xFF01) * 2;
|
|
value = ((int)(extraToJis[value])) |
|
|
(((int)(extraToJis[value + 1])) << 8);
|
|
}
|
|
else if (ch >= 0xFF60 && ch <= 0xFFA0)
|
|
{
|
|
value = ch - 0xFF60 + 0x8EA0;
|
|
}
|
|
else
|
|
{
|
|
// Invalid character.
|
|
value = 0;
|
|
}
|
|
|
|
if (value == 0)
|
|
{
|
|
HandleFallback (chars, ref i, ref charCount,
|
|
bytes, ref posn, ref byteCount, null);
|
|
}
|
|
else if (value < 0x0100)
|
|
{
|
|
bytes[posn++] = (byte)value;
|
|
}
|
|
else if ((posn + 1) >= byteLength)
|
|
{
|
|
throw new ArgumentException(Strings.GetString("Arg_InsufficientSpace"), "bytes");
|
|
}
|
|
else if (value < 0x8000)
|
|
{
|
|
// general 2byte glyph/kanji
|
|
value -= 0x0100;
|
|
bytes[posn++] = (byte)(value / 0x5E + 0xA1);
|
|
bytes[posn++] = (byte)(value % 0x5E + 0xA1);
|
|
//Console.WriteLine ("{0:X04}", ch);
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
// half-width kana
|
|
bytes[posn++] = 0x8E;
|
|
bytes[posn++] = (byte)(value - 0x8E00);
|
|
}
|
|
}
|
|
|
|
// Return the final length to the caller.
|
|
return posn - byteIndex;
|
|
}
|
|
#endif
|
|
} // CP51932Encoder
|
|
|
|
internal class CP51932Decoder : DbcsEncoding.DbcsDecoder
|
|
{
|
|
public CP51932Decoder ()
|
|
: base (null)
|
|
{
|
|
}
|
|
|
|
int last_count, last_bytes;
|
|
|
|
// Get the number of characters needed to decode a byte buffer.
|
|
public override int GetCharCount (byte [] bytes, int index, int count)
|
|
{
|
|
return GetCharCount (bytes, index, count, false);
|
|
}
|
|
|
|
public override
|
|
int GetCharCount (byte [] bytes, int index, int count, bool refresh)
|
|
{
|
|
CheckRange (bytes, index, count);
|
|
|
|
// Determine the total length of the converted string.
|
|
int value = 0;
|
|
byte[] table0208 = JISConvert.Convert.jisx0208ToUnicode;
|
|
byte[] table0212 = JISConvert.Convert.jisx0212ToUnicode;
|
|
int length = 0;
|
|
int byteval = 0;
|
|
int last = last_count;
|
|
|
|
while (count > 0) {
|
|
byteval = bytes [index++];
|
|
--count;
|
|
if (last == 0) {
|
|
if (byteval == 0x8F) {
|
|
// SS3: One-time triple-byte sequence should follow.
|
|
last = byteval;
|
|
} else if (byteval <= 0x7F) {
|
|
// Ordinary ASCII/Latin1/Control character.
|
|
length++;
|
|
} else if (byteval == 0x8E) {
|
|
// SS2: One-time double-byte sequence should follow.
|
|
last = byteval;
|
|
} else if (byteval >= 0xA1 && byteval <= 0xFE) {
|
|
// First byte in a double-byte sequence.
|
|
last = byteval;
|
|
} else {
|
|
// Invalid first byte.
|
|
length++;
|
|
}
|
|
}
|
|
else if (last == 0x8E) {
|
|
// SS2 (One-time double-byte sequence)
|
|
if (byteval >= 0xA1 && byteval <= 0xDF) {
|
|
length++;
|
|
} else {
|
|
// Invalid second byte.
|
|
length++;
|
|
}
|
|
last =0;
|
|
}
|
|
else if (last == 0x8F) {
|
|
// SS3: 3-byte character
|
|
// FIXME: not supported (I don't think iso-2022-jp has)
|
|
last = byteval;
|
|
}
|
|
else
|
|
{
|
|
// Second byte in a double-byte sequence.
|
|
value = (last - 0xA1) * 0x5E;
|
|
last = 0;
|
|
if (byteval >= 0xA1 && byteval <= 0xFE)
|
|
{
|
|
value += (byteval - 0xA1);
|
|
}
|
|
else
|
|
{
|
|
// Invalid second byte.
|
|
last = 0;
|
|
length++;
|
|
continue;
|
|
}
|
|
|
|
value *= 2;
|
|
value = ((int) (table0208 [value]))
|
|
| (((int) (table0208 [value + 1])) << 8);
|
|
if (value == 0)
|
|
value = ((int) (table0212 [value]))
|
|
| (((int) (table0212 [value + 1])) << 8);
|
|
if (value != 0)
|
|
length++;
|
|
else
|
|
length++;
|
|
}
|
|
}
|
|
|
|
// seems like .NET 2.0 adds \u30FB for insufficient
|
|
// byte seuqence (for Japanese \u30FB makes sense).
|
|
if (refresh && last != 0)
|
|
length++;
|
|
else
|
|
last_count = last;
|
|
|
|
// Return the final length to the caller.
|
|
return length;
|
|
}
|
|
|
|
public override int GetChars (byte[] bytes, int byteIndex,
|
|
int byteCount, char[] chars,
|
|
int charIndex)
|
|
{
|
|
return GetChars (bytes, byteIndex, byteCount, chars, charIndex, false);
|
|
}
|
|
|
|
public override
|
|
int GetChars (byte[] bytes, int byteIndex,
|
|
int byteCount, char[] chars,
|
|
int charIndex, bool refresh)
|
|
{
|
|
CheckRange (bytes, byteIndex, byteCount, chars, charIndex);
|
|
|
|
// Decode the bytes in the buffer.
|
|
int posn = charIndex;
|
|
int charLength = chars.Length;
|
|
int byteval, value;
|
|
int last = last_bytes;
|
|
byte[] table0208 = JISConvert.Convert.jisx0208ToUnicode;
|
|
byte[] table0212 = JISConvert.Convert.jisx0212ToUnicode;
|
|
|
|
while (byteCount > 0) {
|
|
byteval = bytes [byteIndex++];
|
|
--byteCount;
|
|
if (last == 0) {
|
|
if (byteval == 0x8F) {
|
|
// SS3 (One-time triple-byte sequence) should follow.
|
|
last = byteval;
|
|
} else if (byteval <= 0x7F) {
|
|
// Ordinary ASCII/Latin1/Control character.
|
|
if (posn >= charLength)
|
|
throw Insufficient ();
|
|
chars [posn++] = (char) byteval;
|
|
} else if (byteval == 0x8E) {
|
|
// SS2 (One-time double-byte sequence) should follow.
|
|
last = byteval;
|
|
} else if (byteval >= 0xA1 && byteval <= 0xFE) {
|
|
// First byte in a double-byte sequence.
|
|
last = byteval;
|
|
} else {
|
|
// Invalid first byte.
|
|
if (posn >= charLength)
|
|
throw Insufficient ();
|
|
chars [posn++] = '\u30FB';
|
|
}
|
|
}
|
|
else if (last == 0x8E) {
|
|
// SS2 (One-time double-byte sequence)
|
|
if (byteval >= 0xA1 && byteval <= 0xDF) {
|
|
value = ((byteval - 0x40) |
|
|
(last + 0x71) << 8);
|
|
if (posn >= charLength)
|
|
throw Insufficient ();
|
|
chars [posn++] = (char) value;
|
|
} else {
|
|
// Invalid second byte.
|
|
if (posn >= charLength)
|
|
throw Insufficient ();
|
|
chars [posn++] = '\u30FB';
|
|
}
|
|
last =0;
|
|
}
|
|
else if (last == 0x8F) {
|
|
// SS3: 3-byte character
|
|
// FIXME: not supported (I don't think iso-2022-jp has)
|
|
last = byteval;
|
|
}
|
|
else
|
|
{
|
|
// Second byte in a double-byte sequence.
|
|
value = (last - 0xA1) * 0x5E;
|
|
last = 0;
|
|
if (byteval >= 0xA1 && byteval <= 0xFE)
|
|
{
|
|
value += (byteval - 0xA1);
|
|
}
|
|
else
|
|
{
|
|
// Invalid second byte.
|
|
last = 0;
|
|
if (posn >= charLength)
|
|
throw Insufficient ();
|
|
chars [posn++] = '\u30FB';
|
|
continue;
|
|
}
|
|
|
|
value *= 2;
|
|
value = ((int) (table0208 [value]))
|
|
| (((int) (table0208 [value + 1])) << 8);
|
|
if (value == 0)
|
|
value = ((int) (table0212 [value]))
|
|
| (((int) (table0212 [value + 1])) << 8);
|
|
if (posn >= charLength)
|
|
throw Insufficient ();
|
|
if (value != 0)
|
|
chars [posn++] = (char)value;
|
|
else
|
|
chars [posn++] = '\u30FB';
|
|
}
|
|
}
|
|
|
|
if (refresh && last != 0) {
|
|
// seems like .NET 2.0 adds \u30FB for insufficient
|
|
// byte seuqence (for Japanese \u30FB makes sense).
|
|
if (posn >= charLength)
|
|
throw Insufficient ();
|
|
chars [posn++] = '\u30FB';
|
|
}
|
|
else
|
|
last_bytes = last;
|
|
|
|
// Return the final length to the caller.
|
|
return posn - charIndex;
|
|
}
|
|
|
|
Exception Insufficient ()
|
|
{
|
|
throw new ArgumentException
|
|
(Strings.GetString
|
|
("Arg_InsufficientSpace"), "chars");
|
|
}
|
|
}; // class CP51932Decoder
|
|
|
|
[Serializable]
|
|
public class ENCeuc_jp : CP51932
|
|
{
|
|
public ENCeuc_jp () : base() {}
|
|
|
|
}; // class ENCeucjp
|
|
|
|
}; // namespace I18N.CJK
|