mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
8c296bbcd4
This patch was generated by a script. Here's the source of the script for future reference: function convert() { echo "Converting $1 to $2..." find . ! -wholename "*nsprpub*" \ ! -wholename "*security/nss*" \ ! -wholename "*/.hg*" \ ! -wholename "obj-ff-dbg*" \ ! -name nsXPCOMCID.h \ ! -name prtypes.h \ -type f \ \( -iname "*.cpp" \ -o -iname "*.h" \ -o -iname "*.c" \ -o -iname "*.cc" \ -o -iname "*.idl" \ -o -iname "*.ipdl" \ -o -iname "*.ipdlh" \ -o -iname "*.mm" \) | \ xargs -n 1 sed -i -e "s/\b$1\b/$2/g" } convert PRInt8 int8_t convert PRUint8 uint8_t convert PRInt16 int16_t convert PRUint16 uint16_t convert PRInt32 int32_t convert PRUint32 uint32_t convert PRInt64 int64_t convert PRUint64 uint64_t convert PRIntn int convert PRUintn unsigned convert PRSize size_t convert PROffset32 int32_t convert PROffset64 int64_t convert PRPtrdiff ptrdiff_t convert PRFloat64 double
328 lines
9.7 KiB
C++
328 lines
9.7 KiB
C++
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
/**
|
|
* A character set converter from GBK to Unicode.
|
|
*
|
|
*
|
|
* @created 07/Sept/1999
|
|
* @author Yueheng Xu, Yueheng.Xu@intel.com
|
|
*/
|
|
|
|
#include "nsGBKToUnicode.h"
|
|
#include "nsUCvCnDll.h"
|
|
#include "gbku.h"
|
|
|
|
|
|
//------------------------------------------------------------
|
|
// nsGBKUnique2BytesToUnicode
|
|
//------------------------------------------------------------
|
|
class nsGBKUnique2BytesToUnicode : public nsTableDecoderSupport
|
|
{
|
|
public:
|
|
nsGBKUnique2BytesToUnicode();
|
|
virtual ~nsGBKUnique2BytesToUnicode()
|
|
{ }
|
|
protected:
|
|
};
|
|
|
|
static const uint16_t g_utGBKUnique2Bytes[] = {
|
|
#include "gbkuniq2b.ut"
|
|
};
|
|
nsGBKUnique2BytesToUnicode::nsGBKUnique2BytesToUnicode()
|
|
: nsTableDecoderSupport(u2BytesCharset, nullptr,
|
|
(uMappingTable*) &g_utGBKUnique2Bytes, 1)
|
|
{
|
|
}
|
|
|
|
//------------------------------------------------------------
|
|
// nsGB18030Unique2BytesToUnicode
|
|
//------------------------------------------------------------
|
|
class nsGB18030Unique2BytesToUnicode : public nsTableDecoderSupport
|
|
{
|
|
public:
|
|
nsGB18030Unique2BytesToUnicode();
|
|
virtual ~nsGB18030Unique2BytesToUnicode()
|
|
{ }
|
|
protected:
|
|
};
|
|
|
|
static const uint16_t g_utGB18030Unique2Bytes[] = {
|
|
#include "gb18030uniq2b.ut"
|
|
};
|
|
nsGB18030Unique2BytesToUnicode::nsGB18030Unique2BytesToUnicode()
|
|
: nsTableDecoderSupport(u2BytesCharset, nullptr,
|
|
(uMappingTable*) &g_utGB18030Unique2Bytes, 1)
|
|
{
|
|
}
|
|
|
|
//------------------------------------------------------------
|
|
// nsGB18030Unique4BytesToUnicode
|
|
//------------------------------------------------------------
|
|
class nsGB18030Unique4BytesToUnicode : public nsTableDecoderSupport
|
|
{
|
|
public:
|
|
nsGB18030Unique4BytesToUnicode();
|
|
virtual ~nsGB18030Unique4BytesToUnicode()
|
|
{ }
|
|
protected:
|
|
};
|
|
|
|
static const uint16_t g_utGB18030Unique4Bytes[] = {
|
|
#include "gb180304bytes.ut"
|
|
};
|
|
nsGB18030Unique4BytesToUnicode::nsGB18030Unique4BytesToUnicode()
|
|
: nsTableDecoderSupport(u4BytesGB18030Charset, nullptr,
|
|
(uMappingTable*) &g_utGB18030Unique4Bytes, 1)
|
|
{
|
|
}
|
|
|
|
|
|
//----------------------------------------------------------------------
|
|
// Class nsGBKToUnicode [implementation]
|
|
|
|
//----------------------------------------------------------------------
|
|
// Subclassing of nsTablesDecoderSupport class [implementation]
|
|
|
|
#define LEGAL_GBK_MULTIBYTE_FIRST_BYTE(c) \
|
|
(UINT8_IN_RANGE(0x81, (c), 0xFE))
|
|
#define FIRST_BYTE_IS_SURROGATE(c) \
|
|
(UINT8_IN_RANGE(0x90, (c), 0xFE))
|
|
#define LEGAL_GBK_2BYTE_SECOND_BYTE(c) \
|
|
(UINT8_IN_RANGE(0x40, (c), 0x7E)|| UINT8_IN_RANGE(0x80, (c), 0xFE))
|
|
#define LEGAL_GBK_4BYTE_SECOND_BYTE(c) \
|
|
(UINT8_IN_RANGE(0x30, (c), 0x39))
|
|
#define LEGAL_GBK_4BYTE_THIRD_BYTE(c) \
|
|
(UINT8_IN_RANGE(0x81, (c), 0xFE))
|
|
#define LEGAL_GBK_4BYTE_FORTH_BYTE(c) \
|
|
(UINT8_IN_RANGE(0x30, (c), 0x39))
|
|
|
|
NS_IMETHODIMP nsGBKToUnicode::ConvertNoBuff(const char* aSrc,
|
|
int32_t * aSrcLength,
|
|
PRUnichar *aDest,
|
|
int32_t * aDestLength)
|
|
{
|
|
int32_t i=0;
|
|
int32_t iSrcLength = (*aSrcLength);
|
|
int32_t iDestlen = 0;
|
|
nsresult rv=NS_OK;
|
|
*aSrcLength = 0;
|
|
|
|
for (i=0;i<iSrcLength;i++)
|
|
{
|
|
if ( iDestlen >= (*aDestLength) )
|
|
{
|
|
rv = NS_OK_UDEC_MOREOUTPUT;
|
|
break;
|
|
}
|
|
// The valid range for the 1st byte is [0x81,0xFE]
|
|
if(LEGAL_GBK_MULTIBYTE_FIRST_BYTE(*aSrc))
|
|
{
|
|
if(i+1 >= iSrcLength)
|
|
{
|
|
rv = NS_OK_UDEC_MOREINPUT;
|
|
break;
|
|
}
|
|
// To make sure, the second byte has to be checked as well.
|
|
// In GBK, the second byte range is [0x40,0x7E] and [0x80,0XFE]
|
|
if(LEGAL_GBK_2BYTE_SECOND_BYTE(aSrc[1]))
|
|
{
|
|
// Valid GBK code
|
|
*aDest = mUtil.GBKCharToUnicode(aSrc[0], aSrc[1]);
|
|
if(UCS2_NO_MAPPING == *aDest)
|
|
{
|
|
// We cannot map in the common mapping, let's call the
|
|
// delegate 2 byte decoder to decode the gbk or gb18030 unique
|
|
// 2 byte mapping
|
|
if(! TryExtensionDecoder(aSrc, aDest))
|
|
{
|
|
*aDest = UCS2_NO_MAPPING;
|
|
}
|
|
}
|
|
aSrc += 2;
|
|
i++;
|
|
}
|
|
else if (LEGAL_GBK_4BYTE_SECOND_BYTE(aSrc[1]))
|
|
{
|
|
// from the first 2 bytes, it looks like a 4 byte GB18030
|
|
if(i+3 >= iSrcLength) // make sure we got 4 bytes
|
|
{
|
|
rv = NS_OK_UDEC_MOREINPUT;
|
|
break;
|
|
}
|
|
// 4 bytes patten
|
|
// [0x81-0xfe][0x30-0x39][0x81-0xfe][0x30-0x39]
|
|
// preset the
|
|
|
|
if (LEGAL_GBK_4BYTE_THIRD_BYTE(aSrc[2]) &&
|
|
LEGAL_GBK_4BYTE_FORTH_BYTE(aSrc[3]))
|
|
{
|
|
if ( ! FIRST_BYTE_IS_SURROGATE(aSrc[0]))
|
|
{
|
|
// let's call the delegated 4 byte gb18030 converter to convert it
|
|
if(! Try4BytesDecoder(aSrc, aDest))
|
|
*aDest = UCS2_NO_MAPPING;
|
|
} else {
|
|
// let's try supplement mapping
|
|
if ( (iDestlen+1) < (*aDestLength) )
|
|
{
|
|
if(DecodeToSurrogate(aSrc, aDest))
|
|
{
|
|
// surrogte two PRUnichar
|
|
iDestlen++;
|
|
aDest++;
|
|
} else {
|
|
*aDest = UCS2_NO_MAPPING;
|
|
}
|
|
} else {
|
|
if (*aDestLength < 2) {
|
|
NS_ERROR("insufficient space in output buffer");
|
|
*aDest = UCS2_NO_MAPPING;
|
|
} else {
|
|
rv = NS_OK_UDEC_MOREOUTPUT;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
aSrc += 4;
|
|
i += 3;
|
|
} else {
|
|
*aDest = UCS2_NO_MAPPING;
|
|
// If the third and fourth bytes are not in the legal ranges for
|
|
// a four-byte sequnce, resynchronize on the second byte
|
|
// (which we know is in the range of LEGAL_GBK_4BYTE_SECOND_BYTE,
|
|
// 0x30-0x39)
|
|
aSrc++;
|
|
}
|
|
}
|
|
else if ((uint8_t) aSrc[0] == (uint8_t)0xA0 )
|
|
{
|
|
// stand-alone (not followed by a valid second byte) 0xA0 !
|
|
// treat it as valid a la Netscape 4.x
|
|
*aDest = CAST_CHAR_TO_UNICHAR(*aSrc);
|
|
aSrc++;
|
|
} else {
|
|
// Invalid GBK code point (second byte should be 0x40 or higher)
|
|
*aDest = UCS2_NO_MAPPING;
|
|
aSrc++;
|
|
}
|
|
} else {
|
|
if(IS_ASCII(*aSrc))
|
|
{
|
|
// The source is an ASCII
|
|
*aDest = CAST_CHAR_TO_UNICHAR(*aSrc);
|
|
aSrc++;
|
|
} else {
|
|
if(IS_GBK_EURO(*aSrc)) {
|
|
*aDest = UCS2_EURO;
|
|
} else {
|
|
*aDest = UCS2_NO_MAPPING;
|
|
}
|
|
aSrc++;
|
|
}
|
|
}
|
|
iDestlen++;
|
|
aDest++;
|
|
*aSrcLength = i+1;
|
|
}
|
|
*aDestLength = iDestlen;
|
|
return rv;
|
|
}
|
|
|
|
|
|
void nsGBKToUnicode::CreateExtensionDecoder()
|
|
{
|
|
mExtensionDecoder = new nsGBKUnique2BytesToUnicode();
|
|
}
|
|
void nsGBKToUnicode::Create4BytesDecoder()
|
|
{
|
|
m4BytesDecoder = nullptr;
|
|
}
|
|
void nsGB18030ToUnicode::CreateExtensionDecoder()
|
|
{
|
|
mExtensionDecoder = new nsGB18030Unique2BytesToUnicode();
|
|
}
|
|
void nsGB18030ToUnicode::Create4BytesDecoder()
|
|
{
|
|
m4BytesDecoder = new nsGB18030Unique4BytesToUnicode();
|
|
}
|
|
bool nsGB18030ToUnicode::DecodeToSurrogate(const char* aSrc, PRUnichar* aOut)
|
|
{
|
|
NS_ASSERTION(FIRST_BYTE_IS_SURROGATE(aSrc[0]), "illegal first byte");
|
|
NS_ASSERTION(LEGAL_GBK_4BYTE_SECOND_BYTE(aSrc[1]), "illegal second byte");
|
|
NS_ASSERTION(LEGAL_GBK_4BYTE_THIRD_BYTE(aSrc[2]), "illegal third byte");
|
|
NS_ASSERTION(LEGAL_GBK_4BYTE_FORTH_BYTE(aSrc[3]), "illegal forth byte");
|
|
if(! FIRST_BYTE_IS_SURROGATE(aSrc[0]))
|
|
return false;
|
|
if(! LEGAL_GBK_4BYTE_SECOND_BYTE(aSrc[1]))
|
|
return false;
|
|
if(! LEGAL_GBK_4BYTE_THIRD_BYTE(aSrc[2]))
|
|
return false;
|
|
if(! LEGAL_GBK_4BYTE_FORTH_BYTE(aSrc[3]))
|
|
return false;
|
|
|
|
uint8_t a1 = (uint8_t) aSrc[0];
|
|
uint8_t a2 = (uint8_t) aSrc[1];
|
|
uint8_t a3 = (uint8_t) aSrc[2];
|
|
uint8_t a4 = (uint8_t) aSrc[3];
|
|
a1 -= (uint8_t)0x90;
|
|
a2 -= (uint8_t)0x30;
|
|
a3 -= (uint8_t)0x81;
|
|
a4 -= (uint8_t)0x30;
|
|
uint32_t idx = (((a1 * 10 + a2 ) * 126 + a3) * 10) + a4;
|
|
// idx == ucs4Codepoint - 0x10000
|
|
if (idx > 0x000FFFFF)
|
|
return false;
|
|
|
|
*aOut++ = 0xD800 | (idx >> 10);
|
|
*aOut = 0xDC00 | (0x000003FF & idx);
|
|
|
|
return true;
|
|
}
|
|
bool nsGBKToUnicode::TryExtensionDecoder(const char* aSrc, PRUnichar* aOut)
|
|
{
|
|
if(!mExtensionDecoder)
|
|
CreateExtensionDecoder();
|
|
NS_ASSERTION(mExtensionDecoder, "cannot creqte 2 bytes unique converter");
|
|
if(mExtensionDecoder)
|
|
{
|
|
nsresult res = mExtensionDecoder->Reset();
|
|
NS_ASSERTION(NS_SUCCEEDED(res), "2 bytes unique conversoin reset failed");
|
|
int32_t len = 2;
|
|
int32_t dstlen = 1;
|
|
res = mExtensionDecoder->Convert(aSrc,&len, aOut, &dstlen);
|
|
NS_ASSERTION(NS_FAILED(res) || ((len==2) && (dstlen == 1)),
|
|
"some strange conversion result");
|
|
// if we failed, we then just use the 0xfffd
|
|
// therefore, we ignore the res here.
|
|
if(NS_SUCCEEDED(res))
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
bool nsGBKToUnicode::DecodeToSurrogate(const char* aSrc, PRUnichar* aOut)
|
|
{
|
|
return false;
|
|
}
|
|
bool nsGBKToUnicode::Try4BytesDecoder(const char* aSrc, PRUnichar* aOut)
|
|
{
|
|
if(!m4BytesDecoder)
|
|
Create4BytesDecoder();
|
|
if(m4BytesDecoder)
|
|
{
|
|
nsresult res = m4BytesDecoder->Reset();
|
|
NS_ASSERTION(NS_SUCCEEDED(res), "4 bytes unique conversoin reset failed");
|
|
int32_t len = 4;
|
|
int32_t dstlen = 1;
|
|
res = m4BytesDecoder->Convert(aSrc,&len, aOut, &dstlen);
|
|
NS_ASSERTION(NS_FAILED(res) || ((len==4) && (dstlen == 1)),
|
|
"some strange conversion result");
|
|
// if we failed, we then just use the 0xfffd
|
|
// therefore, we ignore the res here.
|
|
if(NS_SUCCEEDED(res))
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|