mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
897 lines
29 KiB
C++
897 lines
29 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* vim:expandtab:shiftwidth=2:tabstop=2:
|
|
*/
|
|
/* ***** BEGIN LICENSE BLOCK *****
|
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
*
|
|
* The contents of this file are subject to the Mozilla Public License Version
|
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
* http://www.mozilla.org/MPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
* for the specific language governing rights and limitations under the
|
|
* License.
|
|
*
|
|
* The Original Code is Mozilla Communicator client code.
|
|
*
|
|
* The Initial Developer of the Original Code is
|
|
* Netscape Communications Corp.
|
|
* Portions created by the Initial Developer are Copyright (C) 2003
|
|
* the Initial Developer. All Rights Reserved.
|
|
*
|
|
* Contributor(s):
|
|
* Jungshik Shin <jshin@mailaps.org>
|
|
* Frank Tang <ftang@netscape.com>
|
|
* Jin-Hwan Cho <chofchof@ktug.or.kr>
|
|
* Won-Kyu Park <wkpark@chem.skku.ac.kr>
|
|
*
|
|
* Alternatively, the contents of this file may be used under the terms of
|
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
* of those above. If you wish to allow use of your version of this file only
|
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
* use your version of this file under the terms of the MPL, indicate your
|
|
* decision by deleting the provisions above and replace them with the notice
|
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
* the provisions above, a recipient may use your version of this file under
|
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
*
|
|
* ***** END LICENSE BLOCK ***** */
|
|
|
|
/*
|
|
* - Purposes:
|
|
* 1. Enable rendering over 1.5 million Hangul syllables with
|
|
* UnBatang and other fonts made available by UN KoaungHi
|
|
* and PARK Won-kyu.
|
|
*/
|
|
#include "nsUCvKODll.h"
|
|
#include "nsUnicodeToJamoTTF.h"
|
|
#include "prmem.h"
|
|
#include "nsXPIDLString.h"
|
|
#include "prtypes.h"
|
|
#include "nscore.h"
|
|
#include "nsISupportsUtils.h"
|
|
#include "nsCOMPtr.h"
|
|
#include "nsIUnicodeDecoder.h"
|
|
#include "nsServiceManagerUtils.h"
|
|
#include "nsICharsetConverterManager.h"
|
|
#include <string.h>
|
|
|
|
typedef struct {
|
|
PRUint8 seq[3];
|
|
PRUint8 liga;
|
|
} JamoNormMap;
|
|
|
|
// cluster maps
|
|
#include "jamoclusters.h"
|
|
|
|
// Constants for Hangul Jamo/syllable handling taken from Unicode 3.0
|
|
// section 3.11
|
|
|
|
#define LBASE 0x1100
|
|
#define VBASE 0x1161
|
|
#define TBASE 0x11A7
|
|
#define TSTART 0x11A8
|
|
#define SBASE 0xAC00
|
|
|
|
#define LCOUNT 19
|
|
#define VCOUNT 21
|
|
#define TCOUNT 28
|
|
#define SCOUNT (LCOUNT * VCOUNT * TCOUNT)
|
|
#define SEND (SBASE + SCOUNT - 1)
|
|
|
|
|
|
#define LFILL 0x115F
|
|
#define VFILL 0x1160
|
|
|
|
#define IS_LC(wc) (LBASE <= (wc) && (wc) < VFILL)
|
|
#define IS_VO(wc) (VFILL <= (wc) && (wc) < TSTART)
|
|
#define IS_TC(wc) (TSTART <= (wc) && (wc) <= 0x11FF)
|
|
#define IS_JAMO(wc) (IS_LC(wc) || IS_VO(wc) || IS_TC(wc))
|
|
|
|
// Jamos used in modern precomposed syllables
|
|
#define IS_SYL_LC(wc) (LBASE <= (wc) && (wc) < LBASE + LCOUNT)
|
|
#define IS_SYL_VO(wc) (VBASE <= (wc) && (wc) < VBASE + VCOUNT)
|
|
#define IS_SYL_TC(wc) (TBASE < (wc) && (wc) <= TBASE + TCOUNT)
|
|
|
|
// Modern precomposed syllables.
|
|
#define IS_SYL(wc) (SBASE <= (wc) && (wc) <= SEND)
|
|
#define IS_SYL_WO_TC(wc) (((wc) - SBASE) % TCOUNT == 0)
|
|
#define IS_SYL_WITH_TC(wc) (((wc) - SBASE) % TCOUNT)
|
|
|
|
// Compose precomposed syllables out of L, V, and T.
|
|
#define SYL_FROM_LVT(l,v,t) (SBASE + \
|
|
(((l) - LBASE) * VCOUNT + (v) - VBASE) * TCOUNT + \
|
|
(t) - TBASE)
|
|
|
|
// Hangul tone marks
|
|
#define HTONE1 0x302E
|
|
#define HTONE2 0x302F
|
|
|
|
#define IS_TONE(wc) ((wc) == HTONE1 || (wc) == HTONE2)
|
|
|
|
// Below are constants for rendering with UnBatang-like fonts.
|
|
|
|
#define LC_TMPPOS 0xF000 // temp. block for leading consonants
|
|
#define VO_TMPPOS 0xF100 // temp. block for vowels
|
|
#define TC_TMPPOS 0xF200 // temp. block for trailinng consonants
|
|
#define LC_OFFSET (LC_TMPPOS-LBASE)
|
|
#define VO_OFFSET (VO_TMPPOS-VFILL)
|
|
#define TC_OFFSET (TC_TMPPOS-TSTART)
|
|
|
|
// Jamo class of *temporary* code points in PUA for UnBatang-like fonts.
|
|
#define IS_LC_EXT(wc) ( ((wc) & 0xFF00) == LC_TMPPOS )
|
|
#define IS_VO_EXT(wc) ( ((wc) & 0xFF00) == VO_TMPPOS )
|
|
#define IS_TC_EXT(wc) ( ((wc) & 0xFF00) == TC_TMPPOS )
|
|
|
|
// Glyph code point bases for L,V, and T in UnBatang-like fonts
|
|
#define UP_LBASE 0xE000 // 0xE000 = Lfill, 0xE006 = Kiyeok
|
|
#define UP_VBASE 0xE300 // 0xE300 = Vfill, 0xE302 = Ah
|
|
#define UP_TBASE 0xE404 // 0xE400 = Tfill, 0xE404 = Kiyeok
|
|
|
|
static nsresult JamoNormalize (const PRUnichar* aInSeq,
|
|
PRUnichar** aOutSeq, PRInt32* aLength);
|
|
static void JamosToExtJamos (PRUnichar* aInSeq, PRInt32* aLength);
|
|
static const JamoNormMap* JamoClusterSearch(JamoNormMap aKey,
|
|
const JamoNormMap* aClusters,
|
|
PRInt16 aClustersSize);
|
|
|
|
static PRInt32 JamoNormMapComp (const JamoNormMap& p1,
|
|
const JamoNormMap& p2);
|
|
static PRInt16 JamoSrchReplace (const JamoNormMap* aCluster,
|
|
PRUint16 aSize, PRUnichar *aIn,
|
|
PRInt32* aLength, PRUint16 aOffset);
|
|
static nsresult ScanDecomposeSyllable (PRUnichar *aIn, PRInt32* aLength,
|
|
const PRInt32 aMaxLen);
|
|
|
|
//----------------------------------------------------------------------
|
|
// Class nsUnicodeToJamoTTF [implementation]
|
|
|
|
NS_IMPL_ISUPPORTS1(nsUnicodeToJamoTTF, nsIUnicodeEncoder)
|
|
|
|
NS_IMETHODIMP
|
|
nsUnicodeToJamoTTF::SetOutputErrorBehavior(PRInt32 aBehavior,
|
|
nsIUnicharEncoder *aEncoder,
|
|
PRUnichar aChar)
|
|
{
|
|
if (aBehavior == kOnError_CallBack && aEncoder == nsnull)
|
|
return NS_ERROR_NULL_POINTER;
|
|
mErrEncoder = aEncoder;
|
|
mErrBehavior = aBehavior;
|
|
mErrChar = aChar;
|
|
return NS_OK;
|
|
}
|
|
|
|
// constructor and destructor
|
|
|
|
nsUnicodeToJamoTTF::nsUnicodeToJamoTTF()
|
|
{
|
|
mJamos = nsnull;
|
|
Reset();
|
|
}
|
|
|
|
nsUnicodeToJamoTTF::~nsUnicodeToJamoTTF()
|
|
{
|
|
if (mJamos != nsnull && mJamos != mJamosStatic)
|
|
PR_Free(mJamos);
|
|
}
|
|
|
|
enum KoCharClass {
|
|
KO_CHAR_CLASS_LC,
|
|
KO_CHAR_CLASS_VO,
|
|
KO_CHAR_CLASS_TC,
|
|
KO_CHAR_CLASS_SYL1, // modern precomposed syllable w/o TC (LV type syl.)
|
|
KO_CHAR_CLASS_SYL2, // modern precomposed syllable with TC (LVT type syl.)
|
|
KO_CHAR_CLASS_TONE, // Tone marks
|
|
KO_CHAR_CLASS_NOHANGUL, // Non-Hangul characters.
|
|
KO_CHAR_CLASS_NUM
|
|
} ;
|
|
|
|
#define CHAR_CLASS(ch) \
|
|
(IS_LC(ch) ? KO_CHAR_CLASS_LC : \
|
|
IS_VO(ch) ? KO_CHAR_CLASS_VO : \
|
|
IS_TC(ch) ? KO_CHAR_CLASS_TC : \
|
|
IS_SYL(ch) ? \
|
|
(IS_SYL_WITH_TC(ch) ? KO_CHAR_CLASS_SYL2 : KO_CHAR_CLASS_SYL1) : \
|
|
IS_TONE(ch) ? KO_CHAR_CLASS_TONE : \
|
|
KO_CHAR_CLASS_NOHANGUL)
|
|
|
|
|
|
// Grapheme boundary checker : See UTR #29 and Unicode 3.2 section 3.11
|
|
const static PRBool gIsBoundary[KO_CHAR_CLASS_NUM][KO_CHAR_CLASS_NUM] =
|
|
{// L V T S1 S2 M X
|
|
{ 0, 0, 1, 0, 0, 0, 1 }, // L
|
|
{ 1, 0, 0, 1, 1, 0, 1 }, // V
|
|
{ 1, 1, 0, 1, 1, 0, 1 }, // T
|
|
{ 1, 0, 0, 1, 1, 0, 1 }, // S1
|
|
{ 1, 1, 0, 1, 1, 0, 1 }, // S2
|
|
{ 1, 1, 1, 1, 1, 0, 1 }, // M
|
|
{ 1, 1, 1, 1, 1, 0, 1 } // X
|
|
};
|
|
|
|
|
|
NS_IMETHODIMP
|
|
nsUnicodeToJamoTTF::Convert(const PRUnichar * aSrc,
|
|
PRInt32 * aSrcLength, char * aDest,
|
|
PRInt32 * aDestLength)
|
|
{
|
|
nsresult rv = NS_OK;
|
|
mByteOff = 0;
|
|
|
|
// This should never happen, but it happens under MS Windows, somehow...
|
|
if (mJamoCount > mJamosMaxLength)
|
|
{
|
|
NS_WARNING("mJamoCount > mJamoMaxLength on entering Convert()");
|
|
Reset();
|
|
}
|
|
|
|
for (PRInt32 charOff = 0; charOff < *aSrcLength; charOff++)
|
|
{
|
|
PRUnichar ch = aSrc[charOff];
|
|
|
|
// Syllable boundary check. Ref. : Unicode 3.2 section 3.11
|
|
if (mJamoCount != 0 &&
|
|
gIsBoundary[CHAR_CLASS(mJamos[mJamoCount - 1])][CHAR_CLASS(ch)])
|
|
{
|
|
composeHangul(aDest);
|
|
mJamoCount = 0;
|
|
}
|
|
// Ignore tone marks other than the first in a sequence of tone marks.
|
|
else if (mJamoCount != 0 && IS_TONE(mJamos[mJamoCount - 1]) && IS_TONE(ch))
|
|
{
|
|
--mJamoCount;
|
|
composeHangul(aDest);
|
|
mJamoCount = 0;
|
|
|
|
// skip over tone marks from the second on in a series.
|
|
while (IS_TONE(ch) && ++charOff < *aSrcLength)
|
|
ch = aSrc[charOff];
|
|
|
|
if (!IS_TONE(ch))
|
|
{
|
|
mJamos[mJamoCount++] = ch;
|
|
continue;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
|
|
if (mJamoCount == mJamosMaxLength)
|
|
{
|
|
mJamosMaxLength++;
|
|
if (mJamos == mJamosStatic)
|
|
{
|
|
mJamos = (PRUnichar *) PR_Malloc(sizeof(PRUnichar) * mJamosMaxLength);
|
|
if (!mJamos)
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
memcpy(mJamos, mJamosStatic, sizeof(PRUnichar) * mJamoCount);
|
|
}
|
|
else
|
|
{
|
|
mJamos = (PRUnichar *) PR_Realloc(mJamos,
|
|
sizeof(PRUnichar) * mJamosMaxLength);
|
|
if (!mJamos)
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
}
|
|
|
|
mJamos[mJamoCount++] = ch;
|
|
}
|
|
|
|
if (mJamoCount != 0)
|
|
composeHangul(aDest);
|
|
mJamoCount = 0;
|
|
*aDestLength = mByteOff;
|
|
|
|
return rv;
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
nsUnicodeToJamoTTF::Finish(char* aDest, PRInt32* aDestLength)
|
|
{
|
|
mByteOff = 0;
|
|
if (mJamoCount != 0)
|
|
composeHangul(aDest);
|
|
|
|
*aDestLength = mByteOff;
|
|
|
|
mByteOff = 0;
|
|
mJamoCount = 0;
|
|
return NS_OK;
|
|
}
|
|
|
|
//================================================================
|
|
NS_IMETHODIMP
|
|
nsUnicodeToJamoTTF::Reset()
|
|
{
|
|
|
|
if (mJamos != nsnull && mJamos != mJamosStatic)
|
|
PR_Free(mJamos);
|
|
mJamos = mJamosStatic;
|
|
mJamosMaxLength = sizeof(mJamosStatic) / sizeof(PRUnichar);
|
|
memset(mJamos, 0, sizeof(mJamosStatic));
|
|
mJamoCount = 0;
|
|
mByteOff = 0;
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
nsUnicodeToJamoTTF::GetMaxLength(const PRUnichar * aSrc, PRInt32 aSrcLength,
|
|
PRInt32 * aDestLength)
|
|
{
|
|
// a precomposed Hangul syllable can be decomposed into 3 Jamos, each of
|
|
// which takes 2bytes.
|
|
*aDestLength = aSrcLength * 6;
|
|
return NS_OK;
|
|
}
|
|
|
|
/**
|
|
* Copied from mslvt.otp by Jin-Hwan Cho <chofchof@ktug.or.kr>.
|
|
* Extended by Jungshik Shin <jshin@mailaps.org> to support
|
|
* additional Jamo clusters not encoded in U+1100 Jamo block
|
|
* as precomposed Jamo clsuters.
|
|
* Corrected by Won-Kyu Park <wkpark@chem.skku.ac.kr>.
|
|
* See http://www.ktug.or.kr for its use in Lambda and swindow/SFontTTF.cpp at
|
|
* http://www.yudit.org for its use in Yudit.
|
|
* A patch with the same set of tables was submitted for
|
|
* inclusion in Pango (http://www.pango.org).
|
|
*/
|
|
|
|
/**
|
|
* Mapping from LC code points to glyph indices in UnPark fonts.
|
|
* UnPark fonts have the same glyph arrangement as Ogulim font, but
|
|
* they have them in BMP PUA (beginning at U+E000) to be proper Unicode
|
|
* fonts unlike Ogulim font with Jamo glyphs in CJK ideograph code points.
|
|
* Glyph indices for 90 LCs encoded in U+1100 block are followed by 6 reserved
|
|
* code points and glyph indices for 34 additional consonant clusters
|
|
* (not assigned code points of their own) for which separate glyphs exist in
|
|
* UnPark fonts.
|
|
* The first element is for Kiyeok and UP_LBASE is set to Lfill glyph(0xe000)
|
|
* so that the first element is '1' to map it to glyph for Kiyeok at 0xe006.
|
|
* (there are six glyphs for each LC in UnPark fonts.)
|
|
*/
|
|
const static PRUint8 gUnParkLcGlyphMap[130] = {
|
|
1, 2, 4, 12, 14, 20, 36, 42, 46, 62, 70, 85,100,102,108,113,
|
|
114,116,120, 5, 6, 7, 8, 13, 23, 26, 34, 35, 39, 41, 43, 44,
|
|
45, 47, 48, 49, 50, 51, 52, 54, 55, 57, 58, 60, 61, 63, 64, 65,
|
|
66, 67, 68, 69, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83,
|
|
84, 86, 87, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99,101,104,105,
|
|
106,107,109,110,111,112,117,119,122,123, 0, 0, 0, 0, 0, 0,
|
|
3, 9, 10, 11, 15, 16, 17, 18, 19, 21, 22, 24, 25, 27, 28, 29,
|
|
30, 31, 32, 33, 37, 38, 40, 53, 56, 59, 71, 88, 98,103,115,118,
|
|
121, 124
|
|
};
|
|
|
|
/**
|
|
* Mapping from vowel code points to glyph indices in UnPark/Oxxx font.
|
|
* Glyphs for 28 additional vowel clusters (not given separate
|
|
* code points in U+1100 block) are available in O*ttf fonts.
|
|
* Total count: 95 = 1(Vfill) + 66 (in U+1100 block) + 28 (extra.)
|
|
*/
|
|
const static PRUint8 gUnParkVoGlyphMap[95] = {
|
|
0, 1, 5, 6, 10, 11, 15, 16, 20, 21, 22, 23, 33, 34, 43, 46,
|
|
48, 52, 54, 64, 71, 73, 2, 3, 7, 8, 12, 13, 14, 18, 19, 26,
|
|
27, 29, 30, 32, 37, 38, 40, 41, 42, 44, 45, 47, 50, 51, 55, 57,
|
|
58, 59, 60, 62, 63, 69, 70, 72, 74, 75, 80, 83, 85, 87, 88, 90,
|
|
92, 93, 94, 4, 9, 17, 24, 25, 28, 31, 35, 36, 39, 49, 53, 56,
|
|
61, 65, 66, 67, 68, 76, 77, 78, 79, 81, 82, 84, 86, 89, 91
|
|
};
|
|
|
|
/**
|
|
* Mapping from TC code points to glyph indices in UnPark/Oxxx font.
|
|
* glyphs for 59 additional trailing consonant clusters (not given separate
|
|
* code points in U+1100 blocks) are available in O*ttf fonts.
|
|
* Total count: 141 = 82 (in U+1100 block) + 59 (extra.)
|
|
* The first element is Kiyeok and UP_TBASE is set to 0x5204 (Kiyeok).
|
|
*/
|
|
const static PRUint8 gUnParkTcGlyphMap[141] = {
|
|
0, 1, 5, 10, 17, 20, 21, 32, 33, 42, 46, 52, 57, 58, 59, 63,
|
|
78, 84, 91, 98,109,123,127,128,129,130,135, 3, 6, 11, 13, 15,
|
|
16, 19, 22, 25, 35, 37, 38, 39, 40, 43, 44, 48, 50, 51, 53, 54,
|
|
56, 60, 64, 67, 69, 71, 72, 73, 75, 76, 77, 80, 88, 89, 90, 92,
|
|
93, 94, 96,106,110,111,114,115,117,119,120,131,134,136,137,138,
|
|
139,140, 2, 4, 7, 8, 9, 12, 14, 18, 23, 24, 26, 27, 28, 29,
|
|
30, 31, 34, 36, 41, 45, 47, 49, 55, 61, 62, 65, 66, 68, 70, 74,
|
|
79, 81, 82, 83, 85, 86, 87, 95, 97, 99,100,101,102,103,104,105,
|
|
107,108,112,113,116,118,121,122,124,125,126,132,133
|
|
};
|
|
|
|
/* Which of six glyphs to use for choseong(L) depends on
|
|
the following vowel and whether or not jongseong(T) is present
|
|
in a syllable. Note that The first(0th) element is for Vfill.
|
|
|
|
shape Number of choseong(L) w.r.t. jungseong(V) without jongseong(T)
|
|
|
|
95 = 1(Vfill) + 66 + 28 (extra)
|
|
*/
|
|
|
|
const static PRUint8 gUnParkVo2LcMap[95] = {
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 1, 2, 2, 1,
|
|
1, 1, 2, 2, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 2, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1,
|
|
1, 1, 1, 2, 1, 2, 2, 1, 0, 0, 1, 1, 1, 0, 2, 1,
|
|
2, 1, 2, 1, 1, 0, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1,
|
|
2, 1, 1, 1, 2, 1, 0, 0, 0, 1, 1, 1, 0, 2, 2
|
|
};
|
|
|
|
/* shape Number of choseong(L) w.r.t. jungseong(V) with jongseong(T) */
|
|
|
|
const static PRUint8 gUnParkVo2LcMap2[95] = {
|
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 4, 4, 4, 5, 5, 4,
|
|
4, 4, 5, 5, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
4, 4, 5, 5, 4, 4, 4, 5, 4, 4, 4, 4, 4, 5, 4, 4,
|
|
4, 4, 4, 5, 4, 5, 5, 4, 3, 3, 4, 4, 4, 3, 5, 4,
|
|
5, 4, 5, 4, 4, 3, 4, 4, 4, 5, 4, 4, 4, 4, 4, 4,
|
|
5, 4, 4, 4, 5, 4, 3, 3, 3, 4, 4, 4, 3, 5, 5
|
|
};
|
|
|
|
/* shape Number of jongseong(T) w.r.t. jungseong(V)
|
|
Which of four glyphs to use for jongseong(T) depends on
|
|
the preceding vowel. */
|
|
|
|
const static PRUint8 gUnParkVo2TcMap[95] = {
|
|
3, 0, 2, 0, 2, 1, 2, 1, 2, 3, 0, 2, 1, 3, 3, 1,
|
|
2, 1, 3, 3, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
|
|
2, 2, 3, 3, 0, 2, 1, 3, 1, 0, 2, 1, 2, 3, 0, 1,
|
|
2, 1, 2, 3, 1, 3, 3, 1, 2, 2, 1, 1, 1, 1, 3, 1,
|
|
3, 1, 3, 0, 1, 0, 0, 0, 2, 3, 0, 2, 1, 1, 2, 2,
|
|
3, 0, 0, 0, 3, 0, 2, 2, 2, 1, 0, 1, 2, 1, 1
|
|
};
|
|
|
|
NS_IMETHODIMP
|
|
nsUnicodeToJamoTTF::composeHangul(char* aResult)
|
|
{
|
|
PRInt32 length = mJamoCount, i;
|
|
nsresult rv = NS_OK;
|
|
|
|
if (!length)
|
|
{
|
|
NS_WARNING("composeHangul() : zero length string comes in ! \n");
|
|
return NS_ERROR_UNEXPECTED;
|
|
}
|
|
|
|
if (!aResult)
|
|
return NS_ERROR_NULL_POINTER;
|
|
|
|
// Put Hangul tone mark first as it should be to the left of
|
|
// the character it follows.
|
|
// XXX : What should we do when a tone mark come by itself?
|
|
|
|
if (IS_TONE(mJamos[length - 1]))
|
|
{
|
|
aResult[mByteOff++] = PRUint8(mJamos[length - 1] >> 8);
|
|
aResult[mByteOff++] = PRUint8(mJamos[length - 1] & 0xff);
|
|
if (--length == 0)
|
|
return rv;
|
|
}
|
|
|
|
// no more processing is necessary for precomposed modern Hangul syllables.
|
|
if (length == 1 && IS_SYL(mJamos[0]))
|
|
{
|
|
aResult[mByteOff++] = PRUint8(mJamos[0] >> 8);
|
|
aResult[mByteOff++] = PRUint8(mJamos[0] & 0xff);
|
|
return rv;
|
|
}
|
|
|
|
if (CHAR_CLASS(mJamos[0]) == KO_CHAR_CLASS_NOHANGUL)
|
|
{
|
|
NS_ASSERTION(length == 1, "A non-Hangul should come by itself !!\n");
|
|
aResult[mByteOff++] = PRUint8(mJamos[0] >> 8);
|
|
aResult[mByteOff++] = PRUint8(mJamos[0] & 0xff);
|
|
return rv;
|
|
}
|
|
|
|
nsXPIDLString buffer;
|
|
|
|
rv = JamoNormalize(mJamos, getter_Copies(buffer), &length);
|
|
|
|
// safe to cast away const.
|
|
PRUnichar* text = buffer.BeginWriting();
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
text += RenderAsPrecompSyllable(text, &length, aResult);
|
|
|
|
if (!length)
|
|
return rv;
|
|
|
|
// convert to extended Jamo sequence
|
|
JamosToExtJamos(text, &length);
|
|
|
|
|
|
// Check if not in LV or LVT form after the conversion
|
|
if (length != 2 && length != 3 ||
|
|
(!IS_LC_EXT(text[0]) || !IS_VO_EXT(text[1]) ||
|
|
(length == 3 && !IS_TC_EXT(text[2]))))
|
|
goto fallback;
|
|
|
|
// Now that text[0..2] are identified as L,V, and T, it's safe to
|
|
// shift them back to U+1100 block although their ranges overlap each other.
|
|
|
|
text[0] -= LC_OFFSET;
|
|
text[1] -= VO_OFFSET;
|
|
if (length == 3)
|
|
text[2] -= TC_OFFSET;
|
|
|
|
if (length != 3)
|
|
{
|
|
text[0] = gUnParkLcGlyphMap[text[0] - LBASE] * 6 +
|
|
gUnParkVo2LcMap[text[1] - VFILL] + UP_LBASE;
|
|
text[1] = gUnParkVoGlyphMap[text[1] - VFILL] * 2 + UP_VBASE;
|
|
}
|
|
else
|
|
{
|
|
text[0] = gUnParkLcGlyphMap[text[0] - LBASE] * 6 +
|
|
gUnParkVo2LcMap2[text[1] - VFILL] + UP_LBASE;
|
|
text[2] = gUnParkTcGlyphMap[text[2] - TSTART] * 4 +
|
|
gUnParkVo2TcMap[text[1] - VFILL] + UP_TBASE;
|
|
text[1] = gUnParkVoGlyphMap[text[1] - VFILL] * 2 + UP_VBASE + 1;
|
|
}
|
|
|
|
// Xft doesn't like blank glyphs at code points other than listed in
|
|
// the blank glyph list. Replace Lfill glyph code points of UnPark
|
|
// fonts with standard LFILL code point (U+115F).
|
|
|
|
if (UP_LBASE <= text[0] && text[0] < UP_LBASE + 6)
|
|
text[0] = LFILL;
|
|
|
|
// The same is true of glyph code points corresponding to VFILL
|
|
// in UnBatang-like fonts. VFILL is not only blank but also non-advancing
|
|
// so that we can just skip it.
|
|
if (UP_VBASE <= text[1] && text[1] < UP_VBASE + 2)
|
|
{
|
|
--length;
|
|
if (length == 2)
|
|
text[1] = text[2];
|
|
}
|
|
|
|
for (i = 0 ; i < length; i++)
|
|
{
|
|
aResult[mByteOff++] = PRUint8(text[i] >> 8);
|
|
aResult[mByteOff++] = PRUint8(text[i] & 0xff);
|
|
}
|
|
|
|
return rv;
|
|
|
|
|
|
/* If jamo sequence is not convertible to a jamo cluster,
|
|
* just enumerate stand-alone jamos. Prepend V and T with Lf.
|
|
*
|
|
* XXX: It might be better to search for a sub-sequence (not just at the
|
|
* beginning of a cluster but also in the middle or at the end.)
|
|
* that can be rendered as precomposed and render it as such and enumerate
|
|
* jamos in the rest. This approach is useful when a simple Xkb-based input
|
|
* is used.
|
|
*/
|
|
|
|
fallback:
|
|
for (i = 0; i < length; i++)
|
|
{
|
|
PRUnichar wc=0, wc2=0;
|
|
/* skip Lfill and Vfill if they're not the sole char. in a cluster */
|
|
if (length > 1 &&
|
|
(text[i] - LC_OFFSET == LFILL || text[i] - VO_OFFSET == VFILL))
|
|
continue;
|
|
else if (IS_LC_EXT (text[i]))
|
|
wc = gUnParkLcGlyphMap[text[i] - LC_OFFSET - LBASE] * 6 + UP_LBASE;
|
|
else
|
|
{
|
|
/* insert Lfill glyph to advance cursor pos. for V and T */
|
|
wc = LBASE;
|
|
/* don't have to draw Vfill. Drawing Lfill is sufficient. */
|
|
if (text[i] - VO_OFFSET != VFILL)
|
|
wc2 = IS_VO_EXT (text[i]) ?
|
|
gUnParkVoGlyphMap[text[i] - VO_OFFSET - VFILL] * 2 + UP_VBASE:
|
|
gUnParkTcGlyphMap[text[i] - TC_OFFSET - TSTART] * 4 + UP_TBASE + 3;
|
|
}
|
|
aResult[mByteOff++] = PRUint8(wc >> 8);
|
|
aResult[mByteOff++] = PRUint8(wc & 0xff);
|
|
|
|
if (wc2)
|
|
{
|
|
aResult[mByteOff++] = wc2 >> 8;
|
|
aResult[mByteOff++] = wc2 & 0xff;
|
|
}
|
|
}
|
|
|
|
return rv;
|
|
}
|
|
|
|
int
|
|
nsUnicodeToJamoTTF::RenderAsPrecompSyllable (PRUnichar* aSrc,
|
|
PRInt32* aSrcLength, char* aResult)
|
|
{
|
|
|
|
int composed = 0;
|
|
|
|
if (*aSrcLength == 3 && IS_SYL_LC(aSrc[0]) && IS_SYL_VO(aSrc[1]) &&
|
|
IS_SYL_TC(aSrc[2]))
|
|
composed = 3;
|
|
else if (*aSrcLength == 2 && IS_SYL_LC(aSrc[0]) && IS_SYL_VO(aSrc[1]))
|
|
composed = 2;
|
|
else
|
|
composed = 0;
|
|
|
|
if (composed)
|
|
{
|
|
PRUnichar wc;
|
|
if (composed == 3)
|
|
wc = SYL_FROM_LVT(aSrc[0], aSrc[1], aSrc[2]);
|
|
else
|
|
wc = SYL_FROM_LVT(aSrc[0], aSrc[1], TBASE);
|
|
aResult[mByteOff++] = PRUint8(wc >> 8);
|
|
aResult[mByteOff++] = PRUint8(wc & 0xff);
|
|
}
|
|
|
|
*aSrcLength -= composed;
|
|
|
|
return composed;
|
|
}
|
|
|
|
/* static */
|
|
PRInt32 JamoNormMapComp (const JamoNormMap& p1, const JamoNormMap& p2)
|
|
{
|
|
if (p1.seq[0] != p2.seq[0])
|
|
return p1.seq[0] - p2.seq[0];
|
|
if (p1.seq[1] != p2.seq[1])
|
|
return p1.seq[1] - p2.seq[1];
|
|
return p1.seq[2] - p2.seq[2];
|
|
}
|
|
|
|
/* static */
|
|
const JamoNormMap* JamoClusterSearch (JamoNormMap aKey,
|
|
const JamoNormMap* aClusters,
|
|
PRInt16 aClustersSize)
|
|
{
|
|
|
|
if (aClustersSize <= 0 || !aClusters)
|
|
{
|
|
NS_WARNING("aClustersSize <= 0 || !aClusters");
|
|
return nsnull;
|
|
}
|
|
|
|
if (aClustersSize < 9)
|
|
{
|
|
PRInt16 i;
|
|
for (i = 0; i < aClustersSize; i++)
|
|
if (JamoNormMapComp (aKey, aClusters[i]) == 0)
|
|
return aClusters + i;
|
|
return nsnull;
|
|
}
|
|
|
|
PRUint16 l = 0, u = aClustersSize - 1;
|
|
PRUint16 h = (l + u) / 2;
|
|
|
|
if (JamoNormMapComp (aKey, aClusters[h]) < 0)
|
|
return JamoClusterSearch(aKey, &(aClusters[l]), h - l);
|
|
else if (JamoNormMapComp (aKey, aClusters[h]) > 0)
|
|
return JamoClusterSearch(aKey, &(aClusters[h + 1]), u - h);
|
|
else
|
|
return aClusters + h;
|
|
|
|
}
|
|
|
|
|
|
/*
|
|
* look up cluster array for all possible matching Jamo sequences
|
|
* in 'aIn' and replace all matching substrings with match->liga in place.
|
|
* returns the difference in aLength between before and after the replacement.
|
|
* XXX : 1. Do we need caching here?
|
|
**/
|
|
|
|
/* static */
|
|
PRInt16 JamoSrchReplace (const JamoNormMap* aClusters,
|
|
PRUint16 aClustersSize, PRUnichar* aIn,
|
|
PRInt32* aLength, PRUint16 aOffset)
|
|
{
|
|
PRInt32 origLen = *aLength;
|
|
|
|
// non-zero third element => clusternLen = 3. otherwise, it's 2.
|
|
PRUint16 clusterLen = aClusters[0].seq[2] ? 3 : 2;
|
|
|
|
PRInt32 start = 0, end;
|
|
|
|
// identify the substring of aIn with values in [aOffset, aOffset + 0x100).
|
|
while (start < origLen && (aIn[start] & 0xff00) != aOffset)
|
|
++start;
|
|
for (end=start; end < origLen && (aIn[end] & 0xff00) == aOffset; ++end);
|
|
|
|
// now process the substring aIn[start] .. aIn[end]
|
|
// we don't need a separate range check here because the one in
|
|
// for-loop is sufficient.
|
|
for (PRInt32 i = start; i <= end - clusterLen; i++)
|
|
{
|
|
const JamoNormMap *match;
|
|
JamoNormMap key;
|
|
|
|
// cluster array is made up of PRUint8's to save memory
|
|
// and we have to subtract aOffset from the input before looking it up.
|
|
key.seq[0] = aIn[i] - aOffset;
|
|
key.seq[1] = aIn[i + 1] - aOffset;
|
|
key.seq[2] = clusterLen == 3 ? (aIn[i + 2] - aOffset) : 0;
|
|
|
|
match = JamoClusterSearch (key, aClusters, aClustersSize);
|
|
|
|
if (match)
|
|
{
|
|
aIn[i] = match->liga + aOffset; // add back aOffset.
|
|
|
|
// move up the 'tail'
|
|
for (PRInt32 j = i + clusterLen ; j < *aLength; j++)
|
|
aIn[j - clusterLen + 1] = aIn[j];
|
|
|
|
end -= (clusterLen - 1);
|
|
*aLength -= (clusterLen - 1);
|
|
}
|
|
}
|
|
|
|
return *aLength - origLen;
|
|
}
|
|
|
|
/* static */
|
|
nsresult ScanDecomposeSyllable(PRUnichar* aIn, PRInt32 *aLength,
|
|
const PRInt32 maxLength)
|
|
{
|
|
nsresult rv = NS_OK;
|
|
|
|
if (!aIn || *aLength < 1 || maxLength < *aLength + 2)
|
|
return NS_ERROR_INVALID_ARG;
|
|
|
|
PRInt32 i = 0;
|
|
while (i < *aLength && !IS_SYL(aIn[i]))
|
|
i++;
|
|
|
|
// Convert a precomposed syllable to an LV or LVT sequence.
|
|
if (i < *aLength && IS_SYL(aIn[i]))
|
|
{
|
|
PRUint16 j = IS_SYL_WITH_TC(aIn[i]) ? 1 : 0;
|
|
aIn[i] -= SBASE;
|
|
memmove(aIn + i + 2 + j, aIn + i + 1, *aLength - i - 1);
|
|
if (j)
|
|
aIn[i + 2] = aIn[i] % TCOUNT + TBASE;
|
|
aIn[i + 1] = (aIn[i] / TCOUNT) % VCOUNT + VBASE;
|
|
aIn[i] = aIn[i] / (TCOUNT * VCOUNT) + LBASE;
|
|
*aLength += 1 + j;
|
|
}
|
|
|
|
return rv;
|
|
}
|
|
|
|
/*
|
|
* 1. Normalize (regularize) a jamo sequence to the regular
|
|
* syllable form defined in Unicode 3.2 section 3.11 to the extent
|
|
* that it's useful in rendering by render_func's().
|
|
*
|
|
* 2. Replace a compatibly decomposed Jamo sequence (unicode 2.0
|
|
* definition) with a 'precomposed' Jamo cluster (with codepoint
|
|
* of its own in U+1100 block). For instance, a seq.
|
|
* of U+1100, U+1100 is replaced by U+1101. It actually
|
|
* more than Unicode 2.0 decomposition map suggests.
|
|
* For a Jamo cluster made up of three basic Jamos
|
|
* (e.g. U+1133 : Sios, Piup, Kiyeok), not only
|
|
* a sequence of Sios(U+1109), Piup(U+1107) and
|
|
* Kiyeok(U+1100) but also two more sequences,
|
|
* {U+1132(Sios-Pieup), U+1100(Kiyeok) and {Sios(U+1109),
|
|
* U+111E(Piup-Kiyeok)} are mapped to U+1133.
|
|
*
|
|
* 3. the result is returned in a newly malloced
|
|
* PRUnichar*. Callers have to delete it, which
|
|
* is taken care of by using nsXPIDLString in caller.
|
|
*/
|
|
|
|
/* static */
|
|
nsresult JamoNormalize(const PRUnichar* aInSeq, PRUnichar** aOutSeq,
|
|
PRInt32* aLength)
|
|
{
|
|
if (!aInSeq || !aOutSeq || *aLength <= 0)
|
|
return NS_ERROR_INVALID_ARG;
|
|
|
|
// 4 more slots : 2 for Lf and Vf, 2 for decomposing a modern precomposed
|
|
// syllable into a Jamo sequence of LVT?.
|
|
*aOutSeq = new PRUnichar[*aLength + 4];
|
|
if (!*aOutSeq)
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
memcpy(*aOutSeq, aInSeq, *aLength * sizeof(PRUnichar));
|
|
|
|
nsresult rv = ScanDecomposeSyllable(*aOutSeq, aLength, *aLength + 4);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
// LV or LVT : no need to search for and replace jamo sequences
|
|
if ((*aLength == 2 && IS_LC((*aOutSeq)[0]) && IS_VO((*aOutSeq)[1])) ||
|
|
(*aLength == 3 && IS_LC((*aOutSeq)[0]) && IS_VO((*aOutSeq)[1]) &&
|
|
IS_TC((*aOutSeq)[2])))
|
|
return NS_OK;
|
|
|
|
// remove Lf in LfL sequence that may occur in an interim cluster during
|
|
// a simple Xkb-based input.
|
|
if ((*aOutSeq)[0] == LFILL && *aLength > 1 && IS_LC((*aOutSeq)[1]))
|
|
{
|
|
memmove (*aOutSeq, *aOutSeq + 1, (*aLength - 1) * sizeof(PRUnichar));
|
|
(*aLength)--;
|
|
}
|
|
|
|
if (*aLength > 1)
|
|
{
|
|
JamoSrchReplace (gJamoClustersGroup1,
|
|
sizeof(gJamoClustersGroup1) / sizeof(gJamoClustersGroup1[0]),
|
|
*aOutSeq, aLength, LBASE);
|
|
JamoSrchReplace (gJamoClustersGroup234,
|
|
sizeof(gJamoClustersGroup234) / sizeof(gJamoClustersGroup234[0]),
|
|
*aOutSeq, aLength, LBASE);
|
|
}
|
|
|
|
// prepend a leading V with Lf
|
|
if (IS_VO((*aOutSeq)[0]))
|
|
{
|
|
memmove(*aOutSeq + 1, *aOutSeq, *aLength * sizeof(PRUnichar));
|
|
(*aOutSeq)[0] = LFILL;
|
|
(*aLength)++;
|
|
}
|
|
/* prepend a leading T with LfVf */
|
|
else if (IS_TC((*aOutSeq)[0]))
|
|
{
|
|
memmove (*aOutSeq + 2, *aOutSeq, *aLength * sizeof(PRUnichar));
|
|
(*aOutSeq)[0] = LFILL;
|
|
(*aOutSeq)[1] = VFILL;
|
|
*aLength += 2;
|
|
}
|
|
return NS_OK;
|
|
}
|
|
|
|
|
|
/* JamosToExtJamos() :
|
|
* 1. shift jamo sequences to three disjoint code blocks in
|
|
* PUA (0xF000 for LC, 0xF1000 for VO, 0xF200 for TC).
|
|
* 2. replace a jamo sequence with a precomposed extended
|
|
* cluster jamo code point in PUA
|
|
* 3. this replacement is done 'in place'
|
|
*/
|
|
|
|
/* static */
|
|
void JamosToExtJamos (PRUnichar* aInSeq, PRInt32* aLength)
|
|
{
|
|
// translate jamo code points to temporary code points in PUA
|
|
for (PRInt32 i = 0; i < *aLength; i++)
|
|
{
|
|
if (IS_LC(aInSeq[i]))
|
|
aInSeq[i] += LC_OFFSET;
|
|
else if (IS_VO(aInSeq[i]))
|
|
aInSeq[i] += VO_OFFSET;
|
|
else if (IS_TC(aInSeq[i]))
|
|
aInSeq[i] += TC_OFFSET;
|
|
}
|
|
|
|
// LV or LVT : no need to search for and replace jamo sequences
|
|
if ((*aLength == 2 && IS_LC_EXT(aInSeq[0]) && IS_VO_EXT(aInSeq[1])) ||
|
|
(*aLength == 3 && IS_LC_EXT(aInSeq[0]) && IS_VO_EXT(aInSeq[1]) &&
|
|
IS_TC_EXT(aInSeq[2])))
|
|
return;
|
|
|
|
// replace a sequence of Jamos with the corresponding precomposed
|
|
// Jamo cluster in PUA
|
|
|
|
JamoSrchReplace (gExtLcClustersGroup1,
|
|
sizeof (gExtLcClustersGroup1) / sizeof (gExtLcClustersGroup1[0]),
|
|
aInSeq, aLength, LC_TMPPOS);
|
|
JamoSrchReplace (gExtLcClustersGroup2,
|
|
sizeof (gExtLcClustersGroup2) / sizeof (gExtLcClustersGroup2[0]),
|
|
aInSeq, aLength, LC_TMPPOS);
|
|
JamoSrchReplace (gExtVoClustersGroup1,
|
|
sizeof (gExtVoClustersGroup1) / sizeof (gExtVoClustersGroup1[0]),
|
|
aInSeq, aLength, VO_TMPPOS);
|
|
JamoSrchReplace (gExtVoClustersGroup2,
|
|
sizeof (gExtVoClustersGroup2) / sizeof (gExtVoClustersGroup2[0]),
|
|
aInSeq, aLength, VO_TMPPOS);
|
|
JamoSrchReplace (gExtTcClustersGroup1,
|
|
sizeof (gExtTcClustersGroup1) / sizeof (gExtTcClustersGroup1[0]),
|
|
aInSeq, aLength, TC_TMPPOS);
|
|
JamoSrchReplace (gExtTcClustersGroup2,
|
|
sizeof (gExtTcClustersGroup2) / sizeof (gExtTcClustersGroup2[0]),
|
|
aInSeq, aLength, TC_TMPPOS);
|
|
return;
|
|
}
|
|
|