bug 553981 - handle Hangul Jamo sequences and other special cases when marking clusters. r=karlt a=roc

--HG--
extra : transplant_source : M%C8%2B%D6%B7%CBG%8E%EEE7%AC%97%01%A0%E4%DF%FB%F3%9B
This commit is contained in:
Jonathan Kew 2010-12-06 13:22:24 +00:00
parent dbfee46277
commit 2f1ff76149
7 changed files with 191 additions and 38 deletions

View File

@ -52,6 +52,7 @@
# - Scripts.txt
# - EastAsianWidth.txt
# - BidiMirroring.txt
# - HangulSyllableType.txt
# though this may change if we find a need for additional properties.
#
# The Unicode data files should be together in a single directory.
@ -96,6 +97,7 @@ my @category;
my @combining;
my @eaw;
my @mirror;
my @hangul;
for (my $i = 0; $i < 0x110000; ++$i) {
$script[$i] = $scriptCode{"UNKNOWN"};
$category[$i] = $catCode{"UNASSIGNED"};
@ -162,15 +164,10 @@ while (<FH>) {
close FH;
# read Scripts.txt
my %scriptAliases = (
'MEETEI_MAYEK' => 'MEITEI_MAYEK'
);
open FH, "< $ARGV[1]/Scripts.txt" or die "can't open UCD file Scripts.txt\n";
while (<FH>) {
if (m/([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))*\s+;\s+([^ ]+)/) {
my $script = uc($3);
$script = $scriptAliases{$script} if exists $scriptAliases{$script};
warn "unknown script $script" unless exists $scriptCode{$script};
$script = $scriptCode{$script};
my $start = hex "0x$1";
@ -226,6 +223,30 @@ while (<FH>) {
}
close FH;
# read HangulSyllableType.txt
my %hangulType = (
'L' => 0x01,
'V' => 0x02,
'T' => 0x04,
'LV' => 0x03,
'LVT' => 0x07
);
open FH, "< $ARGV[1]/HangulSyllableType.txt" or die "can't open UCD file HangulSyllableType.txt\n";
while (<FH>) {
s/#.*//;
if (m/([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))*\s*;\s*([^ ]+)/) {
my $hangul = uc($3);
warn "unknown Hangul syllable type" unless exists $hangulType{$hangul};
$hangul = $hangulType{$hangul};
my $start = hex "0x$1";
my $end = (defined $2) ? hex "0x$2" : $start;
for (my $i = $start; $i <= $end; ++$i) {
$hangul[$i] = $hangul;
}
}
}
close FH;
my $timestamp = gmtime();
print <<__END;
/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*-
@ -314,6 +335,13 @@ sub sprintCatEAW
&genTables("CatEAW", "struct {\n unsigned int mEAW:3;\n unsigned int mCategory:5;\n}",
9, 7, \&sprintCatEAW, 1);
sub sprintHangulType
{
my $usv = shift;
return sprintf("%d,", $hangul[$usv]);
}
&genTables("Hangul", "PRUint8", 10, 6, \&sprintHangulType, 0);
sub genTables
{
my ($prefix, $type, $indexBits, $charBits, $func, $smp) = @_;

View File

@ -2354,6 +2354,10 @@ gfxFontGroup::InitTextRun(gfxContext *aContext,
InitTextRun(aContext, aTextRun, aString, aLength,
runStart, runLimit, runScript);
}
// Is this actually necessary? Without it, gfxTextRun::CopyGlyphDataFrom may assert
// "Glyphruns not coalesced", but does that matter?
aTextRun->SortGlyphRuns();
}
void
@ -2415,10 +2419,6 @@ gfxFontGroup::InitTextRun(gfxContext *aContext,
// ligatures" with the wrong font.
aTextRun->SanitizeGlyphRuns();
// Is this actually necessary? Without it, gfxTextRun::CopyGlyphDataFrom may assert
// "Glyphruns not coalesced", but does that matter?
aTextRun->SortGlyphRuns();
#ifdef DUMP_TEXT_RUNS
nsCAutoString lang;
style->language->ToUTF8String(lang);
@ -3798,9 +3798,24 @@ gfxTextRun::AddGlyphRun(gfxFont *aFont, PRUint32 aUTF16Offset, PRBool aForceNewR
NS_ASSERTION(lastGlyphRun->mCharacterOffset <= aUTF16Offset,
"Glyph runs out of order (and run not forced)");
// Don't append a run if the font is already the one we want
if (lastGlyphRun->mFont == aFont)
return NS_OK;
// If the offset has not changed, avoid leaving a zero-length run
// by overwriting the last entry instead of appending...
if (lastGlyphRun->mCharacterOffset == aUTF16Offset) {
// ...except that if the run before the last entry had the same
// font as the new one wants, merge with it instead of creating
// adjacent runs with the same font
if (numGlyphRuns > 1 &&
mGlyphRuns[numGlyphRuns - 2].mFont == aFont)
{
mGlyphRuns.TruncateLength(numGlyphRuns - 1);
return NS_OK;
}
lastGlyphRun->mFont = aFont;
return NS_OK;
}

View File

@ -60,6 +60,8 @@
#include "gfxTextRunCache.h"
#include "gfxTextRunWordCache.h"
#include "gfxUserFontSet.h"
#include "gfxUnicodeProperties.h"
#include "harfbuzz/hb-unicode.h"
#include "nsUnicodeRange.h"
#include "nsServiceManagerUtils.h"
@ -1113,21 +1115,6 @@ static void MigratePrefs()
// default SetupClusterBoundaries, based on Unicode properties;
// platform subclasses may override if they wish
static nsIUGenCategory* gGenCategory = nsnull;
static nsIUGenCategory*
GetGenCategory()
{
if (!gGenCategory) {
nsresult rv = CallGetService(NS_UNICHARCATEGORY_CONTRACTID, &gGenCategory);
if (NS_FAILED(rv)) {
NS_ERROR("Failed to get the Unicode character category service!");
gGenCategory = nsnull;
}
}
return gGenCategory;
}
void
gfxPlatform::SetupClusterBoundaries(gfxTextRun *aTextRun, const PRUnichar *aString)
{
@ -1136,33 +1123,110 @@ gfxPlatform::SetupClusterBoundaries(gfxTextRun *aTextRun, const PRUnichar *aStri
// XXX is this true in all languages???
// behdad: don't think so. Czech for example IIRC has a
// 'ch' grapheme.
// jfkthame: but that's not expected to behave as a grapheme cluster
// for selection/editing/etc.
return;
}
nsIUGenCategory* gc = GetGenCategory();
if (!gc) {
NS_WARNING("No Unicode category service: cannot determine clusters");
return;
}
gfxTextRun::CompressedGlyph extendCluster;
extendCluster.SetComplex(PR_FALSE, PR_TRUE, 0);
PRUint32 i, length = aTextRun->GetLength();
gfxUnicodeProperties::HSType hangulState = gfxUnicodeProperties::HST_NONE;
for (i = 0; i < length; ++i) {
PRBool surrogatePair = PR_FALSE;
PRUint32 ch = aString[i];
if (NS_IS_HIGH_SURROGATE(ch) &&
i < length - 1 && NS_IS_LOW_SURROGATE(aString[i+1])) {
i < length - 1 && NS_IS_LOW_SURROGATE(aString[i+1]))
{
ch = SURROGATE_TO_UCS4(ch, aString[i+1]);
surrogatePair = PR_TRUE;
}
if (i > 0 && gc->Get(ch) == nsIUGenCategory::kMark) {
gfxTextRun::CompressedGlyph g;
aTextRun->SetGlyphs(i, g.SetComplex(PR_FALSE, PR_TRUE, 0), nsnull);
PRUint8 category = gfxUnicodeProperties::GetGeneralCategory(ch);
gfxUnicodeProperties::HSType hangulType = gfxUnicodeProperties::HST_NONE;
// combining marks extend the cluster
if ((category >= HB_CATEGORY_COMBINING_MARK &&
category <= HB_CATEGORY_NON_SPACING_MARK) ||
(ch >= 0x200c && ch <= 0x200d) || // ZWJ, ZWNJ
(ch >= 0xff9e && ch <= 0xff9f)) // katakana sound marks
{
if (i > 0) {
aTextRun->SetGlyphs(i, extendCluster, nsnull);
}
} else if (category == HB_CATEGORY_OTHER_LETTER) {
// handle special cases in Letter_Other category
#if 0
// Currently disabled. This would follow the UAX#29 specification
// for extended grapheme clusters, but this is not favored by
// Thai users, at least for editing behavior.
// See discussion of equivalent Pango issue in bug 474068 and
// upstream at https://bugzilla.gnome.org/show_bug.cgi?id=576156.
if ((ch & ~0xff) == 0x0e00) {
// specific Thai & Lao (U+0Exx) chars that extend the cluster
if ( ch == 0x0e30 ||
(ch >= 0x0e32 && ch <= 0x0e33) ||
ch == 0x0e45 ||
ch == 0x0eb0 ||
(ch >= 0x0eb2 && ch <= 0x0eb3))
{
if (i > 0) {
aTextRun->SetGlyphs(i, extendCluster, nsnull);
}
}
else if ((ch >= 0x0e40 && ch <= 0x0e44) ||
(ch >= 0x0ec0 && ch <= 0x0ec4))
{
// characters that are prepended to the following cluster
if (i < length - 1) {
aTextRun->SetGlyphs(i+1, extendCluster, nsnull);
}
}
} else
#endif
if ((ch & ~0xff) == 0x1100 ||
(ch >= 0xa960 && ch <= 0xa97f) ||
(ch >= 0xac00 && ch <= 0xd7ff))
{
// no break within Hangul syllables
hangulType = gfxUnicodeProperties::GetHangulSyllableType(ch);
switch (hangulType) {
case gfxUnicodeProperties::HST_L:
case gfxUnicodeProperties::HST_LV:
case gfxUnicodeProperties::HST_LVT:
if (hangulState == gfxUnicodeProperties::HST_L) {
aTextRun->SetGlyphs(i, extendCluster, nsnull);
}
break;
case gfxUnicodeProperties::HST_V:
if ( (hangulState != gfxUnicodeProperties::HST_NONE) &&
!(hangulState & gfxUnicodeProperties::HST_T))
{
aTextRun->SetGlyphs(i, extendCluster, nsnull);
}
break;
case gfxUnicodeProperties::HST_T:
if (hangulState & (gfxUnicodeProperties::HST_V |
gfxUnicodeProperties::HST_T))
{
aTextRun->SetGlyphs(i, extendCluster, nsnull);
}
break;
default:
break;
}
}
}
if (surrogatePair) {
++i;
gfxTextRun::CompressedGlyph g;
aTextRun->SetGlyphs(i, g.SetComplex(PR_FALSE, PR_TRUE, 0), nsnull);
aTextRun->SetGlyphs(i, extendCluster, nsnull);
}
hangulState = hangulType;
}
}

View File

@ -145,6 +145,17 @@ gfxUnicodeProperties::GetScriptCode(PRUint32 aCh)
[aCh & ((1 << kScriptCharBits) - 1)];
}
gfxUnicodeProperties::HSType
gfxUnicodeProperties::GetHangulSyllableType(PRUint32 aCh)
{
// all Hangul chars are in plane 0
if (aCh < UNICODE_BMP_LIMIT) {
return HSType(sHangulValues[sHangulPages[0][aCh >> kHangulCharBits]]
[aCh & ((1 << kHangulCharBits) - 1)]);
}
return HST_NONE;
}
// TODO: replace this with a properties file or similar;
// expect this to evolve as harfbuzz shaping support matures.
//

View File

@ -53,6 +53,17 @@ public:
static PRInt32 GetScriptCode(PRUint32 aCh);
enum HSType {
HST_NONE = 0x00,
HST_L = 0x01,
HST_V = 0x02,
HST_T = 0x04,
HST_LV = 0x03,
HST_LVT = 0x07
};
static HSType GetHangulSyllableType(PRUint32 aCh);
static PRInt32 ScriptShapingLevel(PRInt32 aScriptCode);
};

View File

@ -40,7 +40,7 @@
* ***** END LICENSE BLOCK ***** */
/*
* Created on Sun Oct 31 13:39:09 2010.
* Created on Mon Nov 29 21:49:02 2010.
*
* * * * * This file contains MACHINE-GENERATED DATA, do not edit! * * * * *
*/
@ -656,6 +656,30 @@ static const struct {
{{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,3},{0,2},{0,2}}
};
#define kHangulIndexBits 10
#define kHangulCharBits 6
static const PRUint8 sHangulPages[1][1024] = {
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,6,7,8,9,10,11,12,6,7,8,9,10,11,12,6,7,8,9,10,11,12,6,7,8,9,10,11,12,6,7,8,9,10,11,12,6,7,8,9,10,11,12,6,7,8,9,10,11,12,6,7,8,9,10,11,12,6,7,8,9,10,11,12,6,7,8,9,10,11,12,6,7,8,9,10,11,12,6,7,8,9,10,11,12,6,7,8,9,10,11,12,6,7,8,9,10,11,12,6,7,8,9,10,11,12,6,7,8,9,10,11,12,6,7,8,9,10,11,12,6,7,8,9,10,11,12,6,7,8,9,10,11,12,6,7,8,9,10,11,12,6,7,8,9,10,11,12,6,7,8,9,10,11,12,6,7,8,9,10,11,12,6,7,8,9,10,11,12,6,7,8,9,10,11,13,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
};
static const PRUint8 sHangulValues[15][64] = {
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
{1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
{1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2},
{2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4},
{4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4},
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0},
{3,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,3,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,3,7,7,7,7,7,7,7},
{7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,3,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,3,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7},
{7,7,7,7,7,7,7,7,7,7,7,7,3,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,3,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7},
{7,7,7,7,3,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,3,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,3,7,7,7},
{7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,3,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,3,7,7,7,7,7,7,7,7,7,7,7},
{7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,3,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,3,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7},
{7,7,7,7,7,7,7,7,3,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,3,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7},
{7,7,7,7,7,7,7,7,3,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2},
{2,2,2,2,2,2,2,0,0,0,0,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,0,0,0,0}
};
/*
* * * * * This file contains MACHINE-GENERATED DATA, do not edit! * * * * *
*/

View File

@ -35,8 +35,8 @@ random-if(d2d) == dynamic-2.html dynamic-2-ref.html
!= 229764-1.html 229764-ref.html
== 229764-2.html 229764-ref.html
== 329069-1.html 329069-1-ref.html
fails-if(!gtk2Widget) == 329069-2.html 329069-2-ref.html # bug 553981
fails-if(!gtk2Widget) == 329069-3.html 329069-3-ref.html # bug 614468
fails-if(winWidget) == 329069-2.html 329069-2-ref.html # bug 553981
fails-if(winWidget&&!d2d) == 329069-3.html 329069-3-ref.html # bug 615445
== 342120-1.xhtml 342120-1-ref.xhtml
== 379799-1.html 379799-1-ref.html
== 399941-1.html 399941-1-ref.html