mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
bug 763703 - optimize Unicode property lookup and gfxScriptItemizer::Next. r=smontagu
This commit is contained in:
parent
d0c563d64a
commit
368eaccf10
@ -48,13 +48,10 @@
|
||||
*/
|
||||
|
||||
#include "gfxScriptItemizer.h"
|
||||
#include "gfxFontUtils.h" // for the FindHighestBit function
|
||||
#include "nsUnicodeProperties.h"
|
||||
|
||||
#include "nsCharTraits.h"
|
||||
|
||||
#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
|
||||
|
||||
#define MOD(sp) ((sp) % PAREN_STACK_DEPTH)
|
||||
#define LIMIT_INC(sp) (((sp) < PAREN_STACK_DEPTH)? (sp) + 1 : PAREN_STACK_DEPTH)
|
||||
#define INC(sp,count) (MOD((sp) + (count)))
|
||||
@ -66,61 +63,14 @@
|
||||
#define TOP() (parenStack[parenSP])
|
||||
#define SYNC_FIXUP() (fixupCount = 0)
|
||||
|
||||
|
||||
static const PRUint16 pairedChars[] = {
|
||||
0x0028, 0x0029, /* ascii paired punctuation */
|
||||
0x003c, 0x003e,
|
||||
0x005b, 0x005d,
|
||||
0x007b, 0x007d,
|
||||
0x00ab, 0x00bb, /* guillemets */
|
||||
0x2018, 0x2019, /* general punctuation */
|
||||
0x201c, 0x201d,
|
||||
0x2039, 0x203a,
|
||||
0x207d, 0x207e, /* superscripts and subscripts */
|
||||
0x208d, 0x208e,
|
||||
0x275b, 0x275c, /* dingbat quotes and brackets */
|
||||
0x275d, 0x275e,
|
||||
0x2768, 0x2769,
|
||||
0x276a, 0x276b,
|
||||
0x276c, 0x276d,
|
||||
0x276e, 0x276f,
|
||||
0x2770, 0x2771,
|
||||
0x2772, 0x2773,
|
||||
0x2774, 0x2775,
|
||||
/* omitted: lots of potentially-paired math symbols */
|
||||
0x2e22, 0x2e23, /* supplemental punctuation */
|
||||
0x2e24, 0x2e25,
|
||||
0x2e26, 0x2e27,
|
||||
0x2e28, 0x2e29,
|
||||
0x3008, 0x3009, /* chinese paired punctuation */
|
||||
0x300a, 0x300b,
|
||||
0x300c, 0x300d,
|
||||
0x300e, 0x300f,
|
||||
0x3010, 0x3011,
|
||||
0x3014, 0x3015,
|
||||
0x3016, 0x3017,
|
||||
0x3018, 0x3019,
|
||||
0x301a, 0x301b,
|
||||
0xfe59, 0xfe5a, /* small form variants */
|
||||
0xfe5b, 0xfe5c,
|
||||
0xfe5d, 0xfe5e,
|
||||
0xfe64, 0xfe65,
|
||||
0xff08, 0xff09, /* half-width and full-width forms */
|
||||
0xff1c, 0xff1e,
|
||||
0xff3b, 0xff3d,
|
||||
0xff5b, 0xff5d,
|
||||
0xff5f, 0xff60,
|
||||
0xff62, 0xff63
|
||||
};
|
||||
|
||||
void
|
||||
gfxScriptItemizer::push(PRInt32 pairIndex, PRInt32 scriptCode)
|
||||
gfxScriptItemizer::push(PRUint32 endPairChar, PRInt32 scriptCode)
|
||||
{
|
||||
pushCount = LIMIT_INC(pushCount);
|
||||
fixupCount = LIMIT_INC(fixupCount);
|
||||
|
||||
parenSP = INC1(parenSP);
|
||||
parenStack[parenSP].pairIndex = pairIndex;
|
||||
parenStack[parenSP].endPairChar = endPairChar;
|
||||
parenStack[parenSP].scriptCode = scriptCode;
|
||||
}
|
||||
|
||||
@ -157,43 +107,23 @@ gfxScriptItemizer::fixup(PRInt32 scriptCode)
|
||||
}
|
||||
}
|
||||
|
||||
static PRInt32
|
||||
getPairIndex(PRUint32 ch)
|
||||
{
|
||||
PRInt32 pairedCharCount = ARRAY_SIZE(pairedChars);
|
||||
PRInt32 pairedCharPower = mozilla::FindHighestBit(pairedCharCount);
|
||||
PRInt32 pairedCharExtra = pairedCharCount - pairedCharPower;
|
||||
|
||||
PRInt32 probe = pairedCharPower;
|
||||
PRInt32 pairIndex = 0;
|
||||
|
||||
if (ch >= pairedChars[pairedCharExtra]) {
|
||||
pairIndex = pairedCharExtra;
|
||||
}
|
||||
|
||||
while (probe > 1) {
|
||||
probe >>= 1;
|
||||
|
||||
if (ch >= pairedChars[pairIndex + probe]) {
|
||||
pairIndex += probe;
|
||||
}
|
||||
}
|
||||
|
||||
if (pairedChars[pairIndex] != ch) {
|
||||
pairIndex = -1;
|
||||
}
|
||||
|
||||
return pairIndex;
|
||||
}
|
||||
|
||||
static bool
|
||||
sameScript(PRInt32 runScript, PRInt32 currCharScript)
|
||||
static inline bool
|
||||
SameScript(PRInt32 runScript, PRInt32 currCharScript)
|
||||
{
|
||||
return runScript <= MOZ_SCRIPT_INHERITED ||
|
||||
currCharScript <= MOZ_SCRIPT_INHERITED ||
|
||||
currCharScript == runScript;
|
||||
}
|
||||
|
||||
// Return whether the char has a mirrored-pair counterpart.
|
||||
// NOTE that this depends on the implementation of nsCharProps records in
|
||||
// nsUnicodeProperties, and may need to be updated if those structures change
|
||||
static inline bool
|
||||
HasMirroredChar(PRUint32 aCh)
|
||||
{
|
||||
return GetCharProps1(aCh).mMirrorOffsetIndex != 0;
|
||||
}
|
||||
|
||||
gfxScriptItemizer::gfxScriptItemizer(const PRUnichar *src, PRUint32 length)
|
||||
: textPtr(src), textLength(length)
|
||||
{
|
||||
@ -224,63 +154,64 @@ gfxScriptItemizer::Next(PRUint32& aRunStart, PRUint32& aRunLimit,
|
||||
for (scriptStart = scriptLimit; scriptLimit < textLength; scriptLimit += 1) {
|
||||
PRUint32 ch;
|
||||
PRInt32 sc;
|
||||
PRInt32 pairIndex;
|
||||
PRUint32 startOfChar = scriptLimit;
|
||||
|
||||
ch = textPtr[scriptLimit];
|
||||
|
||||
/*
|
||||
* MODIFICATION for Gecko - clear the paired-character stack
|
||||
* when we see a space character, because we cannot trust
|
||||
* context outside the current "word" when doing textrun
|
||||
* construction
|
||||
*/
|
||||
if (ch == 0x20) {
|
||||
while (STACK_IS_NOT_EMPTY()) {
|
||||
pop();
|
||||
}
|
||||
sc = MOZ_SCRIPT_COMMON;
|
||||
pairIndex = -1;
|
||||
} else {
|
||||
/* decode UTF-16 (may be surrogate pair) */
|
||||
if (NS_IS_HIGH_SURROGATE(ch) && scriptLimit < textLength - 1) {
|
||||
PRUint32 low = textPtr[scriptLimit + 1];
|
||||
if (NS_IS_LOW_SURROGATE(low)) {
|
||||
ch = SURROGATE_TO_UCS4(ch, low);
|
||||
scriptLimit += 1;
|
||||
}
|
||||
/* decode UTF-16 (may be surrogate pair) */
|
||||
if (NS_IS_HIGH_SURROGATE(ch) && scriptLimit < textLength - 1) {
|
||||
PRUint32 low = textPtr[scriptLimit + 1];
|
||||
if (NS_IS_LOW_SURROGATE(low)) {
|
||||
ch = SURROGATE_TO_UCS4(ch, low);
|
||||
scriptLimit += 1;
|
||||
}
|
||||
}
|
||||
|
||||
sc = mozilla::unicode::GetScriptCode(ch);
|
||||
// Get the nsCharProps2 record for the current character,
|
||||
// so we can read the script and (if needed) the gen category
|
||||
// without needing to do two multi-level lookups.
|
||||
// NOTE that this means we're relying on an implementation detail
|
||||
// of the nsUnicodeProperties tables, and might have to revise this
|
||||
// if the nsCharProps records used there are modified in future.
|
||||
const nsCharProps2& charProps = GetCharProps2(ch);
|
||||
|
||||
pairIndex = getPairIndex(ch);
|
||||
// Initialize gc to UNASSIGNED; we'll only set it to the true GC
|
||||
// if the character has script=COMMON, otherwise we don't care.
|
||||
PRUint8 gc = HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
|
||||
|
||||
sc = charProps.mScriptCode;
|
||||
if (sc == MOZ_SCRIPT_COMMON) {
|
||||
/*
|
||||
* Paired character handling:
|
||||
*
|
||||
* if it's an open character, push it onto the stack.
|
||||
* if it's a close character, find the matching open on the
|
||||
* stack, and use that script code. Any non-matching open
|
||||
* characters above it on the stack will be poped.
|
||||
* characters above it on the stack will be popped.
|
||||
*
|
||||
* We only do this if the script is COMMON; for chars with
|
||||
* specific script assignments, we just use them as-is.
|
||||
*/
|
||||
if (pairIndex >= 0) {
|
||||
if ((pairIndex & 1) == 0) {
|
||||
push(pairIndex, scriptCode);
|
||||
} else {
|
||||
PRInt32 pi = pairIndex & ~1;
|
||||
gc = charProps.mCategory;
|
||||
if (gc == HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION) {
|
||||
PRUint32 endPairChar = mozilla::unicode::GetMirroredChar(ch);
|
||||
if (endPairChar != ch) {
|
||||
push(endPairChar, scriptCode);
|
||||
}
|
||||
} else if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION &&
|
||||
HasMirroredChar(ch))
|
||||
{
|
||||
while (STACK_IS_NOT_EMPTY() && TOP().endPairChar != ch) {
|
||||
pop();
|
||||
}
|
||||
|
||||
while (STACK_IS_NOT_EMPTY() && TOP().pairIndex != pi) {
|
||||
pop();
|
||||
}
|
||||
|
||||
if (STACK_IS_NOT_EMPTY()) {
|
||||
sc = TOP().scriptCode;
|
||||
}
|
||||
if (STACK_IS_NOT_EMPTY()) {
|
||||
sc = TOP().scriptCode;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (sameScript(scriptCode, sc)) {
|
||||
if (SameScript(scriptCode, sc)) {
|
||||
if (scriptCode <= MOZ_SCRIPT_INHERITED &&
|
||||
sc > MOZ_SCRIPT_INHERITED)
|
||||
{
|
||||
@ -292,7 +223,8 @@ gfxScriptItemizer::Next(PRUint32& aRunStart, PRUint32& aRunLimit,
|
||||
* if this character is a close paired character,
|
||||
* pop the matching open character from the stack
|
||||
*/
|
||||
if (pairIndex >= 0 && (pairIndex & 1) != 0) {
|
||||
if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION &&
|
||||
HasMirroredChar(ch)) {
|
||||
pop();
|
||||
}
|
||||
} else {
|
||||
|
@ -77,13 +77,13 @@ protected:
|
||||
fixupCount = 0;
|
||||
}
|
||||
|
||||
void push(PRInt32 pairIndex, PRInt32 scriptCode);
|
||||
void push(PRUint32 endPairChar, PRInt32 scriptCode);
|
||||
void pop();
|
||||
void fixup(PRInt32 scriptCode);
|
||||
|
||||
struct ParenStackEntry {
|
||||
PRInt32 pairIndex;
|
||||
PRInt32 scriptCode;
|
||||
PRUint32 endPairChar;
|
||||
PRInt32 scriptCode;
|
||||
};
|
||||
|
||||
const PRUnichar *textPtr;
|
||||
|
@ -622,6 +622,7 @@ $versionInfo
|
||||
|
||||
#ifndef NS_UNICODE_SCRIPT_CODES
|
||||
#define NS_UNICODE_SCRIPT_CODES
|
||||
|
||||
__END
|
||||
|
||||
print DATA_TABLES "static const PRUint32 sScriptCodeToTag[] = {\n";
|
||||
@ -640,12 +641,14 @@ for (my $i = 0; $i < scalar @offsets; ++$i) {
|
||||
}
|
||||
print DATA_TABLES "};\n\n";
|
||||
|
||||
print HEADER "#pragma pack(1)\n\n";
|
||||
|
||||
sub sprintCharProps1
|
||||
{
|
||||
my $usv = shift;
|
||||
return sprintf("{%d,%d,%d}, ", $mirror[$usv], $hangul[$usv], $combining[$usv]);
|
||||
}
|
||||
&genTables("CharProp1", "struct nsCharProps1 {\n unsigned char mMirrorOffsetIndex:5;\n unsigned char mHangulType:3;\n unsigned char mCombiningClass:8;\n};",
|
||||
&genTables("CharProp1", "struct nsCharProps1 {\n unsigned char mMirrorOffsetIndex:5;\n unsigned char mHangulType:3;\n unsigned char mCombiningClass:8;\n};",
|
||||
"nsCharProps1", 11, 5, \&sprintCharProps1, 1, 2, 1);
|
||||
|
||||
sub sprintCharProps2
|
||||
@ -658,6 +661,8 @@ sub sprintCharProps2
|
||||
&genTables("CharProp2", "struct nsCharProps2 {\n unsigned char mScriptCode:8;\n unsigned char mEAW:3;\n unsigned char mCategory:5;\n unsigned char mBidiCategory:5;\n unsigned char mXidmod:4;\n signed char mNumericValue:5;\n unsigned char mHanVariant:2;\n};",
|
||||
"nsCharProps2", 11, 5, \&sprintCharProps2, 16, 4, 1);
|
||||
|
||||
print HEADER "#pragma pack()\n\n";
|
||||
|
||||
sub sprintHanVariants
|
||||
{
|
||||
my $baseUsv = shift;
|
||||
|
@ -16,7 +16,7 @@
|
||||
#define UNICODE_LIMIT 0x110000
|
||||
|
||||
|
||||
nsCharProps1
|
||||
const nsCharProps1&
|
||||
GetCharProps1(PRUint32 aCh)
|
||||
{
|
||||
if (aCh < UNICODE_BMP_LIMIT) {
|
||||
@ -30,13 +30,15 @@ GetCharProps1(PRUint32 aCh)
|
||||
}
|
||||
|
||||
// Default values for unassigned
|
||||
nsCharProps1 undefined = {0, // Index to mirrored char offsets
|
||||
0, // Hangul Syllable type
|
||||
0}; // Combining class
|
||||
static const nsCharProps1 undefined = {
|
||||
0, // Index to mirrored char offsets
|
||||
0, // Hangul Syllable type
|
||||
0 // Combining class
|
||||
};
|
||||
return undefined;
|
||||
}
|
||||
|
||||
nsCharProps2
|
||||
const nsCharProps2&
|
||||
GetCharProps2(PRUint32 aCh)
|
||||
{
|
||||
if (aCh < UNICODE_BMP_LIMIT) {
|
||||
@ -51,13 +53,14 @@ GetCharProps2(PRUint32 aCh)
|
||||
|
||||
NS_NOTREACHED("Getting CharProps for codepoint outside Unicode range");
|
||||
// Default values for unassigned
|
||||
nsCharProps2 undefined = {
|
||||
static const nsCharProps2 undefined = {
|
||||
MOZ_SCRIPT_UNKNOWN, // Script code
|
||||
0, // East Asian Width
|
||||
HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED, // General Category
|
||||
eCharType_LeftToRight, // Bidi Category
|
||||
mozilla::unicode::XIDMOD_NOT_CHARS, // Xidmod
|
||||
-1 // Numeric Value
|
||||
-1, // Numeric Value
|
||||
mozilla::unicode::HVT_NotHan // Han variant
|
||||
};
|
||||
return undefined;
|
||||
}
|
||||
|
@ -11,8 +11,8 @@
|
||||
#include "nsIUGenCategory.h"
|
||||
#include "nsUnicodeScriptCodes.h"
|
||||
|
||||
nsCharProps1 GetCharProps1(PRUint32 aCh);
|
||||
nsCharProps2 GetCharProps2(PRUint32 aCh);
|
||||
const nsCharProps1& GetCharProps1(PRUint32 aCh);
|
||||
const nsCharProps2& GetCharProps2(PRUint32 aCh);
|
||||
|
||||
namespace mozilla {
|
||||
|
||||
|
@ -1,11 +1,17 @@
|
||||
|
||||
/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
/*
|
||||
* Created on Mon Apr 23 20:03:29 2012 from UCD data files with version info:
|
||||
* Derived from the Unicode Character Database by genUnicodePropertyData.pl
|
||||
*
|
||||
* For Unicode terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
*/
|
||||
|
||||
/*
|
||||
* Created on Mon Jun 11 21:04:54 2012 from UCD data files with version info:
|
||||
*
|
||||
|
||||
# Date: 2012-01-26, 22:03:00 GMT [KW]
|
||||
|
@ -1,11 +1,17 @@
|
||||
|
||||
/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
/*
|
||||
* Created on Mon Apr 23 20:03:29 2012 from UCD data files with version info:
|
||||
* Derived from the Unicode Character Database by genUnicodePropertyData.pl
|
||||
*
|
||||
* For Unicode terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
*/
|
||||
|
||||
/*
|
||||
* Created on Mon Jun 11 21:04:54 2012 from UCD data files with version info:
|
||||
*
|
||||
|
||||
# Date: 2012-01-26, 22:03:00 GMT [KW]
|
||||
@ -48,8 +54,11 @@ for the Unicode Character Database (UCD) for Unicode 6.1.0.
|
||||
|
||||
#ifndef NS_UNICODE_SCRIPT_CODES
|
||||
#define NS_UNICODE_SCRIPT_CODES
|
||||
|
||||
#pragma pack(1)
|
||||
|
||||
struct nsCharProps1 {
|
||||
unsigned char mMirrorOffsetIndex:5;
|
||||
unsigned char mMirrorOffsetIndex:5;
|
||||
unsigned char mHangulType:3;
|
||||
unsigned char mCombiningClass:8;
|
||||
};
|
||||
@ -64,6 +73,8 @@ struct nsCharProps2 {
|
||||
unsigned char mHanVariant:2;
|
||||
};
|
||||
|
||||
#pragma pack()
|
||||
|
||||
enum {
|
||||
MOZ_SCRIPT_COMMON = 0,
|
||||
MOZ_SCRIPT_INHERITED = 1,
|
||||
|
Loading…
Reference in New Issue
Block a user