mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
Add new bidi control characters defined in Unicode 6.3. Bug 922530, r=jfkthame
This commit is contained in:
parent
b570e91848
commit
74c3067fe1
@ -2155,7 +2155,7 @@ function losslessDecodeURI(aURI) {
|
||||
// except ZWNJ (U+200C) and ZWJ (U+200D) (bug 582186).
|
||||
// This includes all bidirectional formatting characters.
|
||||
// (RFC 3987 sections 3.2 and 4.1 paragraph 6)
|
||||
value = value.replace(/[\u00ad\u034f\u115f-\u1160\u17b4-\u17b5\u180b-\u180d\u200b\u200e-\u200f\u202a-\u202e\u2060-\u206f\u3164\ufe00-\ufe0f\ufeff\uffa0\ufff0-\ufff8]|\ud834[\udd73-\udd7a]|[\udb40-\udb43][\udc00-\udfff]/g,
|
||||
value = value.replace(/[\u00ad\u034f\u061c\u115f-\u1160\u17b4-\u17b5\u180b-\u180d\u200b\u200e-\u200f\u202a-\u202e\u2060-\u206f\u3164\ufe00-\ufe0f\ufeff\uffa0\ufff0-\ufff8]|\ud834[\udd73-\udd7a]|[\udb40-\udb43][\udc00-\udfff]/g,
|
||||
encodeURIComponent);
|
||||
return value;
|
||||
}
|
||||
|
@ -423,7 +423,7 @@ nsTextFragment::UpdateBidiFlag(const PRUnichar* aBuffer, uint32_t aLength)
|
||||
PRUnichar ch2 = *cp++;
|
||||
utf32Char = SURROGATE_TO_UCS4(ch1, ch2);
|
||||
}
|
||||
if (UTF32_CHAR_IS_BIDI(utf32Char) || IS_BIDI_CONTROL_CHAR(utf32Char)) {
|
||||
if (UTF32_CHAR_IS_BIDI(utf32Char) || IsBidiControl(utf32Char)) {
|
||||
mState.mIsBidi = true;
|
||||
break;
|
||||
}
|
||||
|
@ -4177,9 +4177,9 @@ gfxFontGroup::IsInvalidChar(PRUnichar ch)
|
||||
if (ch <= 0x9f) {
|
||||
return true;
|
||||
}
|
||||
return ((ch & 0xFF00) == 0x2000 /* Unicode control character */ &&
|
||||
(ch == 0x200B/*ZWSP*/ || ch == 0x2028/*LSEP*/ || ch == 0x2029/*PSEP*/ ||
|
||||
IS_BIDI_CONTROL_CHAR(ch)));
|
||||
return (((ch & 0xFF00) == 0x2000 /* Unicode control character */ &&
|
||||
(ch == 0x200B/*ZWSP*/ || ch == 0x2028/*LSEP*/ || ch == 0x2029/*PSEP*/)) ||
|
||||
IsBidiControl(ch));
|
||||
}
|
||||
|
||||
bool
|
||||
@ -5343,11 +5343,14 @@ gfxShapedText::SetGlyphs(uint32_t aIndex, CompressedGlyph aGlyph,
|
||||
|
||||
#define ZWNJ 0x200C
|
||||
#define ZWJ 0x200D
|
||||
// U+061C ARABIC LETTER MARK is expected to be added to XIDMOD_DEFAULT_IGNORABLE
|
||||
// in a future Unicode update. Add it manually for now
|
||||
#define ALM 0x061C
|
||||
static inline bool
|
||||
IsDefaultIgnorable(uint32_t aChar)
|
||||
{
|
||||
return GetIdentifierModification(aChar) == XIDMOD_DEFAULT_IGNORABLE ||
|
||||
aChar == ZWNJ || aChar == ZWJ;
|
||||
aChar == ZWNJ || aChar == ZWJ || aChar == ALM;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -82,17 +82,6 @@ nsresult HandleNumbers(PRUnichar* aBuffer, uint32_t aSize, uint32_t aNumFlag)
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
#define LRM_CHAR 0x200e
|
||||
#define LRE_CHAR 0x202a
|
||||
#define RLO_CHAR 0x202e
|
||||
bool IsBidiControl(uint32_t aChar)
|
||||
{
|
||||
// This method is used when stripping Bidi control characters for
|
||||
// display, so it will return TRUE for LRM, RLM, LRE, RLE, PDF, LRO and RLO
|
||||
return ((LRE_CHAR <= aChar && aChar <= RLO_CHAR) ||
|
||||
((aChar)&0xfffffe)==LRM_CHAR);
|
||||
}
|
||||
|
||||
bool HasRTLChars(const nsAString& aString)
|
||||
{
|
||||
// This is used to determine whether to enable bidi if a string has
|
||||
|
@ -82,16 +82,28 @@ typedef enum nsCharType nsCharType;
|
||||
|
||||
/**
|
||||
* Give a UTF-32 codepoint
|
||||
* return true if the codepoint is a Bidi control character (LRE, RLE, PDF, LRO, RLO, LRM, RLM)
|
||||
* return false, otherwise
|
||||
* return true if the codepoint is a Bidi control character (LRM, RLM, ALM;
|
||||
* LRE, RLE, PDF, LRO, RLO; LRI, RLI, FSI, PDI).
|
||||
* Return false, otherwise
|
||||
*/
|
||||
bool IsBidiControl(uint32_t aChar);
|
||||
#define LRM_CHAR 0x200e
|
||||
#define LRE_CHAR 0x202a
|
||||
#define RLO_CHAR 0x202e
|
||||
#define LRI_CHAR 0x2066
|
||||
#define PDI_CHAR 0x2069
|
||||
#define ALM_CHAR 0x061C
|
||||
inline bool IsBidiControl(uint32_t aChar) {
|
||||
return ((LRE_CHAR <= aChar && aChar <= RLO_CHAR) ||
|
||||
(LRI_CHAR <= aChar && aChar <= PDI_CHAR) ||
|
||||
(aChar == ALM_CHAR) ||
|
||||
(aChar & 0xfffffe) == LRM_CHAR);
|
||||
}
|
||||
|
||||
/**
|
||||
* Give an nsString.
|
||||
* @return true if the string contains right-to-left characters
|
||||
*/
|
||||
bool HasRTLChars(const nsAString& aString);
|
||||
bool HasRTLChars(const nsAString& aString);
|
||||
|
||||
// --------------------------------------------------
|
||||
// IBMBIDI
|
||||
@ -202,8 +214,6 @@ typedef enum nsCharType nsCharType;
|
||||
(c) >= 0x08a0 ) )
|
||||
#define IS_ARABIC_ALPHABETIC(c) (IS_ARABIC_CHAR(c) && \
|
||||
!(IS_HINDI_DIGIT(c) || IS_FARSI_DIGIT(c) || IS_ARABIC_SEPARATOR(c)))
|
||||
#define IS_BIDI_CONTROL_CHAR(c) (((0x202a <= (c)) && ((c) <= 0x202e)) \
|
||||
|| ((c) == 0x200e) || ((c) == 0x200f))
|
||||
|
||||
/**
|
||||
* The codepoint ranges in the following macros are based on the blocks
|
||||
|
@ -21,11 +21,7 @@ static bool IsDiscardable(PRUnichar ch, uint32_t* aFlags)
|
||||
*aFlags |= nsTextFrameUtils::TEXT_HAS_SHY;
|
||||
return true;
|
||||
}
|
||||
if ((ch & 0xFF00) != 0x2000) {
|
||||
// Not a Bidi control character
|
||||
return false;
|
||||
}
|
||||
return IS_BIDI_CONTROL_CHAR(ch);
|
||||
return IsBidiControl(ch);
|
||||
}
|
||||
|
||||
static bool IsDiscardable(uint8_t ch, uint32_t* aFlags)
|
||||
|
@ -17,11 +17,6 @@ struct nsStyleText;
|
||||
#define CH_SHY 173
|
||||
#define CH_CJKSP 12288 // U+3000 IDEOGRAPHIC SPACE (CJK Full-Width Space)
|
||||
|
||||
#define CH_LRM 8206 //<!ENTITY lrm CDATA "‎" -- left-to-right mark, U+200E NEW RFC 2070 -->
|
||||
#define CH_RLM 8207 //<!ENTITY rlm CDATA "‏" -- right-to-left mark, U+200F NEW RFC 2070 -->
|
||||
#define CH_LRE 8234 //<!CDATA "‪" -- left-to-right embedding, U+202A -->
|
||||
#define CH_RLO 8238 //<!CDATA "‮" -- right-to-left override, U+202E -->
|
||||
|
||||
class nsTextFrameUtils {
|
||||
public:
|
||||
// These constants are used as textrun flags for textframe textruns.
|
||||
|
13
layout/reftests/bidi/922530-1-ref.html
Normal file
13
layout/reftests/bidi/922530-1-ref.html
Normal file
@ -0,0 +1,13 @@
|
||||
<!DOCTYPE HTML>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>ALM character</title>
|
||||
<style type="text/css">
|
||||
p { font-size: 2em; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<p>a2+1</p>
|
||||
</body>
|
||||
</html>
|
25
layout/reftests/bidi/922530-1.html
Normal file
25
layout/reftests/bidi/922530-1.html
Normal file
@ -0,0 +1,25 @@
|
||||
<!DOCTYPE HTML>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>ALM character</title>
|
||||
<style type="text/css">
|
||||
p { font-size: 2em; }
|
||||
</style>
|
||||
<!--
|
||||
Test that U+061C functions as an invisible Arabic character.
|
||||
|
||||
In the test text it is followed by two European numbers with a
|
||||
European separator between them. By rules W2, W3 and W6 of the
|
||||
Bidi algorithm (http://unicode.org/reports/tr9/#W2) the
|
||||
European numbers change to Arabic numbers, the ARABIC LETTER
|
||||
MARK changes to R and the separator changes to Other Neutral,
|
||||
and then by rule N1 the Arabic numbers make the separator
|
||||
change to R, so the final visual order is "a2+1؜", but
|
||||
the ؜ should not appear
|
||||
-->
|
||||
</head>
|
||||
<body>
|
||||
<p>a؜1+2</p>
|
||||
</body>
|
||||
</html>
|
@ -138,3 +138,4 @@ skip-if(B2G) == 726420-1.html 726420-1-ref.html
|
||||
== 746987-4.html 746987-4-ref.html
|
||||
== 779003-1.html 779003-1-ref.html
|
||||
== 779003-1-dynamic.html 779003-1-ref.html
|
||||
== 922530-1.html 922530-1-ref.html
|
||||
|
@ -12,6 +12,9 @@ const testcases = [
|
||||
// non-XID character
|
||||
["I♥NY", "xn--iny-zx5a", false, false, false],
|
||||
|
||||
// new non-XID character in Unicode 6.3
|
||||
["حلا\u061cل", "xn--bgbvr6gc", false, false, false],
|
||||
|
||||
// U+30FB KATAKANA MIDDLE DOT is excluded from non-XID characters (bug 857490)
|
||||
["乾燥肌・石けん", "xn--08j4gylj12hz80b0uhfup", false, true, true],
|
||||
|
||||
|
@ -1127,11 +1127,18 @@ nsExternalAppHandler::nsExternalAppHandler(nsIMIMEInfo * aMIMEInfo,
|
||||
|
||||
// Remove unsafe bidi characters which might have spoofing implications (bug 511521).
|
||||
const PRUnichar unsafeBidiCharacters[] = {
|
||||
PRUnichar(0x061c), // Arabic Letter Mark
|
||||
PRUnichar(0x200e), // Left-to-Right Mark
|
||||
PRUnichar(0x200f), // Right-to-Left Mark
|
||||
PRUnichar(0x202a), // Left-to-Right Embedding
|
||||
PRUnichar(0x202b), // Right-to-Left Embedding
|
||||
PRUnichar(0x202c), // Pop Directional Formatting
|
||||
PRUnichar(0x202d), // Left-to-Right Override
|
||||
PRUnichar(0x202e) // Right-to-Left Override
|
||||
PRUnichar(0x202e), // Right-to-Left Override
|
||||
PRUnichar(0x2066), // Left-to-Right Isolate
|
||||
PRUnichar(0x2067), // Right-to-Left Isolate
|
||||
PRUnichar(0x2068), // First Strong Isolate
|
||||
PRUnichar(0x2069) // Pop Directional Isolate
|
||||
};
|
||||
for (uint32_t i = 0; i < ArrayLength(unsafeBidiCharacters); ++i) {
|
||||
mSuggestedFileName.ReplaceChar(unsafeBidiCharacters[i], '_');
|
||||
|
Loading…
Reference in New Issue
Block a user