mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
Bug 1026438 part 8 - Make irregexp GetCaseIndependentLetters work with Latin1 strings. r=bhackett
This commit is contained in:
parent
2d3dff50ad
commit
64f87ae0f9
@ -192,11 +192,14 @@ GetCaseIndependentLetters(jschar character,
|
||||
bool ascii_subject,
|
||||
jschar *letters)
|
||||
{
|
||||
JS_ASSERT(!ascii_subject);
|
||||
|
||||
jschar lower = unicode::ToLowerCase(character);
|
||||
jschar upper = unicode::ToUpperCase(character);
|
||||
|
||||
// The standard requires that non-ASCII characters cannot have ASCII
|
||||
// character codes in their equivalence class.
|
||||
if (ascii_subject && character > kMaxOneByteCharCode)
|
||||
return 0;
|
||||
|
||||
letters[0] = character;
|
||||
|
||||
if (lower != character) {
|
||||
@ -214,6 +217,23 @@ GetCaseIndependentLetters(jschar character,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static jschar
|
||||
ConvertNonLatin1ToLatin1(jschar c)
|
||||
{
|
||||
JS_ASSERT(c > kMaxOneByteCharCode);
|
||||
switch (c) {
|
||||
// This are equivalent characters in unicode.
|
||||
case 0x39c:
|
||||
case 0x3bc:
|
||||
return 0xb5;
|
||||
// This is an uppercase of a Latin-1 character
|
||||
// outside of Latin-1.
|
||||
case 0x178:
|
||||
return 0xff;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
CharacterRange::AddCaseEquivalents(bool is_ascii, CharacterRangeVector *ranges)
|
||||
{
|
||||
@ -670,17 +690,14 @@ TextNode::FilterASCII(int depth, bool ignore_case)
|
||||
|
||||
// Here, we need to check for characters whose upper and lower cases
|
||||
// are outside the Latin-1 range.
|
||||
jschar chars[kEcma262UnCanonicalizeMaxWidth];
|
||||
size_t length = GetCaseIndependentLetters(c, true, chars);
|
||||
JS_ASSERT(length <= 1);
|
||||
|
||||
if (length == 0) {
|
||||
jschar converted = ConvertNonLatin1ToLatin1(c);
|
||||
if (converted == 0) {
|
||||
// Character is outside Latin-1 completely
|
||||
return set_replacement(nullptr);
|
||||
}
|
||||
|
||||
// Convert quark to Latin-1 in place.
|
||||
quarks[j] = chars[0];
|
||||
quarks[j] = converted;
|
||||
}
|
||||
} else {
|
||||
JS_ASSERT(elm.text_type() == TextElement::CHAR_CLASS);
|
||||
|
@ -31,3 +31,8 @@ assertEq(toLatin1("1abcdefghijklm4").search(re), 1);
|
||||
assertEq("\u12001abcdefghijklm0".search(re), 2);
|
||||
assertEq(toLatin1("1abcdefghijklm8").search(re), -1);
|
||||
assertEq("\u12001abcdefghijklm8".search(re), -1);
|
||||
|
||||
// If the input is Latin1, case-independent matches should work
|
||||
// correctly for characters outside Latin1 with Latin1 equivalents.
|
||||
var s = toLatin1("foobar\xff5baz");
|
||||
assertEq(s.search(/bar\u0178\d/i), 3);
|
||||
|
Loading…
Reference in New Issue
Block a user