Bug 1026438 part 8 - Make irregexp GetCaseIndependentLetters work with Latin1 strings. r=bhackett

This commit is contained in:
Jan de Mooij 2014-06-21 19:56:32 +02:00
parent 2d3dff50ad
commit 64f87ae0f9
2 changed files with 30 additions and 8 deletions

View File

@ -192,11 +192,14 @@ GetCaseIndependentLetters(jschar character,
bool ascii_subject,
jschar *letters)
{
JS_ASSERT(!ascii_subject);
jschar lower = unicode::ToLowerCase(character);
jschar upper = unicode::ToUpperCase(character);
// The standard requires that non-ASCII characters cannot have ASCII
// character codes in their equivalence class.
if (ascii_subject && character > kMaxOneByteCharCode)
return 0;
letters[0] = character;
if (lower != character) {
@ -214,6 +217,23 @@ GetCaseIndependentLetters(jschar character,
return 1;
}
static jschar
ConvertNonLatin1ToLatin1(jschar c)
{
JS_ASSERT(c > kMaxOneByteCharCode);
switch (c) {
// This are equivalent characters in unicode.
case 0x39c:
case 0x3bc:
return 0xb5;
// This is an uppercase of a Latin-1 character
// outside of Latin-1.
case 0x178:
return 0xff;
}
return 0;
}
void
CharacterRange::AddCaseEquivalents(bool is_ascii, CharacterRangeVector *ranges)
{
@ -670,17 +690,14 @@ TextNode::FilterASCII(int depth, bool ignore_case)
// Here, we need to check for characters whose upper and lower cases
// are outside the Latin-1 range.
jschar chars[kEcma262UnCanonicalizeMaxWidth];
size_t length = GetCaseIndependentLetters(c, true, chars);
JS_ASSERT(length <= 1);
if (length == 0) {
jschar converted = ConvertNonLatin1ToLatin1(c);
if (converted == 0) {
// Character is outside Latin-1 completely
return set_replacement(nullptr);
}
// Convert quark to Latin-1 in place.
quarks[j] = chars[0];
quarks[j] = converted;
}
} else {
JS_ASSERT(elm.text_type() == TextElement::CHAR_CLASS);

View File

@ -31,3 +31,8 @@ assertEq(toLatin1("1abcdefghijklm4").search(re), 1);
assertEq("\u12001abcdefghijklm0".search(re), 2);
assertEq(toLatin1("1abcdefghijklm8").search(re), -1);
assertEq("\u12001abcdefghijklm8".search(re), -1);
// If the input is Latin1, case-independent matches should work
// correctly for characters outside Latin1 with Latin1 equivalents.
var s = toLatin1("foobar\xff5baz");
assertEq(s.search(/bar\u0178\d/i), 3);