Bug 888673 - Update Yarr yet again. r=sstangl

--HG--
extra : rebase_source : 129ba2817818f5c666b9d1bd4402a76999bff895
This commit is contained in:
Till Schneidereit 2013-07-01 23:29:19 +02:00
parent bbcd8a7090
commit 7eff9354c4
7 changed files with 43 additions and 49 deletions

View File

@ -24,12 +24,11 @@ printStatus (summary);
var spaces = [ "\u0009", "\u000b", "\u000c", "\u0020", "\u00a0", "\u1680", var spaces = [ "\u0009", "\u000b", "\u000c", "\u0020", "\u00a0", "\u1680",
"\u180e", "\u2000", "\u2001", "\u2002", "\u2003", "\u2004", "\u180e", "\u2000", "\u2001", "\u2002", "\u2003", "\u2004",
"\u2005", "\u2006", "\u2007", "\u2008", "\u2009", "\u200a", "\u2005", "\u2006", "\u2007", "\u2008", "\u2009", "\u200a",
"\u202f", "\u205f", "\u3000" ]; "\u202f", "\u205f", "\u3000", "\ufeff" ];
var line_terminators = [ "\u2028", "\u2029", "\u000a", "\u000d" ]; var line_terminators = [ "\u2028", "\u2029", "\u000a", "\u000d" ];
var space_chars = [].concat(spaces, line_terminators); var space_chars = [].concat(spaces, line_terminators);
var non_space_chars = [ "\u200b", "\u200c", "\u200d", var non_space_chars = [ "\u200b", "\u200c", "\u200d" ];
"\ufeff" ];
var chars = [].concat(space_chars, non_space_chars); var chars = [].concat(space_chars, non_space_chars);
var is_space = [].concat(space_chars.map(function(ch) { return true; }), var is_space = [].concat(space_chars.map(function(ch) { return true; }),

View File

@ -1307,7 +1307,7 @@ static const char _spacesData[65536] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@ -2632,16 +2632,14 @@ static const char _wordcharData[65536] = {
CharacterClass* digitsCreate() CharacterClass* digitsCreate()
{ {
// FIXME: bug 574459 -- no NULL check CharacterClass* characterClass = js_new<CharacterClass>();
CharacterClass* characterClass = js_new<CharacterClass>((CharacterClassTable*)NULL);
characterClass->m_ranges.append(CharacterRange(0x30, 0x39)); characterClass->m_ranges.append(CharacterRange(0x30, 0x39));
return characterClass; return characterClass;
} }
CharacterClass* nondigitsCreate() CharacterClass* nondigitsCreate()
{ {
// FIXME: bug 574459 -- no NULL check CharacterClass* characterClass = js_new<CharacterClass>();
CharacterClass* characterClass = js_new<CharacterClass>((CharacterClassTable*)NULL);
characterClass->m_ranges.append(CharacterRange(0x00, 0x2f)); characterClass->m_ranges.append(CharacterRange(0x00, 0x2f));
characterClass->m_ranges.append(CharacterRange(0x3a, 0x7f)); characterClass->m_ranges.append(CharacterRange(0x3a, 0x7f));
characterClass->m_rangesUnicode.append(CharacterRange(0x0080, 0xffff)); characterClass->m_rangesUnicode.append(CharacterRange(0x0080, 0xffff));
@ -2650,8 +2648,7 @@ CharacterClass* nondigitsCreate()
CharacterClass* newlineCreate() CharacterClass* newlineCreate()
{ {
// FIXME: bug 574459 -- no NULL check CharacterClass* characterClass = js_new<CharacterClass>();
CharacterClass* characterClass = js_new<CharacterClass>((CharacterClassTable*)NULL);
characterClass->m_matches.append(0x0a); characterClass->m_matches.append(0x0a);
characterClass->m_matches.append(0x0d); characterClass->m_matches.append(0x0d);
characterClass->m_matchesUnicode.append(0x2028); characterClass->m_matchesUnicode.append(0x2028);
@ -2661,8 +2658,7 @@ CharacterClass* newlineCreate()
CharacterClass* spacesCreate() CharacterClass* spacesCreate()
{ {
// FIXME: bug 574459 -- no NULL check CharacterClass* characterClass = js_new<CharacterClass>(_spacesData, false);
CharacterClass* characterClass = js_new<CharacterClass>(CharacterClassTable::create(_spacesData, false));
characterClass->m_ranges.append(CharacterRange(0x09, 0x0d)); characterClass->m_ranges.append(CharacterRange(0x09, 0x0d));
characterClass->m_matches.append(0x20); characterClass->m_matches.append(0x20);
characterClass->m_matchesUnicode.append(0x00a0); characterClass->m_matchesUnicode.append(0x00a0);
@ -2674,13 +2670,13 @@ CharacterClass* spacesCreate()
characterClass->m_matchesUnicode.append(0x202f); characterClass->m_matchesUnicode.append(0x202f);
characterClass->m_matchesUnicode.append(0x205f); characterClass->m_matchesUnicode.append(0x205f);
characterClass->m_matchesUnicode.append(0x3000); characterClass->m_matchesUnicode.append(0x3000);
characterClass->m_matchesUnicode.append(0xfeff);
return characterClass; return characterClass;
} }
CharacterClass* nonspacesCreate() CharacterClass* nonspacesCreate()
{ {
// FIXME: bug 574459 -- no NULL check CharacterClass* characterClass = js_new<CharacterClass>(_spacesData, true);
CharacterClass* characterClass = js_new<CharacterClass>(CharacterClassTable::create(_spacesData, true));
characterClass->m_ranges.append(CharacterRange(0x00, 0x08)); characterClass->m_ranges.append(CharacterRange(0x00, 0x08));
characterClass->m_ranges.append(CharacterRange(0x0e, 0x1f)); characterClass->m_ranges.append(CharacterRange(0x0e, 0x1f));
characterClass->m_ranges.append(CharacterRange(0x21, 0x7f)); characterClass->m_ranges.append(CharacterRange(0x21, 0x7f));
@ -2692,14 +2688,14 @@ CharacterClass* nonspacesCreate()
characterClass->m_rangesUnicode.append(CharacterRange(0x202a, 0x202e)); characterClass->m_rangesUnicode.append(CharacterRange(0x202a, 0x202e));
characterClass->m_rangesUnicode.append(CharacterRange(0x2030, 0x205e)); characterClass->m_rangesUnicode.append(CharacterRange(0x2030, 0x205e));
characterClass->m_rangesUnicode.append(CharacterRange(0x2060, 0x2fff)); characterClass->m_rangesUnicode.append(CharacterRange(0x2060, 0x2fff));
characterClass->m_rangesUnicode.append(CharacterRange(0x3001, 0xffff)); characterClass->m_rangesUnicode.append(CharacterRange(0x3001, 0xfefe));
characterClass->m_rangesUnicode.append(CharacterRange(0xff00, 0xffff));
return characterClass; return characterClass;
} }
CharacterClass* nonwordcharCreate() CharacterClass* nonwordcharCreate()
{ {
// FIXME: bug 574459 -- no NULL check CharacterClass* characterClass = js_new<CharacterClass>(_wordcharData, true);
CharacterClass* characterClass = js_new<CharacterClass>(CharacterClassTable::create(_wordcharData, true));
characterClass->m_ranges.append(CharacterRange(0x00, 0x2f)); characterClass->m_ranges.append(CharacterRange(0x00, 0x2f));
characterClass->m_ranges.append(CharacterRange(0x3a, 0x40)); characterClass->m_ranges.append(CharacterRange(0x3a, 0x40));
characterClass->m_ranges.append(CharacterRange(0x5b, 0x5e)); characterClass->m_ranges.append(CharacterRange(0x5b, 0x5e));
@ -2711,8 +2707,7 @@ CharacterClass* nonwordcharCreate()
CharacterClass* wordcharCreate() CharacterClass* wordcharCreate()
{ {
// FIXME: bug 574459 -- no NULL check CharacterClass* characterClass = js_new<CharacterClass>(_wordcharData, false);
CharacterClass* characterClass = js_new<CharacterClass>(CharacterClassTable::create(_wordcharData, false));
characterClass->m_ranges.append(CharacterRange(0x30, 0x39)); characterClass->m_ranges.append(CharacterRange(0x30, 0x39));
characterClass->m_ranges.append(CharacterRange(0x41, 0x5a)); characterClass->m_ranges.append(CharacterRange(0x41, 0x5a));
characterClass->m_matches.append(0x5f); characterClass->m_matches.append(0x5f);

View File

@ -1663,10 +1663,10 @@ public:
#ifndef NDEBUG #ifndef NDEBUG
void dumpDisjunction(ByteDisjunction* disjunction) void dumpDisjunction(ByteDisjunction* disjunction)
{ {
dataLog("ByteDisjunction(%p):\n\t", (void *)disjunction); dataLogF("ByteDisjunction(%p):\n\t", (void *)disjunction);
for (unsigned i = 0; i < disjunction->terms.size(); ++i) for (unsigned i = 0; i < disjunction->terms.size(); ++i)
dataLog("{ %d } ", disjunction->terms[i].type); dataLogF("{ %d } ", disjunction->terms[i].type);
dataLog("\n"); dataLogF("\n");
} }
#endif #endif

View File

@ -189,8 +189,8 @@ class YarrGenerator : private MacroAssembler {
void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass) void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
{ {
if (charClass->m_table) { if (charClass->m_table) {
ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table->m_table)); ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table));
matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry)); matchDest.append(branchTest8(charClass->m_tableInverted ? Zero : NonZero, tableEntry));
return; return;
} }
Jump unicodeFail; Jump unicodeFail;
@ -766,7 +766,11 @@ class YarrGenerator : private MacroAssembler {
const RegisterID character = regT0; const RegisterID character = regT0;
int maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2; int maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2;
unsigned ignoreCaseMask = 0; unsigned ignoreCaseMask = 0;
#if CPU(BIG_ENDIAN)
int allCharacters = ch << (m_charSize == Char8 ? 24 : 16);
#else
int allCharacters = ch; int allCharacters = ch;
#endif
int numberCharacters; int numberCharacters;
int startTermPosition = term->inputPosition; int startTermPosition = term->inputPosition;
@ -775,7 +779,11 @@ class YarrGenerator : private MacroAssembler {
ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch)); ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch));
if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) if (m_pattern.m_ignoreCase && isASCIIAlpha(ch))
#if CPU(BIG_ENDIAN)
ignoreCaseMask |= 32 << (m_charSize == Char8 ? 24 : 16);
#else
ignoreCaseMask |= 32; ignoreCaseMask |= 32;
#endif
for (numberCharacters = 1; numberCharacters < maxCharactersAtOnce && nextOp->m_op == OpTerm; ++numberCharacters, nextOp = &m_ops[opIndex + numberCharacters]) { for (numberCharacters = 1; numberCharacters < maxCharactersAtOnce && nextOp->m_op == OpTerm; ++numberCharacters, nextOp = &m_ops[opIndex + numberCharacters]) {
PatternTerm* nextTerm = nextOp->m_term; PatternTerm* nextTerm = nextOp->m_term;
@ -788,7 +796,11 @@ class YarrGenerator : private MacroAssembler {
nextOp->m_isDeadCode = true; nextOp->m_isDeadCode = true;
#if CPU(BIG_ENDIAN)
int shiftAmount = (m_charSize == Char8 ? 24 : 16) - ((m_charSize == Char8 ? 8 : 16) * numberCharacters);
#else
int shiftAmount = (m_charSize == Char8 ? 8 : 16) * numberCharacters; int shiftAmount = (m_charSize == Char8 ? 8 : 16) * numberCharacters;
#endif
UChar currentCharacter = nextTerm->patternCharacter; UChar currentCharacter = nextTerm->patternCharacter;

View File

@ -183,7 +183,7 @@ public:
CharacterClass* charClass() CharacterClass* charClass()
{ {
CharacterClass* characterClass = js_new<CharacterClass>(PassRefPtr<CharacterClassTable>(0)); CharacterClass* characterClass = js_new<CharacterClass>();
characterClass->m_matches.swap(m_matches); characterClass->m_matches.swap(m_matches);
characterClass->m_ranges.swap(m_ranges); characterClass->m_ranges.swap(m_ranges);

View File

@ -1,7 +1,7 @@
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
* vim: set ts=8 sts=4 et sw=4 tw=99: * vim: set ts=8 sts=4 et sw=4 tw=99:
* *
* Copyright (C) 2009 Apple Inc. All rights reserved. * Copyright (C) 2009, 2013 Apple Inc. All rights reserved.
* Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
@ -87,40 +87,28 @@ struct CharacterRange {
} }
}; };
struct CharacterClassTable : RefCounted<CharacterClassTable> {
const char* m_table;
bool m_inverted;
static PassRefPtr<CharacterClassTable> create(const char* table, bool inverted)
{
return adoptRef(js_new<CharacterClassTable>(table, inverted));
}
CharacterClassTable(const char* table, bool inverted)
: m_table(table)
, m_inverted(inverted)
{
}
};
struct CharacterClass { struct CharacterClass {
WTF_MAKE_FAST_ALLOCATED; WTF_MAKE_FAST_ALLOCATED;
public: public:
// All CharacterClass instances have to have the full set of matches and ranges, // All CharacterClass instances have to have the full set of matches and ranges,
// they may have an optional table for faster lookups (which must match the // they may have an optional m_table for faster lookups (which must match the
// specified matches and ranges) // specified matches and ranges)
CharacterClass(PassRefPtr<CharacterClassTable> table) CharacterClass()
: m_table(table) : m_table(0)
{ {
} }
~CharacterClass() CharacterClass(const char* table, bool inverted)
: m_table(table)
, m_tableInverted(inverted)
{ {
js_delete(m_table.get());
} }
Vector<UChar> m_matches; Vector<UChar> m_matches;
Vector<CharacterRange> m_ranges; Vector<CharacterRange> m_ranges;
Vector<UChar> m_matchesUnicode; Vector<UChar> m_matchesUnicode;
Vector<CharacterRange> m_rangesUnicode; Vector<CharacterRange> m_rangesUnicode;
RefPtr<CharacterClassTable> m_table;
const char* m_table;
bool m_tableInverted;
}; };
enum QuantifierType { enum QuantifierType {
@ -327,7 +315,7 @@ public:
, m_hasFixedSize(false) , m_hasFixedSize(false)
{ {
} }
~PatternDisjunction() ~PatternDisjunction()
{ {
deleteAllValues(m_alternatives); deleteAllValues(m_alternatives);

View File

@ -248,7 +248,7 @@ deleteAllValues(Vector<T, N> &v) {
} }
static inline void static inline void
dataLog(const char *fmt, ...) dataLogF(const char *fmt, ...)
{ {
va_list ap; va_list ap;
va_start(ap, fmt); va_start(ap, fmt);