Bug 888673 - Update Yarr yet again. r=sstangl

--HG--
extra : rebase_source : 129ba2817818f5c666b9d1bd4402a76999bff895
This commit is contained in:
Till Schneidereit 2013-07-01 23:29:19 +02:00
parent bbcd8a7090
commit 7eff9354c4
7 changed files with 43 additions and 49 deletions

View File

@ -24,12 +24,11 @@ printStatus (summary);
var spaces = [ "\u0009", "\u000b", "\u000c", "\u0020", "\u00a0", "\u1680",
"\u180e", "\u2000", "\u2001", "\u2002", "\u2003", "\u2004",
"\u2005", "\u2006", "\u2007", "\u2008", "\u2009", "\u200a",
"\u202f", "\u205f", "\u3000" ];
"\u202f", "\u205f", "\u3000", "\ufeff" ];
var line_terminators = [ "\u2028", "\u2029", "\u000a", "\u000d" ];
var space_chars = [].concat(spaces, line_terminators);
var non_space_chars = [ "\u200b", "\u200c", "\u200d",
"\ufeff" ];
var non_space_chars = [ "\u200b", "\u200c", "\u200d" ];
var chars = [].concat(space_chars, non_space_chars);
var is_space = [].concat(space_chars.map(function(ch) { return true; }),

View File

@ -1307,7 +1307,7 @@ static const char _spacesData[65536] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@ -2632,16 +2632,14 @@ static const char _wordcharData[65536] = {
CharacterClass* digitsCreate()
{
// FIXME: bug 574459 -- no NULL check
CharacterClass* characterClass = js_new<CharacterClass>((CharacterClassTable*)NULL);
CharacterClass* characterClass = js_new<CharacterClass>();
characterClass->m_ranges.append(CharacterRange(0x30, 0x39));
return characterClass;
}
CharacterClass* nondigitsCreate()
{
// FIXME: bug 574459 -- no NULL check
CharacterClass* characterClass = js_new<CharacterClass>((CharacterClassTable*)NULL);
CharacterClass* characterClass = js_new<CharacterClass>();
characterClass->m_ranges.append(CharacterRange(0x00, 0x2f));
characterClass->m_ranges.append(CharacterRange(0x3a, 0x7f));
characterClass->m_rangesUnicode.append(CharacterRange(0x0080, 0xffff));
@ -2650,8 +2648,7 @@ CharacterClass* nondigitsCreate()
CharacterClass* newlineCreate()
{
// FIXME: bug 574459 -- no NULL check
CharacterClass* characterClass = js_new<CharacterClass>((CharacterClassTable*)NULL);
CharacterClass* characterClass = js_new<CharacterClass>();
characterClass->m_matches.append(0x0a);
characterClass->m_matches.append(0x0d);
characterClass->m_matchesUnicode.append(0x2028);
@ -2661,8 +2658,7 @@ CharacterClass* newlineCreate()
CharacterClass* spacesCreate()
{
// FIXME: bug 574459 -- no NULL check
CharacterClass* characterClass = js_new<CharacterClass>(CharacterClassTable::create(_spacesData, false));
CharacterClass* characterClass = js_new<CharacterClass>(_spacesData, false);
characterClass->m_ranges.append(CharacterRange(0x09, 0x0d));
characterClass->m_matches.append(0x20);
characterClass->m_matchesUnicode.append(0x00a0);
@ -2674,13 +2670,13 @@ CharacterClass* spacesCreate()
characterClass->m_matchesUnicode.append(0x202f);
characterClass->m_matchesUnicode.append(0x205f);
characterClass->m_matchesUnicode.append(0x3000);
characterClass->m_matchesUnicode.append(0xfeff);
return characterClass;
}
CharacterClass* nonspacesCreate()
{
// FIXME: bug 574459 -- no NULL check
CharacterClass* characterClass = js_new<CharacterClass>(CharacterClassTable::create(_spacesData, true));
CharacterClass* characterClass = js_new<CharacterClass>(_spacesData, true);
characterClass->m_ranges.append(CharacterRange(0x00, 0x08));
characterClass->m_ranges.append(CharacterRange(0x0e, 0x1f));
characterClass->m_ranges.append(CharacterRange(0x21, 0x7f));
@ -2692,14 +2688,14 @@ CharacterClass* nonspacesCreate()
characterClass->m_rangesUnicode.append(CharacterRange(0x202a, 0x202e));
characterClass->m_rangesUnicode.append(CharacterRange(0x2030, 0x205e));
characterClass->m_rangesUnicode.append(CharacterRange(0x2060, 0x2fff));
characterClass->m_rangesUnicode.append(CharacterRange(0x3001, 0xffff));
characterClass->m_rangesUnicode.append(CharacterRange(0x3001, 0xfefe));
characterClass->m_rangesUnicode.append(CharacterRange(0xff00, 0xffff));
return characterClass;
}
CharacterClass* nonwordcharCreate()
{
// FIXME: bug 574459 -- no NULL check
CharacterClass* characterClass = js_new<CharacterClass>(CharacterClassTable::create(_wordcharData, true));
CharacterClass* characterClass = js_new<CharacterClass>(_wordcharData, true);
characterClass->m_ranges.append(CharacterRange(0x00, 0x2f));
characterClass->m_ranges.append(CharacterRange(0x3a, 0x40));
characterClass->m_ranges.append(CharacterRange(0x5b, 0x5e));
@ -2711,8 +2707,7 @@ CharacterClass* nonwordcharCreate()
CharacterClass* wordcharCreate()
{
// FIXME: bug 574459 -- no NULL check
CharacterClass* characterClass = js_new<CharacterClass>(CharacterClassTable::create(_wordcharData, false));
CharacterClass* characterClass = js_new<CharacterClass>(_wordcharData, false);
characterClass->m_ranges.append(CharacterRange(0x30, 0x39));
characterClass->m_ranges.append(CharacterRange(0x41, 0x5a));
characterClass->m_matches.append(0x5f);

View File

@ -1663,10 +1663,10 @@ public:
#ifndef NDEBUG
void dumpDisjunction(ByteDisjunction* disjunction)
{
dataLog("ByteDisjunction(%p):\n\t", (void *)disjunction);
dataLogF("ByteDisjunction(%p):\n\t", (void *)disjunction);
for (unsigned i = 0; i < disjunction->terms.size(); ++i)
dataLog("{ %d } ", disjunction->terms[i].type);
dataLog("\n");
dataLogF("{ %d } ", disjunction->terms[i].type);
dataLogF("\n");
}
#endif

View File

@ -189,8 +189,8 @@ class YarrGenerator : private MacroAssembler {
void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
{
if (charClass->m_table) {
ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table->m_table));
matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry));
ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table));
matchDest.append(branchTest8(charClass->m_tableInverted ? Zero : NonZero, tableEntry));
return;
}
Jump unicodeFail;
@ -766,7 +766,11 @@ class YarrGenerator : private MacroAssembler {
const RegisterID character = regT0;
int maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2;
unsigned ignoreCaseMask = 0;
#if CPU(BIG_ENDIAN)
int allCharacters = ch << (m_charSize == Char8 ? 24 : 16);
#else
int allCharacters = ch;
#endif
int numberCharacters;
int startTermPosition = term->inputPosition;
@ -775,7 +779,11 @@ class YarrGenerator : private MacroAssembler {
ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch));
if (m_pattern.m_ignoreCase && isASCIIAlpha(ch))
#if CPU(BIG_ENDIAN)
ignoreCaseMask |= 32 << (m_charSize == Char8 ? 24 : 16);
#else
ignoreCaseMask |= 32;
#endif
for (numberCharacters = 1; numberCharacters < maxCharactersAtOnce && nextOp->m_op == OpTerm; ++numberCharacters, nextOp = &m_ops[opIndex + numberCharacters]) {
PatternTerm* nextTerm = nextOp->m_term;
@ -788,7 +796,11 @@ class YarrGenerator : private MacroAssembler {
nextOp->m_isDeadCode = true;
#if CPU(BIG_ENDIAN)
int shiftAmount = (m_charSize == Char8 ? 24 : 16) - ((m_charSize == Char8 ? 8 : 16) * numberCharacters);
#else
int shiftAmount = (m_charSize == Char8 ? 8 : 16) * numberCharacters;
#endif
UChar currentCharacter = nextTerm->patternCharacter;

View File

@ -183,7 +183,7 @@ public:
CharacterClass* charClass()
{
CharacterClass* characterClass = js_new<CharacterClass>(PassRefPtr<CharacterClassTable>(0));
CharacterClass* characterClass = js_new<CharacterClass>();
characterClass->m_matches.swap(m_matches);
characterClass->m_ranges.swap(m_ranges);

View File

@ -1,7 +1,7 @@
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
* vim: set ts=8 sts=4 et sw=4 tw=99:
*
* Copyright (C) 2009 Apple Inc. All rights reserved.
* Copyright (C) 2009, 2013 Apple Inc. All rights reserved.
* Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
*
* Redistribution and use in source and binary forms, with or without
@ -87,40 +87,28 @@ struct CharacterRange {
}
};
struct CharacterClassTable : RefCounted<CharacterClassTable> {
const char* m_table;
bool m_inverted;
static PassRefPtr<CharacterClassTable> create(const char* table, bool inverted)
{
return adoptRef(js_new<CharacterClassTable>(table, inverted));
}
CharacterClassTable(const char* table, bool inverted)
: m_table(table)
, m_inverted(inverted)
{
}
};
struct CharacterClass {
WTF_MAKE_FAST_ALLOCATED;
public:
// All CharacterClass instances have to have the full set of matches and ranges,
// they may have an optional table for faster lookups (which must match the
// they may have an optional m_table for faster lookups (which must match the
// specified matches and ranges)
CharacterClass(PassRefPtr<CharacterClassTable> table)
: m_table(table)
CharacterClass()
: m_table(0)
{
}
~CharacterClass()
CharacterClass(const char* table, bool inverted)
: m_table(table)
, m_tableInverted(inverted)
{
js_delete(m_table.get());
}
Vector<UChar> m_matches;
Vector<CharacterRange> m_ranges;
Vector<UChar> m_matchesUnicode;
Vector<CharacterRange> m_rangesUnicode;
RefPtr<CharacterClassTable> m_table;
const char* m_table;
bool m_tableInverted;
};
enum QuantifierType {
@ -327,7 +315,7 @@ public:
, m_hasFixedSize(false)
{
}
~PatternDisjunction()
{
deleteAllValues(m_alternatives);

View File

@ -248,7 +248,7 @@ deleteAllValues(Vector<T, N> &v) {
}
static inline void
dataLog(const char *fmt, ...)
dataLogF(const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);