mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
Bug 684559: bring back PCRE for JavaScript regexps on systems without YARR JIT because it's faster than YARR's interpreter. r=dmandelin
This commit is contained in:
parent
25316d6093
commit
1270c81714
@ -391,22 +391,20 @@ CPPSRCS += checks.cc \
|
||||
# END enclude sources for V8 dtoa
|
||||
#############################################
|
||||
|
||||
# For architectures without YARR JIT, PCRE is faster than the YARR
|
||||
# interpreter (bug 684559).
|
||||
|
||||
ifeq (,$(filter arm% sparc %86 x86_64,$(TARGET_CPU)))
|
||||
|
||||
VPATH += $(srcdir)/assembler \
|
||||
$(srcdir)/assembler/wtf \
|
||||
$(srcdir)/yarr\
|
||||
VPATH += $(srcdir)/yarr/pcre \
|
||||
$(NULL)
|
||||
|
||||
CPPSRCS += \
|
||||
Assertions.cpp \
|
||||
OSAllocatorOS2.cpp \
|
||||
OSAllocatorPosix.cpp \
|
||||
OSAllocatorWin.cpp \
|
||||
PageBlock.cpp \
|
||||
YarrInterpreter.cpp \
|
||||
YarrPattern.cpp \
|
||||
YarrSyntaxChecker.cpp \
|
||||
pcre_compile.cpp \
|
||||
pcre_exec.cpp \
|
||||
pcre_tables.cpp \
|
||||
pcre_xclass.cpp \
|
||||
pcre_ucp_searchfuncs.cpp \
|
||||
$(NULL)
|
||||
else
|
||||
|
||||
|
@ -194,6 +194,40 @@ js_ObjectIsRegExp(JSObject *obj)
|
||||
* js::RegExp
|
||||
*/
|
||||
|
||||
#if !ENABLE_YARR_JIT
|
||||
void
|
||||
RegExp::reportPCREError(JSContext *cx, int error)
|
||||
{
|
||||
#define REPORT(msg_) \
|
||||
JS_ReportErrorFlagsAndNumberUC(cx, JSREPORT_ERROR, js_GetErrorMessage, NULL, msg_); \
|
||||
return
|
||||
switch (error) {
|
||||
case -2: REPORT(JSMSG_REGEXP_TOO_COMPLEX);
|
||||
case 0: JS_NOT_REACHED("Precondition violation: an error must have occurred.");
|
||||
case 1: REPORT(JSMSG_TRAILING_SLASH);
|
||||
case 2: REPORT(JSMSG_TRAILING_SLASH);
|
||||
case 3: REPORT(JSMSG_REGEXP_TOO_COMPLEX);
|
||||
case 4: REPORT(JSMSG_BAD_QUANTIFIER);
|
||||
case 5: REPORT(JSMSG_BAD_QUANTIFIER);
|
||||
case 6: REPORT(JSMSG_BAD_CLASS_RANGE);
|
||||
case 7: REPORT(JSMSG_REGEXP_TOO_COMPLEX);
|
||||
case 8: REPORT(JSMSG_BAD_CLASS_RANGE);
|
||||
case 9: REPORT(JSMSG_BAD_QUANTIFIER);
|
||||
case 10: REPORT(JSMSG_UNMATCHED_RIGHT_PAREN);
|
||||
case 11: REPORT(JSMSG_REGEXP_TOO_COMPLEX);
|
||||
case 12: REPORT(JSMSG_UNMATCHED_RIGHT_PAREN);
|
||||
case 13: REPORT(JSMSG_REGEXP_TOO_COMPLEX);
|
||||
case 14: REPORT(JSMSG_MISSING_PAREN);
|
||||
case 15: REPORT(JSMSG_BAD_BACKREF);
|
||||
case 16: REPORT(JSMSG_REGEXP_TOO_COMPLEX);
|
||||
case 17: REPORT(JSMSG_REGEXP_TOO_COMPLEX);
|
||||
default:
|
||||
JS_NOT_REACHED("Precondition violation: unknown PCRE error code.");
|
||||
}
|
||||
#undef REPORT
|
||||
}
|
||||
#endif
|
||||
|
||||
void
|
||||
RegExp::reportYarrError(JSContext *cx, TokenStream *ts, JSC::Yarr::ErrorCode error)
|
||||
{
|
||||
|
@ -55,6 +55,8 @@
|
||||
#include "yarr/Yarr.h"
|
||||
#if ENABLE_YARR_JIT
|
||||
#include "yarr/YarrJIT.h"
|
||||
#else
|
||||
#include "yarr/pcre/pcre.h"
|
||||
#endif
|
||||
|
||||
namespace js {
|
||||
@ -98,8 +100,10 @@ class RegExp
|
||||
#if ENABLE_YARR_JIT
|
||||
/* native code is valid only if codeBlock.isFallBack() == false */
|
||||
JSC::Yarr::YarrCodeBlock codeBlock;
|
||||
#endif
|
||||
JSC::Yarr::BytecodePattern *byteCode;
|
||||
#else
|
||||
JSRegExp *compiled;
|
||||
#endif
|
||||
JSLinearString *source;
|
||||
size_t refCount;
|
||||
unsigned parenCount; /* Must be |unsigned| to interface with YARR. */
|
||||
@ -115,8 +119,11 @@ class RegExp
|
||||
:
|
||||
#if ENABLE_YARR_JIT
|
||||
codeBlock(),
|
||||
byteCode(NULL),
|
||||
#else
|
||||
compiled(NULL),
|
||||
#endif
|
||||
byteCode(NULL), source(source), refCount(1), parenCount(0), flags(flags)
|
||||
source(source), refCount(1), parenCount(0), flags(flags)
|
||||
#ifdef DEBUG
|
||||
, compartment(compartment)
|
||||
#endif
|
||||
@ -127,14 +134,20 @@ class RegExp
|
||||
~RegExp() {
|
||||
#if ENABLE_YARR_JIT
|
||||
codeBlock.release();
|
||||
#endif
|
||||
if (byteCode)
|
||||
Foreground::delete_<JSC::Yarr::BytecodePattern>(byteCode);
|
||||
#else
|
||||
if (compiled)
|
||||
jsRegExpFree(compiled);
|
||||
#endif
|
||||
}
|
||||
|
||||
bool compileHelper(JSContext *cx, JSLinearString &pattern, TokenStream *ts);
|
||||
bool compile(JSContext *cx, TokenStream *ts);
|
||||
static const uint32 allFlags = JSREG_FOLD | JSREG_GLOB | JSREG_MULTILINE | JSREG_STICKY;
|
||||
#if !ENABLE_YARR_JIT
|
||||
void reportPCREError(JSContext *cx, int error);
|
||||
#endif
|
||||
void reportYarrError(JSContext *cx, TokenStream *ts, JSC::Yarr::ErrorCode error);
|
||||
static inline bool initArena(JSContext *cx);
|
||||
static inline void checkMatchPairs(JSString *input, int *buf, size_t matchItemCount);
|
||||
@ -370,13 +383,20 @@ RegExp::executeInternal(JSContext *cx, RegExpStatics *res, JSString *inputstr,
|
||||
else
|
||||
result = JSC::Yarr::interpret(byteCode, chars, *lastIndex - inputOffset, len, buf);
|
||||
#else
|
||||
result = JSC::Yarr::interpret(byteCode, chars, *lastIndex - inputOffset, len, buf);
|
||||
result = jsRegExpExecute(cx, compiled, chars, len, *lastIndex - inputOffset, buf, bufCount);
|
||||
#endif
|
||||
if (result == -1) {
|
||||
*rval = NullValue();
|
||||
return true;
|
||||
}
|
||||
|
||||
#if !ENABLE_YARR_JIT
|
||||
if (result < 0) {
|
||||
reportPCREError(cx, result);
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Adjust buf for the inputOffset. Use of sticky is rare and the matchItemCount is small, so
|
||||
* just do another pass.
|
||||
@ -472,6 +492,7 @@ EnableYarrJIT(JSContext *cx)
|
||||
inline bool
|
||||
RegExp::compileHelper(JSContext *cx, JSLinearString &pattern, TokenStream *ts)
|
||||
{
|
||||
#if ENABLE_YARR_JIT
|
||||
JSC::Yarr::ErrorCode yarrError;
|
||||
JSC::Yarr::YarrPattern yarrPattern(pattern, ignoreCase(), multiline(), &yarrError);
|
||||
if (yarrError) {
|
||||
@ -480,7 +501,7 @@ RegExp::compileHelper(JSContext *cx, JSLinearString &pattern, TokenStream *ts)
|
||||
}
|
||||
parenCount = yarrPattern.m_numSubpatterns;
|
||||
|
||||
#if ENABLE_YARR_JIT && defined(JS_METHODJIT)
|
||||
#if defined(JS_METHODJIT)
|
||||
if (EnableYarrJIT(cx) && !yarrPattern.m_containsBackreferences) {
|
||||
bool ok = cx->compartment->ensureJaegerCompartmentExists(cx);
|
||||
if (!ok)
|
||||
@ -492,12 +513,21 @@ RegExp::compileHelper(JSContext *cx, JSLinearString &pattern, TokenStream *ts)
|
||||
}
|
||||
#endif
|
||||
|
||||
#if ENABLE_YARR_JIT
|
||||
codeBlock.setFallBack(true);
|
||||
#endif
|
||||
byteCode = JSC::Yarr::byteCompile(yarrPattern, cx->compartment->regExpAllocator).get();
|
||||
|
||||
return true;
|
||||
#else
|
||||
int error = 0;
|
||||
compiled = jsRegExpCompile(pattern.chars(), pattern.length(),
|
||||
ignoreCase() ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase,
|
||||
multiline() ? JSRegExpMultiline : JSRegExpSingleLine,
|
||||
&parenCount, &error);
|
||||
if (error) {
|
||||
reportPCREError(cx, error);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline bool
|
||||
|
12
js/src/yarr/pcre/AUTHORS
Normal file
12
js/src/yarr/pcre/AUTHORS
Normal file
@ -0,0 +1,12 @@
|
||||
Originally written by: Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England. Phone: +44 1223 334714.
|
||||
|
||||
Copyright (c) 1997-2005 University of Cambridge. All rights reserved.
|
||||
|
||||
Adapted for JavaScriptCore and WebKit by Apple Inc.
|
||||
|
||||
Copyright (c) 2005, 2006, 2007 Apple Inc. All rights reserved.
|
35
js/src/yarr/pcre/COPYING
Normal file
35
js/src/yarr/pcre/COPYING
Normal file
@ -0,0 +1,35 @@
|
||||
PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
This is JavaScriptCore's variant of the PCRE library. While this library
|
||||
started out as a copy of PCRE, many of the features of PCRE have been
|
||||
removed.
|
||||
|
||||
Copyright (c) 1997-2005 University of Cambridge. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the name of Apple
|
||||
Inc. nor the names of their contributors may be used to endorse or
|
||||
promote products derived from this software without specific prior
|
||||
written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
96
js/src/yarr/pcre/chartables.c
Normal file
96
js/src/yarr/pcre/chartables.c
Normal file
@ -0,0 +1,96 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This file is automatically written by the dftables auxiliary
|
||||
program. If you edit it by hand, you might like to edit the Makefile to
|
||||
prevent its ever being regenerated.
|
||||
|
||||
This file contains the default tables for characters with codes less than
|
||||
128 (ASCII characters). These tables are used when no external tables are
|
||||
passed to PCRE. */
|
||||
|
||||
const unsigned char jsc_pcre_default_tables[480] = {
|
||||
|
||||
/* This table is a lower casing table. */
|
||||
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
|
||||
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
|
||||
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
|
||||
0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
|
||||
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
|
||||
0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
|
||||
0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
|
||||
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
|
||||
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
|
||||
0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
|
||||
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
|
||||
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
|
||||
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
|
||||
0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
|
||||
|
||||
/* This table is a case flipping table. */
|
||||
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
|
||||
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
|
||||
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
|
||||
0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
|
||||
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
|
||||
0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
|
||||
0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
|
||||
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
|
||||
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
|
||||
0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
|
||||
0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
|
||||
0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
|
||||
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
|
||||
0x58, 0x59, 0x5A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
|
||||
|
||||
/* This table contains bit maps for various character classes.
|
||||
Each map is 32 bytes long and the bits run from the least
|
||||
significant end of each byte. The classes are: space, digit, word. */
|
||||
|
||||
0x00, 0x3E, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
|
||||
0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
|
||||
/* This table identifies various classes of character by individual bits:
|
||||
0x01 white space character
|
||||
0x08 hexadecimal digit
|
||||
0x10 alphanumeric or '_'
|
||||
*/
|
||||
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0- 7 */
|
||||
0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, /* 8- 15 */
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 16- 23 */
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 24- 31 */
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* - ' */
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* ( - / */
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, /* 0 - 7 */
|
||||
0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 8 - ? */
|
||||
0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x10, /* @ - G */
|
||||
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, /* H - O */
|
||||
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, /* P - W */
|
||||
0x10, 0x10, 0x10, 0x00, 0x00, 0x00, 0x00, 0x10, /* X - _ */
|
||||
0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x10, /* ` - g */
|
||||
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, /* h - o */
|
||||
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, /* p - w */
|
||||
0x10, 0x10, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00}; /* x -127 */
|
||||
|
||||
|
||||
/* End of chartables.c */
|
273
js/src/yarr/pcre/dftables
Normal file
273
js/src/yarr/pcre/dftables
Normal file
@ -0,0 +1,273 @@
|
||||
#!/usr/bin/perl -w
|
||||
#
|
||||
# This is JavaScriptCore's variant of the PCRE library. While this library
|
||||
# started out as a copy of PCRE, many of the features of PCRE have been
|
||||
# removed. This library now supports only the regular expression features
|
||||
# required by the JavaScript language specification, and has only the functions
|
||||
# needed by JavaScriptCore and the rest of WebKit.
|
||||
#
|
||||
# Originally written by Philip Hazel
|
||||
# Copyright (c) 1997-2006 University of Cambridge
|
||||
# Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
|
||||
#
|
||||
# -----------------------------------------------------------------------------
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# * Neither the name of the University of Cambridge nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# This is a freestanding support program to generate a file containing
|
||||
# character tables. The tables are built according to the default C
|
||||
# locale.
|
||||
|
||||
use strict;
|
||||
|
||||
use File::Basename;
|
||||
use File::Spec;
|
||||
use File::Temp qw(tempfile);
|
||||
use Getopt::Long;
|
||||
|
||||
sub readHeaderValues();
|
||||
|
||||
my %pcre_internal;
|
||||
|
||||
if (scalar(@ARGV) < 1) {
|
||||
print STDERR "Usage: ", basename($0), " [--preprocessor=program] output-file\n";
|
||||
exit 1;
|
||||
}
|
||||
|
||||
my $outputFile;
|
||||
my $preprocessor;
|
||||
GetOptions('preprocessor=s' => \$preprocessor);
|
||||
if (not $preprocessor) {
|
||||
$preprocessor = "cpp";
|
||||
}
|
||||
|
||||
$outputFile = $ARGV[0];
|
||||
die('Must specify output file.') unless defined($outputFile);
|
||||
|
||||
readHeaderValues();
|
||||
|
||||
open(OUT, ">", $outputFile) or die "$!";
|
||||
binmode(OUT);
|
||||
|
||||
printf(OUT
|
||||
"/*************************************************\n" .
|
||||
"* Perl-Compatible Regular Expressions *\n" .
|
||||
"*************************************************/\n\n" .
|
||||
"/* This file is automatically written by the dftables auxiliary \n" .
|
||||
"program. If you edit it by hand, you might like to edit the Makefile to \n" .
|
||||
"prevent its ever being regenerated.\n\n");
|
||||
printf(OUT
|
||||
"This file contains the default tables for characters with codes less than\n" .
|
||||
"128 (ASCII characters). These tables are used when no external tables are\n" .
|
||||
"passed to PCRE. */\n\n" .
|
||||
"const unsigned char jsc_pcre_default_tables[%d] = {\n\n" .
|
||||
"/* This table is a lower casing table. */\n\n", $pcre_internal{tables_length});
|
||||
|
||||
if ($pcre_internal{lcc_offset} != 0) {
|
||||
die "lcc_offset != 0";
|
||||
}
|
||||
|
||||
printf(OUT " ");
|
||||
for (my $i = 0; $i < 128; $i++) {
|
||||
if (($i & 7) == 0 && $i != 0) {
|
||||
printf(OUT "\n ");
|
||||
}
|
||||
printf(OUT "0x%02X", ord(lc(chr($i))));
|
||||
if ($i != 127) {
|
||||
printf(OUT ", ");
|
||||
}
|
||||
}
|
||||
printf(OUT ",\n\n");
|
||||
|
||||
printf(OUT "/* This table is a case flipping table. */\n\n");
|
||||
|
||||
if ($pcre_internal{fcc_offset} != 128) {
|
||||
die "fcc_offset != 128";
|
||||
}
|
||||
|
||||
printf(OUT " ");
|
||||
for (my $i = 0; $i < 128; $i++) {
|
||||
if (($i & 7) == 0 && $i != 0) {
|
||||
printf(OUT "\n ");
|
||||
}
|
||||
my $c = chr($i);
|
||||
printf(OUT "0x%02X", $c =~ /[[:lower:]]/ ? ord(uc($c)) : ord(lc($c)));
|
||||
if ($i != 127) {
|
||||
printf(OUT ", ");
|
||||
}
|
||||
}
|
||||
printf(OUT ",\n\n");
|
||||
|
||||
printf(OUT
|
||||
"/* This table contains bit maps for various character classes.\n" .
|
||||
"Each map is 32 bytes long and the bits run from the least\n" .
|
||||
"significant end of each byte. The classes are: space, digit, word. */\n\n");
|
||||
|
||||
if ($pcre_internal{cbits_offset} != $pcre_internal{fcc_offset} + 128) {
|
||||
die "cbits_offset != fcc_offset + 128";
|
||||
}
|
||||
|
||||
my @cbit_table = (0) x $pcre_internal{cbit_length};
|
||||
for (my $i = ord('0'); $i <= ord('9'); $i++) {
|
||||
$cbit_table[$pcre_internal{cbit_digit} + $i / 8] |= 1 << ($i & 7);
|
||||
}
|
||||
$cbit_table[$pcre_internal{cbit_word} + ord('_') / 8] |= 1 << (ord('_') & 7);
|
||||
for (my $i = 0; $i < 128; $i++) {
|
||||
my $c = chr($i);
|
||||
if ($c =~ /[[:alnum:]]/) {
|
||||
$cbit_table[$pcre_internal{cbit_word} + $i / 8] |= 1 << ($i & 7);
|
||||
}
|
||||
if ($c =~ /[[:space:]]/) {
|
||||
$cbit_table[$pcre_internal{cbit_space} + $i / 8] |= 1 << ($i & 7);
|
||||
}
|
||||
}
|
||||
|
||||
printf(OUT " ");
|
||||
for (my $i = 0; $i < $pcre_internal{cbit_length}; $i++) {
|
||||
if (($i & 7) == 0 && $i != 0) {
|
||||
if (($i & 31) == 0) {
|
||||
printf(OUT "\n");
|
||||
}
|
||||
printf(OUT "\n ");
|
||||
}
|
||||
printf(OUT "0x%02X", $cbit_table[$i]);
|
||||
if ($i != $pcre_internal{cbit_length} - 1) {
|
||||
printf(OUT ", ");
|
||||
}
|
||||
}
|
||||
printf(OUT ",\n\n");
|
||||
|
||||
printf(OUT
|
||||
"/* This table identifies various classes of character by individual bits:\n" .
|
||||
" 0x%02x white space character\n" .
|
||||
" 0x%02x hexadecimal digit\n" .
|
||||
" 0x%02x alphanumeric or '_'\n*/\n\n",
|
||||
$pcre_internal{ctype_space}, $pcre_internal{ctype_xdigit}, $pcre_internal{ctype_word});
|
||||
|
||||
if ($pcre_internal{ctypes_offset} != $pcre_internal{cbits_offset} + $pcre_internal{cbit_length}) {
|
||||
die "ctypes_offset != cbits_offset + cbit_length";
|
||||
}
|
||||
|
||||
printf(OUT " ");
|
||||
for (my $i = 0; $i < 128; $i++) {
|
||||
my $x = 0;
|
||||
my $c = chr($i);
|
||||
if ($c =~ /[[:space:]]/) {
|
||||
$x += $pcre_internal{ctype_space};
|
||||
}
|
||||
if ($c =~ /[[:xdigit:]]/) {
|
||||
$x += $pcre_internal{ctype_xdigit};
|
||||
}
|
||||
if ($c =~ /[[:alnum:]_]/) {
|
||||
$x += $pcre_internal{ctype_word};
|
||||
}
|
||||
printf(OUT "0x%02X", $x);
|
||||
if ($i != 127) {
|
||||
printf(OUT ", ");
|
||||
} else {
|
||||
printf(OUT "};");
|
||||
}
|
||||
if (($i & 7) == 7) {
|
||||
printf(OUT " /* ");
|
||||
my $d = chr($i - 7);
|
||||
if ($d =~ /[[:print:]]/) {
|
||||
printf(OUT " %c -", $i - 7);
|
||||
} else {
|
||||
printf(OUT "%3d-", $i - 7);
|
||||
}
|
||||
if ($c =~ m/[[:print:]]/) {
|
||||
printf(OUT " %c ", $i);
|
||||
} else {
|
||||
printf(OUT "%3d", $i);
|
||||
}
|
||||
printf(OUT " */\n");
|
||||
if ($i != 127) {
|
||||
printf(OUT " ");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($pcre_internal{tables_length} != $pcre_internal{ctypes_offset} + 128) {
|
||||
die "tables_length != ctypes_offset + 128";
|
||||
}
|
||||
|
||||
printf(OUT "\n\n/* End of chartables.c */\n");
|
||||
|
||||
close(OUT);
|
||||
|
||||
exit 0;
|
||||
|
||||
sub readHeaderValues()
|
||||
{
|
||||
my @variables = qw(
|
||||
cbit_digit
|
||||
cbit_length
|
||||
cbit_space
|
||||
cbit_word
|
||||
cbits_offset
|
||||
ctype_space
|
||||
ctype_word
|
||||
ctype_xdigit
|
||||
ctypes_offset
|
||||
fcc_offset
|
||||
lcc_offset
|
||||
tables_length
|
||||
);
|
||||
|
||||
local $/ = undef;
|
||||
|
||||
my $headerPath = File::Spec->catfile(dirname($0), "pcre_internal.h");
|
||||
|
||||
my ($fh, $tempFile) = tempfile(
|
||||
basename($0) . "-XXXXXXXX",
|
||||
DIR => File::Spec->tmpdir(),
|
||||
SUFFIX => ".in",
|
||||
UNLINK => 0,
|
||||
);
|
||||
|
||||
print $fh "#define DFTABLES\n\n";
|
||||
|
||||
open(HEADER, "<", $headerPath) or die "$!";
|
||||
print $fh <HEADER>;
|
||||
close(HEADER);
|
||||
|
||||
print $fh "\n\n";
|
||||
|
||||
for my $v (@variables) {
|
||||
print $fh "\$pcre_internal{\"$v\"} = $v;\n";
|
||||
}
|
||||
|
||||
close($fh);
|
||||
|
||||
open(CPP, "$preprocessor \"$tempFile\" |") or die "$!";
|
||||
my $content = <CPP>;
|
||||
close(CPP);
|
||||
|
||||
eval $content;
|
||||
die "$@" if $@;
|
||||
unlink $tempFile;
|
||||
}
|
76
js/src/yarr/pcre/pcre.h
Normal file
76
js/src/yarr/pcre/pcre.h
Normal file
@ -0,0 +1,76 @@
|
||||
/* This is the public header file for JavaScriptCore's variant of the PCRE
|
||||
library. While this library started out as a copy of PCRE, many of the
|
||||
features of PCRE have been removed. This library now supports only the
|
||||
regular expression features required by the JavaScript language
|
||||
specification, and has only the functions needed by JavaScriptCore and the
|
||||
rest of WebKit.
|
||||
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved.
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
// FIXME: This file needs to be renamed to JSRegExp.h; it's no longer PCRE.
|
||||
|
||||
#ifndef JSRegExp_h
|
||||
#define JSRegExp_h
|
||||
|
||||
// Last remnants from the JSWTFBridge.
|
||||
#include "assembler/wtf/Platform.h"
|
||||
#include "jsstr.h"
|
||||
#include "jsprvtd.h"
|
||||
#include "jstl.h"
|
||||
|
||||
typedef jschar UChar;
|
||||
typedef JSLinearString UString;
|
||||
|
||||
struct JSRegExp;
|
||||
struct JSContext;
|
||||
|
||||
enum JSRegExpIgnoreCaseOption { JSRegExpDoNotIgnoreCase, JSRegExpIgnoreCase };
|
||||
enum JSRegExpMultilineOption { JSRegExpSingleLine, JSRegExpMultiline };
|
||||
|
||||
/* jsRegExpExecute error codes */
|
||||
const int JSRegExpErrorNoMatch = -1;
|
||||
const int JSRegExpErrorHitLimit = -2;
|
||||
const int JSRegExpErrorInternal = -4;
|
||||
|
||||
JSRegExp* jsRegExpCompile(
|
||||
const UChar* pattern, int patternLength,
|
||||
JSRegExpIgnoreCaseOption, JSRegExpMultilineOption,
|
||||
unsigned* numSubpatterns, int *error);
|
||||
|
||||
int jsRegExpExecute(JSContext *, const JSRegExp*,
|
||||
const UChar* subject, int subjectLength, int startOffset,
|
||||
int* offsetsVector, int offsetsVectorLength);
|
||||
|
||||
void jsRegExpFree(JSRegExp*);
|
||||
|
||||
#endif
|
12
js/src/yarr/pcre/pcre.pri
Normal file
12
js/src/yarr/pcre/pcre.pri
Normal file
@ -0,0 +1,12 @@
|
||||
# Perl Compatible Regular Expressions - Qt4 build info
|
||||
VPATH += $$PWD
|
||||
INCLUDEPATH += $$PWD $$OUTPUT_DIR/JavaScriptCore/tmp
|
||||
DEPENDPATH += $$PWD
|
||||
|
||||
SOURCES += \
|
||||
pcre_compile.cpp \
|
||||
pcre_exec.cpp \
|
||||
pcre_tables.cpp \
|
||||
pcre_ucp_searchfuncs.cpp \
|
||||
pcre_xclass.cpp
|
||||
|
2702
js/src/yarr/pcre/pcre_compile.cpp
Normal file
2702
js/src/yarr/pcre/pcre_compile.cpp
Normal file
File diff suppressed because it is too large
Load Diff
2193
js/src/yarr/pcre/pcre_exec.cpp
Normal file
2193
js/src/yarr/pcre/pcre_exec.cpp
Normal file
File diff suppressed because it is too large
Load Diff
434
js/src/yarr/pcre/pcre_internal.h
Normal file
434
js/src/yarr/pcre/pcre_internal.h
Normal file
@ -0,0 +1,434 @@
|
||||
/* This is JavaScriptCore's variant of the PCRE library. While this library
|
||||
started out as a copy of PCRE, many of the features of PCRE have been
|
||||
removed. This library now supports only the regular expression features
|
||||
required by the JavaScript language specification, and has only the functions
|
||||
needed by JavaScriptCore and the rest of WebKit.
|
||||
|
||||
Originally written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This header contains definitions that are shared between the different
|
||||
modules, but which are not relevant to the exported API. This includes some
|
||||
functions whose names all begin with "_pcre_". */
|
||||
|
||||
#ifndef PCRE_INTERNAL_H
|
||||
#define PCRE_INTERNAL_H
|
||||
|
||||
/* Bit definitions for entries in the pcre_ctypes table. */
|
||||
|
||||
#define ctype_space 0x01
|
||||
#define ctype_xdigit 0x08
|
||||
#define ctype_word 0x10 /* alphameric or '_' */
|
||||
|
||||
/* Offsets for the bitmap tables in pcre_cbits. Each table contains a set
|
||||
of bits for a class map. Some classes are built by combining these tables. */
|
||||
|
||||
#define cbit_space 0 /* \s */
|
||||
#define cbit_digit 32 /* \d */
|
||||
#define cbit_word 64 /* \w */
|
||||
#define cbit_length 96 /* Length of the cbits table */
|
||||
|
||||
/* Offsets of the various tables from the base tables pointer, and
|
||||
total length. */
|
||||
|
||||
#define lcc_offset 0
|
||||
#define fcc_offset 128
|
||||
#define cbits_offset 256
|
||||
#define ctypes_offset (cbits_offset + cbit_length)
|
||||
#define tables_length (ctypes_offset + 128)
|
||||
|
||||
#ifndef DFTABLES
|
||||
|
||||
#include "pcre.h"
|
||||
|
||||
/* The value of LINK_SIZE determines the number of bytes used to store links as
|
||||
offsets within the compiled regex. The default is 2, which allows for compiled
|
||||
patterns up to 64K long. */
|
||||
|
||||
#define LINK_SIZE 3
|
||||
|
||||
/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
|
||||
inline, and there are *still* stupid compilers about that don't like indented
|
||||
pre-processor statements, or at least there were when I first wrote this. After
|
||||
all, it had only been about 10 years then... */
|
||||
|
||||
#ifdef DEBUG
|
||||
#define DPRINTF(p) /*printf p; fflush(stdout);*/
|
||||
#else
|
||||
#define DPRINTF(p) /*nothing*/
|
||||
#endif
|
||||
|
||||
/* PCRE keeps offsets in its compiled code as 2-byte quantities (always stored
|
||||
in big-endian order) by default. These are used, for example, to link from the
|
||||
start of a subpattern to its alternatives and its end. The use of 2 bytes per
|
||||
offset limits the size of the compiled regex to around 64K, which is big enough
|
||||
for almost everybody. However, I received a request for an even bigger limit.
|
||||
For this reason, and also to make the code easier to maintain, the storing and
|
||||
loading of offsets from the byte string is now handled by the functions that are
|
||||
defined here. */
|
||||
|
||||
/* PCRE uses some other 2-byte quantities that do not change when the size of
|
||||
offsets changes. There are used for repeat counts and for other things such as
|
||||
capturing parenthesis numbers in back references. */
|
||||
|
||||
static inline void put2ByteValue(unsigned char* opcodePtr, int value)
|
||||
{
|
||||
JS_ASSERT(value >= 0 && value <= 0xFFFF);
|
||||
opcodePtr[0] = value >> 8;
|
||||
opcodePtr[1] = value;
|
||||
}
|
||||
|
||||
static inline void put3ByteValue(unsigned char* opcodePtr, int value)
|
||||
{
|
||||
JS_ASSERT(value >= 0 && value <= 0xFFFFFF);
|
||||
opcodePtr[0] = value >> 16;
|
||||
opcodePtr[1] = value >> 8;
|
||||
opcodePtr[2] = value;
|
||||
}
|
||||
|
||||
static inline int get2ByteValue(const unsigned char* opcodePtr)
|
||||
{
|
||||
return (opcodePtr[0] << 8) | opcodePtr[1];
|
||||
}
|
||||
|
||||
static inline int get3ByteValue(const unsigned char* opcodePtr)
|
||||
{
|
||||
return (opcodePtr[0] << 16) | (opcodePtr[1] << 8) | opcodePtr[2];
|
||||
}
|
||||
|
||||
static inline void put2ByteValueAndAdvance(unsigned char*& opcodePtr, int value)
|
||||
{
|
||||
put2ByteValue(opcodePtr, value);
|
||||
opcodePtr += 2;
|
||||
}
|
||||
|
||||
static inline void put3ByteValueAndAdvance(unsigned char*& opcodePtr, int value)
|
||||
{
|
||||
put3ByteValue(opcodePtr, value);
|
||||
opcodePtr += 3;
|
||||
}
|
||||
|
||||
static inline void putLinkValueAllowZero(unsigned char* opcodePtr, int value)
|
||||
{
|
||||
#if LINK_SIZE == 3
|
||||
put3ByteValue(opcodePtr, value);
|
||||
#elif LINK_SIZE == 2
|
||||
put2ByteValue(opcodePtr, value);
|
||||
#else
|
||||
# error LINK_SIZE not supported.
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int getLinkValueAllowZero(const unsigned char* opcodePtr)
|
||||
{
|
||||
#if LINK_SIZE == 3
|
||||
return get3ByteValue(opcodePtr);
|
||||
#elif LINK_SIZE == 2
|
||||
return get2ByteValue(opcodePtr);
|
||||
#else
|
||||
# error LINK_SIZE not supported.
|
||||
#endif
|
||||
}
|
||||
|
||||
#define MAX_PATTERN_SIZE 4096 * 1024 // Derived by empirical testing of compile time in PCRE and WREC.
|
||||
JS_STATIC_ASSERT(MAX_PATTERN_SIZE < (1 << (8 * LINK_SIZE)));
|
||||
|
||||
static inline void putLinkValue(unsigned char* opcodePtr, int value)
|
||||
{
|
||||
JS_ASSERT(value);
|
||||
putLinkValueAllowZero(opcodePtr, value);
|
||||
}
|
||||
|
||||
static inline int getLinkValue(const unsigned char* opcodePtr)
|
||||
{
|
||||
int value = getLinkValueAllowZero(opcodePtr);
|
||||
JS_ASSERT(value);
|
||||
return value;
|
||||
}
|
||||
|
||||
static inline void putLinkValueAndAdvance(unsigned char*& opcodePtr, int value)
|
||||
{
|
||||
putLinkValue(opcodePtr, value);
|
||||
opcodePtr += LINK_SIZE;
|
||||
}
|
||||
|
||||
static inline void putLinkValueAllowZeroAndAdvance(unsigned char*& opcodePtr, int value)
|
||||
{
|
||||
putLinkValueAllowZero(opcodePtr, value);
|
||||
opcodePtr += LINK_SIZE;
|
||||
}
|
||||
|
||||
// FIXME: These are really more of a "compiled regexp state" than "regexp options"
|
||||
enum RegExpOptions {
|
||||
UseFirstByteOptimizationOption = 0x40000000, /* firstByte is set */
|
||||
UseRequiredByteOptimizationOption = 0x20000000, /* reqByte is set */
|
||||
UseMultiLineFirstByteOptimizationOption = 0x10000000, /* start after \n for multiline */
|
||||
IsAnchoredOption = 0x02000000, /* can't use partial with this regex */
|
||||
IgnoreCaseOption = 0x00000001,
|
||||
MatchAcrossMultipleLinesOption = 0x00000002
|
||||
};
|
||||
|
||||
/* Flags added to firstByte or reqByte; a "non-literal" item is either a
|
||||
variable-length repeat, or a anything other than literal characters. */
|
||||
|
||||
#define REQ_IGNORE_CASE 0x0100 /* indicates should ignore case */
|
||||
#define REQ_VARY 0x0200 /* reqByte followed non-literal item */
|
||||
|
||||
/* Miscellaneous definitions */
|
||||
|
||||
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
|
||||
contain UTF-8 characters with values greater than 255. */
|
||||
|
||||
#define XCL_NOT 0x01 /* Flag: this is a negative class */
|
||||
#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */
|
||||
|
||||
#define XCL_END 0 /* Marks end of individual items */
|
||||
#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */
|
||||
#define XCL_RANGE 2 /* A range (two multibyte chars) follows */
|
||||
|
||||
/* These are escaped items that aren't just an encoding of a particular data
|
||||
value such as \n. They must have non-zero values, as check_escape() returns
|
||||
their negation. Also, they must appear in the same order as in the opcode
|
||||
definitions below, up to ESC_w. The final one must be
|
||||
ESC_REF as subsequent values are used for \1, \2, \3, etc. There is are two
|
||||
tests in the code for an escape > ESC_b and <= ESC_w to
|
||||
detect the types that may be repeated. These are the types that consume
|
||||
characters. If any new escapes are put in between that don't consume a
|
||||
character, that code will have to change. */
|
||||
|
||||
enum { ESC_B = 1, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W, ESC_w, ESC_REF };
|
||||
|
||||
/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets
|
||||
that extract substrings. Starting from 1 (i.e. after OP_END), the values up to
|
||||
OP_EOD must correspond in order to the list of escapes immediately above.
|
||||
Note that whenever this list is updated, the two macro definitions that follow
|
||||
must also be updated to match. */
|
||||
|
||||
#define FOR_EACH_OPCODE(macro) \
|
||||
macro(END) \
|
||||
\
|
||||
, macro(NOT_WORD_BOUNDARY) \
|
||||
, macro(WORD_BOUNDARY) \
|
||||
, macro(NOT_DIGIT) \
|
||||
, macro(DIGIT) \
|
||||
, macro(NOT_WHITESPACE) \
|
||||
, macro(WHITESPACE) \
|
||||
, macro(NOT_WORDCHAR) \
|
||||
, macro(WORDCHAR) \
|
||||
\
|
||||
, macro(NOT_NEWLINE) \
|
||||
\
|
||||
, macro(CIRC) \
|
||||
, macro(DOLL) \
|
||||
, macro(BOL) \
|
||||
, macro(EOL) \
|
||||
, macro(CHAR) \
|
||||
, macro(CHAR_IGNORING_CASE) \
|
||||
, macro(ASCII_CHAR) \
|
||||
, macro(ASCII_LETTER_IGNORING_CASE) \
|
||||
, macro(NOT) \
|
||||
\
|
||||
, macro(STAR) \
|
||||
, macro(MINSTAR) \
|
||||
, macro(PLUS) \
|
||||
, macro(MINPLUS) \
|
||||
, macro(QUERY) \
|
||||
, macro(MINQUERY) \
|
||||
, macro(UPTO) \
|
||||
, macro(MINUPTO) \
|
||||
, macro(EXACT) \
|
||||
\
|
||||
, macro(NOTSTAR) \
|
||||
, macro(NOTMINSTAR) \
|
||||
, macro(NOTPLUS) \
|
||||
, macro(NOTMINPLUS) \
|
||||
, macro(NOTQUERY) \
|
||||
, macro(NOTMINQUERY) \
|
||||
, macro(NOTUPTO) \
|
||||
, macro(NOTMINUPTO) \
|
||||
, macro(NOTEXACT) \
|
||||
\
|
||||
, macro(TYPESTAR) \
|
||||
, macro(TYPEMINSTAR) \
|
||||
, macro(TYPEPLUS) \
|
||||
, macro(TYPEMINPLUS) \
|
||||
, macro(TYPEQUERY) \
|
||||
, macro(TYPEMINQUERY) \
|
||||
, macro(TYPEUPTO) \
|
||||
, macro(TYPEMINUPTO) \
|
||||
, macro(TYPEEXACT) \
|
||||
\
|
||||
, macro(CRSTAR) \
|
||||
, macro(CRMINSTAR) \
|
||||
, macro(CRPLUS) \
|
||||
, macro(CRMINPLUS) \
|
||||
, macro(CRQUERY) \
|
||||
, macro(CRMINQUERY) \
|
||||
, macro(CRRANGE) \
|
||||
, macro(CRMINRANGE) \
|
||||
\
|
||||
, macro(CLASS) \
|
||||
, macro(NCLASS) \
|
||||
, macro(XCLASS) \
|
||||
\
|
||||
, macro(REF) \
|
||||
\
|
||||
, macro(ALT) \
|
||||
, macro(KET) \
|
||||
, macro(KETRMAX) \
|
||||
, macro(KETRMIN) \
|
||||
\
|
||||
, macro(ASSERT) \
|
||||
, macro(ASSERT_NOT) \
|
||||
\
|
||||
, macro(BRAZERO) \
|
||||
, macro(BRAMINZERO) \
|
||||
, macro(BRANUMBER) \
|
||||
, macro(BRA)
|
||||
|
||||
#define OPCODE_ENUM_VALUE(opcode) OP_##opcode
|
||||
enum { FOR_EACH_OPCODE(OPCODE_ENUM_VALUE) };
|
||||
|
||||
/* WARNING WARNING WARNING: There is an implicit assumption in pcre.c and
|
||||
study.c that all opcodes are less than 128 in value. This makes handling UTF-8
|
||||
character sequences easier. */
|
||||
|
||||
/* The highest extraction number before we have to start using additional
|
||||
bytes. (Originally PCRE didn't have support for extraction counts higher than
|
||||
this number.) The value is limited by the number of opcodes left after OP_BRA,
|
||||
i.e. 255 - OP_BRA. We actually set it a bit lower to leave room for additional
|
||||
opcodes. */
|
||||
|
||||
/* FIXME: Note that OP_BRA + 100 is > 128, so the two comments above
|
||||
are in conflict! */
|
||||
|
||||
#define EXTRACT_BASIC_MAX 100
|
||||
|
||||
/* The code vector runs on as long as necessary after the end. */
|
||||
|
||||
struct JSRegExp {
|
||||
unsigned options;
|
||||
|
||||
unsigned short topBracket;
|
||||
unsigned short topBackref;
|
||||
|
||||
unsigned short firstByte;
|
||||
unsigned short reqByte;
|
||||
};
|
||||
|
||||
/* Internal shared data tables. These are tables that are used by more than one
|
||||
of the exported public functions. They have to be "external" in the C sense,
|
||||
but are not part of the PCRE public API. The data for these tables is in the
|
||||
pcre_tables.c module. */
|
||||
|
||||
#define jsc_pcre_utf8_table1_size 6
|
||||
|
||||
extern const int jsc_pcre_utf8_table1[6];
|
||||
extern const int jsc_pcre_utf8_table2[6];
|
||||
extern const int jsc_pcre_utf8_table3[6];
|
||||
extern const unsigned char jsc_pcre_utf8_table4[0x40];
|
||||
|
||||
extern const unsigned char jsc_pcre_default_tables[tables_length];
|
||||
|
||||
static inline unsigned char toLowerCase(unsigned char c)
|
||||
{
|
||||
static const unsigned char* lowerCaseChars = jsc_pcre_default_tables + lcc_offset;
|
||||
return lowerCaseChars[c];
|
||||
}
|
||||
|
||||
static inline unsigned char flipCase(unsigned char c)
|
||||
{
|
||||
static const unsigned char* flippedCaseChars = jsc_pcre_default_tables + fcc_offset;
|
||||
return flippedCaseChars[c];
|
||||
}
|
||||
|
||||
static inline unsigned char classBitmapForChar(unsigned char c)
|
||||
{
|
||||
static const unsigned char* charClassBitmaps = jsc_pcre_default_tables + cbits_offset;
|
||||
return charClassBitmaps[c];
|
||||
}
|
||||
|
||||
static inline unsigned char charTypeForChar(unsigned char c)
|
||||
{
|
||||
const unsigned char* charTypeMap = jsc_pcre_default_tables + ctypes_offset;
|
||||
return charTypeMap[c];
|
||||
}
|
||||
|
||||
static inline bool isWordChar(UChar c)
|
||||
{
|
||||
return c < 128 && (charTypeForChar(c) & ctype_word);
|
||||
}
|
||||
|
||||
static inline bool isSpaceChar(UChar c)
|
||||
{
|
||||
return (c < 128 && (charTypeForChar(c) & ctype_space)) || c == 0x00A0;
|
||||
}
|
||||
|
||||
static inline bool isNewline(UChar nl)
|
||||
{
|
||||
return (nl == 0xA || nl == 0xD || nl == 0x2028 || nl == 0x2029);
|
||||
}
|
||||
|
||||
static inline bool isBracketStartOpcode(unsigned char opcode)
|
||||
{
|
||||
if (opcode >= OP_BRA)
|
||||
return true;
|
||||
switch (opcode) {
|
||||
case OP_ASSERT:
|
||||
case OP_ASSERT_NOT:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void advanceToEndOfBracket(const unsigned char*& opcodePtr)
|
||||
{
|
||||
JS_ASSERT(isBracketStartOpcode(*opcodePtr) || *opcodePtr == OP_ALT);
|
||||
do
|
||||
opcodePtr += getLinkValue(opcodePtr + 1);
|
||||
while (*opcodePtr == OP_ALT);
|
||||
}
|
||||
|
||||
/* Internal shared functions. These are functions that are used in more
|
||||
that one of the source files. They have to have external linkage, but
|
||||
but are not part of the public API and so not exported from the library. */
|
||||
|
||||
extern int jsc_pcre_ucp_othercase(unsigned);
|
||||
extern bool jsc_pcre_xclass(int, const unsigned char*);
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/* End of pcre_internal.h */
|
71
js/src/yarr/pcre/pcre_tables.cpp
Normal file
71
js/src/yarr/pcre/pcre_tables.cpp
Normal file
@ -0,0 +1,71 @@
|
||||
/* This is JavaScriptCore's variant of the PCRE library. While this library
|
||||
started out as a copy of PCRE, many of the features of PCRE have been
|
||||
removed. This library now supports only the regular expression features
|
||||
required by the JavaScript language specification, and has only the functions
|
||||
needed by JavaScriptCore and the rest of WebKit.
|
||||
|
||||
Originally written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This module contains some fixed tables that are used by more than one of the
|
||||
PCRE code modules. */
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
/*************************************************
|
||||
* Tables for UTF-8 support *
|
||||
*************************************************/
|
||||
|
||||
/* These are the breakpoints for different numbers of bytes in a UTF-8
|
||||
character. */
|
||||
|
||||
const int jsc_pcre_utf8_table1[6] =
|
||||
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
|
||||
|
||||
/* These are the indicator bits and the mask for the data bits to set in the
|
||||
first byte of a character, indexed by the number of additional bytes. */
|
||||
|
||||
const int jsc_pcre_utf8_table2[6] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
|
||||
const int jsc_pcre_utf8_table3[6] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
|
||||
|
||||
/* Table of the number of extra characters, indexed by the first character
|
||||
masked with 0x3f. The highest number for a valid UTF-8 character is in fact
|
||||
0x3d. */
|
||||
|
||||
const unsigned char jsc_pcre_utf8_table4[0x40] = {
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
|
||||
|
||||
#include "chartables.c"
|
98
js/src/yarr/pcre/pcre_ucp_searchfuncs.cpp
Normal file
98
js/src/yarr/pcre/pcre_ucp_searchfuncs.cpp
Normal file
@ -0,0 +1,98 @@
|
||||
/* This is JavaScriptCore's variant of the PCRE library. While this library
|
||||
started out as a copy of PCRE, many of the features of PCRE have been
|
||||
removed. This library now supports only the regular expression features
|
||||
required by the JavaScript language specification, and has only the functions
|
||||
needed by JavaScriptCore and the rest of WebKit.
|
||||
|
||||
Originally written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains code for searching the table of Unicode character
|
||||
properties. */
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
#include "ucpinternal.h" /* Internal table details */
|
||||
#include "ucptable.cpp" /* The table itself */
|
||||
|
||||
/*************************************************
|
||||
* Search table and return other case *
|
||||
*************************************************/
|
||||
|
||||
/* If the given character is a letter, and there is another case for the
|
||||
letter, return the other case. Otherwise, return -1.
|
||||
|
||||
Arguments:
|
||||
c the character value
|
||||
|
||||
Returns: the other case or -1 if none
|
||||
*/
|
||||
|
||||
int jsc_pcre_ucp_othercase(unsigned c)
|
||||
{
|
||||
int bot = 0;
|
||||
int top = sizeof(ucp_table) / sizeof(cnode);
|
||||
int mid;
|
||||
|
||||
/* The table is searched using a binary chop. You might think that using
|
||||
intermediate variables to hold some of the common expressions would speed
|
||||
things up, but tests with gcc 3.4.4 on Linux showed that, on the contrary, it
|
||||
makes things a lot slower. */
|
||||
|
||||
for (;;) {
|
||||
if (top <= bot)
|
||||
return -1;
|
||||
mid = (bot + top) >> 1;
|
||||
if (c == (ucp_table[mid].f0 & f0_charmask))
|
||||
break;
|
||||
if (c < (ucp_table[mid].f0 & f0_charmask))
|
||||
top = mid;
|
||||
else {
|
||||
if ((ucp_table[mid].f0 & f0_rangeflag) && (c <= (ucp_table[mid].f0 & f0_charmask) + (ucp_table[mid].f1 & f1_rangemask)))
|
||||
break;
|
||||
bot = mid + 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Found an entry in the table. Return -1 for a range entry. Otherwise return
|
||||
the other case if there is one, else -1. */
|
||||
|
||||
if (ucp_table[mid].f0 & f0_rangeflag)
|
||||
return -1;
|
||||
|
||||
int offset = ucp_table[mid].f1 & f1_casemask;
|
||||
if (offset & f1_caseneg)
|
||||
offset |= f1_caseneg;
|
||||
return !offset ? -1 : c + offset;
|
||||
}
|
114
js/src/yarr/pcre/pcre_xclass.cpp
Normal file
114
js/src/yarr/pcre/pcre_xclass.cpp
Normal file
@ -0,0 +1,114 @@
|
||||
/* This is JavaScriptCore's variant of the PCRE library. While this library
|
||||
started out as a copy of PCRE, many of the features of PCRE have been
|
||||
removed. This library now supports only the regular expression features
|
||||
required by the JavaScript language specification, and has only the functions
|
||||
needed by JavaScriptCore and the rest of WebKit.
|
||||
|
||||
Originally written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This module contains an internal function that is used to match an extended
|
||||
class (one that contains characters whose values are > 255). */
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
/*************************************************
|
||||
* Match character against an XCLASS *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called to match a character against an extended class that
|
||||
might contain values > 255.
|
||||
|
||||
Arguments:
|
||||
c the character
|
||||
data points to the flag byte of the XCLASS data
|
||||
|
||||
Returns: true if character matches, else false
|
||||
*/
|
||||
|
||||
/* Get the next UTF-8 character, advancing the pointer. This is called when we
|
||||
know we are in UTF-8 mode. */
|
||||
|
||||
static inline void getUTF8CharAndAdvancePointer(int& c, const unsigned char*& subjectPtr)
|
||||
{
|
||||
c = *subjectPtr++;
|
||||
if ((c & 0xc0) == 0xc0) {
|
||||
int gcaa = jsc_pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
|
||||
int gcss = 6 * gcaa;
|
||||
c = (c & jsc_pcre_utf8_table3[gcaa]) << gcss;
|
||||
while (gcaa-- > 0) {
|
||||
gcss -= 6;
|
||||
c |= (*subjectPtr++ & 0x3f) << gcss;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool jsc_pcre_xclass(int c, const unsigned char* data)
|
||||
{
|
||||
bool negated = (*data & XCL_NOT);
|
||||
|
||||
/* Character values < 256 are matched against a bitmap, if one is present. If
|
||||
not, we still carry on, because there may be ranges that start below 256 in the
|
||||
additional data. */
|
||||
|
||||
if (c < 256) {
|
||||
if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0)
|
||||
return !negated; /* char found */
|
||||
}
|
||||
|
||||
/* First skip the bit map if present. Then match against the list of Unicode
|
||||
properties or large chars or ranges that end with a large char. We won't ever
|
||||
encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */
|
||||
|
||||
if ((*data++ & XCL_MAP) != 0)
|
||||
data += 32;
|
||||
|
||||
int t;
|
||||
while ((t = *data++) != XCL_END) {
|
||||
if (t == XCL_SINGLE) {
|
||||
int x;
|
||||
getUTF8CharAndAdvancePointer(x, data);
|
||||
if (c == x)
|
||||
return !negated;
|
||||
}
|
||||
else if (t == XCL_RANGE) {
|
||||
int x, y;
|
||||
getUTF8CharAndAdvancePointer(x, data);
|
||||
getUTF8CharAndAdvancePointer(y, data);
|
||||
if (c >= x && c <= y)
|
||||
return !negated;
|
||||
}
|
||||
}
|
||||
|
||||
return negated; /* char did not match */
|
||||
}
|
126
js/src/yarr/pcre/ucpinternal.h
Normal file
126
js/src/yarr/pcre/ucpinternal.h
Normal file
@ -0,0 +1,126 @@
|
||||
/* This is JavaScriptCore's variant of the PCRE library. While this library
|
||||
started out as a copy of PCRE, many of the features of PCRE have been
|
||||
removed. This library now supports only the regular expression features
|
||||
required by the JavaScript language specification, and has only the functions
|
||||
needed by JavaScriptCore and the rest of WebKit.
|
||||
|
||||
Originally written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved.
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*************************************************
|
||||
* Unicode Property Table handler *
|
||||
*************************************************/
|
||||
|
||||
/* Internal header file defining the layout of the bits in each pair of 32-bit
|
||||
words that form a data item in the table. */
|
||||
|
||||
typedef struct cnode {
|
||||
unsigned f0;
|
||||
unsigned f1;
|
||||
} cnode;
|
||||
|
||||
/* Things for the f0 field */
|
||||
|
||||
#define f0_scriptmask 0xff000000 /* Mask for script field */
|
||||
#define f0_scriptshift 24 /* Shift for script value */
|
||||
#define f0_rangeflag 0x00f00000 /* Flag for a range item */
|
||||
#define f0_charmask 0x001fffff /* Mask for code point value */
|
||||
|
||||
/* Things for the f1 field */
|
||||
|
||||
#define f1_typemask 0xfc000000 /* Mask for char type field */
|
||||
#define f1_typeshift 26 /* Shift for the type field */
|
||||
#define f1_rangemask 0x0000ffff /* Mask for a range offset */
|
||||
#define f1_casemask 0x0000ffff /* Mask for a case offset */
|
||||
#define f1_caseneg 0xffff8000 /* Bits for negation */
|
||||
|
||||
/* The data consists of a vector of structures of type cnode. The two unsigned
|
||||
32-bit integers are used as follows:
|
||||
|
||||
(f0) (1) The most significant byte holds the script number. The numbers are
|
||||
defined by the enum in ucp.h.
|
||||
|
||||
(2) The 0x00800000 bit is set if this entry defines a range of characters.
|
||||
It is not set if this entry defines a single character
|
||||
|
||||
(3) The 0x00600000 bits are spare.
|
||||
|
||||
(4) The 0x001fffff bits contain the code point. No Unicode code point will
|
||||
ever be greater than 0x0010ffff, so this should be OK for ever.
|
||||
|
||||
(f1) (1) The 0xfc000000 bits contain the character type number. The numbers are
|
||||
defined by an enum in ucp.h.
|
||||
|
||||
(2) The 0x03ff0000 bits are spare.
|
||||
|
||||
(3) The 0x0000ffff bits contain EITHER the unsigned offset to the top of
|
||||
range if this entry defines a range, OR the *signed* offset to the
|
||||
character's "other case" partner if this entry defines a single
|
||||
character. There is no partner if the value is zero.
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
| script (8) |.|.|.| codepoint (21) || type (6) |.|.| spare (8) | offset (16) |
|
||||
-------------------------------------------------------------------------------
|
||||
| | | | |
|
||||
| | |-> spare | |-> spare
|
||||
| | |
|
||||
| |-> spare |-> spare
|
||||
|
|
||||
|-> range flag
|
||||
|
||||
The upper/lower casing information is set only for characters that come in
|
||||
pairs. The non-one-to-one mappings in the Unicode data are ignored.
|
||||
|
||||
When searching the data, proceed as follows:
|
||||
|
||||
(1) Set up for a binary chop search.
|
||||
|
||||
(2) If the top is not greater than the bottom, the character is not in the
|
||||
table. Its type must therefore be "Cn" ("Undefined").
|
||||
|
||||
(3) Find the middle vector element.
|
||||
|
||||
(4) Extract the code point and compare. If equal, we are done.
|
||||
|
||||
(5) If the test character is smaller, set the top to the current point, and
|
||||
goto (2).
|
||||
|
||||
(6) If the current entry defines a range, compute the last character by adding
|
||||
the offset, and see if the test character is within the range. If it is,
|
||||
we are done.
|
||||
|
||||
(7) Otherwise, set the bottom to one element past the current point and goto
|
||||
(2).
|
||||
*/
|
||||
|
||||
/* End of ucpinternal.h */
|
2968
js/src/yarr/pcre/ucptable.cpp
Normal file
2968
js/src/yarr/pcre/ucptable.cpp
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user