Bug 684559: bring back PCRE for JavaScript regexps on systems without YARR JIT because it's faster than YARR's interpreter. r=dmandelin

2024-09-13 09:24:08 -07:00 · 2011-09-07 17:05:59 -07:00 · 2011-09-07 17:05:59 -07:00 · 1270c81714
commit 1270c81714
parent 25316d6093
17 changed files with 9291 additions and 19 deletions
--- a/js/src/Makefile.in
+++ b/js/src/Makefile.in
@ -391,22 +391,20 @@ CPPSRCS += 	checks.cc \
 # END enclude sources for V8 dtoa
 #############################################

+# For architectures without YARR JIT, PCRE is faster than the YARR
+# interpreter (bug 684559).
+
 ifeq (,$(filter arm% sparc %86 x86_64,$(TARGET_CPU)))

-VPATH +=	$(srcdir)/assembler \
-		$(srcdir)/assembler/wtf \
-		$(srcdir)/yarr\
+VPATH +=        $(srcdir)/yarr/pcre \
 		$(NULL)

 CPPSRCS += \
-		Assertions.cpp \
-		OSAllocatorOS2.cpp \
-		OSAllocatorPosix.cpp \
-		OSAllocatorWin.cpp \
-		PageBlock.cpp \
-		YarrInterpreter.cpp \
-		YarrPattern.cpp \
-		YarrSyntaxChecker.cpp \
+                pcre_compile.cpp \
+                pcre_exec.cpp \
+                pcre_tables.cpp \
+                pcre_xclass.cpp \
+                pcre_ucp_searchfuncs.cpp \
 		$(NULL)
 else

--- a/js/src/jsregexp.cpp
+++ b/js/src/jsregexp.cpp
@ -194,6 +194,40 @@ js_ObjectIsRegExp(JSObject *obj)
 * js::RegExp
 */

+#if !ENABLE_YARR_JIT
+void
+RegExp::reportPCREError(JSContext *cx, int error)
+{
+#define REPORT(msg_) \ 
+    JS_ReportErrorFlagsAndNumberUC(cx, JSREPORT_ERROR, js_GetErrorMessage, NULL, msg_); \
+    return
+    switch (error) {
+      case -2: REPORT(JSMSG_REGEXP_TOO_COMPLEX);
+      case 0: JS_NOT_REACHED("Precondition violation: an error must have occurred."); 
+      case 1: REPORT(JSMSG_TRAILING_SLASH);
+      case 2: REPORT(JSMSG_TRAILING_SLASH); 
+      case 3: REPORT(JSMSG_REGEXP_TOO_COMPLEX);
+      case 4: REPORT(JSMSG_BAD_QUANTIFIER);
+      case 5: REPORT(JSMSG_BAD_QUANTIFIER);
+      case 6: REPORT(JSMSG_BAD_CLASS_RANGE);
+      case 7: REPORT(JSMSG_REGEXP_TOO_COMPLEX);
+      case 8: REPORT(JSMSG_BAD_CLASS_RANGE);
+      case 9: REPORT(JSMSG_BAD_QUANTIFIER);
+      case 10: REPORT(JSMSG_UNMATCHED_RIGHT_PAREN);
+      case 11: REPORT(JSMSG_REGEXP_TOO_COMPLEX);
+      case 12: REPORT(JSMSG_UNMATCHED_RIGHT_PAREN);
+      case 13: REPORT(JSMSG_REGEXP_TOO_COMPLEX);
+      case 14: REPORT(JSMSG_MISSING_PAREN);
+      case 15: REPORT(JSMSG_BAD_BACKREF);
+      case 16: REPORT(JSMSG_REGEXP_TOO_COMPLEX);
+      case 17: REPORT(JSMSG_REGEXP_TOO_COMPLEX);
+      default:
+        JS_NOT_REACHED("Precondition violation: unknown PCRE error code.");
+    }
+#undef REPORT
+}
+#endif
+
 void
 RegExp::reportYarrError(JSContext *cx, TokenStream *ts, JSC::Yarr::ErrorCode error)
 {
--- a/js/src/jsregexpinlines.h
+++ b/js/src/jsregexpinlines.h
@ -55,6 +55,8 @@
 #include "yarr/Yarr.h"
 #if ENABLE_YARR_JIT
 #include "yarr/YarrJIT.h"
+#else
+#include "yarr/pcre/pcre.h"
 #endif

 namespace js {
@ -98,8 +100,10 @@ class RegExp
 #if ENABLE_YARR_JIT
    /* native code is valid only if codeBlock.isFallBack() == false */
    JSC::Yarr::YarrCodeBlock    codeBlock;
-#endif
    JSC::Yarr::BytecodePattern  *byteCode;
+#else
+    JSRegExp                    *compiled;
+#endif
    JSLinearString              *source;
    size_t                      refCount;
    unsigned                    parenCount; /* Must be |unsigned| to interface with YARR. */
@ -115,8 +119,11 @@ class RegExp
      :
 #if ENABLE_YARR_JIT
        codeBlock(),
+        byteCode(NULL),
+#else
+        compiled(NULL),
 #endif
-        byteCode(NULL), source(source), refCount(1), parenCount(0), flags(flags)
+        source(source), refCount(1), parenCount(0), flags(flags)
 #ifdef DEBUG
        , compartment(compartment)
 #endif
@ -127,14 +134,20 @@ class RegExp
    ~RegExp() {
 #if ENABLE_YARR_JIT
        codeBlock.release();
-#endif
        if (byteCode)
            Foreground::delete_<JSC::Yarr::BytecodePattern>(byteCode);
+#else
+        if (compiled)
+            jsRegExpFree(compiled);
+#endif
    }

    bool compileHelper(JSContext *cx, JSLinearString &pattern, TokenStream *ts);
    bool compile(JSContext *cx, TokenStream *ts);
    static const uint32 allFlags = JSREG_FOLD | JSREG_GLOB | JSREG_MULTILINE | JSREG_STICKY;
+#if !ENABLE_YARR_JIT
+    void reportPCREError(JSContext *cx, int error);
+#endif
    void reportYarrError(JSContext *cx, TokenStream *ts, JSC::Yarr::ErrorCode error);
    static inline bool initArena(JSContext *cx);
    static inline void checkMatchPairs(JSString *input, int *buf, size_t matchItemCount);
@ -370,13 +383,20 @@ RegExp::executeInternal(JSContext *cx, RegExpStatics *res, JSString *inputstr,
    else
        result = JSC::Yarr::interpret(byteCode, chars, *lastIndex - inputOffset, len, buf);
 #else
-    result = JSC::Yarr::interpret(byteCode, chars, *lastIndex - inputOffset, len, buf);
+    result = jsRegExpExecute(cx, compiled, chars, len, *lastIndex - inputOffset, buf, bufCount);
 #endif
    if (result == -1) {
        *rval = NullValue();
        return true;
    }

+#if !ENABLE_YARR_JIT
+    if (result < 0) {
+        reportPCREError(cx, result);
+        return false;
+    }
+#endif
+
    /* 
     * Adjust buf for the inputOffset. Use of sticky is rare and the matchItemCount is small, so
     * just do another pass.
@ -472,6 +492,7 @@ EnableYarrJIT(JSContext *cx)
 inline bool
 RegExp::compileHelper(JSContext *cx, JSLinearString &pattern, TokenStream *ts)
 {
+#if ENABLE_YARR_JIT
    JSC::Yarr::ErrorCode yarrError;
    JSC::Yarr::YarrPattern yarrPattern(pattern, ignoreCase(), multiline(), &yarrError);
    if (yarrError) {
@ -480,7 +501,7 @@ RegExp::compileHelper(JSContext *cx, JSLinearString &pattern, TokenStream *ts)
    }
    parenCount = yarrPattern.m_numSubpatterns;

-#if ENABLE_YARR_JIT && defined(JS_METHODJIT)
+#if defined(JS_METHODJIT)
    if (EnableYarrJIT(cx) && !yarrPattern.m_containsBackreferences) {
        bool ok = cx->compartment->ensureJaegerCompartmentExists(cx);
        if (!ok)
@ -492,12 +513,21 @@ RegExp::compileHelper(JSContext *cx, JSLinearString &pattern, TokenStream *ts)
    }
 #endif

-#if ENABLE_YARR_JIT
    codeBlock.setFallBack(true);
-#endif
    byteCode = JSC::Yarr::byteCompile(yarrPattern, cx->compartment->regExpAllocator).get();
-
    return true;
+#else
+    int error = 0;
+    compiled = jsRegExpCompile(pattern.chars(), pattern.length(),
+        ignoreCase() ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase,
+        multiline() ? JSRegExpMultiline : JSRegExpSingleLine,
+        &parenCount, &error);
+    if (error) {
+        reportPCREError(cx, error);
+        return false;
+    }
+    return true;
+#endif
 }

 inline bool
--- a/js/src/yarr/pcre/AUTHORS
+++ b/js/src/yarr/pcre/AUTHORS
@ -0,0 +1,12 @@
+Originally written by:  Philip Hazel
+Email local part:       ph10
+Email domain:           cam.ac.uk
+
+University of Cambridge Computing Service,
+Cambridge, England. Phone: +44 1223 334714.
+
+Copyright (c) 1997-2005 University of Cambridge. All rights reserved.
+
+Adapted for JavaScriptCore and WebKit by Apple Inc.
+
+Copyright (c) 2005, 2006, 2007 Apple Inc. All rights reserved.
--- a/js/src/yarr/pcre/COPYING
+++ b/js/src/yarr/pcre/COPYING
@ -0,0 +1,35 @@
+PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+This is JavaScriptCore's variant of the PCRE library. While this library
+started out as a copy of PCRE, many of the features of PCRE have been
+removed.
+
+Copyright (c) 1997-2005 University of Cambridge. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the name of Apple
+      Inc. nor the names of their contributors may be used to endorse or
+      promote products derived from this software without specific prior
+      written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
--- a/js/src/yarr/pcre/chartables.c
+++ b/js/src/yarr/pcre/chartables.c
@ -0,0 +1,96 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* This file is automatically written by the dftables auxiliary 
+program. If you edit it by hand, you might like to edit the Makefile to 
+prevent its ever being regenerated.
+
+This file contains the default tables for characters with codes less than
+128 (ASCII characters). These tables are used when no external tables are
+passed to PCRE. */
+
+const unsigned char jsc_pcre_default_tables[480] = {
+
+/* This table is a lower casing table. */
+
+  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 
+  0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 
+  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 
+  0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 
+  0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 
+  0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 
+  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 
+  0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, 
+  0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
+  0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
+  0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
+  0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 
+  0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
+  0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
+  0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
+  0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
+
+/* This table is a case flipping table. */
+
+  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 
+  0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 
+  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 
+  0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 
+  0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 
+  0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 
+  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 
+  0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, 
+  0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
+  0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
+  0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
+  0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 
+  0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 
+  0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 
+  0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 
+  0x58, 0x59, 0x5A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
+
+/* This table contains bit maps for various character classes.
+Each map is 32 bytes long and the bits run from the least
+significant end of each byte. The classes are: space, digit, word. */
+
+  0x00, 0x3E, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
+
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03, 
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
+
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03, 
+  0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07, 
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+
+/* This table identifies various classes of character by individual bits:
+  0x01   white space character
+  0x08   hexadecimal digit
+  0x10   alphanumeric or '_'
+*/
+
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  /*   0-  7 */
+  0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00,  /*   8- 15 */
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  /*  16- 23 */
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  /*  24- 31 */
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  /*    - '  */
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  /*  ( - /  */
+  0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,  /*  0 - 7  */
+  0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  /*  8 - ?  */
+  0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x10,  /*  @ - G  */
+  0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,  /*  H - O  */
+  0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,  /*  P - W  */
+  0x10, 0x10, 0x10, 0x00, 0x00, 0x00, 0x00, 0x10,  /*  X - _  */
+  0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x10,  /*  ` - g  */
+  0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,  /*  h - o  */
+  0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,  /*  p - w  */
+  0x10, 0x10, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00}; /*  x -127 */
+
+
+/* End of chartables.c */
--- a/js/src/yarr/pcre/dftables
+++ b/js/src/yarr/pcre/dftables
@ -0,0 +1,273 @@
+#!/usr/bin/perl -w
+#
+# This is JavaScriptCore's variant of the PCRE library. While this library
+# started out as a copy of PCRE, many of the features of PCRE have been
+# removed. This library now supports only the regular expression features
+# required by the JavaScript language specification, and has only the functions
+# needed by JavaScriptCore and the rest of WebKit.
+# 
+#                  Originally written by Philip Hazel
+#            Copyright (c) 1997-2006 University of Cambridge
+#  Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc.  All rights reserved.
+# 
+# -----------------------------------------------------------------------------
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# 
+#     * Redistributions of source code must retain the above copyright notice,
+#       this list of conditions and the following disclaimer.
+# 
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+# 
+#     * Neither the name of the University of Cambridge nor the names of its
+#       contributors may be used to endorse or promote products derived from
+#       this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+# -----------------------------------------------------------------------------
+
+# This is a freestanding support program to generate a file containing
+# character tables. The tables are built according to the default C
+# locale.
+
+use strict;
+
+use File::Basename;
+use File::Spec;
+use File::Temp qw(tempfile);
+use Getopt::Long;
+
+sub readHeaderValues();
+
+my %pcre_internal;
+
+if (scalar(@ARGV) < 1) {
+    print STDERR "Usage: ", basename($0), " [--preprocessor=program] output-file\n";
+    exit 1;
+}
+
+my $outputFile;
+my $preprocessor;
+GetOptions('preprocessor=s' => \$preprocessor);
+if (not $preprocessor) {
+    $preprocessor = "cpp";
+}
+
+$outputFile = $ARGV[0];
+die('Must specify output file.') unless defined($outputFile);
+
+readHeaderValues();
+
+open(OUT, ">", $outputFile) or die "$!";
+binmode(OUT);
+
+printf(OUT
+    "/*************************************************\n" .
+    "*      Perl-Compatible Regular Expressions       *\n" .
+    "*************************************************/\n\n" .
+    "/* This file is automatically written by the dftables auxiliary \n" .
+    "program. If you edit it by hand, you might like to edit the Makefile to \n" .
+    "prevent its ever being regenerated.\n\n");
+printf(OUT
+    "This file contains the default tables for characters with codes less than\n" .
+    "128 (ASCII characters). These tables are used when no external tables are\n" .
+    "passed to PCRE. */\n\n" .
+    "const unsigned char jsc_pcre_default_tables[%d] = {\n\n" .
+    "/* This table is a lower casing table. */\n\n", $pcre_internal{tables_length});
+
+if ($pcre_internal{lcc_offset} != 0) {
+    die "lcc_offset != 0";
+}
+
+printf(OUT "  ");
+for (my $i = 0; $i < 128; $i++) {
+    if (($i & 7) == 0 && $i != 0) {
+        printf(OUT "\n  ");
+    }
+    printf(OUT "0x%02X", ord(lc(chr($i))));
+    if ($i != 127) {
+        printf(OUT ", ");
+    }
+}
+printf(OUT ",\n\n");
+
+printf(OUT "/* This table is a case flipping table. */\n\n");
+
+if ($pcre_internal{fcc_offset} != 128) {
+  die "fcc_offset != 128";
+}
+
+printf(OUT "  ");
+for (my $i = 0; $i < 128; $i++) {
+    if (($i & 7) == 0 && $i != 0) {
+        printf(OUT "\n  ");
+    }
+    my $c = chr($i);
+    printf(OUT "0x%02X", $c =~ /[[:lower:]]/ ? ord(uc($c)) : ord(lc($c)));
+    if ($i != 127) {
+        printf(OUT ", ");
+    }
+}
+printf(OUT ",\n\n");
+
+printf(OUT
+    "/* This table contains bit maps for various character classes.\n" .
+    "Each map is 32 bytes long and the bits run from the least\n" .
+    "significant end of each byte. The classes are: space, digit, word. */\n\n");
+
+if ($pcre_internal{cbits_offset} != $pcre_internal{fcc_offset} + 128) {
+    die "cbits_offset != fcc_offset + 128";
+}
+
+my @cbit_table = (0) x $pcre_internal{cbit_length};
+for (my $i = ord('0'); $i <= ord('9'); $i++) {
+    $cbit_table[$pcre_internal{cbit_digit} + $i / 8] |= 1 << ($i & 7);
+}
+$cbit_table[$pcre_internal{cbit_word} + ord('_') / 8] |= 1 << (ord('_') & 7);
+for (my $i = 0; $i < 128; $i++) {
+    my $c = chr($i);
+    if ($c =~ /[[:alnum:]]/) {
+        $cbit_table[$pcre_internal{cbit_word} + $i / 8] |= 1 << ($i & 7);
+    }
+    if ($c =~ /[[:space:]]/) {
+        $cbit_table[$pcre_internal{cbit_space} + $i / 8] |= 1 << ($i & 7);
+    }
+}
+
+printf(OUT "  ");
+for (my $i = 0; $i < $pcre_internal{cbit_length}; $i++) {
+    if (($i & 7) == 0 && $i != 0) {
+        if (($i & 31) == 0) {
+            printf(OUT "\n");
+        }
+        printf(OUT "\n  ");
+    }
+    printf(OUT "0x%02X", $cbit_table[$i]);
+    if ($i != $pcre_internal{cbit_length} - 1) {
+        printf(OUT ", ");
+    }
+}
+printf(OUT ",\n\n");
+
+printf(OUT
+    "/* This table identifies various classes of character by individual bits:\n" .
+    "  0x%02x   white space character\n" .
+    "  0x%02x   hexadecimal digit\n" .
+    "  0x%02x   alphanumeric or '_'\n*/\n\n",
+    $pcre_internal{ctype_space}, $pcre_internal{ctype_xdigit}, $pcre_internal{ctype_word});
+
+if ($pcre_internal{ctypes_offset} != $pcre_internal{cbits_offset} + $pcre_internal{cbit_length}) {
+    die "ctypes_offset != cbits_offset + cbit_length";
+}
+
+printf(OUT "  ");
+for (my $i = 0; $i < 128; $i++) {
+    my $x = 0;
+    my $c = chr($i);
+    if ($c =~ /[[:space:]]/) {
+        $x += $pcre_internal{ctype_space};
+    }
+    if ($c =~ /[[:xdigit:]]/) {
+        $x += $pcre_internal{ctype_xdigit};
+    }
+    if ($c =~ /[[:alnum:]_]/) {
+        $x += $pcre_internal{ctype_word};
+    }
+    printf(OUT "0x%02X", $x);
+    if ($i != 127) {
+        printf(OUT ", ");
+    } else {
+        printf(OUT "};");
+    }
+    if (($i & 7) == 7) {
+        printf(OUT " /* ");
+        my $d = chr($i - 7);
+        if ($d =~ /[[:print:]]/) {
+            printf(OUT " %c -", $i - 7);
+        } else {
+            printf(OUT "%3d-", $i - 7);
+        }
+        if ($c =~ m/[[:print:]]/) {
+            printf(OUT " %c ", $i);
+        } else {
+            printf(OUT "%3d", $i);
+        }
+        printf(OUT " */\n");
+        if ($i != 127) {
+            printf(OUT "  ");
+        }
+    }
+}
+
+if ($pcre_internal{tables_length} != $pcre_internal{ctypes_offset} + 128) {
+    die "tables_length != ctypes_offset + 128";
+}
+
+printf(OUT "\n\n/* End of chartables.c */\n");
+
+close(OUT);
+
+exit 0;
+
+sub readHeaderValues()
+{
+    my @variables = qw(
+        cbit_digit
+        cbit_length
+        cbit_space
+        cbit_word
+        cbits_offset
+        ctype_space
+        ctype_word
+        ctype_xdigit
+        ctypes_offset
+        fcc_offset
+        lcc_offset
+        tables_length
+    );
+
+    local $/ = undef;
+
+    my $headerPath = File::Spec->catfile(dirname($0), "pcre_internal.h");
+ 
+    my ($fh, $tempFile) = tempfile(
+        basename($0) . "-XXXXXXXX",
+        DIR => File::Spec->tmpdir(),
+        SUFFIX => ".in",
+        UNLINK => 0,
+    );
+
+    print $fh "#define DFTABLES\n\n";
+
+    open(HEADER, "<", $headerPath) or die "$!";
+    print $fh <HEADER>;
+    close(HEADER);
+
+    print $fh "\n\n";
+
+    for my $v (@variables) {
+        print $fh "\$pcre_internal{\"$v\"} = $v;\n";
+    }
+
+    close($fh);
+
+    open(CPP, "$preprocessor \"$tempFile\" |") or die "$!";
+    my $content = <CPP>;
+    close(CPP);
+    
+    eval $content;
+    die "$@" if $@;
+    unlink $tempFile;
+}
--- a/js/src/yarr/pcre/pcre.h
+++ b/js/src/yarr/pcre/pcre.h
@ -0,0 +1,76 @@
+/* This is the public header file for JavaScriptCore's variant of the PCRE
+library. While this library started out as a copy of PCRE, many of the
+features of PCRE have been removed. This library now supports only the
+regular expression features required by the JavaScript language
+specification, and has only the functions needed by JavaScriptCore and the
+rest of WebKit.
+
+           Copyright (c) 1997-2005 University of Cambridge
+    Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved.
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+// FIXME: This file needs to be renamed to JSRegExp.h; it's no longer PCRE.
+
+#ifndef JSRegExp_h
+#define JSRegExp_h
+
+// Last remnants from the JSWTFBridge.
+#include "assembler/wtf/Platform.h"
+#include "jsstr.h"
+#include "jsprvtd.h"
+#include "jstl.h"
+
+typedef jschar UChar;
+typedef JSLinearString UString;
+
+struct JSRegExp;
+struct JSContext;
+
+enum JSRegExpIgnoreCaseOption { JSRegExpDoNotIgnoreCase, JSRegExpIgnoreCase };
+enum JSRegExpMultilineOption { JSRegExpSingleLine, JSRegExpMultiline };
+
+/* jsRegExpExecute error codes */
+const int JSRegExpErrorNoMatch = -1;
+const int JSRegExpErrorHitLimit = -2;
+const int JSRegExpErrorInternal = -4;
+
+JSRegExp* jsRegExpCompile(
+const UChar* pattern, int patternLength,
+    JSRegExpIgnoreCaseOption, JSRegExpMultilineOption,
+    unsigned* numSubpatterns, int *error);
+
+int jsRegExpExecute(JSContext *, const JSRegExp*,
+    const UChar* subject, int subjectLength, int startOffset,
+    int* offsetsVector, int offsetsVectorLength);
+
+void jsRegExpFree(JSRegExp*);
+
+#endif
--- a/js/src/yarr/pcre/pcre.pri
+++ b/js/src/yarr/pcre/pcre.pri
@ -0,0 +1,12 @@
+# Perl Compatible Regular Expressions - Qt4 build info
+VPATH += $$PWD
+INCLUDEPATH += $$PWD $$OUTPUT_DIR/JavaScriptCore/tmp
+DEPENDPATH += $$PWD
+
+SOURCES += \
+    pcre_compile.cpp \
+    pcre_exec.cpp \
+    pcre_tables.cpp \
+    pcre_ucp_searchfuncs.cpp \
+    pcre_xclass.cpp
+
--- a/js/src/yarr/pcre/pcre_compile.cpp
+++ b/js/src/yarr/pcre/pcre_compile.cpp
--- a/js/src/yarr/pcre/pcre_exec.cpp
+++ b/js/src/yarr/pcre/pcre_exec.cpp
--- a/js/src/yarr/pcre/pcre_internal.h
+++ b/js/src/yarr/pcre/pcre_internal.h
@ -0,0 +1,434 @@
+/* This is JavaScriptCore's variant of the PCRE library. While this library
+started out as a copy of PCRE, many of the features of PCRE have been
+removed. This library now supports only the regular expression features
+required by the JavaScript language specification, and has only the functions
+needed by JavaScriptCore and the rest of WebKit.
+
+                 Originally written by Philip Hazel
+           Copyright (c) 1997-2006 University of Cambridge
+    Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* This header contains definitions that are shared between the different
+modules, but which are not relevant to the exported API. This includes some
+functions whose names all begin with "_pcre_". */
+
+#ifndef PCRE_INTERNAL_H
+#define PCRE_INTERNAL_H
+
+/* Bit definitions for entries in the pcre_ctypes table. */
+
+#define ctype_space   0x01
+#define ctype_xdigit  0x08
+#define ctype_word    0x10   /* alphameric or '_' */
+
+/* Offsets for the bitmap tables in pcre_cbits. Each table contains a set
+of bits for a class map. Some classes are built by combining these tables. */
+
+#define cbit_space     0      /* \s */
+#define cbit_digit    32      /* \d */
+#define cbit_word     64      /* \w */
+#define cbit_length   96      /* Length of the cbits table */
+
+/* Offsets of the various tables from the base tables pointer, and
+total length. */
+
+#define lcc_offset      0
+#define fcc_offset    128
+#define cbits_offset  256
+#define ctypes_offset (cbits_offset + cbit_length)
+#define tables_length (ctypes_offset + 128)
+
+#ifndef DFTABLES
+
+#include "pcre.h"
+
+/* The value of LINK_SIZE determines the number of bytes used to store links as
+offsets within the compiled regex. The default is 2, which allows for compiled
+patterns up to 64K long. */
+
+#define LINK_SIZE   3
+
+/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
+inline, and there are *still* stupid compilers about that don't like indented
+pre-processor statements, or at least there were when I first wrote this. After
+all, it had only been about 10 years then... */
+
+#ifdef DEBUG
+#define DPRINTF(p) /*printf p; fflush(stdout);*/
+#else
+#define DPRINTF(p) /*nothing*/
+#endif
+
+/* PCRE keeps offsets in its compiled code as 2-byte quantities (always stored
+in big-endian order) by default. These are used, for example, to link from the
+start of a subpattern to its alternatives and its end. The use of 2 bytes per
+offset limits the size of the compiled regex to around 64K, which is big enough
+for almost everybody. However, I received a request for an even bigger limit.
+For this reason, and also to make the code easier to maintain, the storing and
+loading of offsets from the byte string is now handled by the functions that are
+defined here. */
+
+/* PCRE uses some other 2-byte quantities that do not change when the size of
+offsets changes. There are used for repeat counts and for other things such as
+capturing parenthesis numbers in back references. */
+
+static inline void put2ByteValue(unsigned char* opcodePtr, int value)
+{
+    JS_ASSERT(value >= 0 && value <= 0xFFFF);
+    opcodePtr[0] = value >> 8;
+    opcodePtr[1] = value;
+}
+
+static inline void put3ByteValue(unsigned char* opcodePtr, int value)
+{
+    JS_ASSERT(value >= 0 && value <= 0xFFFFFF);
+    opcodePtr[0] = value >> 16;
+    opcodePtr[1] = value >> 8;
+    opcodePtr[2] = value;
+}
+
+static inline int get2ByteValue(const unsigned char* opcodePtr)
+{
+    return (opcodePtr[0] << 8) | opcodePtr[1];
+}
+
+static inline int get3ByteValue(const unsigned char* opcodePtr)
+{
+    return (opcodePtr[0] << 16) | (opcodePtr[1] << 8) | opcodePtr[2];
+}
+
+static inline void put2ByteValueAndAdvance(unsigned char*& opcodePtr, int value)
+{
+    put2ByteValue(opcodePtr, value);
+    opcodePtr += 2;
+}
+
+static inline void put3ByteValueAndAdvance(unsigned char*& opcodePtr, int value)
+{
+    put3ByteValue(opcodePtr, value);
+    opcodePtr += 3;
+}
+
+static inline void putLinkValueAllowZero(unsigned char* opcodePtr, int value)
+{
+#if LINK_SIZE == 3
+    put3ByteValue(opcodePtr, value);
+#elif LINK_SIZE == 2
+    put2ByteValue(opcodePtr, value);
+#else
+#   error LINK_SIZE not supported.
+#endif
+}
+
+static inline int getLinkValueAllowZero(const unsigned char* opcodePtr)
+{
+#if LINK_SIZE == 3
+    return get3ByteValue(opcodePtr);
+#elif LINK_SIZE == 2
+    return get2ByteValue(opcodePtr);
+#else
+#   error LINK_SIZE not supported.
+#endif
+}
+
+#define MAX_PATTERN_SIZE 4096 * 1024 // Derived by empirical testing of compile time in PCRE and WREC.
+JS_STATIC_ASSERT(MAX_PATTERN_SIZE < (1 << (8 * LINK_SIZE)));
+
+static inline void putLinkValue(unsigned char* opcodePtr, int value)
+{
+    JS_ASSERT(value);
+    putLinkValueAllowZero(opcodePtr, value);
+}
+
+static inline int getLinkValue(const unsigned char* opcodePtr)
+{
+    int value = getLinkValueAllowZero(opcodePtr);
+    JS_ASSERT(value);
+    return value;
+}
+
+static inline void putLinkValueAndAdvance(unsigned char*& opcodePtr, int value)
+{
+    putLinkValue(opcodePtr, value);
+    opcodePtr += LINK_SIZE;
+}
+
+static inline void putLinkValueAllowZeroAndAdvance(unsigned char*& opcodePtr, int value)
+{
+    putLinkValueAllowZero(opcodePtr, value);
+    opcodePtr += LINK_SIZE;
+}
+
+// FIXME: These are really more of a "compiled regexp state" than "regexp options"
+enum RegExpOptions {
+    UseFirstByteOptimizationOption = 0x40000000,  /* firstByte is set */
+    UseRequiredByteOptimizationOption = 0x20000000,  /* reqByte is set */
+    UseMultiLineFirstByteOptimizationOption = 0x10000000,  /* start after \n for multiline */
+    IsAnchoredOption = 0x02000000,  /* can't use partial with this regex */
+    IgnoreCaseOption = 0x00000001,
+    MatchAcrossMultipleLinesOption = 0x00000002
+};
+
+/* Flags added to firstByte or reqByte; a "non-literal" item is either a
+variable-length repeat, or a anything other than literal characters. */
+
+#define REQ_IGNORE_CASE 0x0100    /* indicates should ignore case */
+#define REQ_VARY     0x0200    /* reqByte followed non-literal item */
+
+/* Miscellaneous definitions */
+
+/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
+contain UTF-8 characters with values greater than 255. */
+
+#define XCL_NOT    0x01    /* Flag: this is a negative class */
+#define XCL_MAP    0x02    /* Flag: a 32-byte map is present */
+
+#define XCL_END       0    /* Marks end of individual items */
+#define XCL_SINGLE    1    /* Single item (one multibyte char) follows */
+#define XCL_RANGE     2    /* A range (two multibyte chars) follows */
+
+/* These are escaped items that aren't just an encoding of a particular data
+value such as \n. They must have non-zero values, as check_escape() returns
+their negation. Also, they must appear in the same order as in the opcode
+definitions below, up to ESC_w. The final one must be
+ESC_REF as subsequent values are used for \1, \2, \3, etc. There is are two
+tests in the code for an escape > ESC_b and <= ESC_w to
+detect the types that may be repeated. These are the types that consume
+characters. If any new escapes are put in between that don't consume a
+character, that code will have to change. */
+
+enum { ESC_B = 1, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W, ESC_w, ESC_REF };
+
+/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets
+that extract substrings. Starting from 1 (i.e. after OP_END), the values up to
+OP_EOD must correspond in order to the list of escapes immediately above.
+Note that whenever this list is updated, the two macro definitions that follow
+must also be updated to match. */
+
+#define FOR_EACH_OPCODE(macro) \
+    macro(END) \
+    \
+    , macro(NOT_WORD_BOUNDARY) \
+    , macro(WORD_BOUNDARY) \
+    , macro(NOT_DIGIT) \
+    , macro(DIGIT) \
+    , macro(NOT_WHITESPACE) \
+    , macro(WHITESPACE) \
+    , macro(NOT_WORDCHAR) \
+    , macro(WORDCHAR) \
+    \
+    , macro(NOT_NEWLINE) \
+    \
+    , macro(CIRC) \
+    , macro(DOLL) \
+    , macro(BOL) \
+    , macro(EOL) \
+    , macro(CHAR) \
+    , macro(CHAR_IGNORING_CASE) \
+    , macro(ASCII_CHAR) \
+    , macro(ASCII_LETTER_IGNORING_CASE) \
+    , macro(NOT) \
+    \
+    , macro(STAR) \
+    , macro(MINSTAR) \
+    , macro(PLUS) \
+    , macro(MINPLUS) \
+    , macro(QUERY) \
+    , macro(MINQUERY) \
+    , macro(UPTO) \
+    , macro(MINUPTO) \
+    , macro(EXACT) \
+    \
+    , macro(NOTSTAR) \
+    , macro(NOTMINSTAR) \
+    , macro(NOTPLUS) \
+    , macro(NOTMINPLUS) \
+    , macro(NOTQUERY) \
+    , macro(NOTMINQUERY) \
+    , macro(NOTUPTO) \
+    , macro(NOTMINUPTO) \
+    , macro(NOTEXACT) \
+    \
+    , macro(TYPESTAR) \
+    , macro(TYPEMINSTAR) \
+    , macro(TYPEPLUS) \
+    , macro(TYPEMINPLUS) \
+    , macro(TYPEQUERY) \
+    , macro(TYPEMINQUERY) \
+    , macro(TYPEUPTO) \
+    , macro(TYPEMINUPTO) \
+    , macro(TYPEEXACT) \
+    \
+    , macro(CRSTAR) \
+    , macro(CRMINSTAR) \
+    , macro(CRPLUS) \
+    , macro(CRMINPLUS) \
+    , macro(CRQUERY) \
+    , macro(CRMINQUERY) \
+    , macro(CRRANGE) \
+    , macro(CRMINRANGE) \
+    \
+    , macro(CLASS) \
+    , macro(NCLASS) \
+    , macro(XCLASS) \
+    \
+    , macro(REF) \
+    \
+    , macro(ALT) \
+    , macro(KET) \
+    , macro(KETRMAX) \
+    , macro(KETRMIN) \
+    \
+    , macro(ASSERT) \
+    , macro(ASSERT_NOT) \
+    \
+    , macro(BRAZERO) \
+    , macro(BRAMINZERO) \
+    , macro(BRANUMBER) \
+    , macro(BRA)
+
+#define OPCODE_ENUM_VALUE(opcode) OP_##opcode
+enum { FOR_EACH_OPCODE(OPCODE_ENUM_VALUE) };
+
+/* WARNING WARNING WARNING: There is an implicit assumption in pcre.c and
+study.c that all opcodes are less than 128 in value. This makes handling UTF-8
+character sequences easier. */
+
+/* The highest extraction number before we have to start using additional
+bytes. (Originally PCRE didn't have support for extraction counts higher than
+this number.) The value is limited by the number of opcodes left after OP_BRA,
+i.e. 255 - OP_BRA. We actually set it a bit lower to leave room for additional
+opcodes. */
+
+/* FIXME: Note that OP_BRA + 100 is > 128, so the two comments above
+are in conflict! */
+
+#define EXTRACT_BASIC_MAX  100
+
+/* The code vector runs on as long as necessary after the end. */
+
+struct JSRegExp {
+    unsigned options;
+
+    unsigned short topBracket;
+    unsigned short topBackref;
+    
+    unsigned short firstByte;
+    unsigned short reqByte;
+};
+
+/* Internal shared data tables. These are tables that are used by more than one
+ of the exported public functions. They have to be "external" in the C sense,
+ but are not part of the PCRE public API. The data for these tables is in the
+ pcre_tables.c module. */
+
+#define jsc_pcre_utf8_table1_size 6
+
+extern const int    jsc_pcre_utf8_table1[6];
+extern const int    jsc_pcre_utf8_table2[6];
+extern const int    jsc_pcre_utf8_table3[6];
+extern const unsigned char jsc_pcre_utf8_table4[0x40];
+
+extern const unsigned char jsc_pcre_default_tables[tables_length];
+
+static inline unsigned char toLowerCase(unsigned char c)
+{
+    static const unsigned char* lowerCaseChars = jsc_pcre_default_tables + lcc_offset;
+    return lowerCaseChars[c];
+}
+
+static inline unsigned char flipCase(unsigned char c)
+{
+    static const unsigned char* flippedCaseChars = jsc_pcre_default_tables + fcc_offset;
+    return flippedCaseChars[c];
+}
+
+static inline unsigned char classBitmapForChar(unsigned char c)
+{
+    static const unsigned char* charClassBitmaps = jsc_pcre_default_tables + cbits_offset;
+    return charClassBitmaps[c];
+}
+
+static inline unsigned char charTypeForChar(unsigned char c)
+{
+    const unsigned char* charTypeMap = jsc_pcre_default_tables + ctypes_offset;
+    return charTypeMap[c];
+}
+
+static inline bool isWordChar(UChar c)
+{
+    return c < 128 && (charTypeForChar(c) & ctype_word);
+}
+
+static inline bool isSpaceChar(UChar c)
+{
+    return (c < 128 && (charTypeForChar(c) & ctype_space)) || c == 0x00A0;
+}
+
+static inline bool isNewline(UChar nl)
+{
+    return (nl == 0xA || nl == 0xD || nl == 0x2028 || nl == 0x2029);
+}
+
+static inline bool isBracketStartOpcode(unsigned char opcode)
+{
+    if (opcode >= OP_BRA)
+        return true;
+    switch (opcode) {
+        case OP_ASSERT:
+        case OP_ASSERT_NOT:
+            return true;
+        default:
+            return false;
+    }
+}
+
+static inline void advanceToEndOfBracket(const unsigned char*& opcodePtr)
+{
+    JS_ASSERT(isBracketStartOpcode(*opcodePtr) || *opcodePtr == OP_ALT);
+    do
+        opcodePtr += getLinkValue(opcodePtr + 1);
+    while (*opcodePtr == OP_ALT);
+}
+
+/* Internal shared functions. These are functions that are used in more
+that one of the source files. They have to have external linkage, but
+but are not part of the public API and so not exported from the library. */
+
+extern int jsc_pcre_ucp_othercase(unsigned);
+extern bool jsc_pcre_xclass(int, const unsigned char*);
+
+#endif
+
+#endif
+
+/* End of pcre_internal.h */
--- a/js/src/yarr/pcre/pcre_tables.cpp
+++ b/js/src/yarr/pcre/pcre_tables.cpp
@ -0,0 +1,71 @@
+/* This is JavaScriptCore's variant of the PCRE library. While this library
+started out as a copy of PCRE, many of the features of PCRE have been
+removed. This library now supports only the regular expression features
+required by the JavaScript language specification, and has only the functions
+needed by JavaScriptCore and the rest of WebKit.
+
+                 Originally written by Philip Hazel
+           Copyright (c) 1997-2006 University of Cambridge
+    Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* This module contains some fixed tables that are used by more than one of the
+PCRE code modules. */
+
+#include "pcre_internal.h"
+
+/*************************************************
+*           Tables for UTF-8 support             *
+*************************************************/
+
+/* These are the breakpoints for different numbers of bytes in a UTF-8
+character. */
+
+const int jsc_pcre_utf8_table1[6] =
+  { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
+
+/* These are the indicator bits and the mask for the data bits to set in the
+first byte of a character, indexed by the number of additional bytes. */
+
+const int jsc_pcre_utf8_table2[6] = { 0,    0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
+const int jsc_pcre_utf8_table3[6] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
+
+/* Table of the number of extra characters, indexed by the first character
+masked with 0x3f. The highest number for a valid UTF-8 character is in fact
+0x3d. */
+
+const unsigned char jsc_pcre_utf8_table4[0x40] = {
+  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+  3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
+
+#include "chartables.c"
--- a/js/src/yarr/pcre/pcre_ucp_searchfuncs.cpp
+++ b/js/src/yarr/pcre/pcre_ucp_searchfuncs.cpp
@ -0,0 +1,98 @@
+/* This is JavaScriptCore's variant of the PCRE library. While this library
+started out as a copy of PCRE, many of the features of PCRE have been
+removed. This library now supports only the regular expression features
+required by the JavaScript language specification, and has only the functions
+needed by JavaScriptCore and the rest of WebKit.
+
+                 Originally written by Philip Hazel
+           Copyright (c) 1997-2006 University of Cambridge
+    Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains code for searching the table of Unicode character
+properties. */
+
+#include "pcre_internal.h"
+
+#include "ucpinternal.h"       /* Internal table details */
+#include "ucptable.cpp"        /* The table itself */
+
+/*************************************************
+*       Search table and return other case       *
+*************************************************/
+
+/* If the given character is a letter, and there is another case for the
+letter, return the other case. Otherwise, return -1.
+
+Arguments:
+  c           the character value
+
+Returns:      the other case or -1 if none
+*/
+
+int jsc_pcre_ucp_othercase(unsigned c)
+{
+    int bot = 0;
+    int top = sizeof(ucp_table) / sizeof(cnode);
+    int mid;
+    
+    /* The table is searched using a binary chop. You might think that using
+     intermediate variables to hold some of the common expressions would speed
+     things up, but tests with gcc 3.4.4 on Linux showed that, on the contrary, it
+     makes things a lot slower. */
+    
+    for (;;) {
+        if (top <= bot)
+            return -1;
+        mid = (bot + top) >> 1;
+        if (c == (ucp_table[mid].f0 & f0_charmask))
+            break;
+        if (c < (ucp_table[mid].f0 & f0_charmask))
+            top = mid;
+        else {
+            if ((ucp_table[mid].f0 & f0_rangeflag) && (c <= (ucp_table[mid].f0 & f0_charmask) + (ucp_table[mid].f1 & f1_rangemask)))
+                break;
+            bot = mid + 1;
+        }
+    }
+    
+    /* Found an entry in the table. Return -1 for a range entry. Otherwise return
+     the other case if there is one, else -1. */
+    
+    if (ucp_table[mid].f0 & f0_rangeflag)
+        return -1;
+    
+    int offset = ucp_table[mid].f1 & f1_casemask;
+    if (offset & f1_caseneg)
+        offset |= f1_caseneg;
+    return !offset ? -1 : c + offset;
+}
--- a/js/src/yarr/pcre/pcre_xclass.cpp
+++ b/js/src/yarr/pcre/pcre_xclass.cpp
@ -0,0 +1,114 @@
+/* This is JavaScriptCore's variant of the PCRE library. While this library
+started out as a copy of PCRE, many of the features of PCRE have been
+removed. This library now supports only the regular expression features
+required by the JavaScript language specification, and has only the functions
+needed by JavaScriptCore and the rest of WebKit.
+
+                 Originally written by Philip Hazel
+           Copyright (c) 1997-2006 University of Cambridge
+    Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* This module contains an internal function that is used to match an extended
+class (one that contains characters whose values are > 255). */
+
+#include "pcre_internal.h"
+
+/*************************************************
+*       Match character against an XCLASS        *
+*************************************************/
+
+/* This function is called to match a character against an extended class that
+might contain values > 255.
+
+Arguments:
+  c           the character
+  data        points to the flag byte of the XCLASS data
+
+Returns:      true if character matches, else false
+*/
+
+/* Get the next UTF-8 character, advancing the pointer. This is called when we
+ know we are in UTF-8 mode. */
+
+static inline void getUTF8CharAndAdvancePointer(int& c, const unsigned char*& subjectPtr)
+{
+    c = *subjectPtr++;
+    if ((c & 0xc0) == 0xc0) {
+        int gcaa = jsc_pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */
+        int gcss = 6 * gcaa;
+        c = (c & jsc_pcre_utf8_table3[gcaa]) << gcss;
+        while (gcaa-- > 0) {
+            gcss -= 6;
+            c |= (*subjectPtr++ & 0x3f) << gcss;
+        }
+    }
+}
+
+bool jsc_pcre_xclass(int c, const unsigned char* data)
+{
+    bool negated = (*data & XCL_NOT);
+    
+    /* Character values < 256 are matched against a bitmap, if one is present. If
+     not, we still carry on, because there may be ranges that start below 256 in the
+     additional data. */
+    
+    if (c < 256) {
+        if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0)
+            return !negated;   /* char found */
+    }
+    
+    /* First skip the bit map if present. Then match against the list of Unicode
+     properties or large chars or ranges that end with a large char. We won't ever
+     encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */
+    
+    if ((*data++ & XCL_MAP) != 0)
+        data += 32;
+    
+    int t;
+    while ((t = *data++) != XCL_END) {
+        if (t == XCL_SINGLE) {
+            int x;
+            getUTF8CharAndAdvancePointer(x, data);
+            if (c == x)
+                return !negated;
+        }
+        else if (t == XCL_RANGE) {
+            int x, y;
+            getUTF8CharAndAdvancePointer(x, data);
+            getUTF8CharAndAdvancePointer(y, data);
+            if (c >= x && c <= y)
+                return !negated;
+        }
+    }
+    
+    return negated;   /* char did not match */
+}
--- a/js/src/yarr/pcre/ucpinternal.h
+++ b/js/src/yarr/pcre/ucpinternal.h
@ -0,0 +1,126 @@
+/* This is JavaScriptCore's variant of the PCRE library. While this library
+started out as a copy of PCRE, many of the features of PCRE have been
+removed. This library now supports only the regular expression features
+required by the JavaScript language specification, and has only the functions
+needed by JavaScriptCore and the rest of WebKit.
+
+                 Originally written by Philip Hazel
+           Copyright (c) 1997-2006 University of Cambridge
+    Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved.
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/*************************************************
+*           Unicode Property Table handler       *
+*************************************************/
+
+/* Internal header file defining the layout of the bits in each pair of 32-bit
+words that form a data item in the table. */
+
+typedef struct cnode {
+  unsigned f0;
+  unsigned f1;
+} cnode;
+
+/* Things for the f0 field */
+
+#define f0_scriptmask   0xff000000  /* Mask for script field */
+#define f0_scriptshift          24  /* Shift for script value */
+#define f0_rangeflag    0x00f00000  /* Flag for a range item */
+#define f0_charmask     0x001fffff  /* Mask for code point value */
+
+/* Things for the f1 field */
+
+#define f1_typemask     0xfc000000  /* Mask for char type field */
+#define f1_typeshift            26  /* Shift for the type field */
+#define f1_rangemask    0x0000ffff  /* Mask for a range offset */
+#define f1_casemask     0x0000ffff  /* Mask for a case offset */
+#define f1_caseneg      0xffff8000  /* Bits for negation */
+
+/* The data consists of a vector of structures of type cnode. The two unsigned
+32-bit integers are used as follows:
+
+(f0) (1) The most significant byte holds the script number. The numbers are
+         defined by the enum in ucp.h.
+
+     (2) The 0x00800000 bit is set if this entry defines a range of characters.
+         It is not set if this entry defines a single character
+
+     (3) The 0x00600000 bits are spare.
+
+     (4) The 0x001fffff bits contain the code point. No Unicode code point will
+         ever be greater than 0x0010ffff, so this should be OK for ever.
+
+(f1) (1) The 0xfc000000 bits contain the character type number. The numbers are
+         defined by an enum in ucp.h.
+
+     (2) The 0x03ff0000 bits are spare.
+
+     (3) The 0x0000ffff bits contain EITHER the unsigned offset to the top of
+         range if this entry defines a range, OR the *signed* offset to the
+         character's "other case" partner if this entry defines a single
+         character. There is no partner if the value is zero.
+
+-------------------------------------------------------------------------------
+| script (8) |.|.|.| codepoint (21) || type (6) |.|.| spare (8) | offset (16) |
+-------------------------------------------------------------------------------
+              | | |                              | |
+              | | |-> spare                      | |-> spare
+              | |                                |
+              | |-> spare                        |-> spare
+              |
+              |-> range flag
+
+The upper/lower casing information is set only for characters that come in
+pairs. The non-one-to-one mappings in the Unicode data are ignored.
+
+When searching the data, proceed as follows:
+
+(1) Set up for a binary chop search.
+
+(2) If the top is not greater than the bottom, the character is not in the
+    table. Its type must therefore be "Cn" ("Undefined").
+
+(3) Find the middle vector element.
+
+(4) Extract the code point and compare. If equal, we are done.
+
+(5) If the test character is smaller, set the top to the current point, and
+    goto (2).
+
+(6) If the current entry defines a range, compute the last character by adding
+    the offset, and see if the test character is within the range. If it is,
+    we are done.
+
+(7) Otherwise, set the bottom to one element past the current point and goto
+    (2).
+*/
+
+/* End of ucpinternal.h */
--- a/js/src/yarr/pcre/ucptable.cpp
+++ b/js/src/yarr/pcre/ucptable.cpp