gecko/js/src/jsregexp.cpp

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
 * vim: set sw=4 ts=8 et tw=78:
 *
 * ***** BEGIN LICENSE BLOCK *****
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Mozilla Public License Version
 * 1.1 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is Mozilla Communicator client code, released
 * March 31, 1998.
 *
 * The Initial Developer of the Original Code is
 * Netscape Communications Corporation.
 * Portions created by the Initial Developer are Copyright (C) 1998
 * the Initial Developer. All Rights Reserved.
 *
 * Contributor(s):
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either of the GNU General Public License Version 2 or later (the "GPL"),
 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the MPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the MPL, the GPL or the LGPL.
 *
 * ***** END LICENSE BLOCK ***** */

/*
 * JS regular expressions, after Perl.
 */
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include "jstypes.h"
#include "jsstdint.h"
#include "jsarena.h" /* Added by JSIFY */
#include "jsutil.h" /* Added by JSIFY */
#include "jsapi.h"
#include "jsarray.h"
#include "jsatom.h"
#include "jsbuiltins.h"
#include "jscntxt.h"
#include "jsversion.h"
#include "jsfun.h"
#include "jsgc.h"
#include "jsinterp.h"
#include "jslock.h"
#include "jsnum.h"
#include "jsobj.h"
#include "jsopcode.h"
#include "jsregexp.h"
#include "jsscan.h"
#include "jsscope.h"
#include "jsstaticcheck.h"
#include "jsstr.h"
#include "jsvector.h"

#ifdef JS_TRACER
#include "jstracer.h"
using namespace avmplus;
using namespace nanojit;
#endif

#include "jsobjinlines.h"

using namespace js;

typedef enum REOp {
#define REOP_DEF(opcode, name) opcode,
#include "jsreops.tbl"
#undef REOP_DEF
    REOP_LIMIT /* META: no operator >= to this */
} REOp;

#define REOP_IS_SIMPLE(op)  ((op) <= REOP_NCLASS)

#ifdef REGEXP_DEBUG
const char *reop_names[] = {
#define REOP_DEF(opcode, name) name,
#include "jsreops.tbl"
#undef REOP_DEF
    NULL
};
#endif

#ifdef __GNUC__
static int
re_debug(const char *fmt, ...) __attribute__ ((format(printf, 1, 2)));
#endif

#ifdef REGEXP_DEBUG
static int
re_debug(const char *fmt, ...)
{
    va_list ap;
    int retval;

    va_start(ap, fmt);
    retval = vprintf(fmt, ap);
    va_end(ap);
    return retval;
}

static void
re_debug_chars(const jschar *chrs, size_t length)
{
    int i = 0;

    printf(" \"");
    while (*chrs && i++ < length) {
        putchar((char)*chrs++);
    }
    printf("\"");
}
#else  /* !REGEXP_DEBUG */
/* This should be optimized to a no-op by our tier-1 compilers. */
static int
re_debug(const char *fmt, ...)
{
    return 0;
}

static void
re_debug_chars(const jschar *chrs, size_t length)
{
}
#endif /* !REGEXP_DEBUG */

struct RENode {
    REOp            op;         /* r.e. op bytecode */
    RENode          *next;      /* next in concatenation order */
    void            *kid;       /* first operand */
    union {
        void        *kid2;      /* second operand */
        jsint       num;        /* could be a number */
        size_t      parenIndex; /* or a parenthesis index */
        struct {                /* or a quantifier range */
            uintN  min;
            uintN  max;
            JSPackedBool greedy;
        } range;
        struct {                /* or a character class */
            size_t  startIndex;
            size_t  kidlen;     /* length of string at kid, in jschars */
            size_t  index;      /* index into class list */
            uint16  bmsize;     /* bitmap size, based on max char code */
            JSPackedBool sense;
        } ucclass;
        struct {                /* or a literal sequence */
            jschar  chr;        /* of one character */
            size_t  length;     /* or many (via the kid) */
        } flat;
        struct {
            RENode  *kid2;      /* second operand from ALT */
            jschar  ch1;        /* match char for ALTPREREQ */
            jschar  ch2;        /* ditto, or class index for ALTPREREQ2 */
        } altprereq;
    } u;
};

#define RE_IS_LETTER(c)     (((c >= 'A') && (c <= 'Z')) ||                    \
                             ((c >= 'a') && (c <= 'z')) )
#define RE_IS_LINE_TERM(c)  ((c == '\n') || (c == '\r') ||                    \
                             (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))

#define CLASS_CACHE_SIZE    4

typedef struct CompilerState {
    JSContext       *context;
    TokenStream     *tokenStream; /* For reporting errors */
    const jschar    *cpbegin;
    const jschar    *cpend;
    const jschar    *cp;
    size_t          parenCount;
    size_t          classCount;   /* number of [] encountered */
    size_t          treeDepth;    /* maximum depth of parse tree */
    size_t          progLength;   /* estimated bytecode length */
    RENode          *result;
    size_t          classBitmapsMem; /* memory to hold all class bitmaps */
    struct {
        const jschar *start;        /* small cache of class strings */
        size_t length;              /* since they're often the same */
        size_t index;
    } classCache[CLASS_CACHE_SIZE];
    uint16          flags;
} CompilerState;

typedef struct EmitStateStackEntry {
    jsbytecode      *altHead;       /* start of REOP_ALT* opcode */
    jsbytecode      *nextAltFixup;  /* fixup pointer to next-alt offset */
    jsbytecode      *nextTermFixup; /* fixup ptr. to REOP_JUMP offset */
    jsbytecode      *endTermFixup;  /* fixup ptr. to REOPT_ALTPREREQ* offset */
    RENode          *continueNode;  /* original REOP_ALT* node being stacked */
    jsbytecode      continueOp;     /* REOP_JUMP or REOP_ENDALT continuation */
    JSPackedBool    jumpToJumpFlag; /* true if we've patched jump-to-jump to
                                       avoid 16-bit unsigned offset overflow */
} EmitStateStackEntry;

/*
 * Immediate operand sizes and getter/setters.  Unlike the ones in jsopcode.h,
 * the getters and setters take the pc of the offset, not of the opcode before
 * the offset.
 */
#define ARG_LEN             2
#define GET_ARG(pc)         ((uint16)(((pc)[0] << 8) | (pc)[1]))
#define SET_ARG(pc, arg)    ((pc)[0] = (jsbytecode) ((arg) >> 8),       \
                             (pc)[1] = (jsbytecode) (arg))

#define OFFSET_LEN          ARG_LEN
#define OFFSET_MAX          (JS_BIT(ARG_LEN * 8) - 1)
#define GET_OFFSET(pc)      GET_ARG(pc)

/*
 * Maximum supported tree depth is maximum size of EmitStateStackEntry stack.
 * For sanity, we limit it to 2^24 bytes.
 */
#define TREE_DEPTH_MAX  (JS_BIT(24) / sizeof(EmitStateStackEntry))

/*
 * The maximum memory that can be allocated for class bitmaps.
 * For sanity, we limit it to 2^24 bytes.
 */
#define CLASS_BITMAPS_MEM_LIMIT JS_BIT(24)

/*
 * Functions to get size and write/read bytecode that represent small indexes
 * compactly.
 * Each byte in the code represent 7-bit chunk of the index. 8th bit when set
 * indicates that the following byte brings more bits to the index. Otherwise
 * this is the last byte in the index bytecode representing highest index bits.
 */
static size_t
GetCompactIndexWidth(size_t index)
{
    size_t width;

    for (width = 1; (index >>= 7) != 0; ++width) { }
    return width;
}

static JS_ALWAYS_INLINE jsbytecode *
WriteCompactIndex(jsbytecode *pc, size_t index)
{
    size_t next;

    while ((next = index >> 7) != 0) {
        *pc++ = (jsbytecode)(index | 0x80);
        index = next;
    }
    *pc++ = (jsbytecode)index;
    return pc;
}

static JS_ALWAYS_INLINE jsbytecode *
ReadCompactIndex(jsbytecode *pc, size_t *result)
{
    size_t nextByte;

    nextByte = *pc++;
    if ((nextByte & 0x80) == 0) {
        /*
         * Short-circuit the most common case when compact index <= 127.
         */
        *result = nextByte;
    } else {
        size_t shift = 7;
        *result = 0x7F & nextByte;
        do {
            nextByte = *pc++;
            *result |= (nextByte & 0x7F) << shift;
            shift += 7;
        } while ((nextByte & 0x80) != 0);
    }
    return pc;
}

typedef struct RECapture {
    ptrdiff_t index;           /* start of contents, -1 for empty  */
    size_t length;             /* length of capture */
} RECapture;

typedef struct REMatchState {
    const jschar *cp;
    RECapture parens[1];      /* first of 're->parenCount' captures,
                                 allocated at end of this struct */
} REMatchState;

struct REBackTrackData;

typedef struct REProgState {
    jsbytecode *continue_pc;        /* current continuation data */
    jsbytecode continue_op;
    ptrdiff_t index;                /* progress in text */
    size_t parenSoFar;              /* highest indexed paren started */
    union {
        struct {
            uintN min;             /* current quantifier limits */
            uintN max;
        } quantifier;
        struct {
            size_t top;             /* backtrack stack state */
            size_t sz;
        } assertion;
    } u;
} REProgState;

typedef struct REBackTrackData {
    size_t sz;                      /* size of previous stack entry */
    jsbytecode *backtrack_pc;       /* where to backtrack to */
    jsbytecode backtrack_op;
    const jschar *cp;               /* index in text of match at backtrack */
    size_t parenIndex;              /* start index of saved paren contents */
    size_t parenCount;              /* # of saved paren contents */
    size_t saveStateStackTop;       /* number of parent states */
    /* saved parent states follow */
    /* saved paren contents follow */
} REBackTrackData;

#define INITIAL_STATESTACK  100
#define INITIAL_BACKTRACK   8000

typedef struct REGlobalData {
    JSContext *cx;
    JSRegExp *regexp;               /* the RE in execution */
    JSBool ok;                      /* runtime error (out_of_memory only?) */
    size_t start;                   /* offset to start at */
    ptrdiff_t skipped;              /* chars skipped anchoring this r.e. */
    const jschar    *cpbegin;       /* text base address */
    const jschar    *cpend;         /* text limit address */

    REProgState *stateStack;        /* stack of state of current parents */
    size_t stateStackTop;
    size_t stateStackLimit;

    REBackTrackData *backTrackStack;/* stack of matched-so-far positions */
    REBackTrackData *backTrackSP;
    size_t backTrackStackSize;
    size_t cursz;                   /* size of current stack entry */
    size_t backTrackCount;          /* how many times we've backtracked */
    size_t backTrackLimit;          /* upper limit on backtrack states */
} REGlobalData;

void
JSRegExpStatics::clearRoots()
{
    input = NULL;
    cx->runtime->gcPoke = JS_TRUE;
}

bool
JSRegExpStatics::copy(const JSRegExpStatics& other)
{
    clearRoots();
    input = other.input;
    multiline = other.multiline;
    lastMatch = other.lastMatch;
    lastParen = other.lastParen;
    leftContext = other.leftContext;
    rightContext = other.rightContext;
    if (!parens.resize(other.parens.length()))
        return false;
    memcpy(parens.begin(), other.parens.begin(), sizeof(JSSubString) * parens.length());
    return true;
}

void
JSRegExpStatics::clear()
{
    clearRoots();
    multiline = false;
    lastMatch = lastParen = leftContext = rightContext = js_EmptySubString;
    parens.clear();
}

/*
 * 1. If IgnoreCase is false, return ch.
 * 2. Let u be ch converted to upper case as if by calling
 *    String.prototype.toUpperCase on the one-character string ch.
 * 3. If u does not consist of a single character, return ch.
 * 4. Let cu be u's character.
 * 5. If ch's code point value is greater than or equal to decimal 128 and cu's
 *    code point value is less than decimal 128, then return ch.
 * 6. Return cu.
 */
static JS_ALWAYS_INLINE uintN
upcase(uintN ch)
{
    uintN cu;

    JS_ASSERT((uintN) (jschar) ch == ch);
    if (ch < 128) {
        if (ch - (uintN) 'a' <= (uintN) ('z' - 'a'))
            ch -= (uintN) ('a' - 'A');
        return ch;
    }

    cu = JS_TOUPPER(ch);
    return (cu < 128) ? ch : cu;
}

/*
 * Return the 'canonical' inverse upcase of |ch|. That is the character
 * |lch| such that |upcase(lch) == ch| and (|lch| is the lower-case form
 * of |ch| or is |ch|).
 */
static inline jschar inverse_upcase(jschar ch)
{
    jschar lch = JS_TOLOWER(ch);
    return (upcase(lch) == ch) ? lch : ch;
}

/* Construct and initialize an RENode, returning NULL for out-of-memory */
static RENode *
NewRENode(CompilerState *state, REOp op)
{
    JSContext *cx;
    RENode *ren;

    cx = state->context;
    JS_ARENA_ALLOCATE_CAST(ren, RENode *, &cx->tempPool, sizeof *ren);
    if (!ren) {
        js_ReportOutOfScriptQuota(cx);
        return NULL;
    }
    ren->op = op;
    ren->next = NULL;
    ren->kid = NULL;
    return ren;
}

/*
 * Validates and converts hex ascii value.
 */
static JSBool
isASCIIHexDigit(jschar c, uintN *digit)
{
    uintN cv = c;

    if (cv < '0')
        return JS_FALSE;
    if (cv <= '9') {
        *digit = cv - '0';
        return JS_TRUE;
    }
    cv |= 0x20;
    if (cv >= 'a' && cv <= 'f') {
        *digit = cv - 'a' + 10;
        return JS_TRUE;
    }
    return JS_FALSE;
}


typedef struct {
    REOp op;
    const jschar *errPos;
    size_t parenIndex;
} REOpData;

static JSBool
ReportRegExpErrorHelper(CompilerState *state, uintN flags, uintN errorNumber,
                        const jschar *arg)
{
    if (state->tokenStream) {
        return ReportCompileErrorNumber(state->context, state->tokenStream,
                                        NULL, JSREPORT_UC | flags, errorNumber, arg);
    }
    return JS_ReportErrorFlagsAndNumberUC(state->context, flags,
                                          js_GetErrorMessage, NULL,
                                          errorNumber, arg);
}

static JSBool
ReportRegExpError(CompilerState *state, uintN flags, uintN errorNumber)
{
    return ReportRegExpErrorHelper(state, flags, errorNumber, NULL);
}

/*
 * Process the op against the two top operands, reducing them to a single
 * operand in the penultimate slot. Update progLength and treeDepth.
 */
static JSBool
ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack,
          intN operandSP)
{
    RENode *result;

    switch (opData->op) {
      case REOP_ALT:
        result = NewRENode(state, REOP_ALT);
        if (!result)
            return JS_FALSE;
        result->kid = operandStack[operandSP - 2];
        result->u.kid2 = operandStack[operandSP - 1];
        operandStack[operandSP - 2] = result;

        if (state->treeDepth == TREE_DEPTH_MAX) {
            ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
            return JS_FALSE;
        }
        ++state->treeDepth;

        /*
         * Look at both alternates to see if there's a FLAT or a CLASS at
         * the start of each. If so, use a prerequisite match.
         */
        if (((RENode *) result->kid)->op == REOP_FLAT &&
            ((RENode *) result->u.kid2)->op == REOP_FLAT &&
            (state->flags & JSREG_FOLD) == 0) {
            result->op = REOP_ALTPREREQ;
            result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
            result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr;
            /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
                                            JUMP, <end> ... ENDALT */
            state->progLength += 13;
        }
        else
        if (((RENode *) result->kid)->op == REOP_CLASS &&
            ((RENode *) result->kid)->u.ucclass.index < 256 &&
            ((RENode *) result->u.kid2)->op == REOP_FLAT &&
            (state->flags & JSREG_FOLD) == 0) {
            result->op = REOP_ALTPREREQ2;
            result->u.altprereq.ch1 = ((RENode *) result->u.kid2)->u.flat.chr;
            result->u.altprereq.ch2 = jschar(((RENode *) result->kid)->u.ucclass.index);
            /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
                                            JUMP, <end> ... ENDALT */
            state->progLength += 13;
        }
        else
        if (((RENode *) result->kid)->op == REOP_FLAT &&
            ((RENode *) result->u.kid2)->op == REOP_CLASS &&
            ((RENode *) result->u.kid2)->u.ucclass.index < 256 &&
            (state->flags & JSREG_FOLD) == 0) {
            result->op = REOP_ALTPREREQ2;
            result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
            result->u.altprereq.ch2 =
                jschar(((RENode *) result->u.kid2)->u.ucclass.index);
            /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
                                          JUMP, <end> ... ENDALT */
            state->progLength += 13;
        }
        else {
            /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
            state->progLength += 7;
        }
        break;

      case REOP_CONCAT:
        result = operandStack[operandSP - 2];
        while (result->next)
            result = result->next;
        result->next = operandStack[operandSP - 1];
        break;

      case REOP_ASSERT:
      case REOP_ASSERT_NOT:
      case REOP_LPARENNON:
      case REOP_LPAREN:
        /* These should have been processed by a close paren. */
        ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_MISSING_PAREN,
                                opData->errPos);
        return JS_FALSE;

      default:;
    }
    return JS_TRUE;
}

/*
 * Parser forward declarations.
 */
static JSBool ParseTerm(CompilerState *state);
static JSBool ParseQuantifier(CompilerState *state);
static intN ParseMinMaxQuantifier(CompilerState *state, JSBool ignoreValues);

/*
 * Top-down regular expression grammar, based closely on Perl4.
 *
 *  regexp:     altern                  A regular expression is one or more
 *              altern '|' regexp       alternatives separated by vertical bar.
 */
#define INITIAL_STACK_SIZE  128

static JSBool
ParseRegExp(CompilerState *state)
{
    size_t parenIndex;
    RENode *operand;
    REOpData *operatorStack;
    RENode **operandStack;
    REOp op;
    intN i;
    JSBool result = JS_FALSE;

    intN operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE;
    intN operandSP = 0, operandStackSize = INITIAL_STACK_SIZE;

    /* Watch out for empty regexp */
    if (state->cp == state->cpend) {
        state->result = NewRENode(state, REOP_EMPTY);
        return (state->result != NULL);
    }

    operatorStack = (REOpData *)
        state->context->malloc(sizeof(REOpData) * operatorStackSize);
    if (!operatorStack)
        return JS_FALSE;

    operandStack = (RENode **)
        state->context->malloc(sizeof(RENode *) * operandStackSize);
    if (!operandStack)
        goto out;

    for (;;) {
        parenIndex = state->parenCount;
        if (state->cp == state->cpend) {
            /*
             * If we are at the end of the regexp and we're short one or more
             * operands, the regexp must have the form /x|/ or some such, with
             * left parentheses making us short more than one operand.
             */
            if (operatorSP >= operandSP) {
                operand = NewRENode(state, REOP_EMPTY);
                if (!operand)
                    goto out;
                goto pushOperand;
            }
        } else {
            switch (*state->cp) {
              case '(':
                ++state->cp;
                if (state->cp + 1 < state->cpend &&
                    *state->cp == '?' &&
                    (state->cp[1] == '=' ||
                     state->cp[1] == '!' ||
                     state->cp[1] == ':')) {
                    switch (state->cp[1]) {
                      case '=':
                        op = REOP_ASSERT;
                        /* ASSERT, <next>, ... ASSERTTEST */
                        state->progLength += 4;
                        break;
                      case '!':
                        op = REOP_ASSERT_NOT;
                        /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
                        state->progLength += 4;
                        break;
                      default:
                        op = REOP_LPARENNON;
                        break;
                    }
                    state->cp += 2;
                } else {
                    op = REOP_LPAREN;
                    /* LPAREN, <index>, ... RPAREN, <index> */
                    state->progLength
                        += 2 * (1 + GetCompactIndexWidth(parenIndex));
                    state->parenCount++;
                    if (state->parenCount == 65535) {
                        ReportRegExpError(state, JSREPORT_ERROR,
                                          JSMSG_TOO_MANY_PARENS);
                        goto out;
                    }
                }
                goto pushOperator;

              case ')':
                /*
                 * If there's no stacked open parenthesis, throw syntax error.
                 */
                for (i = operatorSP - 1; ; i--) {
                    if (i < 0) {
                        ReportRegExpError(state, JSREPORT_ERROR,
                                          JSMSG_UNMATCHED_RIGHT_PAREN);
                        goto out;
                    }
                    if (operatorStack[i].op == REOP_ASSERT ||
                        operatorStack[i].op == REOP_ASSERT_NOT ||
                        operatorStack[i].op == REOP_LPARENNON ||
                        operatorStack[i].op == REOP_LPAREN) {
                        break;
                    }
                }
                /* FALL THROUGH */

              case '|':
                /* Expected an operand before these, so make an empty one */
                operand = NewRENode(state, REOP_EMPTY);
                if (!operand)
                    goto out;
                goto pushOperand;

              default:
                if (!ParseTerm(state))
                    goto out;
                operand = state->result;
pushOperand:
                if (operandSP == operandStackSize) {
                    RENode **tmp;
                    operandStackSize += operandStackSize;
                    tmp = (RENode **)
                        state->context->realloc(operandStack,
                                                sizeof(RENode *) * operandStackSize);
                    if (!tmp)
                        goto out;
                    operandStack = tmp;
                }
                operandStack[operandSP++] = operand;
                break;
            }
        }

        /* At the end; process remaining operators. */
restartOperator:
        if (state->cp == state->cpend) {
            while (operatorSP) {
                --operatorSP;
                if (!ProcessOp(state, &operatorStack[operatorSP],
                               operandStack, operandSP))
                    goto out;
                --operandSP;
            }
            JS_ASSERT(operandSP == 1);
            state->result = operandStack[0];
            result = JS_TRUE;
            goto out;
        }

        switch (*state->cp) {
          case '|':
            /* Process any stacked 'concat' operators */
            ++state->cp;
            while (operatorSP &&
                   operatorStack[operatorSP - 1].op == REOP_CONCAT) {
                --operatorSP;
                if (!ProcessOp(state, &operatorStack[operatorSP],
                               operandStack, operandSP)) {
                    goto out;
                }
                --operandSP;
            }
            op = REOP_ALT;
            goto pushOperator;

          case ')':
            /*
             * If there's no stacked open parenthesis, throw syntax error.
             */
            for (i = operatorSP - 1; ; i--) {
                if (i < 0) {
                    ReportRegExpError(state, JSREPORT_ERROR,
                                      JSMSG_UNMATCHED_RIGHT_PAREN);
                    goto out;
                }
                if (operatorStack[i].op == REOP_ASSERT ||
                    operatorStack[i].op == REOP_ASSERT_NOT ||
                    operatorStack[i].op == REOP_LPARENNON ||
                    operatorStack[i].op == REOP_LPAREN) {
                    break;
                }
            }
            ++state->cp;

            /* Process everything on the stack until the open parenthesis. */
            for (;;) {
                JS_ASSERT(operatorSP);
                --operatorSP;
                switch (operatorStack[operatorSP].op) {
                  case REOP_ASSERT:
                  case REOP_ASSERT_NOT:
                  case REOP_LPAREN:
                    operand = NewRENode(state, operatorStack[operatorSP].op);
                    if (!operand)
                        goto out;
                    operand->u.parenIndex =
                        operatorStack[operatorSP].parenIndex;
                    JS_ASSERT(operandSP);
                    operand->kid = operandStack[operandSP - 1];
                    operandStack[operandSP - 1] = operand;
                    if (state->treeDepth == TREE_DEPTH_MAX) {
                        ReportRegExpError(state, JSREPORT_ERROR,
                                          JSMSG_REGEXP_TOO_COMPLEX);
                        goto out;
                    }
                    ++state->treeDepth;
                    /* FALL THROUGH */

                  case REOP_LPARENNON:
                    state->result = operandStack[operandSP - 1];
                    if (!ParseQuantifier(state))
                        goto out;
                    operandStack[operandSP - 1] = state->result;
                    goto restartOperator;
                  default:
                    if (!ProcessOp(state, &operatorStack[operatorSP],
                                   operandStack, operandSP))
                        goto out;
                    --operandSP;
                    break;
                }
            }
            break;

          case '{':
          {
            const jschar *errp = state->cp;

            if (ParseMinMaxQuantifier(state, JS_TRUE) < 0) {
                /*
                 * This didn't even scan correctly as a quantifier, so we should
                 * treat it as flat.
                 */
                op = REOP_CONCAT;
                goto pushOperator;
            }

            state->cp = errp;
            /* FALL THROUGH */
          }

          case '+':
          case '*':
          case '?':
            ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_BAD_QUANTIFIER,
                                    state->cp);
            result = JS_FALSE;
            goto out;

          default:
            /* Anything else is the start of the next term. */
            op = REOP_CONCAT;
pushOperator:
            if (operatorSP == operatorStackSize) {
                REOpData *tmp;
                operatorStackSize += operatorStackSize;
                tmp = (REOpData *)
                    state->context->realloc(operatorStack,
                                            sizeof(REOpData) * operatorStackSize);
                if (!tmp)
                    goto out;
                operatorStack = tmp;
            }
            operatorStack[operatorSP].op = op;
            operatorStack[operatorSP].errPos = state->cp;
            operatorStack[operatorSP++].parenIndex = parenIndex;
            break;
        }
    }
out:
    if (operatorStack)
        state->context->free(operatorStack);
    if (operandStack)
        state->context->free(operandStack);
    return result;
}

/*
 * Hack two bits in CompilerState.flags, for use within FindParenCount to flag
 * its being on the stack, and to propagate errors to its callers.
 */
#define JSREG_FIND_PAREN_COUNT  0x8000
#define JSREG_FIND_PAREN_ERROR  0x4000

/*
 * Magic return value from FindParenCount and GetDecimalValue, to indicate
 * overflow beyond GetDecimalValue's max parameter, or a computed maximum if
 * its findMax parameter is non-null.
 */
#define OVERFLOW_VALUE          ((uintN)-1)

static uintN
FindParenCount(CompilerState *state)
{
    CompilerState temp;
    int i;

    if (state->flags & JSREG_FIND_PAREN_COUNT)
        return OVERFLOW_VALUE;

    /*
     * Copy state into temp, flag it so we never report an invalid backref,
     * and reset its members to parse the entire regexp.  This is obviously
     * suboptimal, but GetDecimalValue calls us only if a backref appears to
     * refer to a forward parenthetical, which is rare.
     */
    temp = *state;
    temp.flags |= JSREG_FIND_PAREN_COUNT;
    temp.cp = temp.cpbegin;
    temp.parenCount = 0;
    temp.classCount = 0;
    temp.progLength = 0;
    temp.treeDepth = 0;
    temp.classBitmapsMem = 0;
    for (i = 0; i < CLASS_CACHE_SIZE; i++)
        temp.classCache[i].start = NULL;

    if (!ParseRegExp(&temp)) {
        state->flags |= JSREG_FIND_PAREN_ERROR;
        return OVERFLOW_VALUE;
    }
    return temp.parenCount;
}

/*
 * Extract and return a decimal value at state->cp.  The initial character c
 * has already been read.  Return OVERFLOW_VALUE if the result exceeds max.
 * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in
 * state->flags to discover whether an error occurred under findMax.
 */
static uintN
GetDecimalValue(jschar c, uintN max, uintN (*findMax)(CompilerState *state),
                CompilerState *state)
{
    uintN value = JS7_UNDEC(c);
    JSBool overflow = (value > max && (!findMax || value > findMax(state)));

    /* The following restriction allows simpler overflow checks. */
    JS_ASSERT(max <= ((uintN)-1 - 9) / 10);
    while (state->cp < state->cpend) {
        c = *state->cp;
        if (!JS7_ISDEC(c))
            break;
        value = 10 * value + JS7_UNDEC(c);
        if (!overflow && value > max && (!findMax || value > findMax(state)))
            overflow = JS_TRUE;
        ++state->cp;
    }
    return overflow ? OVERFLOW_VALUE : value;
}

/*
 * Calculate the total size of the bitmap required for a class expression.
 */
static JSBool
CalculateBitmapSize(CompilerState *state, RENode *target, const jschar *src,
                    const jschar *end)
{
    uintN max = 0;
    JSBool inRange = JS_FALSE;
    jschar c, rangeStart = 0;
    uintN n, digit, nDigits, i;

    target->u.ucclass.bmsize = 0;
    target->u.ucclass.sense = JS_TRUE;

    if (src == end)
        return JS_TRUE;

    if (*src == '^') {
        ++src;
        target->u.ucclass.sense = JS_FALSE;
    }

    while (src != end) {
        JSBool canStartRange = JS_TRUE;
        jschar localMax = 0;

        switch (*src) {
          case '\\':
            ++src;
            c = *src++;
            switch (c) {
              case 'b':
                localMax = 0x8;
                break;
              case 'f':
                localMax = 0xC;
                break;
              case 'n':
                localMax = 0xA;
                break;
              case 'r':
                localMax = 0xD;
                break;
              case 't':
                localMax = 0x9;
                break;
              case 'v':
                localMax = 0xB;
                break;
              case 'c':
                if (src < end && RE_IS_LETTER(*src)) {
                    localMax = (uintN) (*src++) & 0x1F;
                } else {
                    --src;
                    localMax = '\\';
                }
                break;
              case 'x':
                nDigits = 2;
                goto lexHex;
              case 'u':
                nDigits = 4;
lexHex:
                n = 0;
                for (i = 0; (i < nDigits) && (src < end); i++) {
                    c = *src++;
                    if (!isASCIIHexDigit(c, &digit)) {
                        /*
                         * Back off to accepting the original
                         *'\' as a literal.
                         */
                        src -= i + 1;
                        n = '\\';
                        break;
                    }
                    n = (n << 4) | digit;
                }
                localMax = jschar(n);
                break;
              case 'd':
                canStartRange = JS_FALSE;
                if (inRange) {
                    JS_ReportErrorNumber(state->context,
                                         js_GetErrorMessage, NULL,
                                         JSMSG_BAD_CLASS_RANGE);
                    return JS_FALSE;
                }
                localMax = '9';
                break;
              case 'D':
              case 's':
              case 'S':
              case 'w':
              case 'W':
                canStartRange = JS_FALSE;
                if (inRange) {
                    JS_ReportErrorNumber(state->context,
                                         js_GetErrorMessage, NULL,
                                         JSMSG_BAD_CLASS_RANGE);
                    return JS_FALSE;
                }
                max = 65535;

                /*
                 * If this is the start of a range, ensure that it's less than
                 * the end.
                 */
                localMax = 0;
                break;
              case '0':
              case '1':
              case '2':
              case '3':
              case '4':
              case '5':
              case '6':
              case '7':
                /*
                 *  This is a non-ECMA extension - decimal escapes (in this
                 *  case, octal!) are supposed to be an error inside class
                 *  ranges, but supported here for backwards compatibility.
                 *
                 */
                n = JS7_UNDEC(c);
                c = *src;
                if ('0' <= c && c <= '7') {
                    src++;
                    n = 8 * n + JS7_UNDEC(c);
                    c = *src;
                    if ('0' <= c && c <= '7') {
                        src++;
                        i = 8 * n + JS7_UNDEC(c);
                        if (i <= 0377)
                            n = i;
                        else
                            src--;
                    }
                }
                localMax = jschar(n);
                break;

              default:
                localMax = c;
                break;
            }
            break;
          default:
            localMax = *src++;
            break;
        }

        if (inRange) {
            /* Throw a SyntaxError here, per ECMA-262, 15.10.2.15. */
            if (rangeStart > localMax) {
                JS_ReportErrorNumber(state->context,
                                     js_GetErrorMessage, NULL,
                                     JSMSG_BAD_CLASS_RANGE);
                return JS_FALSE;
            }
            inRange = JS_FALSE;
        } else {
            if (canStartRange && src < end - 1) {
                if (*src == '-') {
                    ++src;
                    inRange = JS_TRUE;
                    rangeStart = (jschar)localMax;
                    continue;
                }
            }
            if (state->flags & JSREG_FOLD)
                rangeStart = localMax;   /* one run of the uc/dc loop below */
        }

        if (state->flags & JSREG_FOLD) {
            jschar maxch = localMax;

            for (i = rangeStart; i <= localMax; i++) {
                jschar uch, dch;

                uch = jschar(upcase(i));
                dch = inverse_upcase(jschar(i));
                maxch = JS_MAX(maxch, uch);
                maxch = JS_MAX(maxch, dch);
            }
            localMax = maxch;
        }

        if (localMax > max)
            max = uintN(localMax);
    }
    target->u.ucclass.bmsize = uint16(max);
    return JS_TRUE;
}

/*
 *  item:       assertion               An item is either an assertion or
 *              quantatom               a quantified atom.
 *
 *  assertion:  '^'                     Assertions match beginning of string
 *                                      (or line if the class static property
 *                                      RegExp.multiline is true).
 *              '$'                     End of string (or line if the class
 *                                      static property RegExp.multiline is
 *                                      true).
 *              '\b'                    Word boundary (between \w and \W).
 *              '\B'                    Word non-boundary.
 *
 *  quantatom:  atom                    An unquantified atom.
 *              quantatom '{' n ',' m '}'
 *                                      Atom must occur between n and m times.
 *              quantatom '{' n ',' '}' Atom must occur at least n times.
 *              quantatom '{' n '}'     Atom must occur exactly n times.
 *              quantatom '*'           Zero or more times (same as {0,}).
 *              quantatom '+'           One or more times (same as {1,}).
 *              quantatom '?'           Zero or one time (same as {0,1}).
 *
 *              any of which can be optionally followed by '?' for ungreedy
 *
 *  atom:       '(' regexp ')'          A parenthesized regexp (what matched
 *                                      can be addressed using a backreference,
 *                                      see '\' n below).
 *              '.'                     Matches any char except '\n'.
 *              '[' classlist ']'       A character class.
 *              '[' '^' classlist ']'   A negated character class.
 *              '\f'                    Form Feed.
 *              '\n'                    Newline (Line Feed).
 *              '\r'                    Carriage Return.
 *              '\t'                    Horizontal Tab.
 *              '\v'                    Vertical Tab.
 *              '\d'                    A digit (same as [0-9]).
 *              '\D'                    A non-digit.
 *              '\w'                    A word character, [0-9a-z_A-Z].
 *              '\W'                    A non-word character.
 *              '\s'                    A whitespace character, [ \b\f\n\r\t\v].
 *              '\S'                    A non-whitespace character.
 *              '\' n                   A backreference to the nth (n decimal
 *                                      and positive) parenthesized expression.
 *              '\' octal               An octal escape sequence (octal must be
 *                                      two or three digits long, unless it is
 *                                      0 for the null character).
 *              '\x' hex                A hex escape (hex must be two digits).
 *              '\u' unicode            A unicode escape (must be four digits).
 *              '\c' ctrl               A control character, ctrl is a letter.
 *              '\' literalatomchar     Any character except one of the above
 *                                      that follow '\' in an atom.
 *              otheratomchar           Any character not first among the other
 *                                      atom right-hand sides.
 */
static JSBool
ParseTerm(CompilerState *state)
{
    jschar c = *state->cp++;
    uintN nDigits;
    uintN num, tmp, n, i;
    const jschar *termStart;

    switch (c) {
    /* assertions and atoms */
      case '^':
        state->result = NewRENode(state, REOP_BOL);
        if (!state->result)
            return JS_FALSE;
        state->progLength++;
        return JS_TRUE;
      case '$':
        state->result = NewRENode(state, REOP_EOL);
        if (!state->result)
            return JS_FALSE;
        state->progLength++;
        return JS_TRUE;
      case '\\':
        if (state->cp >= state->cpend) {
            /* a trailing '\' is an error */
            ReportRegExpError(state, JSREPORT_ERROR, JSMSG_TRAILING_SLASH);
            return JS_FALSE;
        }
        c = *state->cp++;
        switch (c) {
        /* assertion escapes */
          case 'b' :
            state->result = NewRENode(state, REOP_WBDRY);
            if (!state->result)
                return JS_FALSE;
            state->progLength++;
            return JS_TRUE;
          case 'B':
            state->result = NewRENode(state, REOP_WNONBDRY);
            if (!state->result)
                return JS_FALSE;
            state->progLength++;
            return JS_TRUE;
          /* Decimal escape */
          case '0':
            /* Give a strict warning. See also the note below. */
            if (!ReportRegExpError(state, JSREPORT_WARNING | JSREPORT_STRICT,
                                   JSMSG_INVALID_BACKREF)) {
                return JS_FALSE;
            }
     doOctal:
            num = 0;
            while (state->cp < state->cpend) {
                c = *state->cp;
                if (c < '0' || '7' < c)
                    break;
                state->cp++;
                tmp = 8 * num + (uintN)JS7_UNDEC(c);
                if (tmp > 0377)
                    break;
                num = tmp;
            }
            c = (jschar)num;
    doFlat:
            state->result = NewRENode(state, REOP_FLAT);
            if (!state->result)
                return JS_FALSE;
            state->result->u.flat.chr = c;
            state->result->u.flat.length = 1;
            state->progLength += 3;
            break;
          case '1':
          case '2':
          case '3':
          case '4':
          case '5':
          case '6':
          case '7':
          case '8':
          case '9':
            termStart = state->cp - 1;
            num = GetDecimalValue(c, state->parenCount, FindParenCount, state);
            if (state->flags & JSREG_FIND_PAREN_ERROR)
                return JS_FALSE;
            if (num == OVERFLOW_VALUE) {
                /* Give a strict mode warning. */
                if (!ReportRegExpError(state,
                                       JSREPORT_WARNING | JSREPORT_STRICT,
                                       (c >= '8')
                                       ? JSMSG_INVALID_BACKREF
                                       : JSMSG_BAD_BACKREF)) {
                    return JS_FALSE;
                }

                /*
                 * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
                 * error here. However, for compatibility with IE, we treat the
                 * whole backref as flat if the first character in it is not a
                 * valid octal character, and as an octal escape otherwise.
                 */
                state->cp = termStart;
                if (c >= '8') {
                    /* Treat this as flat. termStart - 1 is the \. */
                    c = '\\';
                    goto asFlat;
                }

                /* Treat this as an octal escape. */
                goto doOctal;
            }

            /*
             * When FindParenCount calls the regex parser recursively (to find
             * the number of backrefs) num can be arbitrary and the maximum
             * supported number of backrefs does not bound it.
             */
            JS_ASSERT_IF(!(state->flags & JSREG_FIND_PAREN_COUNT),
                         1 <= num && num <= 0x10000);
            state->result = NewRENode(state, REOP_BACKREF);
            if (!state->result)
                return JS_FALSE;
            state->result->u.parenIndex = num - 1;
            state->progLength
                += 1 + GetCompactIndexWidth(state->result->u.parenIndex);
            break;
          /* Control escape */
          case 'f':
            c = 0xC;
            goto doFlat;
          case 'n':
            c = 0xA;
            goto doFlat;
          case 'r':
            c = 0xD;
            goto doFlat;
          case 't':
            c = 0x9;
            goto doFlat;
          case 'v':
            c = 0xB;
            goto doFlat;
          /* Control letter */
          case 'c':
            if (state->cp < state->cpend && RE_IS_LETTER(*state->cp)) {
                c = (jschar) (*state->cp++ & 0x1F);
            } else {
                /* back off to accepting the original '\' as a literal */
                --state->cp;
                c = '\\';
            }
            goto doFlat;
          /* HexEscapeSequence */
          case 'x':
            nDigits = 2;
            goto lexHex;
          /* UnicodeEscapeSequence */
          case 'u':
            nDigits = 4;
lexHex:
            n = 0;
            for (i = 0; i < nDigits && state->cp < state->cpend; i++) {
                uintN digit;
                c = *state->cp++;
                if (!isASCIIHexDigit(c, &digit)) {
                    /*
                     * Back off to accepting the original 'u' or 'x' as a
                     * literal.
                     */
                    state->cp -= i + 2;
                    n = *state->cp++;
                    break;
                }
                n = (n << 4) | digit;
            }
            c = (jschar) n;
            goto doFlat;
          /* Character class escapes */
          case 'd':
            state->result = NewRENode(state, REOP_DIGIT);
doSimple:
            if (!state->result)
                return JS_FALSE;
            state->progLength++;
            break;
          case 'D':
            state->result = NewRENode(state, REOP_NONDIGIT);
            goto doSimple;
          case 's':
            state->result = NewRENode(state, REOP_SPACE);
            goto doSimple;
          case 'S':
            state->result = NewRENode(state, REOP_NONSPACE);
            goto doSimple;
          case 'w':
            state->result = NewRENode(state, REOP_ALNUM);
            goto doSimple;
          case 'W':
            state->result = NewRENode(state, REOP_NONALNUM);
            goto doSimple;
          /* IdentityEscape */
          default:
            state->result = NewRENode(state, REOP_FLAT);
            if (!state->result)
                return JS_FALSE;
            state->result->u.flat.chr = c;
            state->result->u.flat.length = 1;
            state->result->kid = (void *) (state->cp - 1);
            state->progLength += 3;
            break;
        }
        break;
      case '[':
        state->result = NewRENode(state, REOP_CLASS);
        if (!state->result)
            return JS_FALSE;
        termStart = state->cp;
        state->result->u.ucclass.startIndex = termStart - state->cpbegin;
        for (;;) {
            if (state->cp == state->cpend) {
                ReportRegExpErrorHelper(state, JSREPORT_ERROR,
                                        JSMSG_UNTERM_CLASS, termStart);

                return JS_FALSE;
            }
            if (*state->cp == '\\') {
                state->cp++;
                if (state->cp != state->cpend)
                    state->cp++;
                continue;
            }
            if (*state->cp == ']') {
                state->result->u.ucclass.kidlen = state->cp - termStart;
                break;
            }
            state->cp++;
        }
        for (i = 0; i < CLASS_CACHE_SIZE; i++) {
            if (!state->classCache[i].start) {
                state->classCache[i].start = termStart;
                state->classCache[i].length = state->result->u.ucclass.kidlen;
                state->classCache[i].index = state->classCount;
                break;
            }
            if (state->classCache[i].length ==
                state->result->u.ucclass.kidlen) {
                for (n = 0; ; n++) {
                    if (n == state->classCache[i].length) {
                        state->result->u.ucclass.index
                            = state->classCache[i].index;
                        goto claim;
                    }
                    if (state->classCache[i].start[n] != termStart[n])
                        break;
                }
            }
        }
        state->result->u.ucclass.index = state->classCount++;

    claim:
        /*
         * Call CalculateBitmapSize now as we want any errors it finds
         * to be reported during the parse phase, not at execution.
         */
        if (!CalculateBitmapSize(state, state->result, termStart, state->cp++))
            return JS_FALSE;
        /*
         * Update classBitmapsMem with number of bytes to hold bmsize bits,
         * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
         * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
         */
        n = (state->result->u.ucclass.bmsize >> 3) + 1;
        if (n > CLASS_BITMAPS_MEM_LIMIT - state->classBitmapsMem) {
            ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
            return JS_FALSE;
        }
        state->classBitmapsMem += n;
        /* CLASS, <index> */
        state->progLength
            += 1 + GetCompactIndexWidth(state->result->u.ucclass.index);
        break;

      case '.':
        state->result = NewRENode(state, REOP_DOT);
        goto doSimple;

      case '{':
      {
        const jschar *errp = state->cp--;
        intN err;

        err = ParseMinMaxQuantifier(state, JS_TRUE);
        state->cp = errp;

        if (err < 0)
            goto asFlat;

        /* FALL THROUGH */
      }
      case '*':
      case '+':
      case '?':
        ReportRegExpErrorHelper(state, JSREPORT_ERROR,
                                JSMSG_BAD_QUANTIFIER, state->cp - 1);
        return JS_FALSE;
      default:
asFlat:
        state->result = NewRENode(state, REOP_FLAT);
        if (!state->result)
            return JS_FALSE;
        state->result->u.flat.chr = c;
        state->result->u.flat.length = 1;
        state->result->kid = (void *) (state->cp - 1);
        state->progLength += 3;
        break;
    }
    return ParseQuantifier(state);
}

static JSBool
ParseQuantifier(CompilerState *state)
{
    RENode *term;
    term = state->result;
    if (state->cp < state->cpend) {
        switch (*state->cp) {
          case '+':
            state->result = NewRENode(state, REOP_QUANT);
            if (!state->result)
                return JS_FALSE;
            state->result->u.range.min = 1;
            state->result->u.range.max = (uintN)-1;
            /* <PLUS>, <next> ... <ENDCHILD> */
            state->progLength += 4;
            goto quantifier;
          case '*':
            state->result = NewRENode(state, REOP_QUANT);
            if (!state->result)
                return JS_FALSE;
            state->result->u.range.min = 0;
            state->result->u.range.max = (uintN)-1;
            /* <STAR>, <next> ... <ENDCHILD> */
            state->progLength += 4;
            goto quantifier;
          case '?':
            state->result = NewRENode(state, REOP_QUANT);
            if (!state->result)
                return JS_FALSE;
            state->result->u.range.min = 0;
            state->result->u.range.max = 1;
            /* <OPT>, <next> ... <ENDCHILD> */
            state->progLength += 4;
            goto quantifier;
          case '{':       /* balance '}' */
          {
            intN err;
            const jschar *errp = state->cp;

            err = ParseMinMaxQuantifier(state, JS_FALSE);
            if (err == 0)
                goto quantifier;
            if (err == -1)
                return JS_TRUE;

            ReportRegExpErrorHelper(state, JSREPORT_ERROR, err, errp);
            return JS_FALSE;
          }
          default:;
        }
    }
    return JS_TRUE;

quantifier:
    if (state->treeDepth == TREE_DEPTH_MAX) {
        ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
        return JS_FALSE;
    }

    ++state->treeDepth;
    ++state->cp;
    state->result->kid = term;
    if (state->cp < state->cpend && *state->cp == '?') {
        ++state->cp;
        state->result->u.range.greedy = JS_FALSE;
    } else {
        state->result->u.range.greedy = JS_TRUE;
    }
    return JS_TRUE;
}

static intN
ParseMinMaxQuantifier(CompilerState *state, JSBool ignoreValues)
{
    uintN min, max;
    jschar c;
    const jschar *errp = state->cp++;

    c = *state->cp;
    if (JS7_ISDEC(c)) {
        ++state->cp;
        min = GetDecimalValue(c, 0xFFFF, NULL, state);
        c = *state->cp;

        if (!ignoreValues && min == OVERFLOW_VALUE)
            return JSMSG_MIN_TOO_BIG;

        if (c == ',') {
            c = *++state->cp;
            if (JS7_ISDEC(c)) {
                ++state->cp;
                max = GetDecimalValue(c, 0xFFFF, NULL, state);
                c = *state->cp;
                if (!ignoreValues && max == OVERFLOW_VALUE)
                    return JSMSG_MAX_TOO_BIG;
                if (!ignoreValues && min > max)
                    return JSMSG_OUT_OF_ORDER;
            } else {
                max = (uintN)-1;
            }
        } else {
            max = min;
        }
        if (c == '}') {
            state->result = NewRENode(state, REOP_QUANT);
            if (!state->result)
                return JSMSG_OUT_OF_MEMORY;
            state->result->u.range.min = min;
            state->result->u.range.max = max;
            /*
             * QUANT, <min>, <max>, <next> ... <ENDCHILD>
             * where <max> is written as compact(max+1) to make
             * (uintN)-1 sentinel to occupy 1 byte, not width_of(max)+1.
             */
            state->progLength += (1 + GetCompactIndexWidth(min)
                                  + GetCompactIndexWidth(max + 1)
                                  +3);
            return 0;
        }
    }

    state->cp = errp;
    return -1;
}

static JSBool
SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target)
{
    ptrdiff_t offset = target - jump;

    /* Check that target really points forward. */
    JS_ASSERT(offset >= 2);
    if ((size_t)offset > OFFSET_MAX)
        return JS_FALSE;

    jump[0] = JUMP_OFFSET_HI(offset);
    jump[1] = JUMP_OFFSET_LO(offset);
    return JS_TRUE;
}

/* Copy the charset data from a character class node to the charset list
 * in the regexp object. */
static JS_ALWAYS_INLINE RECharSet *
InitNodeCharSet(JSRegExp *re, RENode *node)
{
    RECharSet *charSet = &re->classList[node->u.ucclass.index];
    charSet->converted = JS_FALSE;
    charSet->length = node->u.ucclass.bmsize;
    charSet->u.src.startIndex = node->u.ucclass.startIndex;
    charSet->u.src.length = node->u.ucclass.kidlen;
    charSet->sense = node->u.ucclass.sense;
    return charSet;
}

/*
 * Generate bytecode for the tree rooted at t using an explicit stack instead
 * of recursion.
 */
static jsbytecode *
EmitREBytecode(CompilerState *state, JSRegExp *re, size_t treeDepth,
               jsbytecode *pc, RENode *t)
{
    EmitStateStackEntry *emitStateSP, *emitStateStack;
    REOp op;

    if (treeDepth == 0) {
        emitStateStack = NULL;
    } else {
        emitStateStack =
            (EmitStateStackEntry *)
            state->context->malloc(sizeof(EmitStateStackEntry) * treeDepth);
        if (!emitStateStack)
            return NULL;
    }
    emitStateSP = emitStateStack;
    op = t->op;
    JS_ASSERT(op < REOP_LIMIT);

    for (;;) {
        *pc++ = op;
        switch (op) {
          case REOP_EMPTY:
            --pc;
            break;

          case REOP_ALTPREREQ2:
          case REOP_ALTPREREQ:
            JS_ASSERT(emitStateSP);
            emitStateSP->altHead = pc - 1;
            emitStateSP->endTermFixup = pc;
            pc += OFFSET_LEN;
            SET_ARG(pc, t->u.altprereq.ch1);
            pc += ARG_LEN;
            SET_ARG(pc, t->u.altprereq.ch2);
            pc += ARG_LEN;

            emitStateSP->nextAltFixup = pc;    /* offset to next alternate */
            pc += OFFSET_LEN;

            emitStateSP->continueNode = t;
            emitStateSP->continueOp = REOP_JUMP;
            emitStateSP->jumpToJumpFlag = JS_FALSE;
            ++emitStateSP;
            JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
            t = (RENode *) t->kid;
            op = t->op;
            JS_ASSERT(op < REOP_LIMIT);
            continue;

          case REOP_JUMP:
            emitStateSP->nextTermFixup = pc;    /* offset to following term */
            pc += OFFSET_LEN;
            if (!SetForwardJumpOffset(emitStateSP->nextAltFixup, pc))
                goto jump_too_big;
            emitStateSP->continueOp = REOP_ENDALT;
            ++emitStateSP;
            JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
            t = (RENode *) t->u.kid2;
            op = t->op;
            JS_ASSERT(op < REOP_LIMIT);
            continue;

          case REOP_ENDALT:
            /*
             * If we already patched emitStateSP->nextTermFixup to jump to
             * a nearer jump, to avoid 16-bit immediate offset overflow, we
             * are done here.
             */
            if (emitStateSP->jumpToJumpFlag)
                break;

            /*
             * Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT.
             * REOP_ENDALT is executed only on successful match of the last
             * alternate in a group.
             */
            if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
                goto jump_too_big;
            if (t->op != REOP_ALT) {
                if (!SetForwardJumpOffset(emitStateSP->endTermFixup, pc))
                    goto jump_too_big;
            }

            /*
             * If the program is bigger than the REOP_JUMP offset range, then
             * we must check for alternates before this one that are part of
             * the same group, and fix up their jump offsets to target jumps
             * close enough to fit in a 16-bit unsigned offset immediate.
             */
            if ((size_t)(pc - re->program) > OFFSET_MAX &&
                emitStateSP > emitStateStack) {
                EmitStateStackEntry *esp, *esp2;
                jsbytecode *alt, *jump;
                ptrdiff_t span, header;

                esp2 = emitStateSP;
                alt = esp2->altHead;
                for (esp = esp2 - 1; esp >= emitStateStack; --esp) {
                    if (esp->continueOp == REOP_ENDALT &&
                        !esp->jumpToJumpFlag &&
                        esp->nextTermFixup + OFFSET_LEN == alt &&
                        (size_t)(pc - ((esp->continueNode->op != REOP_ALT)
                                       ? esp->endTermFixup
                                       : esp->nextTermFixup)) > OFFSET_MAX) {
                        alt = esp->altHead;
                        jump = esp->nextTermFixup;

                        /*
                         * The span must be 1 less than the distance from
                         * jump offset to jump offset, so we actually jump
                         * to a REOP_JUMP bytecode, not to its offset!
                         */
                        for (;;) {
                            JS_ASSERT(jump < esp2->nextTermFixup);
                            span = esp2->nextTermFixup - jump - 1;
                            if ((size_t)span <= OFFSET_MAX)
                                break;
                            do {
                                if (--esp2 == esp)
                                    goto jump_too_big;
                            } while (esp2->continueOp != REOP_ENDALT);
                        }

                        jump[0] = JUMP_OFFSET_HI(span);
                        jump[1] = JUMP_OFFSET_LO(span);

                        if (esp->continueNode->op != REOP_ALT) {
                            /*
                             * We must patch the offset at esp->endTermFixup
                             * as well, for the REOP_ALTPREREQ{,2} opcodes.
                             * If we're unlucky and endTermFixup is more than
                             * OFFSET_MAX bytes from its target, we cheat by
                             * jumping 6 bytes to the jump whose offset is at
                             * esp->nextTermFixup, which has the same target.
                             */
                            jump = esp->endTermFixup;
                            header = esp->nextTermFixup - jump;
                            span += header;
                            if ((size_t)span > OFFSET_MAX)
                                span = header;

                            jump[0] = JUMP_OFFSET_HI(span);
                            jump[1] = JUMP_OFFSET_LO(span);
                        }

                        esp->jumpToJumpFlag = JS_TRUE;
                    }
                }
            }
            break;

          case REOP_ALT:
            JS_ASSERT(emitStateSP);
            emitStateSP->altHead = pc - 1;
            emitStateSP->nextAltFixup = pc;     /* offset to next alternate */
            pc += OFFSET_LEN;
            emitStateSP->continueNode = t;
            emitStateSP->continueOp = REOP_JUMP;
            emitStateSP->jumpToJumpFlag = JS_FALSE;
            ++emitStateSP;
            JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
            t = (RENode *) t->kid;
            op = t->op;
            JS_ASSERT(op < REOP_LIMIT);
            continue;

          case REOP_FLAT:
            /*
             * Coalesce FLATs if possible and if it would not increase bytecode
             * beyond preallocated limit. The latter happens only when bytecode
             * size for coalesced string with offset p and length 2 exceeds 6
             * bytes preallocated for 2 single char nodes, i.e. when
             * 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or
             * GetCompactIndexWidth(p) > 4.
             * Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more
             * nodes strictly decreases bytecode size, the check has to be
             * done only for the first coalescing.
             */
            if (t->kid &&
                GetCompactIndexWidth((jschar *)t->kid - state->cpbegin) <= 4)
            {
                while (t->next &&
                       t->next->op == REOP_FLAT &&
                       (jschar*)t->kid + t->u.flat.length ==
                       (jschar*)t->next->kid) {
                    t->u.flat.length += t->next->u.flat.length;
                    t->next = t->next->next;
                }
            }
            if (t->kid && t->u.flat.length > 1) {
                pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLATi : REOP_FLAT;
                pc = WriteCompactIndex(pc, (jschar *)t->kid - state->cpbegin);
                pc = WriteCompactIndex(pc, t->u.flat.length);
            } else if (t->u.flat.chr < 256) {
                pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLAT1i : REOP_FLAT1;
                *pc++ = (jsbytecode) t->u.flat.chr;
            } else {
                pc[-1] = (state->flags & JSREG_FOLD)
                         ? REOP_UCFLAT1i
                         : REOP_UCFLAT1;
                SET_ARG(pc, t->u.flat.chr);
                pc += ARG_LEN;
            }
            break;

          case REOP_LPAREN:
            JS_ASSERT(emitStateSP);
            pc = WriteCompactIndex(pc, t->u.parenIndex);
            emitStateSP->continueNode = t;
            emitStateSP->continueOp = REOP_RPAREN;
            ++emitStateSP;
            JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
            t = (RENode *) t->kid;
            op = t->op;
            continue;

          case REOP_RPAREN:
            pc = WriteCompactIndex(pc, t->u.parenIndex);
            break;

          case REOP_BACKREF:
            pc = WriteCompactIndex(pc, t->u.parenIndex);
            break;

          case REOP_ASSERT:
            JS_ASSERT(emitStateSP);
            emitStateSP->nextTermFixup = pc;
            pc += OFFSET_LEN;
            emitStateSP->continueNode = t;
            emitStateSP->continueOp = REOP_ASSERTTEST;
            ++emitStateSP;
            JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
            t = (RENode *) t->kid;
            op = t->op;
            continue;

          case REOP_ASSERTTEST:
          case REOP_ASSERTNOTTEST:
            if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
                goto jump_too_big;
            break;

          case REOP_ASSERT_NOT:
            JS_ASSERT(emitStateSP);
            emitStateSP->nextTermFixup = pc;
            pc += OFFSET_LEN;
            emitStateSP->continueNode = t;
            emitStateSP->continueOp = REOP_ASSERTNOTTEST;
            ++emitStateSP;
            JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
            t = (RENode *) t->kid;
            op = t->op;
            continue;

          case REOP_QUANT:
            JS_ASSERT(emitStateSP);
            if (t->u.range.min == 0 && t->u.range.max == (uintN)-1) {
                pc[-1] = (t->u.range.greedy) ? REOP_STAR : REOP_MINIMALSTAR;
            } else if (t->u.range.min == 0 && t->u.range.max == 1) {
                pc[-1] = (t->u.range.greedy) ? REOP_OPT : REOP_MINIMALOPT;
            } else if (t->u.range.min == 1 && t->u.range.max == (uintN) -1) {
                pc[-1] = (t->u.range.greedy) ? REOP_PLUS : REOP_MINIMALPLUS;
            } else {
                if (!t->u.range.greedy)
                    pc[-1] = REOP_MINIMALQUANT;
                pc = WriteCompactIndex(pc, t->u.range.min);
                /*
                 * Write max + 1 to avoid using size_t(max) + 1 bytes
                 * for (uintN)-1 sentinel.
                 */
                pc = WriteCompactIndex(pc, t->u.range.max + 1);
            }
            emitStateSP->nextTermFixup = pc;
            pc += OFFSET_LEN;
            emitStateSP->continueNode = t;
            emitStateSP->continueOp = REOP_ENDCHILD;
            ++emitStateSP;
            JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
            t = (RENode *) t->kid;
            op = t->op;
            continue;

          case REOP_ENDCHILD:
            if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
                goto jump_too_big;
            break;

          case REOP_CLASS:
            if (!t->u.ucclass.sense)
                pc[-1] = REOP_NCLASS;
            pc = WriteCompactIndex(pc, t->u.ucclass.index);
            InitNodeCharSet(re, t);
            break;

          default:
            break;
        }

        t = t->next;
        if (t) {
            op = t->op;
        } else {
            if (emitStateSP == emitStateStack)
                break;
            --emitStateSP;
            t = emitStateSP->continueNode;
            op = (REOp) emitStateSP->continueOp;
        }
    }

  cleanup:
    if (emitStateStack)
        state->context->free(emitStateStack);
    return pc;

  jump_too_big:
    ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
    pc = NULL;
    goto cleanup;
}

static JSBool
CompileRegExpToAST(JSContext* cx, TokenStream* ts,
                   JSString* str, uintN flags, CompilerState& state)
{
    uintN i;
    size_t len;

    len = str->length();

    state.context = cx;
    state.tokenStream = ts;
    state.cp = js_UndependString(cx, str);
    if (!state.cp)
        return JS_FALSE;
    state.cpbegin = state.cp;
    state.cpend = state.cp + len;
    state.flags = uint16(flags);
    state.parenCount = 0;
    state.classCount = 0;
    state.progLength = 0;
    state.treeDepth = 0;
    state.classBitmapsMem = 0;
    for (i = 0; i < CLASS_CACHE_SIZE; i++)
        state.classCache[i].start = NULL;

    if (len != 0 && (flags & JSREG_FLAT)) {
        state.result = NewRENode(&state, REOP_FLAT);
        if (!state.result)
            return JS_FALSE;
        state.result->u.flat.chr = *state.cpbegin;
        state.result->u.flat.length = len;
        state.result->kid = (void *) state.cpbegin;
        /* Flat bytecode: REOP_FLAT compact(string_offset) compact(len). */
        state.progLength += 1 + GetCompactIndexWidth(0)
                          + GetCompactIndexWidth(len);
        return JS_TRUE;
    }

    return ParseRegExp(&state);
}

#ifdef JS_TRACER
typedef js::Vector<LIns *, 4, js::ContextAllocPolicy> LInsList;

namespace js {

struct REFragment : public nanojit::Fragment
{
    REFragment(const void* _ip verbose_only(, uint32_t profFragID))
      : nanojit::Fragment(ip verbose_only(, profFragID))
    {}
};

} /* namespace js */

/* Return the cached fragment for the given regexp, or create one. */
static Fragment*
LookupNativeRegExp(JSContext* cx, uint16 re_flags,
                   const jschar* re_chars, size_t re_length)
{
    TraceMonitor *tm = &JS_TRACE_MONITOR(cx);
    VMAllocator &alloc = *tm->dataAlloc;
    REHashMap &table = *tm->reFragments;

    REHashKey k(re_length, re_flags, re_chars);
    REFragment *frag = table.get(k);

    if (!frag) {
        verbose_only(
        uint32_t profFragID = (LogController.lcbits & LC_FragProfile)
                              ? (++(tm->lastFragID)) : 0;
        )
        frag = new (alloc) REFragment(0 verbose_only(, profFragID));
        /*
         * Copy the re_chars portion of the hash key into the Allocator, so
         * its lifecycle is disconnected from the lifecycle of the
         * underlying regexp.
         */
        k.re_chars = (const jschar*) new (alloc) jschar[re_length];
        memcpy((void*) k.re_chars, re_chars, re_length * sizeof(jschar));
        table.put(k, frag);
    }
    return frag;
}

static JSBool
ProcessCharSet(JSContext *cx, JSRegExp *re, RECharSet *charSet);

/* Utilities for the RegExpNativeCompiler */

namespace {
  /*
   * An efficient way to simultaneously statically guard that the sizeof(bool) is a
   * small power of 2 and take its log2.
   */
  template <int> struct StaticLog2 {};
  template <> struct StaticLog2<1> { static const int result = 0; };
  template <> struct StaticLog2<2> { static const int result = 1; };
  template <> struct StaticLog2<4> { static const int result = 2; };
  template <> struct StaticLog2<8> { static const int result = 3; };
}

/*
 * This table allows efficient testing for the ASCII portion of \s during a
 * trace. ECMA-262 15.10.2.12 defines the following characters below 128 to be
 * whitespace: 0x9 (0), 0xA (10), 0xB (11), 0xC (12), 0xD (13), 0x20 (32). The
 * index must be <= 32.
 */
static const bool js_ws[] = {
/*       0      1      2      3      4      5      5      7      8      9      */
/*  0 */ false, false, false, false, false, false, false, false, false, true,
/*  1 */ true,  true,  true,  true,  false, false, false, false, false, false,
/*  2 */ false, false, false, false, false, false, false, false, false, false,
/*  3 */ false, false, true
};

/* Sets of characters are described in terms of individuals and classes. */
class CharSet {
  public:
    CharSet() : charEnd(charBuf), classes(0) {}

    static const uintN sBufSize = 8;

    bool full() { return charEnd == charBuf + sBufSize; }

    /* Add a single char to the set. */
    bool addChar(jschar c)
    {
        if (full())
            return false;
        *charEnd++ = c;
        return true;
    }

    enum Class {
        LineTerms  = 1 << 0,  /* Line Terminators (E262 7.3) */
        OtherSpace = 1 << 1,  /* \s (E262 15.10.2.12) - LineTerms */
        Digit      = 1 << 2,  /* \d (E262 15.10.2.12) */
        OtherAlnum = 1 << 3,  /* \w (E262 15,10.2.12) - Digit */
        Other      = 1 << 4,  /* all other characters */
        All        = LineTerms | OtherSpace | Digit | OtherAlnum | Other,

        Space = LineTerms | OtherSpace,
        AlNum = Digit | OtherAlnum,
        Dot   = All & ~LineTerms
    };

    /* Add a set of chars to the set. */
    void addClass(Class c) { classes |= c; }

    /* Return whether two sets of chars are disjoint. */
    bool disjoint(const CharSet &) const;

  private:
    static bool disjoint(const jschar *beg, const jschar *end, uintN classes);

    mutable jschar charBuf[sBufSize];
    jschar *charEnd;
    uintN classes;
};

/* Appease the type checker. */
static inline CharSet::Class
operator|(CharSet::Class c1, CharSet::Class c2) {
    return (CharSet::Class)(((int)c1) | ((int)c2));
}
static inline CharSet::Class
operator~(CharSet::Class c) {
    return (CharSet::Class)(~(int)c);
}

/*
 * Return whether the characters in the range [beg, end) fall within any of the
 * classes with a bit set in 'classes'.
 */
bool
CharSet::disjoint(const jschar *beg, const jschar *end, uintN classes)
{
    for (const jschar *p = beg; p != end; ++p) {
        if (JS7_ISDEC(*p)) {
            if (classes & Digit)
                return false;
        } else if (JS_ISWORD(*p)) {
            if (classes & OtherAlnum)
                return false;
        } else if (RE_IS_LINE_TERM(*p)) {
            if (classes & LineTerms)
                return false;
        } else if (JS_ISSPACE(*p)) {
            if (classes & OtherSpace)
                return false;
        } else {
            if (classes & Other)
                return false;
        }
    }
    return true;
}

/*
 * Predicate version of the STL's set_intersection. Assumes both ranges are
 * sorted and thus runs in linear time.
 *
 * FIXME: This is a reusable algorithm, perhaps it should be put somewhere.
 */
template <class InputIterator1, class InputIterator2>
bool
set_disjoint(InputIterator1 p1, InputIterator1 end1,
             InputIterator2 p2, InputIterator2 end2)
{
    if (p1 == end1 || p2 == end2)
        return true;
    while (*p1 != *p2) {
        if (*p1 < *p2) {
            ++p1;
            if (p1 == end1)
                return true;
        } else if (*p2 < *p1) {
            ++p2;
            if (p2 == end2)
                return true;
        }
    }
    return false;
}

static JSBool
CharCmp(void *arg, const void *a, const void *b, int *result)
{
    jschar ca = *(jschar *)a, cb = *(jschar *)b;
    *result = ca - cb;
    return JS_TRUE;
}

bool
CharSet::disjoint(const CharSet &other) const
{
    /* Check overlap between classes. */
    if (classes & other.classes)
        return false;

    /*
     * Check char-class overlap. Compare this->charBuf with other.classes and
     * vice versa with a loop.
     */
    if (!disjoint(this->charBuf, this->charEnd, other.classes) ||
        !disjoint(other.charBuf, other.charEnd, this->classes))
        return false;

    /* Check char-char overlap. */
    jschar tmp[CharSet::sBufSize];
    js_MergeSort(charBuf, charEnd - charBuf, sizeof(jschar),
                 CharCmp, 0, tmp);
    js_MergeSort(other.charBuf, other.charEnd - other.charBuf, sizeof(jschar),
                 CharCmp, 0, tmp);
    return set_disjoint(charBuf, charEnd, other.charBuf, other.charEnd);
}

/*
 * Return true if the given subexpression may match the empty string. The
 * conservative answer is |true|. If |next| is true, then the subexpression is
 * considered to be |node| followed by the rest of |node->next|. Otherwise, the
 * subexpression is considered to be |node| by itself.
 */
static bool
mayMatchEmpty(RENode *node, bool next = true)
{
    if (!node)
        return true;
    switch (node->op) {
      case REOP_EMPTY:  return true;
      case REOP_FLAT:   return false;
      case REOP_CLASS:  return false;
      case REOP_ALNUM:  return false;
      case REOP_ALT:    return (mayMatchEmpty((RENode *)node->kid) ||
                                mayMatchEmpty((RENode *)node->u.kid2)) &&
                               (!next || mayMatchEmpty(node->next));
      case REOP_QUANT:  return (node->u.range.min == 0 ||
                                mayMatchEmpty((RENode *)node->kid)) &&
                               (!next || mayMatchEmpty(node->next));
      default:          return true;
    }
}

/*
 * Enumerate the set of characters that may be consumed next by the given
 * subexpression in isolation. Return whether the enumeration was successful.
 */
static bool
enumerateNextChars(JSContext *cx, RENode *node, CharSet &set)
{
    JS_CHECK_RECURSION(cx, return JS_FALSE);

    if (!node)
        return true;

    switch (node->op) {
      /* Record as bitflags. */
      case REOP_DOT:       set.addClass(CharSet::Dot);     return true;
      case REOP_DIGIT:     set.addClass(CharSet::Digit);   return true;
      case REOP_NONDIGIT:  set.addClass(~CharSet::Digit);  return true;
      case REOP_ALNUM:     set.addClass(CharSet::AlNum);   return true;
      case REOP_NONALNUM:  set.addClass(~CharSet::AlNum);  return true;
      case REOP_SPACE:     set.addClass(CharSet::Space);   return true;
      case REOP_NONSPACE:  set.addClass(~CharSet::Space);  return true;

      /* Record as individual characters. */
      case REOP_FLAT:
        return set.addChar(node->u.flat.chr);

      /* Control structures. */
      case REOP_EMPTY:
        return true;
      case REOP_ALT:
      case REOP_ALTPREREQ:
        return enumerateNextChars(cx, (RENode *)node->kid, set) &&
               enumerateNextChars(cx, (RENode *)node->u.kid2, set) &&
               (!mayMatchEmpty(node, false) ||
                enumerateNextChars(cx, (RENode *)node->next, set));
      case REOP_QUANT:
        return enumerateNextChars(cx, (RENode *)node->kid, set) &&
               (!mayMatchEmpty(node, false) ||
                enumerateNextChars(cx, (RENode *)node->next, set));

      /* Arbitrary character classes and oddities. */
      default:
        return false;
    }
}

class RegExpNativeCompiler {
 private:
    VMAllocator&     tempAlloc;
    JSContext*       cx;
    JSRegExp*        re;
    CompilerState*   cs;            /* RegExp to compile */
    Fragment*        fragment;
    LirWriter*       lir;
#ifdef DEBUG
    LirWriter*       validate_writer;
#endif
#ifdef NJ_VERBOSE
    LirWriter*       verbose_filter;
#endif
    LirBufWriter*    lirBufWriter;  /* for skip */

    LIns*            state;
    LIns*            start;
    LIns*            cpend;

    LirBuffer* const lirbuf;

    bool outOfMemory() {
        return tempAlloc.outOfMemory() || JS_TRACE_MONITOR(cx).dataAlloc->outOfMemory();
    }

    JSBool isCaseInsensitive() const { return (cs->flags & JSREG_FOLD) != 0; }

    void targetCurrentPoint(LIns *ins)
    {
        ins->setTarget(lir->ins0(LIR_label));
    }

    void targetCurrentPoint(LInsList &fails)
    {
        LIns *fail = lir->ins0(LIR_label);
        for (size_t i = 0; i < fails.length(); ++i) {
            fails[i]->setTarget(fail);
        }
        fails.clear();
    }

    /*
     * These functions return the new position after their match operation,
     * or NULL if there was an error.
     */
    LIns* compileEmpty(RENode* node, LIns* pos, LInsList& fails)
    {
        return pos;
    }

#if defined(AVMPLUS_ARM) || defined(AVMPLUS_SPARC)
/* We can't do this on ARM or SPARC, since it relies on doing a 32-bit load from
 * a pointer which is only 2-byte aligned.
 */
#undef USE_DOUBLE_CHAR_MATCH
#else
#define USE_DOUBLE_CHAR_MATCH
#endif

    LIns* compileFlatSingleChar(jschar ch, LIns* pos, LInsList& fails)
    {
        LIns* to_fail = lir->insBranch(LIR_jf, lir->ins2(LIR_ltp, pos, cpend), 0);
        if (!fails.append(to_fail))
            return NULL;
        LIns* text_ch = lir->insLoad(LIR_ldus2ui, pos, 0, ACC_READONLY);

        // Extra characters that need to be compared against when doing folding.
        struct extra {
            jschar ch;
            LIns   *match;
        };
        extra extras[5];
        int   nextras = 0;

        if (cs->flags & JSREG_FOLD) {
            ch = JS_TOUPPER(ch);
            jschar lch = inverse_upcase(ch);

            if (ch != lch) {
                if (L'A' <= ch && ch <= L'Z') {
                    // Fast conversion of text character to lower case by OR-ing with 32.
                    text_ch = lir->ins2(LIR_ori, text_ch, lir->insImmI(32));
                    // These ASCII letters have 2 lower-case forms. We put the ASCII one in
                    // |extras| so it is tested first, because we expect that to be the common
                    // case. Note that the code points of the non-ASCII forms both have the
                    // 32 bit set, so it is OK to compare against the OR-32-converted text char.
                    ch = lch;
                    if (ch == L'i') {
                        extras[nextras++].ch = ch;
                        ch = 0x131;
                    } else if (ch == L's') {
                        extras[nextras++].ch = ch;
                        ch = 0x17f;
                    }
                    goto gen;
                } else if (0x01c4 <= ch && ch <= 0x1e60) {
                    // The following group of conditionals handles characters that have 1 or 2
                    // lower-case forms in addition to JS_TOLOWER(ch).
                    if (ch <= 0x1f1) {                 // DZ,LJ,NJ
                        if (ch == 0x01c4) {
                            extras[nextras++].ch = 0x01c5;
                        } else if (ch == 0x01c7) {
                            extras[nextras++].ch = 0x01c8;
                        } else if (ch == 0x01ca) {
                            extras[nextras++].ch = 0x01cb;
                        } else if (ch == 0x01f1) {
                            extras[nextras++].ch = 0x01f2;
                        }
                    } else if (ch < 0x0392) {          // no extra lower-case forms in this range
                    } else if (ch <= 0x03a6) {         // Greek
                        if (ch == 0x0392) {
                            extras[nextras++].ch = 0x03d0;
                        } else if (ch == 0x0395) {
                            extras[nextras++].ch = 0x03f5;
                        } else if (ch == 0x0398) {
                            extras[nextras++].ch = 0x03d1;
                        } else if (ch == 0x0399) {
                            extras[nextras++].ch = 0x0345;
                            extras[nextras++].ch = 0x1fbe;
                        } else if (ch == 0x039a) {
                            extras[nextras++].ch = 0x03f0;
                        } else if (ch == 0x039c) {
                            extras[nextras++].ch = 0xb5;
                        } else if (ch == 0x03a0) {
                            extras[nextras++].ch = 0x03d6;
                        } else if (ch == 0x03a1) {
                            extras[nextras++].ch = 0x03f1;
                        } else if (ch == 0x03a3) {
                            extras[nextras++].ch = 0x03c2;
                        } else if (ch == 0x03a6) {
                            extras[nextras++].ch = 0x03d5;
                        }
                    } else if (ch == 0x1e60) {         // S with dot above
                        extras[nextras++].ch = 0x1e9b;
                    }
                }

                extras[nextras++].ch = lch;
            }
        }

    gen:
        for (int i = 0; i < nextras; ++i) {
            LIns *test = lir->ins2(LIR_eqi, text_ch, lir->insImmI(extras[i].ch));
            LIns *branch = lir->insBranch(LIR_jt, test, 0);
            extras[i].match = branch;
        }

        if (!fails.append(lir->insBranch(LIR_jf, lir->ins2(LIR_eqi, text_ch, lir->insImmI(ch)), 0)))
            return NULL;

        for (int i = 0; i < nextras; ++i)
            targetCurrentPoint(extras[i].match);
        return lir->ins2(LIR_addp, pos, lir->insImmWord(2));
    }

    JS_INLINE bool hasCases(jschar ch)
    {
        return JS_TOLOWER(ch) != JS_TOUPPER(ch);
    }

    LIns* compileFlatDoubleChar(jschar ch1, jschar ch2, LIns* pos, LInsList& fails)
    {
#ifdef IS_BIG_ENDIAN
        uint32 word = (ch1 << 16) | ch2;
#else
        uint32 word = (ch2 << 16) | ch1;
#endif
        /*
         * Fast case-insensitive test for ASCII letters: convert text
         * char to lower case by bit-or-ing in 32 and compare.
         */
        JSBool useFastCI = JS_FALSE;
        union { jschar c[2]; uint32 i; } mask;
        if (cs->flags & JSREG_FOLD) {
            jschar uch1 = JS_TOUPPER(ch1);
            jschar uch2 = JS_TOUPPER(ch2);
            JSBool mask1 = (L'A' <= uch1 && uch1 <= L'Z' && uch1 != L'I' && uch1 != L'S');
            JSBool mask2 = (L'A' <= uch2 && uch2 <= L'Z' && uch2 != L'I' && uch2 != L'S');
            if ((!mask1 && hasCases(ch1)) || (!mask2 && hasCases(ch2))) {
                pos = compileFlatSingleChar(ch1, pos, fails);
                if (!pos) return NULL;
                return compileFlatSingleChar(ch2, pos, fails);
            }

            mask.c[0] = mask1 ? 0x0020 : 0x0;
            mask.c[1] = mask2 ? 0x0020 : 0x0;

            if (mask.i) {
                word |= mask.i;
                useFastCI = JS_TRUE;
            }
        }

        LIns* to_fail = lir->insBranch(LIR_jf,
                                       lir->ins2(LIR_ltp,
                                                 pos,
                                                 lir->ins2(LIR_addp,
                                                           cpend,
                                                           lir->insImmWord(-2))),
                                       0);
        if (!fails.append(to_fail))
            return NULL;
        LIns* text_word = lir->insLoad(LIR_ldi, pos, 0, ACC_OTHER);
        LIns* comp_word = useFastCI ?
            lir->ins2(LIR_ori, text_word, lir->insImmI(mask.i)) :
            text_word;
        if (!fails.append(lir->insBranch(LIR_jf, lir->ins2(LIR_eqi, comp_word, lir->insImmI(word)), 0)))
            return NULL;

        return lir->ins2(LIR_addp, pos, lir->insImmWord(4));
    }

    LIns* compileFlat(RENode *&node, LIns* pos, LInsList& fails)
    {
#ifdef USE_DOUBLE_CHAR_MATCH
        if (node->u.flat.length == 1) {
            if (node->next && node->next->op == REOP_FLAT &&
                node->next->u.flat.length == 1) {
                pos = compileFlatDoubleChar(node->u.flat.chr,
                                            node->next->u.flat.chr,
                                            pos, fails);
                node = node->next;
            } else {
                pos = compileFlatSingleChar(node->u.flat.chr, pos, fails);
            }
            return pos;
        } else {
            size_t i;
            for (i = 0; i < node->u.flat.length - 1; i += 2) {
                if (outOfMemory())
                    return 0;
                pos = compileFlatDoubleChar(((jschar*) node->kid)[i],
                                            ((jschar*) node->kid)[i+1],
                                            pos, fails);
                if (!pos)
                    return 0;
            }
            JS_ASSERT(pos != 0);
            if (i == node->u.flat.length - 1)
                pos = compileFlatSingleChar(((jschar*) node->kid)[i], pos, fails);
            return pos;
        }
#else
        if (node->u.flat.length == 1) {
            return compileFlatSingleChar(node->u.flat.chr, pos, fails);
        } else {
            for (size_t i = 0; i < node->u.flat.length; i++) {
                if (outOfMemory())
                    return 0;
                pos = compileFlatSingleChar(((jschar*) node->kid)[i], pos, fails);
                if (!pos)
                    return 0;
            }
            return pos;
        }
#endif
    }

    LIns* compileClass(RENode* node, LIns* pos, LInsList& fails)
    {
        if (!node->u.ucclass.sense)
            return JS_FALSE;
        /*
         * If we share generated native code, we need to make a copy
         * of the bitmap because the original regexp's copy is destroyed when
         * that regexp is.
         */
        RECharSet *charSet = &re->classList[node->u.ucclass.index];
        size_t bitmapLen = (charSet->length >> 3) + 1;
        /* Arbitrary size limit on bitmap. */
        if (bitmapLen > 1024)
            return NULL;
        Allocator &alloc = *JS_TRACE_MONITOR(cx).dataAlloc;
        /* The following line allocates charSet.u.bits if successful. */
        if (!charSet->converted && !ProcessCharSet(cx, re, charSet))
            return NULL;
        void* bitmapData = alloc.alloc(bitmapLen);
        if (outOfMemory())
            return NULL;
        memcpy(bitmapData, charSet->u.bits, bitmapLen);

        LIns* to_fail = lir->insBranch(LIR_jf, lir->ins2(LIR_ltp, pos, cpend), 0);
        if (!fails.append(to_fail))
            return NULL;
        LIns* text_ch = lir->insLoad(LIR_ldus2ui, pos, 0, ACC_READONLY);
        if (!fails.append(lir->insBranch(LIR_jf,
                                         lir->ins2(LIR_lei, text_ch, lir->insImmI(charSet->length)),
                                         0))) {
            return NULL;
        }
        LIns* byteIndex = lir->insI2P(lir->ins2(LIR_rshi, text_ch, lir->insImmI(3)));
        LIns* bitmap = lir->insImmP(bitmapData);
        LIns* byte = lir->insLoad(LIR_lduc2ui, lir->ins2(LIR_addp, bitmap, byteIndex), (int) 0,
                                  ACC_READONLY);
        LIns* bitMask = lir->ins2(LIR_lshi, lir->insImmI(1),
                               lir->ins2(LIR_andi, text_ch, lir->insImmI(0x7)));
        LIns* test = lir->ins2(LIR_eqi, lir->ins2(LIR_andi, byte, bitMask), lir->insImmI(0));

        LIns* to_next = lir->insBranch(LIR_jt, test, 0);
        if (!fails.append(to_next))
            return NULL;
        return lir->ins2(LIR_addp, pos, lir->insImmWord(2));
    }

    /* Factor out common code to index js_alnum. */
    LIns *compileTableRead(LIns *chr, const bool *tbl)
    {
        if (sizeof(bool) != 1) {
            LIns *sizeLog2 = lir->insImmI(StaticLog2<sizeof(bool)>::result);
            chr = lir->ins2(LIR_lshi, chr, sizeLog2);
        }
        LIns *addr = lir->ins2(LIR_addp, lir->insImmP(tbl), lir->insUI2P(chr));
        return lir->insLoad(LIR_lduc2ui, addr, 0, ACC_READONLY);
    }

    /* Compile a builtin character class. */
    LIns *compileBuiltinClass(RENode *node, LIns *pos, LInsList &fails)
    {
        /* All the builtins checked below consume one character. */
        if (!fails.append(lir->insBranch(LIR_jf, lir->ins2(LIR_ltp, pos, cpend), 0)))
            return NULL;
        LIns *chr = lir->insLoad(LIR_ldus2ui, pos, 0, ACC_READONLY);

        switch (node->op) {
          case REOP_DOT:
          {
            /* Accept any character except those in ECMA-262 15.10.2.8. */
            LIns *eq1 = lir->ins2(LIR_eqi, chr, lir->insImmI('\n'));
            if (!fails.append(lir->insBranch(LIR_jt, eq1, NULL)))
                return NULL;
            LIns *eq2 = lir->ins2(LIR_eqi, chr, lir->insImmI('\r'));
            if (!fails.append(lir->insBranch(LIR_jt, eq2, NULL)))
                return NULL;
            LIns *eq3 = lir->ins2(LIR_eqi, chr, lir->insImmI(LINE_SEPARATOR));
            if (!fails.append(lir->insBranch(LIR_jt, eq3, NULL)))
                return NULL;
            LIns *eq4 = lir->ins2(LIR_eqi, chr, lir->insImmI(PARA_SEPARATOR));
            if (!fails.append(lir->insBranch(LIR_jt, eq4, NULL)))
                return NULL;
            break;
          }
          case REOP_DIGIT:
          {
            LIns *ge = lir->ins2(LIR_gei, chr, lir->insImmI('0'));
            if (!fails.append(lir->insBranch(LIR_jf, ge, NULL)))
                return NULL;
            LIns *le = lir->ins2(LIR_lei, chr, lir->insImmI('9'));
            if (!fails.append(lir->insBranch(LIR_jf, le, NULL)))
                return NULL;
            break;
          }
          case REOP_NONDIGIT:
          {
            /* Use 'and' to give a predictable branch for success path. */
            LIns *ge = lir->ins2(LIR_gei, chr, lir->insImmI('0'));
            LIns *le = lir->ins2(LIR_lei, chr, lir->insImmI('9'));
            LIns *both = lir->ins2(LIR_andi, ge, le);
            if (!fails.append(lir->insBranch(LIR_jf, lir->insEqI_0(both), NULL)))
                return NULL;
            break;
          }
          case REOP_ALNUM:
          {
            /*
             * Compile the condition:
             *   ((uint)*cp) < 128 && js_alnum[(uint)*cp]
             */
            LIns *rangeCnd = lir->ins2(LIR_ltui, chr, lir->insImmI(128));
            if (!fails.append(lir->insBranch(LIR_jf, rangeCnd, NULL)))
                return NULL;
            LIns *tableVal = compileTableRead(chr, js_alnum);
            if (!fails.append(lir->insBranch(LIR_jt, lir->insEqI_0(tableVal), NULL)))
                return NULL;
            break;
          }
          case REOP_NONALNUM:
          {
            /*
             * Compile the condition:
             *   ((uint)*cp) >= 128 || !js_alnum[(uint)*cp]
             */
            LIns *rangeCnd = lir->ins2(LIR_geui, chr, lir->insImmI(128));
            LIns *rangeBr = lir->insBranch(LIR_jt, rangeCnd, NULL);
            LIns *tableVal = compileTableRead(chr, js_alnum);
            if (!fails.append(lir->insBranch(LIR_jf, lir->insEqI_0(tableVal), NULL)))
                return NULL;
            LIns *success = lir->ins0(LIR_label);
            rangeBr->setTarget(success);
            break;
          }
          case REOP_SPACE:
          case REOP_NONSPACE:
          {
            /*
             * ECMA-262 7.2, 7.3, and 15.10.2.12 define a bunch of Unicode code
             * points for whitespace. We optimize here for the common case of
             * ASCII characters using a table lookup for the lower block that
             * can actually contain spaces. For the rest, use a (more or less)
             * binary search to minimize tests.
             *
             *   [0000,0020]: 9, A, B, C, D, 20
             *   (0020,00A0): none
             *   [00A0,2000): A0, 1680, 180E
             *   [2000,200A]: all
             *   (200A, max): 2028, 2029, 202F, 205F, 3000
             */
            /* Below 0x20? */
            LIns *tableRangeCnd = lir->ins2(LIR_leui, chr, lir->insImmI(0x20));
            LIns *tableRangeBr = lir->insBranch(LIR_jt, tableRangeCnd, NULL);
            /* Fall through means *chr > 0x20. */

            /* Handle (0x20,0xA0). */
            LIns *asciiCnd = lir->ins2(LIR_ltui, chr, lir->insImmI(0xA0));
            LIns *asciiMissBr = lir->insBranch(LIR_jt, asciiCnd, NULL);
            /* Fall through means *chr >= 0xA0. */

            /* Partition around [0x2000,0x200A]. */
            LIns *belowCnd = lir->ins2(LIR_ltui, chr, lir->insImmI(0x2000));
            LIns *belowBr = lir->insBranch(LIR_jt, belowCnd, NULL);
            LIns *aboveCnd = lir->ins2(LIR_gtui, chr, lir->insImmI(0x200A));
            LIns *aboveBr = lir->insBranch(LIR_jt, aboveCnd, NULL);
            LIns *intervalMatchBr = lir->insBranch(LIR_j, NULL, NULL);

            /* Handle [0xA0,0x2000). */
            LIns *belowLbl = lir->ins0(LIR_label);
            belowBr->setTarget(belowLbl);
            LIns *eq1Cnd = lir->ins2(LIR_eqi, chr, lir->insImmI(0xA0));
            LIns *eq1Br = lir->insBranch(LIR_jt, eq1Cnd, NULL);
            LIns *eq2Cnd = lir->ins2(LIR_eqi, chr, lir->insImmI(0x1680));
            LIns *eq2Br = lir->insBranch(LIR_jt, eq2Cnd, NULL);
            LIns *eq3Cnd = lir->ins2(LIR_eqi, chr, lir->insImmI(0x180E));
            LIns *eq3Br = lir->insBranch(LIR_jt, eq3Cnd, NULL);
            LIns *belowMissBr = lir->insBranch(LIR_j, NULL, NULL);

            /* Handle (0x200A, max). */
            LIns *aboveLbl = lir->ins0(LIR_label);
            aboveBr->setTarget(aboveLbl);
            LIns *eq4Cnd = lir->ins2(LIR_eqi, chr, lir->insImmI(0x2028));
            LIns *eq4Br = lir->insBranch(LIR_jt, eq4Cnd, NULL);
            LIns *eq5Cnd = lir->ins2(LIR_eqi, chr, lir->insImmI(0x2029));
            LIns *eq5Br = lir->insBranch(LIR_jt, eq5Cnd, NULL);
            LIns *eq6Cnd = lir->ins2(LIR_eqi, chr, lir->insImmI(0x202F));
            LIns *eq6Br = lir->insBranch(LIR_jt, eq6Cnd, NULL);
            LIns *eq7Cnd = lir->ins2(LIR_eqi, chr, lir->insImmI(0x205F));
            LIns *eq7Br = lir->insBranch(LIR_jt, eq7Cnd, NULL);
            LIns *eq8Cnd = lir->ins2(LIR_eqi, chr, lir->insImmI(0x3000));
            LIns *eq8Br = lir->insBranch(LIR_jt, eq8Cnd, NULL);
            LIns *aboveMissBr = lir->insBranch(LIR_j, NULL, NULL);

            /* Handle [0,0x20]. */
            LIns *tableLbl = lir->ins0(LIR_label);
            tableRangeBr->setTarget(tableLbl);
            LIns *tableVal = compileTableRead(chr, js_ws);
            LIns *tableCnd = lir->insEqI_0(tableVal);
            LIns *tableMatchBr = lir->insBranch(LIR_jf, tableCnd, NULL);

            /* Collect misses. */
            LIns *missLbl = lir->ins0(LIR_label);
            asciiMissBr->setTarget(missLbl);
            belowMissBr->setTarget(missLbl);
            aboveMissBr->setTarget(missLbl);
            LIns *missBr = lir->insBranch(LIR_j, NULL, NULL);
            if (node->op == REOP_SPACE) {
                if (!fails.append(missBr))
                    return NULL;
            }

            /* Collect matches. */
            LIns *matchLbl = lir->ins0(LIR_label);
            intervalMatchBr->setTarget(matchLbl);
            tableMatchBr->setTarget(matchLbl);
            eq1Br->setTarget(matchLbl); eq2Br->setTarget(matchLbl);
            eq3Br->setTarget(matchLbl); eq4Br->setTarget(matchLbl);
            eq5Br->setTarget(matchLbl); eq6Br->setTarget(matchLbl);
            eq7Br->setTarget(matchLbl); eq8Br->setTarget(matchLbl);
            if (node->op == REOP_NONSPACE) {
                LIns *matchBr = lir->insBranch(LIR_j, NULL, NULL);
                if (!fails.append(matchBr))
                    return NULL;
            }
            /* Fall through means match == success. */

            /* Collect successes to fall through. */
            LIns *success = lir->ins0(LIR_label);
            if (node->op == REOP_NONSPACE)
                missBr->setTarget(success);
            break;
          }
          default:
            return NULL;
        }

        return lir->ins2(LIR_addp, pos, lir->insImmWord(2));
    }

    LIns *compileAlt(RENode *node, LIns *pos, bool atEnd, LInsList &fails)
    {
        RENode *leftRe = (RENode *)node->kid, *rightRe = (RENode *)node->u.kid2;

        /*
         * If the RE continues after the alternative, we need to ensure that no
         * backtracking is required. Recursive calls to compileNode will fail
         * on capturing parens, so the only thing we have to check here is that,
         * if the left subexpression matches, we can keep going without later
         * deciding we need to try the right subexpression.
         */
        if (!atEnd) {
            /*
             * If there is no character overlap between left and right, then
             * there is only one possible path through the alternative.
             */
            CharSet leftSet, rightSet;
            if (!enumerateNextChars(cx, leftRe, leftSet) ||
                !enumerateNextChars(cx, rightRe, rightSet) ||
                !leftSet.disjoint(rightSet))
                return NULL;

            /*
             * If there is an empty path through either subexpression, the above
             * check is incomplete; we need to include |node->next| as well.
             */
            bool epsLeft = mayMatchEmpty(leftRe),
                 epsRight = mayMatchEmpty(rightRe);
            if (epsRight && epsLeft) {
                return NULL;
            } else if (epsLeft || epsRight) {
                CharSet nextSet;
                if (!enumerateNextChars(cx, node->next, nextSet) ||
                    (epsLeft && !nextSet.disjoint(rightSet)) ||
                    (epsRight && !nextSet.disjoint(leftSet))) {
                    return NULL;
                }
            }
        }

        /* Try left branch. */
        LInsList kidFails(cx);
        LIns *branchEnd = compileNode(leftRe, pos, atEnd, kidFails);
        if (!branchEnd)
            return NULL;

        /*
         * Since there are no phis, simulate by writing to and reading from
         * memory (REGlobalData::stateStack, since it is unused).
         */
        lir->insStore(branchEnd, state,
                       offsetof(REGlobalData, stateStack), ACC_OTHER);
        LIns *leftSuccess = lir->insBranch(LIR_j, NULL, NULL);

        /* Try right branch. */
        targetCurrentPoint(kidFails);
        if (!(branchEnd = compileNode(rightRe, pos, atEnd, fails)))
            return NULL;
        lir->insStore(branchEnd, state,
                       offsetof(REGlobalData, stateStack), ACC_OTHER);

        /* Land success on the left branch. */
        targetCurrentPoint(leftSuccess);
        return addName(fragment->lirbuf,
                       lir->insLoad(LIR_ldp, state, offsetof(REGlobalData, stateStack), ACC_OTHER),
                       "pos");
    }

    LIns *compileOpt(RENode *node, LIns *pos, bool atEnd, LInsList &fails)
    {
        /*
         * Since there are no phis, simulate by writing to and reading from
         * memory (REGlobalData::stateStack, since it is unused).
         */
        lir->insStore(pos, state, offsetof(REGlobalData, stateStack), ACC_OTHER);

        /* Try ? body. */
        LInsList kidFails(cx);
        if (!(pos = compileNode(node, pos, atEnd, kidFails)))
            return NULL;
        lir->insStore(pos, state, offsetof(REGlobalData, stateStack), ACC_OTHER);

        /* Join success and failure and get new position. */
        targetCurrentPoint(kidFails);
        pos = addName(fragment->lirbuf,
                      lir->insLoad(LIR_ldp, state, offsetof(REGlobalData, stateStack), ACC_OTHER),
                      "pos");

        return pos;
    }

    LIns *compileQuant(RENode *node, LIns *pos, bool atEnd, LInsList &fails)
    {
        /* Only support greedy *, +, ?. */
        if (!node->u.range.greedy ||
            node->u.range.min > 1 ||
            (node->u.range.max > 1 && node->u.range.max < (uintN)-1)) {
            return NULL;
        }

        RENode *bodyRe = (RENode *)node->kid;

        /*
         * If the RE continues after the alternative, we need to ensure that no
         * backtracking is required. Recursive calls to compileNode will fail
         * on capturing parens, so the only thing we have to check here is that,
         * if the quantifier body matches, we can continue matching the body
         * without later deciding we need to undo the body matches.
         */
        if (!atEnd) {
            /*
             * If there is no character overlap between the body and
             * |node->next|, then all possible body matches are used.
             */
            CharSet bodySet, nextSet;
            if (!enumerateNextChars(cx, bodyRe, bodySet) ||
                !enumerateNextChars(cx, node->next, nextSet) ||
                !bodySet.disjoint(nextSet)) {
                return NULL;
            }
        }

        /* Fork off ? and {1,1}. */
        if (node->u.range.max == 1) {
            if (node->u.range.min == 1)
                return compileNode(bodyRe, pos, atEnd, fails);
            else
                return compileOpt(bodyRe, pos, atEnd, fails);
        }

        /* For +, compile a copy of the body where failure is real failure. */
        if (node->u.range.min == 1) {
            if (!(pos = compileNode(bodyRe, pos, atEnd, fails)))
                return NULL;
        }

        /*
         * Since there are no phis, simulate by writing to and reading from
         * memory (REGlobalData::stateStack, since it is unused).
         */
        lir->insStore(pos, state, offsetof(REGlobalData, stateStack), ACC_OTHER);

        /* Begin iteration: load loop variables. */
        LIns *loopTop = lir->ins0(LIR_label);
        LIns *iterBegin = addName(fragment->lirbuf,
                                  lir->insLoad(LIR_ldp, state,
                                               offsetof(REGlobalData, stateStack), ACC_OTHER),
                                  "pos");

        /* Match quantifier body. */
        LInsList kidFails(cx);
        LIns *iterEnd = compileNode(bodyRe, iterBegin, atEnd, kidFails);
        if (!iterEnd)
            return NULL;

        /*
         * If there is an epsilon path through the body then, when it is taken,
         * we need to abort the loop or else we will loop forever.
         */
        if (mayMatchEmpty(bodyRe)) {
            LIns *eqCnd = lir->ins2(LIR_eqp, iterBegin, iterEnd);
            if (!kidFails.append(lir->insBranch(LIR_jt, eqCnd, NULL)))
                return NULL;
        }

        /* End iteration: store loop variables, increment, jump */
        lir->insStore(iterEnd, state, offsetof(REGlobalData, stateStack), ACC_OTHER);
        lir->insBranch(LIR_j, NULL, loopTop);

        /*
         * Using '+' as branch, the intended control flow is:
         *
         *     ...
         * A -> |
         *      |<---.
         * B -> |    |
         *      +--. |
         * C -> |  | |
         *      +--. |
         * D -> |  | |
         *      +--|-'
         * X -> |  |
         *      |<-'
         * E -> |
         *     ...
         *
         * We are currently at point X. Since the regalloc makes a single,
         * linear, backwards sweep over the IR (going from E to A), point X
         * must tell the regalloc what LIR insns are live at the end of D.
         * Thus, we need to report *all* insns defined *before* the end of D
         * that may be used *after* D. This means insns defined in A, B, C, or
         * D and used in B, C, D, or E. Since insns in B, C, and D are
         * conditionally executed, and we (currently) don't have real phi
         * nodes, we need only consider insns defined in A and used in E.
         */
        lir->ins1(LIR_livep, state);
        lir->ins1(LIR_livep, cpend);
        lir->ins1(LIR_livep, start);

        /* After the loop: reload 'pos' from memory and continue. */
        targetCurrentPoint(kidFails);
        return iterBegin;
    }

    /*
     * Compile the regular expression rooted at 'node'. Return 0 on failed
     * compilation. Otherwise, generate code that falls through on success (the
     * returned LIns* is the current 'pos') and jumps to the end on failure (by
     * adding the guard LIns to 'fails').
     */
    LIns *compileNode(RENode *node, LIns *pos, bool atEnd, LInsList &fails)
    {
        for (; pos && node; node = node->next) {
            if (outOfMemory())
                return NULL;

            bool childNextIsEnd = atEnd && !node->next;

            switch (node->op) {
              case REOP_EMPTY:
                pos = compileEmpty(node, pos, fails);
                break;
              case REOP_FLAT:
                pos = compileFlat(node, pos, fails);
                break;
              case REOP_ALT:
              case REOP_ALTPREREQ:
                pos = compileAlt(node, pos, childNextIsEnd, fails);
                break;
              case REOP_QUANT:
                pos = compileQuant(node, pos, childNextIsEnd, fails);
                break;
              case REOP_CLASS:
                pos = compileClass(node, pos, fails);
                break;
              case REOP_DOT:
              case REOP_DIGIT:
              case REOP_NONDIGIT:
              case REOP_ALNUM:
              case REOP_NONALNUM:
              case REOP_SPACE:
              case REOP_NONSPACE:
                pos = compileBuiltinClass(node, pos, fails);
                break;
              default:
                return NULL;
            }
        }
        return pos;
    }

    /*
     * This function kicks off recursive compileNode compilation, finishes the
     * success path, and lets the failed-match path fall through.
     */
    bool compileRootNode(RENode *root, LIns *pos, LIns *anchorFail)
    {
        /* Compile the regular expression body. */
        LInsList fails(cx);
        pos = compileNode(root, pos, true, fails);
        if (!pos)
            return false;

        /* Fall-through from compileNode means success. */
        lir->insStore(pos, state, offsetof(REGlobalData, stateStack), ACC_OTHER);
        lir->ins0(LIR_regfence);
        lir->ins1(LIR_reti, lir->insImmI(1));

        /* Stick return here so we don't have to jump over it every time. */
        if (anchorFail) {
            targetCurrentPoint(anchorFail);
            lir->ins0(LIR_regfence);
            lir->ins1(LIR_reti, lir->insImmI(0));
        }

        /* Target failed matches. */
        targetCurrentPoint(fails);
        return true;
    }

    /* Compile a regular expressions that can only match on the first char. */
    bool compileSticky(RENode *root, LIns *start)
    {
        if (!compileRootNode(root, start, NULL))
            return false;

        /* Failed to match on first character, so fail whole match. */
        lir->ins0(LIR_regfence);
        lir->ins1(LIR_reti, lir->insImmI(0));
        return !outOfMemory();
    }

    /* Compile normal regular expressions that can match starting at any char. */
    bool compileAnchoring(RENode *root, LIns *start)
    {
        /* Guard outer anchoring loop. Use <= to allow empty regexp match. */
        LIns *anchorFail = lir->insBranch(LIR_jf, lir->ins2(LIR_lep, start, cpend), 0);

        if (!compileRootNode(root, start, anchorFail))
            return false;

        /* Outer loop increment. */
        lir->insStore(lir->ins2(LIR_addp, start, lir->insImmWord(2)), state,
                       offsetof(REGlobalData, skipped), ACC_OTHER);

        return !outOfMemory();
    }

    inline LIns*
    addName(LirBuffer* lirbuf, LIns* ins, const char* name)
    {
#ifdef NJ_VERBOSE
        debug_only_stmt(lirbuf->printer->lirNameMap->addName(ins, name);)
#endif
        return ins;
    }

    /*
     * Insert the side exit and guard record for a compiled regexp. Most
     * of the fields are not used. The important part is the regexp source
     * and flags, which we use as the fragment lookup key.
     */
    GuardRecord* insertGuard(LIns* loopLabel, const jschar* re_chars, size_t re_length)
    {
        if (loopLabel) {
            lir->insBranch(LIR_j, NULL, loopLabel);
            LirBuffer* lirbuf = fragment->lirbuf;
            lir->ins1(LIR_livep, lirbuf->state);
            lir->ins1(LIR_livep, lirbuf->param1);
        }

        Allocator &alloc = *JS_TRACE_MONITOR(cx).dataAlloc;

        /* Must only create a VMSideExit; see StackFilter::getTops. */
        size_t len = (sizeof(GuardRecord) +
                      sizeof(VMSideExit) +
                      (re_length-1) * sizeof(jschar));
        GuardRecord* guard = (GuardRecord *) alloc.alloc(len);
        VMSideExit* exit = (VMSideExit*)(guard+1);
        guard->exit = exit;
        guard->exit->target = fragment;
        fragment->lastIns = lir->insGuard(LIR_x, NULL, guard);
        // guard->profCount is calloc'd to zero
        verbose_only(
            guard->profGuardID = fragment->guardNumberer++;
            guard->nextInFrag = fragment->guardsForFrag;
            fragment->guardsForFrag = guard;
        )
        return guard;
    }

 public:
    RegExpNativeCompiler(JSContext* cx, JSRegExp* re, CompilerState* cs, Fragment* fragment)
        : tempAlloc(*JS_TRACE_MONITOR(cx).reTempAlloc), cx(cx),
          re(re), cs(cs), fragment(fragment), lir(NULL), lirBufWriter(NULL),
          lirbuf(new (tempAlloc) LirBuffer(tempAlloc))
    {
        fragment->lirbuf = lirbuf;
#ifdef DEBUG
        lirbuf->printer = new (tempAlloc) LInsPrinter(tempAlloc);
#endif
    }

    ~RegExpNativeCompiler() {
        /* Purge the tempAlloc used during recording. */
        tempAlloc.reset();
    }

    JSBool compile()
    {
        GuardRecord* guard = NULL;
        const jschar* re_chars;
        size_t re_length;
        TraceMonitor* tm = &JS_TRACE_MONITOR(cx);
        Assembler *assm = tm->assembler;
        LIns* loopLabel = NULL;

        if (outOfMemory() || OverfullJITCache(tm))
            return JS_FALSE;

        re->source->getCharsAndLength(re_chars, re_length);
        /*
         * If the regexp is too long nanojit will assert when we
         * try to insert the guard record.
         */
        if (re_length > 1024) {
            re->flags |= JSREG_NOCOMPILE;
            return JS_FALSE;
        }

        /* At this point we have an empty fragment. */
        LirBuffer* lirbuf = fragment->lirbuf;
        if (outOfMemory())
            goto fail;
        /* FIXME Use bug 463260 smart pointer when available. */
        lir = lirBufWriter = new LirBufWriter(lirbuf, nanojit::AvmCore::config);

        /* FIXME Use bug 463260 smart pointer when available. */
#ifdef NJ_VERBOSE
        debug_only_stmt(
            if (LogController.lcbits & LC_TMRegexp) {
                lir = verbose_filter = new VerboseWriter(tempAlloc, lir, lirbuf->printer,
                                                         &LogController);
            }
        )
#endif
#ifdef DEBUG
        lir = validate_writer = new ValidateWriter(lir, lirbuf->printer, "regexp writer pipeline");
#endif

        /*
         * Although we could just load REGlobalData::cpend from 'state', by
         * passing it as a parameter, we avoid loading it every iteration.
         */
        lir->ins0(LIR_start);

        for (int i = 0; i < NumSavedRegs; ++i)
            lir->insParam(i, 1);
#ifdef DEBUG
        for (int i = 0; i < NumSavedRegs; ++i)
            addName(lirbuf, lirbuf->savedRegs[i], regNames[Assembler::savedRegs[i]]);
#endif

        lirbuf->state = state = addName(lirbuf, lir->insParam(0, 0), "state");
        lirbuf->param1 = cpend = addName(lirbuf, lir->insParam(1, 0), "cpend");

        loopLabel = lir->ins0(LIR_label);
        // If profiling, record where the loop label is, so that the
        // assembler can insert a frag-entry-counter increment at that
        // point
        verbose_only( if (LogController.lcbits & LC_FragProfile) {
            NanoAssert(!fragment->loopLabel);
            fragment->loopLabel = loopLabel;
        })

        start = addName(lirbuf,
                      lir->insLoad(LIR_ldp, state, offsetof(REGlobalData, skipped), ACC_OTHER),
                      "start");

        if (cs->flags & JSREG_STICKY) {
            if (!compileSticky(cs->result, start))
                goto fail;
        } else {
            if (!compileAnchoring(cs->result, start))
                goto fail;
        }

        guard = insertGuard(loopLabel, re_chars, re_length);

        if (outOfMemory())
            goto fail;

        /*
         * Deep in the nanojit compiler, the StackFilter is trying to throw
         * away stores above the VM interpreter/native stacks. We have no such
         * stacks, so rely on the fact that lirbuf->sp and lirbuf->rp are null
         * to ensure our stores are ignored.
         */
        JS_ASSERT(!lirbuf->sp && !lirbuf->rp);

        assm->compile(fragment, tempAlloc, /*optimize*/true verbose_only(, lirbuf->printer));
        if (assm->error() != nanojit::None)
            goto fail;

        delete lirBufWriter;
#ifdef DEBUG
        delete validate_writer;
#endif
#ifdef NJ_VERBOSE
        debug_only_stmt( if (LogController.lcbits & LC_TMRegexp)
                             delete verbose_filter; )
#endif
        return JS_TRUE;
    fail:
        if (outOfMemory() || OverfullJITCache(tm)) {
            delete lirBufWriter;
            // recover profiling data from expiring Fragments
            verbose_only(
                REHashMap::Iter iter(*(tm->reFragments));
                while (iter.next()) {
                    nanojit::Fragment* frag = iter.value();
                    FragProfiling_FragFinalizer(frag, tm);
                }
            )
            FlushJITCache(cx);
        } else {
            if (!guard) insertGuard(loopLabel, re_chars, re_length);
            re->flags |= JSREG_NOCOMPILE;
            delete lirBufWriter;
        }
#ifdef DEBUG
        delete validate_writer;
#endif
#ifdef NJ_VERBOSE
        debug_only_stmt( if (LogController.lcbits & LC_TMRegexp)
                             delete verbose_filter; )
#endif
        return JS_FALSE;
    }
};

/*
 * Compile a regexp to native code in the given fragment.
 */
static inline JSBool
CompileRegExpToNative(JSContext* cx, JSRegExp* re, Fragment* fragment)
{
    JSBool rv = JS_FALSE;
    void* mark;
    CompilerState state;
    RegExpNativeCompiler rc(cx, re, &state, fragment);

    JS_ASSERT(!fragment->code());
    mark = JS_ARENA_MARK(&cx->tempPool);
    if (!CompileRegExpToAST(cx, NULL, re->source, re->flags, state)) {
        goto out;
    }
    rv = rc.compile();
 out:
    JS_ARENA_RELEASE(&cx->tempPool, mark);
    return rv;
}

/* Function type for a compiled native regexp. */
typedef void *(FASTCALL *NativeRegExp)(REGlobalData*, const jschar *);

/*
 * Return a compiled native regexp if one already exists or can be created
 * now, or NULL otherwise.
 */
static NativeRegExp
GetNativeRegExp(JSContext* cx, JSRegExp* re)
{
    const jschar *re_chars;
    size_t re_length;
    re->source->getCharsAndLength(re_chars, re_length);
    Fragment *fragment = LookupNativeRegExp(cx, re->flags, re_chars, re_length);
    JS_ASSERT(fragment);
    if (!fragment->code() && fragment->recordAttempts == 0) {
        fragment->recordAttempts++;
        if (!CompileRegExpToNative(cx, re, fragment))
            return NULL;
    }
    union { NIns *code; NativeRegExp func; } u;
    u.code = fragment->code();
    return u.func;
}
#endif

JSRegExp *
js_NewRegExp(JSContext *cx, TokenStream *ts,
             JSString *str, uintN flags, JSBool flat)
{
    JSRegExp *re;
    void *mark;
    CompilerState state;
    size_t resize;
    jsbytecode *endPC;
    uintN i;

    re = NULL;
    mark = JS_ARENA_MARK(&cx->tempPool);

    /*
     * Parsing the string as flat is now expressed internally using
     * a flag, so that we keep this information in the JSRegExp, but
     * we keep the 'flat' parameter for now for compatibility.
     */
    if (flat) flags |= JSREG_FLAT;
    if (!CompileRegExpToAST(cx, ts, str, flags, state))
        goto out;

    resize = offsetof(JSRegExp, program) + state.progLength + 1;
    re = (JSRegExp *) cx->malloc(resize);
    if (!re)
        goto out;

    re->nrefs = 1;
    JS_ASSERT(state.classBitmapsMem <= CLASS_BITMAPS_MEM_LIMIT);
    re->classCount = state.classCount;
    if (re->classCount) {
        re->classList = (RECharSet *)
            cx->malloc(re->classCount * sizeof(RECharSet));
        if (!re->classList) {
            js_DestroyRegExp(cx, re);
            re = NULL;
            goto out;
        }
        for (i = 0; i < re->classCount; i++)
            re->classList[i].converted = JS_FALSE;
    } else {
        re->classList = NULL;
    }

    /* Compile the bytecode version. */
    endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result);
    if (!endPC) {
        js_DestroyRegExp(cx, re);
        re = NULL;
        goto out;
    }
    *endPC++ = REOP_END;
    /*
     * Check whether size was overestimated and shrink using realloc.
     * This is safe since no pointers to newly parsed regexp or its parts
     * besides re exist here.
     */
    if ((size_t)(endPC - re->program) != state.progLength + 1) {
        JSRegExp *tmp;
        JS_ASSERT((size_t)(endPC - re->program) < state.progLength + 1);
        resize = offsetof(JSRegExp, program) + (endPC - re->program);
        tmp = (JSRegExp *) cx->realloc(re, resize);
        if (tmp)
            re = tmp;
    }

    re->flags = uint16(flags);
    re->parenCount = state.parenCount;
    re->source = str;

out:
    JS_ARENA_RELEASE(&cx->tempPool, mark);
    return re;
}

JSRegExp *
js_NewRegExpOpt(JSContext *cx, JSString *str, JSString *opt, JSBool flat)
{
    uintN flags;
    const jschar *s;
    size_t i, n;
    char charBuf[2];

    flags = 0;
    if (opt) {
        opt->getCharsAndLength(s, n);
        for (i = 0; i < n; i++) {
#define HANDLE_FLAG(name)                                                     \
            JS_BEGIN_MACRO                                                    \
                if (flags & (name))                                           \
                    goto bad_flag;                                            \
                flags |= (name);                                              \
            JS_END_MACRO
            switch (s[i]) {
              case 'g':
                HANDLE_FLAG(JSREG_GLOB);
                break;
              case 'i':
                HANDLE_FLAG(JSREG_FOLD);
                break;
              case 'm':
                HANDLE_FLAG(JSREG_MULTILINE);
                break;
              case 'y':
                HANDLE_FLAG(JSREG_STICKY);
                break;
              default:
              bad_flag:
                charBuf[0] = (char)s[i];
                charBuf[1] = '\0';
                JS_ReportErrorFlagsAndNumber(cx, JSREPORT_ERROR,
                                             js_GetErrorMessage, NULL,
                                             JSMSG_BAD_REGEXP_FLAG, charBuf);
                return NULL;
            }
#undef HANDLE_FLAG
        }
    }
    return js_NewRegExp(cx, NULL, str, flags, flat);
}

/*
 * Save the current state of the match - the position in the input
 * text as well as the position in the bytecode. The state of any
 * parent expressions is also saved (preceding state).
 * Contents of parenCount parentheses from parenIndex are also saved.
 */
static REBackTrackData *
PushBackTrackState(REGlobalData *gData, REOp op,
                   jsbytecode *target, REMatchState *x, const jschar *cp,
                   size_t parenIndex, size_t parenCount)
{
    size_t i;
    REBackTrackData *result =
        (REBackTrackData *) ((char *)gData->backTrackSP + gData->cursz);

    size_t sz = sizeof(REBackTrackData) +
                gData->stateStackTop * sizeof(REProgState) +
                parenCount * sizeof(RECapture);

    ptrdiff_t btsize = gData->backTrackStackSize;
    ptrdiff_t btincr = ((char *)result + sz) -
                       ((char *)gData->backTrackStack + btsize);

    re_debug("\tBT_Push: %lu,%lu",
             (unsigned long) parenIndex, (unsigned long) parenCount);

    if (btincr > 0) {
        ptrdiff_t offset = (char *)result - (char *)gData->backTrackStack;

        btincr = JS_ROUNDUP(btincr, btsize);
        JS_ARENA_GROW_CAST(gData->backTrackStack, REBackTrackData *,
                           &gData->cx->regexpPool, btsize, btincr);
        if (!gData->backTrackStack) {
            js_ReportOutOfScriptQuota(gData->cx);
            gData->ok = JS_FALSE;
            return NULL;
        }
        gData->backTrackStackSize = btsize + btincr;
        result = (REBackTrackData *) ((char *)gData->backTrackStack + offset);
    }
    gData->backTrackSP = result;
    result->sz = gData->cursz;
    gData->cursz = sz;

    result->backtrack_op = op;
    result->backtrack_pc = target;
    result->cp = cp;
    result->parenCount = parenCount;
    result->parenIndex = parenIndex;

    result->saveStateStackTop = gData->stateStackTop;
    JS_ASSERT(gData->stateStackTop);
    memcpy(result + 1, gData->stateStack,
           sizeof(REProgState) * result->saveStateStackTop);

    if (parenCount != 0) {
        memcpy((char *)(result + 1) +
               sizeof(REProgState) * result->saveStateStackTop,
               &x->parens[parenIndex],
               sizeof(RECapture) * parenCount);
        for (i = 0; i != parenCount; i++)
            x->parens[parenIndex + i].index = -1;
    }

    return result;
}


/*
 *   Consecutive literal characters.
 */
#if 0
static REMatchState *
FlatNMatcher(REGlobalData *gData, REMatchState *x, jschar *matchChars,
             size_t length)
{
    size_t i;
    if (length > gData->cpend - x->cp)
        return NULL;
    for (i = 0; i != length; i++) {
        if (matchChars[i] != x->cp[i])
            return NULL;
    }
    x->cp += length;
    return x;
}
#endif

static JS_ALWAYS_INLINE REMatchState *
FlatNIMatcher(REGlobalData *gData, REMatchState *x, jschar *matchChars,
              size_t length)
{
    size_t i;
    JS_ASSERT(gData->cpend >= x->cp);
    if (length > (size_t)(gData->cpend - x->cp))
        return NULL;
    for (i = 0; i != length; i++) {
        if (upcase(matchChars[i]) != upcase(x->cp[i]))
            return NULL;
    }
    x->cp += length;
    return x;
}

/*
 * 1. Evaluate DecimalEscape to obtain an EscapeValue E.
 * 2. If E is not a character then go to step 6.
 * 3. Let ch be E's character.
 * 4. Let A be a one-element RECharSet containing the character ch.
 * 5. Call CharacterSetMatcher(A, false) and return its Matcher result.
 * 6. E must be an integer. Let n be that integer.
 * 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
 * 8. Return an internal Matcher closure that takes two arguments, a State x
 *    and a Continuation c, and performs the following:
 *     1. Let cap be x's captures internal array.
 *     2. Let s be cap[n].
 *     3. If s is undefined, then call c(x) and return its result.
 *     4. Let e be x's endIndex.
 *     5. Let len be s's length.
 *     6. Let f be e+len.
 *     7. If f>InputLength, return failure.
 *     8. If there exists an integer i between 0 (inclusive) and len (exclusive)
 *        such that Canonicalize(s[i]) is not the same character as
 *        Canonicalize(Input [e+i]), then return failure.
 *     9. Let y be the State (f, cap).
 *     10. Call c(y) and return its result.
 */
static REMatchState *
BackrefMatcher(REGlobalData *gData, REMatchState *x, size_t parenIndex)
{
    size_t len, i;
    const jschar *parenContent;
    RECapture *cap = &x->parens[parenIndex];

    if (cap->index == -1)
        return x;

    len = cap->length;
    if (x->cp + len > gData->cpend)
        return NULL;

    parenContent = &gData->cpbegin[cap->index];
    if (gData->regexp->flags & JSREG_FOLD) {
        for (i = 0; i < len; i++) {
            if (upcase(parenContent[i]) != upcase(x->cp[i]))
                return NULL;
        }
    } else {
        for (i = 0; i < len; i++) {
            if (parenContent[i] != x->cp[i])
                return NULL;
        }
    }
    x->cp += len;
    return x;
}


/* Add a single character to the RECharSet */
static void
AddCharacterToCharSet(RECharSet *cs, jschar c)
{
    uintN byteIndex = (uintN)(c >> 3);
    JS_ASSERT(c <= cs->length);
    cs->u.bits[byteIndex] |= 1 << (c & 0x7);
}


/* Add a character range, c1 to c2 (inclusive) to the RECharSet */
static void
AddCharacterRangeToCharSet(RECharSet *cs, uintN c1, uintN c2)
{
    uintN i;

    uintN byteIndex1 = c1 >> 3;
    uintN byteIndex2 = c2 >> 3;

    JS_ASSERT(c2 <= cs->length && c1 <= c2);

    c1 &= 0x7;
    c2 &= 0x7;

    if (byteIndex1 == byteIndex2) {
        cs->u.bits[byteIndex1] |= ((uint8)0xFF >> (7 - (c2 - c1))) << c1;
    } else {
        cs->u.bits[byteIndex1] |= 0xFF << c1;
        for (i = byteIndex1 + 1; i < byteIndex2; i++)
            cs->u.bits[i] = 0xFF;
        cs->u.bits[byteIndex2] |= (uint8)0xFF >> (7 - c2);
    }
}

struct CharacterRange {
    jschar start;
    jschar end;
};

/*
 * The following characters are taken from the ECMA-262 standard, section 7.2
 * and 7.3, and the Unicode 3 standard, Table 6-1.
 */
static const CharacterRange WhiteSpaceRanges[] = {
    /* TAB, LF, VT, FF, CR */
    { 0x0009, 0x000D },
    /* SPACE */
    { 0x0020, 0x0020 },
    /* NO-BREAK SPACE */
    { 0x00A0, 0x00A0 },
    /*
     * EN QUAD, EM QUAD, EN SPACE, EM SPACE, THREE-PER-EM SPACE, FOUR-PER-EM
     * SPACE, SIX-PER-EM SPACE, FIGURE SPACE, PUNCTUATION SPACE, THIN SPACE,
     * HAIR SPACE, ZERO WIDTH SPACE
     */
    { 0x2000, 0x200B },
    /* LS, PS */
    { 0x2028, 0x2029 },
    /* NARROW NO-BREAK SPACE */
    { 0x202F, 0x202F },
    /* IDEOGRAPHIC SPACE */
    { 0x3000, 0x3000 }
};

/* ECMA-262 standard, section 15.10.2.6. */
static const CharacterRange WordRanges[] = {
    { jschar('0'), jschar('9') },
    { jschar('A'), jschar('Z') },
    { jschar('_'), jschar('_') },
    { jschar('a'), jschar('z') }
};

static void
AddCharacterRanges(RECharSet *charSet,
                   const CharacterRange *range,
                   const CharacterRange *end)
{
    for (; range < end; ++range)
        AddCharacterRangeToCharSet(charSet, range->start, range->end);
}

static void
AddInvertedCharacterRanges(RECharSet *charSet,
                           const CharacterRange *range,
                           const CharacterRange *end)
{
    uint16 previous = 0;
    for (; range < end; ++range) {
        AddCharacterRangeToCharSet(charSet, previous, range->start - 1);
        previous = range->end + 1;
    }
    AddCharacterRangeToCharSet(charSet, previous, charSet->length);
}

/* Compile the source of the class into a RECharSet */
static JSBool
ProcessCharSet(JSContext *cx, JSRegExp *re, RECharSet *charSet)
{
    const jschar *src, *end;
    JSBool inRange = JS_FALSE;
    jschar rangeStart = 0;
    uintN byteLength, n;
    jschar c, thisCh;
    intN nDigits, i;

    JS_ASSERT(!charSet->converted);
    /*
     * Assert that startIndex and length points to chars inside [] inside
     * source string.
     */
    JS_ASSERT(1 <= charSet->u.src.startIndex);
    JS_ASSERT(charSet->u.src.startIndex < re->source->length());
    JS_ASSERT(charSet->u.src.length <= re->source->length()
                                       - 1 - charSet->u.src.startIndex);

    charSet->converted = JS_TRUE;
    src = re->source->chars() + charSet->u.src.startIndex;
    end = src + charSet->u.src.length;
    JS_ASSERT(src[-1] == '[');
    JS_ASSERT(end[0] == ']');

    byteLength = (charSet->length >> 3) + 1;
    charSet->u.bits = (uint8 *)cx->malloc(byteLength);
    if (!charSet->u.bits) {
        JS_ReportOutOfMemory(cx);
        return JS_FALSE;
    }
    memset(charSet->u.bits, 0, byteLength);

    if (src == end)
        return JS_TRUE;

    if (*src == '^') {
        JS_ASSERT(charSet->sense == JS_FALSE);
        ++src;
    } else {
        JS_ASSERT(charSet->sense == JS_TRUE);
    }

    while (src != end) {
        switch (*src) {
          case '\\':
            ++src;
            c = *src++;
            switch (c) {
              case 'b':
                thisCh = 0x8;
                break;
              case 'f':
                thisCh = 0xC;
                break;
              case 'n':
                thisCh = 0xA;
                break;
              case 'r':
                thisCh = 0xD;
                break;
              case 't':
                thisCh = 0x9;
                break;
              case 'v':
                thisCh = 0xB;
                break;
              case 'c':
                if (src < end && JS_ISWORD(*src)) {
                    thisCh = (jschar)(*src++ & 0x1F);
                } else {
                    --src;
                    thisCh = '\\';
                }
                break;
              case 'x':
                nDigits = 2;
                goto lexHex;
              case 'u':
                nDigits = 4;
            lexHex:
                n = 0;
                for (i = 0; (i < nDigits) && (src < end); i++) {
                    uintN digit;
                    c = *src++;
                    if (!isASCIIHexDigit(c, &digit)) {
                        /*
                         * Back off to accepting the original '\'
                         * as a literal
                         */
                        src -= i + 1;
                        n = '\\';
                        break;
                    }
                    n = (n << 4) | digit;
                }
                thisCh = (jschar)n;
                break;
              case '0':
              case '1':
              case '2':
              case '3':
              case '4':
              case '5':
              case '6':
              case '7':
                /*
                 *  This is a non-ECMA extension - decimal escapes (in this
                 *  case, octal!) are supposed to be an error inside class
                 *  ranges, but supported here for backwards compatibility.
                 */
                n = JS7_UNDEC(c);
                c = *src;
                if ('0' <= c && c <= '7') {
                    src++;
                    n = 8 * n + JS7_UNDEC(c);
                    c = *src;
                    if ('0' <= c && c <= '7') {
                        src++;
                        i = 8 * n + JS7_UNDEC(c);
                        if (i <= 0377)
                            n = i;
                        else
                            src--;
                    }
                }
                thisCh = (jschar)n;
                break;

              case 'd':
                AddCharacterRangeToCharSet(charSet, '0', '9');
                continue;   /* don't need range processing */
              case 'D':
                AddCharacterRangeToCharSet(charSet, 0, '0' - 1);
                AddCharacterRangeToCharSet(charSet,
                                           (jschar)('9' + 1),
                                           (jschar)charSet->length);
                continue;
              case 's':
                AddCharacterRanges(charSet, WhiteSpaceRanges,
                                   WhiteSpaceRanges + JS_ARRAY_LENGTH(WhiteSpaceRanges));
                continue;
              case 'S':
                AddInvertedCharacterRanges(charSet, WhiteSpaceRanges,
                                           WhiteSpaceRanges + JS_ARRAY_LENGTH(WhiteSpaceRanges));
                continue;
              case 'w':
                AddCharacterRanges(charSet, WordRanges,
                                   WordRanges + JS_ARRAY_LENGTH(WordRanges));
                continue;
              case 'W':
                AddInvertedCharacterRanges(charSet, WordRanges,
                                           WordRanges + JS_ARRAY_LENGTH(WordRanges));
                continue;
              default:
                thisCh = c;
                break;

            }
            break;

          default:
            thisCh = *src++;
            break;

        }
        if (inRange) {
            if (re->flags & JSREG_FOLD) {
                int i;

                JS_ASSERT(rangeStart <= thisCh);
                for (i = rangeStart; i <= thisCh; i++) {
                    jschar uch, dch;

                    AddCharacterToCharSet(charSet, jschar(i));
                    uch = jschar(upcase(i));
                    dch = inverse_upcase(jschar(i));
                    if (i != uch)
                        AddCharacterToCharSet(charSet, uch);
                    if (i != dch)
                        AddCharacterToCharSet(charSet, dch);
                }
            } else {
                AddCharacterRangeToCharSet(charSet, rangeStart, thisCh);
            }
            inRange = JS_FALSE;
        } else {
            if (re->flags & JSREG_FOLD) {
                AddCharacterToCharSet(charSet, jschar(upcase(thisCh)));
                AddCharacterToCharSet(charSet, inverse_upcase(thisCh));
            } else {
                AddCharacterToCharSet(charSet, thisCh);
            }
            if (src < end - 1) {
                if (*src == '-') {
                    ++src;
                    inRange = JS_TRUE;
                    rangeStart = thisCh;
                }
            }
        }
    }
    return JS_TRUE;
}

static inline JSBool
MatcherProcessCharSet(REGlobalData *gData, RECharSet *charSet) {
    JSBool rv = ProcessCharSet(gData->cx, gData->regexp, charSet);
    if (!rv) gData->ok = JS_FALSE;
    return rv;
}

void
js_DestroyRegExp(JSContext *cx, JSRegExp *re)
{
    if (JS_ATOMIC_DECREMENT(&re->nrefs) == 0) {
        if (re->classList) {
            uintN i;
            for (i = 0; i < re->classCount; i++) {
                if (re->classList[i].converted)
                    cx->free(re->classList[i].u.bits);
                re->classList[i].u.bits = NULL;
            }
            cx->free(re->classList);
        }
        cx->free(re);
    }
}

static JSBool
ReallocStateStack(REGlobalData *gData)
{
    size_t limit = gData->stateStackLimit;
    size_t sz = sizeof(REProgState) * limit;

    JS_ARENA_GROW_CAST(gData->stateStack, REProgState *,
                       &gData->cx->regexpPool, sz, sz);
    if (!gData->stateStack) {
        js_ReportOutOfScriptQuota(gData->cx);
        gData->ok = JS_FALSE;
        return JS_FALSE;
    }
    gData->stateStackLimit = limit + limit;
    return JS_TRUE;
}

#define PUSH_STATE_STACK(data)                                                \
    JS_BEGIN_MACRO                                                            \
        ++(data)->stateStackTop;                                              \
        if ((data)->stateStackTop == (data)->stateStackLimit &&               \
            !ReallocStateStack((data))) {                                     \
            return NULL;                                                      \
        }                                                                     \
    JS_END_MACRO

/*
 * Apply the current op against the given input to see if it's going to match
 * or fail. Return false if we don't get a match, true if we do. If updatecp is
 * true, then update the current state's cp. Always update startpc to the next
 * op.
 */
static JS_ALWAYS_INLINE REMatchState *
SimpleMatch(REGlobalData *gData, REMatchState *x, REOp op,
            jsbytecode **startpc, JSBool updatecp)
{
    REMatchState *result = NULL;
    jschar matchCh;
    size_t parenIndex;
    size_t offset, length, index;
    jsbytecode *pc = *startpc;  /* pc has already been incremented past op */
    jschar *source;
    const jschar *startcp = x->cp;
    jschar ch;
    RECharSet *charSet;

#ifdef REGEXP_DEBUG
    const char *opname = reop_names[op];
    re_debug("\n%06d: %*s%s", pc - gData->regexp->program,
             gData->stateStackTop * 2, "", opname);
#endif
    switch (op) {
      case REOP_EMPTY:
        result = x;
        break;
      case REOP_BOL:
        if (x->cp != gData->cpbegin) {
            if (!gData->cx->regExpStatics.multiline &&
                !(gData->regexp->flags & JSREG_MULTILINE)) {
                break;
            }
            if (!RE_IS_LINE_TERM(x->cp[-1]))
                break;
        }
        result = x;
        break;
      case REOP_EOL:
        if (x->cp != gData->cpend) {
            if (!gData->cx->regExpStatics.multiline &&
                !(gData->regexp->flags & JSREG_MULTILINE)) {
                break;
            }
            if (!RE_IS_LINE_TERM(*x->cp))
                break;
        }
        result = x;
        break;
      case REOP_WBDRY:
        if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
            !(x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
            result = x;
        }
        break;
      case REOP_WNONBDRY:
        if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
            (x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
            result = x;
        }
        break;
      case REOP_DOT:
        if (x->cp != gData->cpend && !RE_IS_LINE_TERM(*x->cp)) {
            result = x;
            result->cp++;
        }
        break;
      case REOP_DIGIT:
        if (x->cp != gData->cpend && JS7_ISDEC(*x->cp)) {
            result = x;
            result->cp++;
        }
        break;
      case REOP_NONDIGIT:
        if (x->cp != gData->cpend && !JS7_ISDEC(*x->cp)) {
            result = x;
            result->cp++;
        }
        break;
      case REOP_ALNUM:
        if (x->cp != gData->cpend && JS_ISWORD(*x->cp)) {
            result = x;
            result->cp++;
        }
        break;
      case REOP_NONALNUM:
        if (x->cp != gData->cpend && !JS_ISWORD(*x->cp)) {
            result = x;
            result->cp++;
        }
        break;
      case REOP_SPACE:
        if (x->cp != gData->cpend && JS_ISSPACE(*x->cp)) {
            result = x;
            result->cp++;
        }
        break;
      case REOP_NONSPACE:
        if (x->cp != gData->cpend && !JS_ISSPACE(*x->cp)) {
            result = x;
            result->cp++;
        }
        break;
      case REOP_BACKREF:
        pc = ReadCompactIndex(pc, &parenIndex);
        JS_ASSERT(parenIndex < gData->regexp->parenCount);
        result = BackrefMatcher(gData, x, parenIndex);
        break;
      case REOP_FLAT:
        pc = ReadCompactIndex(pc, &offset);
        JS_ASSERT(offset < gData->regexp->source->length());
        pc = ReadCompactIndex(pc, &length);
        JS_ASSERT(1 <= length);
        JS_ASSERT(length <= gData->regexp->source->length() - offset);
        if (length <= (size_t)(gData->cpend - x->cp)) {
            source = gData->regexp->source->chars() + offset;
            re_debug_chars(source, length);
            for (index = 0; index != length; index++) {
                if (source[index] != x->cp[index])
                    return NULL;
            }
            x->cp += length;
            result = x;
        }
        break;
      case REOP_FLAT1:
        matchCh = *pc++;
        re_debug(" '%c' == '%c'", (char)matchCh, (char)*x->cp);
        if (x->cp != gData->cpend && *x->cp == matchCh) {
            result = x;
            result->cp++;
        }
        break;
      case REOP_FLATi:
        pc = ReadCompactIndex(pc, &offset);
        JS_ASSERT(offset < gData->regexp->source->length());
        pc = ReadCompactIndex(pc, &length);
        JS_ASSERT(1 <= length);
        JS_ASSERT(length <= gData->regexp->source->length() - offset);
        source = gData->regexp->source->chars();
        result = FlatNIMatcher(gData, x, source + offset, length);
        break;
      case REOP_FLAT1i:
        matchCh = *pc++;
        if (x->cp != gData->cpend && upcase(*x->cp) == upcase(matchCh)) {
            result = x;
            result->cp++;
        }
        break;
      case REOP_UCFLAT1:
        matchCh = GET_ARG(pc);
        re_debug(" '%c' == '%c'", (char)matchCh, (char)*x->cp);
        pc += ARG_LEN;
        if (x->cp != gData->cpend && *x->cp == matchCh) {
            result = x;
            result->cp++;
        }
        break;
      case REOP_UCFLAT1i:
        matchCh = GET_ARG(pc);
        pc += ARG_LEN;
        if (x->cp != gData->cpend && upcase(*x->cp) == upcase(matchCh)) {
            result = x;
            result->cp++;
        }
        break;
      case REOP_CLASS:
        pc = ReadCompactIndex(pc, &index);
        JS_ASSERT(index < gData->regexp->classCount);
        if (x->cp != gData->cpend) {
            charSet = &gData->regexp->classList[index];
            JS_ASSERT(charSet->converted);
            ch = *x->cp;
            index = ch >> 3;
            if (ch <= charSet->length &&
                (charSet->u.bits[index] & (1 << (ch & 0x7)))) {
                result = x;
                result->cp++;
            }
        }
        break;
      case REOP_NCLASS:
        pc = ReadCompactIndex(pc, &index);
        JS_ASSERT(index < gData->regexp->classCount);
        if (x->cp != gData->cpend) {
            charSet = &gData->regexp->classList[index];
            JS_ASSERT(charSet->converted);
            ch = *x->cp;
            index = ch >> 3;
            if (ch > charSet->length ||
                !(charSet->u.bits[index] & (1 << (ch & 0x7)))) {
                result = x;
                result->cp++;
            }
        }
        break;

      default:
        JS_ASSERT(JS_FALSE);
    }
    if (result) {
        if (!updatecp)
            x->cp = startcp;
        *startpc = pc;
        re_debug(" * ");
        return result;
    }
    x->cp = startcp;
    return NULL;
}

static JS_ALWAYS_INLINE REMatchState *
ExecuteREBytecode(REGlobalData *gData, REMatchState *x)
{
    REMatchState *result = NULL;
    REBackTrackData *backTrackData;
    jsbytecode *nextpc, *testpc;
    REOp nextop;
    RECapture *cap;
    REProgState *curState;
    const jschar *startcp;
    size_t parenIndex, k;
    size_t parenSoFar = 0;

    jschar matchCh1, matchCh2;
    RECharSet *charSet;

    JSBool anchor;
    jsbytecode *pc = gData->regexp->program;
    REOp op = (REOp) *pc++;

    /*
     * If the first node is a simple match, step the index into the string
     * until that match is made, or fail if it can't be found at all.
     */
    if (REOP_IS_SIMPLE(op) && !(gData->regexp->flags & JSREG_STICKY)) {
        anchor = JS_FALSE;
        while (x->cp <= gData->cpend) {
            nextpc = pc;    /* reset back to start each time */
            result = SimpleMatch(gData, x, op, &nextpc, JS_TRUE);
            if (result) {
                anchor = JS_TRUE;
                x = result;
                pc = nextpc;    /* accept skip to next opcode */
                op = (REOp) *pc++;
                JS_ASSERT(op < REOP_LIMIT);
                break;
            }
            gData->skipped++;
            x->cp++;
        }
        if (!anchor)
            goto bad;
    }

    for (;;) {
#ifdef REGEXP_DEBUG
        const char *opname = reop_names[op];
        re_debug("\n%06d: %*s%s", pc - gData->regexp->program,
                 gData->stateStackTop * 2, "", opname);
#endif
        if (REOP_IS_SIMPLE(op)) {
            result = SimpleMatch(gData, x, op, &pc, JS_TRUE);
        } else {
            curState = &gData->stateStack[gData->stateStackTop];
            switch (op) {
              case REOP_END:
                goto good;
              case REOP_ALTPREREQ2:
                nextpc = pc + GET_OFFSET(pc);   /* start of next op */
                pc += ARG_LEN;
                matchCh2 = GET_ARG(pc);
                pc += ARG_LEN;
                k = GET_ARG(pc);
                pc += ARG_LEN;

                if (x->cp != gData->cpend) {
                    if (*x->cp == matchCh2)
                        goto doAlt;

                    charSet = &gData->regexp->classList[k];
                    if (!charSet->converted && !MatcherProcessCharSet(gData, charSet))
                        goto bad;
                    matchCh1 = *x->cp;
                    k = matchCh1 >> 3;
                    if ((matchCh1 > charSet->length ||
                         !(charSet->u.bits[k] & (1 << (matchCh1 & 0x7)))) ^
                        charSet->sense) {
                        goto doAlt;
                    }
                }
                result = NULL;
                break;

              case REOP_ALTPREREQ:
                nextpc = pc + GET_OFFSET(pc);   /* start of next op */
                pc += ARG_LEN;
                matchCh1 = GET_ARG(pc);
                pc += ARG_LEN;
                matchCh2 = GET_ARG(pc);
                pc += ARG_LEN;
                if (x->cp == gData->cpend ||
                    (*x->cp != matchCh1 && *x->cp != matchCh2)) {
                    result = NULL;
                    break;
                }
                /* else false thru... */

              case REOP_ALT:
              doAlt:
                nextpc = pc + GET_OFFSET(pc);   /* start of next alternate */
                pc += ARG_LEN;                  /* start of this alternate */
                curState->parenSoFar = parenSoFar;
                PUSH_STATE_STACK(gData);
                op = (REOp) *pc++;
                startcp = x->cp;
                if (REOP_IS_SIMPLE(op)) {
                    if (!SimpleMatch(gData, x, op, &pc, JS_TRUE)) {
                        op = (REOp) *nextpc++;
                        pc = nextpc;
                        continue;
                    }
                    result = x;
                    op = (REOp) *pc++;
                }
                nextop = (REOp) *nextpc++;
                if (!PushBackTrackState(gData, nextop, nextpc, x, startcp, 0, 0))
                    goto bad;
                continue;

              /*
               * Occurs at (successful) end of REOP_ALT,
               */
              case REOP_JUMP:
                /*
                 * If we have not gotten a result here, it is because of an
                 * empty match.  Do the same thing REOP_EMPTY would do.
                 */
                if (!result)
                    result = x;

                --gData->stateStackTop;
                pc += GET_OFFSET(pc);
                op = (REOp) *pc++;
                continue;

              /*
               * Occurs at last (successful) end of REOP_ALT,
               */
              case REOP_ENDALT:
                /*
                 * If we have not gotten a result here, it is because of an
                 * empty match.  Do the same thing REOP_EMPTY would do.
                 */
                if (!result)
                    result = x;

                --gData->stateStackTop;
                op = (REOp) *pc++;
                continue;

              case REOP_LPAREN:
                pc = ReadCompactIndex(pc, &parenIndex);
                re_debug("[ %lu ]", (unsigned long) parenIndex);
                JS_ASSERT(parenIndex < gData->regexp->parenCount);
                if (parenIndex + 1 > parenSoFar)
                    parenSoFar = parenIndex + 1;
                x->parens[parenIndex].index = x->cp - gData->cpbegin;
                x->parens[parenIndex].length = 0;
                op = (REOp) *pc++;
                continue;

              case REOP_RPAREN:
              {
                ptrdiff_t delta;

                pc = ReadCompactIndex(pc, &parenIndex);
                JS_ASSERT(parenIndex < gData->regexp->parenCount);
                cap = &x->parens[parenIndex];
                delta = x->cp - (gData->cpbegin + cap->index);
                cap->length = (delta < 0) ? 0 : (size_t) delta;
                op = (REOp) *pc++;

                if (!result)
                    result = x;
                continue;
              }
              case REOP_ASSERT:
                nextpc = pc + GET_OFFSET(pc);  /* start of term after ASSERT */
                pc += ARG_LEN;                 /* start of ASSERT child */
                op = (REOp) *pc++;
                testpc = pc;
                if (REOP_IS_SIMPLE(op) &&
                    !SimpleMatch(gData, x, op, &testpc, JS_FALSE)) {
                    result = NULL;
                    break;
                }
                curState->u.assertion.top =
                    (char *)gData->backTrackSP - (char *)gData->backTrackStack;
                curState->u.assertion.sz = gData->cursz;
                curState->index = x->cp - gData->cpbegin;
                curState->parenSoFar = parenSoFar;
                PUSH_STATE_STACK(gData);
                if (!PushBackTrackState(gData, REOP_ASSERTTEST,
                                        nextpc, x, x->cp, 0, 0)) {
                    goto bad;
                }
                continue;

              case REOP_ASSERT_NOT:
                nextpc = pc + GET_OFFSET(pc);
                pc += ARG_LEN;
                op = (REOp) *pc++;
                testpc = pc;
                if (REOP_IS_SIMPLE(op) /* Note - fail to fail! */ &&
                    SimpleMatch(gData, x, op, &testpc, JS_FALSE) &&
                    *testpc == REOP_ASSERTNOTTEST) {
                    result = NULL;
                    break;
                }
                curState->u.assertion.top
                    = (char *)gData->backTrackSP -
                      (char *)gData->backTrackStack;
                curState->u.assertion.sz = gData->cursz;
                curState->index = x->cp - gData->cpbegin;
                curState->parenSoFar = parenSoFar;
                PUSH_STATE_STACK(gData);
                if (!PushBackTrackState(gData, REOP_ASSERTNOTTEST,
                                        nextpc, x, x->cp, 0, 0)) {
                    goto bad;
                }
                continue;

              case REOP_ASSERTTEST:
                --gData->stateStackTop;
                --curState;
                x->cp = gData->cpbegin + curState->index;
                gData->backTrackSP =
                    (REBackTrackData *) ((char *)gData->backTrackStack +
                                         curState->u.assertion.top);
                gData->cursz = curState->u.assertion.sz;
                if (result)
                    result = x;
                break;

              case REOP_ASSERTNOTTEST:
                --gData->stateStackTop;
                --curState;
                x->cp = gData->cpbegin + curState->index;
                gData->backTrackSP =
                    (REBackTrackData *) ((char *)gData->backTrackStack +
                                         curState->u.assertion.top);
                gData->cursz = curState->u.assertion.sz;
                result = (!result) ? x : NULL;
                break;
              case REOP_STAR:
                curState->u.quantifier.min = 0;
                curState->u.quantifier.max = (uintN)-1;
                goto quantcommon;
              case REOP_PLUS:
                curState->u.quantifier.min = 1;
                curState->u.quantifier.max = (uintN)-1;
                goto quantcommon;
              case REOP_OPT:
                curState->u.quantifier.min = 0;
                curState->u.quantifier.max = 1;
                goto quantcommon;
              case REOP_QUANT:
                pc = ReadCompactIndex(pc, &k);
                curState->u.quantifier.min = k;
                pc = ReadCompactIndex(pc, &k);
                /* max is k - 1 to use one byte for (uintN)-1 sentinel. */
                curState->u.quantifier.max = k - 1;
                JS_ASSERT(curState->u.quantifier.min
                          <= curState->u.quantifier.max);
              quantcommon:
                if (curState->u.quantifier.max == 0) {
                    pc = pc + GET_OFFSET(pc);
                    op = (REOp) *pc++;
                    result = x;
                    continue;
                }
                /* Step over <next> */
                nextpc = pc + ARG_LEN;
                op = (REOp) *nextpc++;
                startcp = x->cp;
                if (REOP_IS_SIMPLE(op)) {
                    if (!SimpleMatch(gData, x, op, &nextpc, JS_TRUE)) {
                        if (curState->u.quantifier.min == 0)
                            result = x;
                        else
                            result = NULL;
                        pc = pc + GET_OFFSET(pc);
                        break;
                    }
                    op = (REOp) *nextpc++;
                    result = x;
                }
                curState->index = startcp - gData->cpbegin;
                curState->continue_op = REOP_REPEAT;
                curState->continue_pc = pc;
                curState->parenSoFar = parenSoFar;
                PUSH_STATE_STACK(gData);
                if (curState->u.quantifier.min == 0 &&
                    !PushBackTrackState(gData, REOP_REPEAT, pc, x, startcp,
                                        0, 0)) {
                    goto bad;
                }
                pc = nextpc;
                continue;

              case REOP_ENDCHILD: /* marks the end of a quantifier child */
                pc = curState[-1].continue_pc;
                op = (REOp) curState[-1].continue_op;

                if (!result)
                    result = x;
                continue;

              case REOP_REPEAT:
                --curState;
                do {
                    --gData->stateStackTop;
                    if (!result) {
                        /* Failed, see if we have enough children. */
                        if (curState->u.quantifier.min == 0)
                            goto repeatDone;
                        goto break_switch;
                    }
                    if (curState->u.quantifier.min == 0 &&
                        x->cp == gData->cpbegin + curState->index) {
                        /* matched an empty string, that'll get us nowhere */
                        result = NULL;
                        goto break_switch;
                    }
                    if (curState->u.quantifier.min != 0)
                        curState->u.quantifier.min--;
                    if (curState->u.quantifier.max != (uintN) -1)
                        curState->u.quantifier.max--;
                    if (curState->u.quantifier.max == 0)
                        goto repeatDone;
                    nextpc = pc + ARG_LEN;
                    nextop = (REOp) *nextpc;
                    startcp = x->cp;
                    if (REOP_IS_SIMPLE(nextop)) {
                        nextpc++;
                        if (!SimpleMatch(gData, x, nextop, &nextpc, JS_TRUE)) {
                            if (curState->u.quantifier.min == 0)
                                goto repeatDone;
                            result = NULL;
                            goto break_switch;
                        }
                        result = x;
                    }
                    curState->index = startcp - gData->cpbegin;
                    PUSH_STATE_STACK(gData);
                    if (curState->u.quantifier.min == 0 &&
                        !PushBackTrackState(gData, REOP_REPEAT,
                                            pc, x, startcp,
                                            curState->parenSoFar,
                                            parenSoFar -
                                            curState->parenSoFar)) {
                        goto bad;
                    }
                } while (*nextpc == REOP_ENDCHILD);
                pc = nextpc;
                op = (REOp) *pc++;
                parenSoFar = curState->parenSoFar;
                continue;

              repeatDone:
                result = x;
                pc += GET_OFFSET(pc);
                goto break_switch;

              case REOP_MINIMALSTAR:
                curState->u.quantifier.min = 0;
                curState->u.quantifier.max = (uintN)-1;
                goto minimalquantcommon;
              case REOP_MINIMALPLUS:
                curState->u.quantifier.min = 1;
                curState->u.quantifier.max = (uintN)-1;
                goto minimalquantcommon;
              case REOP_MINIMALOPT:
                curState->u.quantifier.min = 0;
                curState->u.quantifier.max = 1;
                goto minimalquantcommon;
              case REOP_MINIMALQUANT:
                pc = ReadCompactIndex(pc, &k);
                curState->u.quantifier.min = k;
                pc = ReadCompactIndex(pc, &k);
                /* See REOP_QUANT comments about k - 1. */
                curState->u.quantifier.max = k - 1;
                JS_ASSERT(curState->u.quantifier.min
                          <= curState->u.quantifier.max);
              minimalquantcommon:
                curState->index = x->cp - gData->cpbegin;
                curState->parenSoFar = parenSoFar;
                PUSH_STATE_STACK(gData);
                if (curState->u.quantifier.min != 0) {
                    curState->continue_op = REOP_MINIMALREPEAT;
                    curState->continue_pc = pc;
                    /* step over <next> */
                    pc += OFFSET_LEN;
                    op = (REOp) *pc++;
                } else {
                    if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
                                            pc, x, x->cp, 0, 0)) {
                        goto bad;
                    }
                    --gData->stateStackTop;
                    pc = pc + GET_OFFSET(pc);
                    op = (REOp) *pc++;
                }
                continue;

              case REOP_MINIMALREPEAT:
                --gData->stateStackTop;
                --curState;

                re_debug("{%d,%d}", curState->u.quantifier.min,
                         curState->u.quantifier.max);
#define PREPARE_REPEAT()                                                      \
    JS_BEGIN_MACRO                                                            \
        curState->index = x->cp - gData->cpbegin;                             \
        curState->continue_op = REOP_MINIMALREPEAT;                           \
        curState->continue_pc = pc;                                           \
        pc += ARG_LEN;                                                        \
        for (k = curState->parenSoFar; k < parenSoFar; k++)                   \
            x->parens[k].index = -1;                                          \
        PUSH_STATE_STACK(gData);                                              \
        op = (REOp) *pc++;                                                    \
        JS_ASSERT(op < REOP_LIMIT);                                           \
    JS_END_MACRO

                if (!result) {
                    re_debug(" - ");
                    /*
                     * Non-greedy failure - try to consume another child.
                     */
                    if (curState->u.quantifier.max == (uintN) -1 ||
                        curState->u.quantifier.max > 0) {
                        PREPARE_REPEAT();
                        continue;
                    }
                    /* Don't need to adjust pc since we're going to pop. */
                    break;
                }
                if (curState->u.quantifier.min == 0 &&
                    x->cp == gData->cpbegin + curState->index) {
                    /* Matched an empty string, that'll get us nowhere. */
                    result = NULL;
                    break;
                }
                if (curState->u.quantifier.min != 0)
                    curState->u.quantifier.min--;
                if (curState->u.quantifier.max != (uintN) -1)
                    curState->u.quantifier.max--;
                if (curState->u.quantifier.min != 0) {
                    PREPARE_REPEAT();
                    continue;
                }
                curState->index = x->cp - gData->cpbegin;
                curState->parenSoFar = parenSoFar;
                PUSH_STATE_STACK(gData);
                if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
                                        pc, x, x->cp,
                                        curState->parenSoFar,
                                        parenSoFar - curState->parenSoFar)) {
                    goto bad;
                }
                --gData->stateStackTop;
                pc = pc + GET_OFFSET(pc);
                op = (REOp) *pc++;
                JS_ASSERT(op < REOP_LIMIT);
                continue;
              default:
                JS_ASSERT(JS_FALSE);
                result = NULL;
            }
          break_switch:;
        }

        /*
         *  If the match failed and there's a backtrack option, take it.
         *  Otherwise this is a complete and utter failure.
         */
        if (!result) {
            if (gData->cursz == 0)
                return NULL;
            if (!JS_CHECK_OPERATION_LIMIT(gData->cx)) {
                gData->ok = JS_FALSE;
                return NULL;
            }

            /* Potentially detect explosive regex here. */
            gData->backTrackCount++;
            if (gData->backTrackLimit &&
                gData->backTrackCount >= gData->backTrackLimit) {
                JS_ReportErrorNumber(gData->cx, js_GetErrorMessage, NULL,
                                     JSMSG_REGEXP_TOO_COMPLEX);
                gData->ok = JS_FALSE;
                return NULL;
            }

            backTrackData = gData->backTrackSP;
            gData->cursz = backTrackData->sz;
            gData->backTrackSP =
                (REBackTrackData *) ((char *)backTrackData - backTrackData->sz);
            x->cp = backTrackData->cp;
            pc = backTrackData->backtrack_pc;
            op = (REOp) backTrackData->backtrack_op;
            JS_ASSERT(op < REOP_LIMIT);
            gData->stateStackTop = backTrackData->saveStateStackTop;
            JS_ASSERT(gData->stateStackTop);

            memcpy(gData->stateStack, backTrackData + 1,
                   sizeof(REProgState) * backTrackData->saveStateStackTop);
            curState = &gData->stateStack[gData->stateStackTop - 1];

            if (backTrackData->parenCount) {
                memcpy(&x->parens[backTrackData->parenIndex],
                       (char *)(backTrackData + 1) +
                       sizeof(REProgState) * backTrackData->saveStateStackTop,
                       sizeof(RECapture) * backTrackData->parenCount);
                parenSoFar = backTrackData->parenIndex + backTrackData->parenCount;
            } else {
                for (k = curState->parenSoFar; k < parenSoFar; k++)
                    x->parens[k].index = -1;
                parenSoFar = curState->parenSoFar;
            }

            re_debug("\tBT_Pop: %ld,%ld",
                     (unsigned long) backTrackData->parenIndex,
                     (unsigned long) backTrackData->parenCount);
            continue;
        }
        x = result;

        /*
         *  Continue with the expression.
         */
        op = (REOp)*pc++;
        JS_ASSERT(op < REOP_LIMIT);
    }

bad:
    re_debug("\n");
    return NULL;

good:
    re_debug("\n");
    return x;
}

static REMatchState *
MatchRegExp(REGlobalData *gData, REMatchState *x)
{
    const jschar *cpOrig = x->cp;

#ifdef JS_TRACER
    NativeRegExp native;

    /* Run with native regexp if possible. */
    if (TRACING_ENABLED(gData->cx) &&
        !(gData->regexp->flags & JSREG_NOCOMPILE) &&
        (native = GetNativeRegExp(gData->cx, gData->regexp))) {

        /*
         * For efficient native execution, store offset as a direct pointer into
         * the buffer and convert back after execution finishes.
         */
        gData->skipped = (ptrdiff_t)cpOrig;

#ifdef JS_JIT_SPEW
        debug_only_stmt({
            VOUCH_DOES_NOT_REQUIRE_STACK();
            JSStackFrame *caller = (JS_ON_TRACE(gData->cx))
                                   ? NULL
                                   : js_GetScriptedCaller(gData->cx, NULL);
            debug_only_printf(LC_TMRegexp,
                              "entering REGEXP trace at %s:%u@%u, code: %p\n",
                              caller ? caller->script->filename : "<unknown>",
                              caller ? js_FramePCToLineNumber(gData->cx, caller) : 0,
                              caller ? FramePCOffset(gData->cx, caller) : 0,
                              JS_FUNC_TO_DATA_PTR(void *, native));
        })
#endif

        void *result;
#if defined(JS_NO_FASTCALL) && defined(NANOJIT_IA32)
        /*
         * Although a NativeRegExp takes one argument and SIMULATE_FASTCALL is
         * passing two, the second goes into 'edx' and can safely be ignored.
         */
        SIMULATE_FASTCALL(result, gData, gData->cpend, native);
#else
        result = native(gData, gData->cpend);
#endif
        debug_only_print0(LC_TMRegexp, "leaving REGEXP trace\n");
        if (!result)
            return NULL;

        /* Restore REGlobalData::skipped and fill REMatchState. */
        x->cp = (const jschar *)gData->stateStack;
        gData->skipped = (const jschar *)gData->skipped - cpOrig;
        return x;
    }
#endif

    /*
     * Have to include the position beyond the last character
     * in order to detect end-of-input/line condition.
     */
    for (const jschar *p = cpOrig; p <= gData->cpend; p++) {
        gData->skipped = p - cpOrig;
        x->cp = p;
        for (uintN j = 0; j < gData->regexp->parenCount; j++)
            x->parens[j].index = -1;
        REMatchState *result = ExecuteREBytecode(gData, x);
        if (!gData->ok || result || (gData->regexp->flags & JSREG_STICKY))
            return result;
        gData->backTrackSP = gData->backTrackStack;
        gData->cursz = 0;
        gData->stateStackTop = 0;
        p = cpOrig + gData->skipped;
    }
    return NULL;
}

#define MIN_BACKTRACK_LIMIT 400000

static REMatchState *
InitMatch(JSContext *cx, REGlobalData *gData, JSRegExp *re, size_t length)
{
    REMatchState *result;
    uintN i;

    gData->backTrackStackSize = INITIAL_BACKTRACK;
    JS_ARENA_ALLOCATE_CAST(gData->backTrackStack, REBackTrackData *,
                           &cx->regexpPool,
                           INITIAL_BACKTRACK);
    if (!gData->backTrackStack)
        goto bad;

    gData->backTrackSP = gData->backTrackStack;
    gData->cursz = 0;
    gData->backTrackCount = 0;
    gData->backTrackLimit = 0;
    if (JS_GetOptions(cx) & JSOPTION_RELIMIT) {
        gData->backTrackLimit = length * length * length; /* O(n^3) */
        if (gData->backTrackLimit < MIN_BACKTRACK_LIMIT)
            gData->backTrackLimit = MIN_BACKTRACK_LIMIT;
    }

    gData->stateStackLimit = INITIAL_STATESTACK;
    JS_ARENA_ALLOCATE_CAST(gData->stateStack, REProgState *,
                           &cx->regexpPool,
                           sizeof(REProgState) * INITIAL_STATESTACK);
    if (!gData->stateStack)
        goto bad;

    gData->stateStackTop = 0;
    gData->cx = cx;
    gData->regexp = re;
    gData->ok = JS_TRUE;

    JS_ARENA_ALLOCATE_CAST(result, REMatchState *,
                           &cx->regexpPool,
                           offsetof(REMatchState, parens)
                           + re->parenCount * sizeof(RECapture));
    if (!result)
        goto bad;

    for (i = 0; i < re->classCount; i++) {
        if (!re->classList[i].converted &&
            !MatcherProcessCharSet(gData, &re->classList[i])) {
            return NULL;
        }
    }

    return result;

bad:
    js_ReportOutOfScriptQuota(cx);
    gData->ok = JS_FALSE;
    return NULL;
}

JSBool
js_ExecuteRegExp(JSContext *cx, JSRegExp *re, JSString *str, size_t *indexp,
                 JSBool test, jsval *rval)
{
    REGlobalData gData;
    REMatchState *x, *result;

    const jschar *cp, *ep;
    size_t i, length, start;
    JSBool ok;
    JSRegExpStatics *res;
    ptrdiff_t matchlen;
    uintN num;
    JSString *parstr, *matchstr;
    JSObject *obj;

    RECapture *parsub = NULL;
    void *mark;
    int64 *timestamp;

    /*
     * It's safe to load from cp because JSStrings have a zero at the end,
     * and we never let cp get beyond cpend.
     */
    start = *indexp;
    str->getCharsAndLength(cp, length);
    if (start > length)
        start = length;
    gData.cpbegin = cp;
    gData.cpend = cp + length;
    cp += start;
    gData.start = start;
    gData.skipped = 0;

    if (!cx->regexpPool.first.next) {
        /*
         * The first arena in the regexpPool must have a timestamp at its base.
         */
        JS_ARENA_ALLOCATE_CAST(timestamp, int64 *,
                               &cx->regexpPool, sizeof *timestamp);
        if (!timestamp)
            return JS_FALSE;
        *timestamp = JS_Now();
    }
    mark = JS_ARENA_MARK(&cx->regexpPool);

    x = InitMatch(cx, &gData, re, length);

    if (!x) {
        ok = JS_FALSE;
        goto out;
    }
    x->cp = cp;

    /*
     * Call the recursive matcher to do the real work.  Return null on mismatch
     * whether testing or not.  On match, return an extended Array object.
     */
    result = MatchRegExp(&gData, x);
    ok = gData.ok;
    if (!ok)
        goto out;
    if (!result) {
        *rval = JSVAL_NULL;
        goto out;
    }
    cp = result->cp;
    i = cp - gData.cpbegin;
    *indexp = i;
    matchlen = i - (start + gData.skipped);
    JS_ASSERT(matchlen >= 0);
    ep = cp;
    cp -= matchlen;

    if (test) {
        /*
         * Testing for a match and updating cx->regExpStatics: don't allocate
         * an array object, do return true.
         */
        *rval = JSVAL_TRUE;

        /* Avoid warning.  (gcc doesn't detect that obj is needed iff !test); */
        obj = NULL;
    } else {
        /*
         * The array returned on match has element 0 bound to the matched
         * string, elements 1 through state.parenCount bound to the paren
         * matches, an index property telling the length of the left context,
         * and an input property referring to the input string.
         */
        obj = js_NewSlowArrayObject(cx);
        if (!obj) {
            ok = JS_FALSE;
            goto out;
        }
        *rval = OBJECT_TO_JSVAL(obj);

#define DEFVAL(val, id) {                                                     \
    ok = js_DefineProperty(cx, obj, id, val,                                  \
                           JS_PropertyStub, JS_PropertyStub,                  \
                           JSPROP_ENUMERATE);                                 \
    if (!ok)                                                                  \
        goto out;                                                             \
}

        matchstr = js_NewDependentString(cx, str, cp - str->chars(),
                                         matchlen);
        if (!matchstr) {
            ok = JS_FALSE;
            goto out;
        }
        DEFVAL(STRING_TO_JSVAL(matchstr), INT_TO_JSID(0));
    }

    res = &cx->regExpStatics;
    res->input = str;
    if (!res->parens.resize(re->parenCount)) {
        ok = JS_FALSE;
        goto out;
    }
    if (re->parenCount == 0) {
        res->lastParen = js_EmptySubString;
    } else {
        for (num = 0; num < re->parenCount; num++) {
            JSSubString *sub = &res->parens[num];
            parsub = &result->parens[num];
            if (parsub->index == -1) {
                sub->chars = NULL;
                sub->length = 0;
            } else {
                sub->chars = gData.cpbegin + parsub->index;
                sub->length = parsub->length;
            }
            if (test)
                continue;
            if (parsub->index == -1) {
                ok = js_DefineProperty(cx, obj, INT_TO_JSID(num + 1), JSVAL_VOID, NULL, NULL,
                                       JSPROP_ENUMERATE);
            } else {
                parstr = js_NewDependentString(cx, str,
                                               gData.cpbegin + parsub->index -
                                               str->chars(),
                                               parsub->length);
                if (!parstr) {
                    ok = JS_FALSE;
                    goto out;
                }
                ok = js_DefineProperty(cx, obj, INT_TO_JSID(num + 1), STRING_TO_JSVAL(parstr),
                                       NULL, NULL, JSPROP_ENUMERATE);
            }
            if (!ok)
                goto out;
        }
        if (parsub->index == -1) {
            res->lastParen = js_EmptySubString;
        } else {
            res->lastParen.chars = gData.cpbegin + parsub->index;
            res->lastParen.length = parsub->length;
        }
    }

    if (!test) {
        /*
         * Define the index and input properties last for better for/in loop
         * order (so they come after the elements).
         */
        DEFVAL(INT_TO_JSVAL(start + gData.skipped),
               ATOM_TO_JSID(cx->runtime->atomState.indexAtom));
        DEFVAL(STRING_TO_JSVAL(str),
               ATOM_TO_JSID(cx->runtime->atomState.inputAtom));
    }

#undef DEFVAL

    res->lastMatch.chars = cp;
    res->lastMatch.length = matchlen;

    /*
     * For JS1.3 and ECMAv2, emulate Perl5 exactly:
     *
     * js1.3        "hi", "hi there"            "hihitherehi therebye"
     */
    res->leftContext.chars = str->chars();
    res->leftContext.length = start + gData.skipped;
    res->rightContext.chars = ep;
    res->rightContext.length = gData.cpend - ep;

out:
    JS_ARENA_RELEASE(&cx->regexpPool, mark);
    return ok;
}

/************************************************************************/

static JSBool
SetRegExpLastIndex(JSContext *cx, JSObject *obj, jsdouble lastIndex)
{
    JS_ASSERT(obj->isRegExp());
    return JS_NewNumberValue(cx, lastIndex, obj->addressOfRegExpLastIndex());
}

#define DEFINE_GETTER(name, code)                                              \
    static JSBool                                                              \
    name(JSContext *cx, JSObject *obj, jsval id, jsval *vp)                    \
    {                                                                          \
        while (obj->getClass() != &js_RegExpClass) {                           \
            obj = obj->getProto();                                             \
            if (!obj)                                                          \
                return true;                                                   \
        }                                                                      \
        JS_LOCK_OBJ(cx, obj);                                                  \
        JSRegExp *re = (JSRegExp *) obj->getPrivate();                         \
        code;                                                                  \
        JS_UNLOCK_OBJ(cx, obj);                                                \
        return true;                                                           \
    }

/* lastIndex is stored in the object, re = re silences the compiler warning. */
DEFINE_GETTER(lastIndex_getter,  re = re; *vp = obj->getRegExpLastIndex())
DEFINE_GETTER(source_getter,     *vp = STRING_TO_JSVAL(re->source))
DEFINE_GETTER(global_getter,     *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_GLOB) != 0))
DEFINE_GETTER(ignoreCase_getter, *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_FOLD) != 0))
DEFINE_GETTER(multiline_getter,  *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_MULTILINE) != 0))
DEFINE_GETTER(sticky_getter,     *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_STICKY) != 0))

static JSBool
lastIndex_setter(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
{
    while (obj->getClass() != &js_RegExpClass) {
        obj = obj->getProto();
        if (!obj)
            return true;
    }
    jsdouble lastIndex;
    if (!JS_ValueToNumber(cx, *vp, &lastIndex))
        return false;
    lastIndex = js_DoubleToInteger(lastIndex);
    return SetRegExpLastIndex(cx, obj, lastIndex);
}

#define REGEXP_PROP_ATTRS     (JSPROP_PERMANENT | JSPROP_SHARED)
#define RO_REGEXP_PROP_ATTRS  (REGEXP_PROP_ATTRS | JSPROP_READONLY)

static JSPropertySpec regexp_props[] = {
    {"source",     0, RO_REGEXP_PROP_ATTRS, source_getter,     NULL},
    {"global",     0, RO_REGEXP_PROP_ATTRS, global_getter,     NULL},
    {"ignoreCase", 0, RO_REGEXP_PROP_ATTRS, ignoreCase_getter, NULL},
    {"lastIndex",  0, REGEXP_PROP_ATTRS,    lastIndex_getter,
                                            lastIndex_setter},
    {"multiline",  0, RO_REGEXP_PROP_ATTRS, multiline_getter,  NULL},
    {"sticky",     0, RO_REGEXP_PROP_ATTRS, sticky_getter,     NULL},
    {0,0,0,0,0}
};

/*
 * RegExp class static properties and their Perl counterparts:
 *
 *  RegExp.input                $_
 *  RegExp.multiline            $*
 *  RegExp.lastMatch            $&
 *  RegExp.lastParen            $+
 *  RegExp.leftContext          $`
 *  RegExp.rightContext         $'
 */

void
js_InitRegExpStatics(JSContext *cx)
{
    /*
     * To avoid multiple allocations in InitMatch(), the arena size parameter
     * should be at least as big as:
     *   INITIAL_BACKTRACK
     *   + (sizeof(REProgState) * INITIAL_STATESTACK)
     *   + (offsetof(REMatchState, parens) + avgParanSize * sizeof(RECapture))
     */
    JS_InitArenaPool(&cx->regexpPool, "regexp",
                     12 * 1024 - 40,  /* FIXME: bug 421435 */
                     sizeof(void *), &cx->scriptStackQuota);

    JS_ClearRegExpStatics(cx);
}

JS_FRIEND_API(void)
js_SaveAndClearRegExpStatics(JSContext *cx, JSRegExpStatics *statics,
                             AutoValueRooter *tvr)
{
    statics->copy(cx->regExpStatics);
    if (statics->input)
        tvr->setString(statics->input);
    JS_ClearRegExpStatics(cx);
}

JS_FRIEND_API(void)
js_RestoreRegExpStatics(JSContext *cx, JSRegExpStatics *statics,
                        AutoValueRooter *tvr)
{
    /* Clear/free any new JSRegExpStatics data before clobbering. */
    cx->regExpStatics.copy(*statics);
}

void
js_TraceRegExpStatics(JSTracer *trc, JSContext *acx)
{
    JSRegExpStatics *res = &acx->regExpStatics;

    if (res->input)
        JS_CALL_STRING_TRACER(trc, res->input, "res->input");
}

void
js_FreeRegExpStatics(JSContext *cx)
{
    JS_ClearRegExpStatics(cx);
    JS_FinishArenaPool(&cx->regexpPool);
}

#define DEFINE_STATIC_GETTER(name, code)                                       \
    static JSBool                                                              \
    name(JSContext *cx, JSObject *obj, jsval id, jsval *vp)                    \
    {                                                                          \
        JSRegExpStatics *res = &cx->regExpStatics;                             \
        code;                                                                  \
    }

static bool
MakeString(JSContext *cx, JSSubString *sub, jsval *vp) {
    JSString *str = js_NewStringCopyN(cx, sub->chars, sub->length);
    if (!str)
        return false;
    *vp = STRING_TO_JSVAL(str);
    return true;
}

DEFINE_STATIC_GETTER(static_input_getter,
                     *vp = res->input
                           ? STRING_TO_JSVAL(res->input)
                           : JS_GetEmptyStringValue(cx);
                     return true)
DEFINE_STATIC_GETTER(static_multiline_getter,    *vp = BOOLEAN_TO_JSVAL(res->multiline); return true)
DEFINE_STATIC_GETTER(static_lastMatch_getter,    return MakeString(cx, &res->lastMatch, vp))
DEFINE_STATIC_GETTER(static_lastParen_getter,    return MakeString(cx, &res->lastParen, vp))
DEFINE_STATIC_GETTER(static_leftContext_getter,  return MakeString(cx, &res->leftContext, vp))
DEFINE_STATIC_GETTER(static_rightContext_getter, return MakeString(cx, &res->rightContext, vp))

static bool
Paren(JSContext *cx, JSRegExpStatics *res, size_t n, jsval *vp)
{
    return MakeString(cx, n < res->parens.length() ? &res->parens[n] : &js_EmptySubString, vp);
}

DEFINE_STATIC_GETTER(static_paren1_getter,       return Paren(cx, res, 0, vp))
DEFINE_STATIC_GETTER(static_paren2_getter,       return Paren(cx, res, 1, vp))
DEFINE_STATIC_GETTER(static_paren3_getter,       return Paren(cx, res, 2, vp))
DEFINE_STATIC_GETTER(static_paren4_getter,       return Paren(cx, res, 3, vp))
DEFINE_STATIC_GETTER(static_paren5_getter,       return Paren(cx, res, 4, vp))
DEFINE_STATIC_GETTER(static_paren6_getter,       return Paren(cx, res, 5, vp))
DEFINE_STATIC_GETTER(static_paren7_getter,       return Paren(cx, res, 6, vp))
DEFINE_STATIC_GETTER(static_paren8_getter,       return Paren(cx, res, 7, vp))
DEFINE_STATIC_GETTER(static_paren9_getter,       return Paren(cx, res, 8, vp))

#define DEFINE_STATIC_SETTER(name, code)                                       \
    static JSBool                                                              \
    name(JSContext *cx, JSObject *obj, jsval id, jsval *vp)                    \
    {                                                                          \
        JSRegExpStatics *res = &cx->regExpStatics;                             \
        code;                                                                  \
        return true;                                                           \
    }

DEFINE_STATIC_SETTER(static_input_setter,
                     if (!JSVAL_IS_STRING(*vp) && !JS_ConvertValue(cx, *vp, JSTYPE_STRING, vp))
                         return false;
                     res->input = JSVAL_TO_STRING(*vp))
DEFINE_STATIC_SETTER(static_multiline_setter,
                     if (!JSVAL_IS_BOOLEAN(*vp) && !JS_ConvertValue(cx, *vp, JSTYPE_BOOLEAN, vp))
                         return false;
                     res->multiline = JSVAL_TO_BOOLEAN(*vp))

#define REGEXP_STATIC_PROP_ATTRS    (REGEXP_PROP_ATTRS | JSPROP_ENUMERATE)
#define RO_REGEXP_STATIC_PROP_ATTRS (REGEXP_STATIC_PROP_ATTRS | JSPROP_READONLY)

static JSPropertySpec regexp_static_props[] = {
    {"input",        0, REGEXP_STATIC_PROP_ATTRS,    static_input_getter,
                                                     static_input_setter},
    {"multiline",    0, REGEXP_STATIC_PROP_ATTRS,    static_multiline_getter,
                                                     static_multiline_setter},
    {"lastMatch",    0, RO_REGEXP_STATIC_PROP_ATTRS, static_lastMatch_getter,    NULL},
    {"lastParen",    0, RO_REGEXP_STATIC_PROP_ATTRS, static_lastParen_getter,    NULL},
    {"leftContext",  0, RO_REGEXP_STATIC_PROP_ATTRS, static_leftContext_getter,  NULL},
    {"rightContext", 0, RO_REGEXP_STATIC_PROP_ATTRS, static_rightContext_getter, NULL},
    {"$1",           0, RO_REGEXP_STATIC_PROP_ATTRS, static_paren1_getter,       NULL},
    {"$2",           0, RO_REGEXP_STATIC_PROP_ATTRS, static_paren2_getter,       NULL},
    {"$3",           0, RO_REGEXP_STATIC_PROP_ATTRS, static_paren3_getter,       NULL},
    {"$4",           0, RO_REGEXP_STATIC_PROP_ATTRS, static_paren4_getter,       NULL},
    {"$5",           0, RO_REGEXP_STATIC_PROP_ATTRS, static_paren5_getter,       NULL},
    {"$6",           0, RO_REGEXP_STATIC_PROP_ATTRS, static_paren6_getter,       NULL},
    {"$7",           0, RO_REGEXP_STATIC_PROP_ATTRS, static_paren7_getter,       NULL},
    {"$8",           0, RO_REGEXP_STATIC_PROP_ATTRS, static_paren8_getter,       NULL},
    {"$9",           0, RO_REGEXP_STATIC_PROP_ATTRS, static_paren9_getter,       NULL},
    {0,0,0,0,0}
};

static void
regexp_finalize(JSContext *cx, JSObject *obj)
{
    JSRegExp *re = (JSRegExp *) obj->getPrivate();
    if (!re)
        return;
    js_DestroyRegExp(cx, re);
}

/* Forward static prototype. */
static JSBool
regexp_exec_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
                JSBool test, jsval *rval);

static JSBool
regexp_call(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
{
    return regexp_exec_sub(cx, JSVAL_TO_OBJECT(argv[-2]), argc, argv,
                           JS_FALSE, rval);
}

#if JS_HAS_XDR

#include "jsxdrapi.h"

JSBool
js_XDRRegExpObject(JSXDRState *xdr, JSObject **objp)
{
    JSRegExp *re;
    JSString *source;
    uint32 flagsword;
    JSObject *obj;

    if (xdr->mode == JSXDR_ENCODE) {
        re = (JSRegExp *) (*objp)->getPrivate();
        if (!re)
            return JS_FALSE;
        source = re->source;
        flagsword = (uint32)re->flags;
    }
    if (!JS_XDRString(xdr, &source) ||
        !JS_XDRUint32(xdr, &flagsword)) {
        return JS_FALSE;
    }
    if (xdr->mode == JSXDR_DECODE) {
        obj = NewObject(xdr->cx, &js_RegExpClass, NULL, NULL);
        if (!obj)
            return JS_FALSE;
        obj->clearParent();
        obj->clearProto();
        re = js_NewRegExp(xdr->cx, NULL, source, (uint8)flagsword, JS_FALSE);
        if (!re)
            return JS_FALSE;
        obj->setPrivate(re);
        obj->zeroRegExpLastIndex();
        *objp = obj;
    }
    return JS_TRUE;
}

#else  /* !JS_HAS_XDR */

#define js_XDRRegExpObject NULL

#endif /* !JS_HAS_XDR */

static void
regexp_trace(JSTracer *trc, JSObject *obj)
{
    JSRegExp *re = (JSRegExp *) obj->getPrivate();
    if (re && re->source)
        JS_CALL_STRING_TRACER(trc, re->source, "source");
}

JSClass js_RegExpClass = {
    js_RegExp_str,
    JSCLASS_HAS_PRIVATE |
    JSCLASS_HAS_RESERVED_SLOTS(JSObject::REGEXP_FIXED_RESERVED_SLOTS) |
    JSCLASS_MARK_IS_TRACE | JSCLASS_HAS_CACHED_PROTO(JSProto_RegExp),
    JS_PropertyStub,    JS_PropertyStub,
    JS_PropertyStub,    JS_PropertyStub,
    JS_EnumerateStub,   JS_ResolveStub,
    JS_ConvertStub,     regexp_finalize,
    NULL,               NULL,
    regexp_call,        NULL,
    js_XDRRegExpObject, NULL,
    JS_CLASS_TRACE(regexp_trace), 0
};

static const jschar empty_regexp_ucstr[] = {'(', '?', ':', ')', 0};

JSBool
js_regexp_toString(JSContext *cx, JSObject *obj, jsval *vp)
{
    JSRegExp *re;
    const jschar *source;
    jschar *chars;
    size_t length, nflags;
    uintN flags;
    JSString *str;

    if (!JS_InstanceOf(cx, obj, &js_RegExpClass, vp + 2))
        return JS_FALSE;
    JS_LOCK_OBJ(cx, obj);
    re = (JSRegExp *) obj->getPrivate();
    if (!re) {
        JS_UNLOCK_OBJ(cx, obj);
        *vp = STRING_TO_JSVAL(cx->runtime->emptyString);
        return JS_TRUE;
    }

    re->source->getCharsAndLength(source, length);
    if (length == 0) {
        source = empty_regexp_ucstr;
        length = JS_ARRAY_LENGTH(empty_regexp_ucstr) - 1;
    }
    length += 2;
    nflags = 0;
    for (flags = re->flags; flags != 0; flags &= flags - 1)
        nflags++;
    chars = (jschar*) cx->malloc((length + nflags + 1) * sizeof(jschar));
    if (!chars) {
        JS_UNLOCK_OBJ(cx, obj);
        return JS_FALSE;
    }

    chars[0] = '/';
    js_strncpy(&chars[1], source, length - 2);
    chars[length-1] = '/';
    if (nflags) {
        if (re->flags & JSREG_GLOB)
            chars[length++] = 'g';
        if (re->flags & JSREG_FOLD)
            chars[length++] = 'i';
        if (re->flags & JSREG_MULTILINE)
            chars[length++] = 'm';
        if (re->flags & JSREG_STICKY)
            chars[length++] = 'y';
    }
    JS_UNLOCK_OBJ(cx, obj);
    chars[length] = 0;

    str = js_NewString(cx, chars, length);
    if (!str) {
        cx->free(chars);
        return JS_FALSE;
    }
    *vp = STRING_TO_JSVAL(str);
    return JS_TRUE;
}

static JSBool
regexp_toString(JSContext *cx, uintN argc, jsval *vp)
{
    JSObject *obj;

    obj = JS_THIS_OBJECT(cx, vp);
    return obj && js_regexp_toString(cx, obj, vp);
}

static JSBool
regexp_compile_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
                   jsval *rval)
{
    JSString *opt, *str;
    JSRegExp *oldre, *re;
    JSObject *obj2;
    size_t length, nbytes;
    const jschar *cp, *start, *end;
    jschar *nstart, *ncp, *tmp;

    if (!JS_InstanceOf(cx, obj, &js_RegExpClass, argv))
        return JS_FALSE;
    opt = NULL;
    if (argc == 0) {
        str = cx->runtime->emptyString;
    } else {
        if (JSVAL_IS_OBJECT(argv[0])) {
            /*
             * If we get passed in a RegExp object we construct a new
             * RegExp that is a duplicate of it by re-compiling the
             * original source code. ECMA requires that it be an error
             * here if the flags are specified. (We must use the flags
             * from the original RegExp also).
             */
            obj2 = JSVAL_TO_OBJECT(argv[0]);
            if (obj2 && obj2->getClass() == &js_RegExpClass) {
                if (argc >= 2 && !JSVAL_IS_VOID(argv[1])) { /* 'flags' passed */
                    JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
                                         JSMSG_NEWREGEXP_FLAGGED);
                    return JS_FALSE;
                }
                JS_LOCK_OBJ(cx, obj2);
                re = (JSRegExp *) obj2->getPrivate();
                if (!re) {
                    JS_UNLOCK_OBJ(cx, obj2);
                    return JS_FALSE;
                }
                re = js_NewRegExp(cx, NULL, re->source, re->flags, JS_FALSE);
                JS_UNLOCK_OBJ(cx, obj2);
                goto created;
            }
        }
        str = js_ValueToString(cx, argv[0]);
        if (!str)
            return JS_FALSE;
        argv[0] = STRING_TO_JSVAL(str);
        if (argc > 1) {
            if (JSVAL_IS_VOID(argv[1])) {
                opt = NULL;
            } else {
                opt = js_ValueToString(cx, argv[1]);
                if (!opt)
                    return JS_FALSE;
                argv[1] = STRING_TO_JSVAL(opt);
            }
        }

        /* Escape any naked slashes in the regexp source. */
        str->getCharsAndLength(start, length);
        end = start + length;
        nstart = ncp = NULL;
        for (cp = start; cp < end; cp++) {
            if (*cp == '/' && (cp == start || cp[-1] != '\\')) {
                nbytes = (++length + 1) * sizeof(jschar);
                if (!nstart) {
                    nstart = (jschar *) cx->malloc(nbytes);
                    if (!nstart)
                        return JS_FALSE;
                    ncp = nstart + (cp - start);
                    js_strncpy(nstart, start, cp - start);
                } else {
                    tmp = (jschar *) cx->realloc(nstart, nbytes);
                    if (!tmp) {
                        cx->free(nstart);
                        return JS_FALSE;
                    }
                    ncp = tmp + (ncp - nstart);
                    nstart = tmp;
                }
                *ncp++ = '\\';
            }
            if (nstart)
                *ncp++ = *cp;
        }

        if (nstart) {
            /* Don't forget to store the backstop after the new string. */
            JS_ASSERT((size_t)(ncp - nstart) == length);
            *ncp = 0;
            str = js_NewString(cx, nstart, length);
            if (!str) {
                cx->free(nstart);
                return JS_FALSE;
            }
            argv[0] = STRING_TO_JSVAL(str);
        }
    }

    re = js_NewRegExpOpt(cx, str, opt, JS_FALSE);
created:
    if (!re)
        return JS_FALSE;
    JS_LOCK_OBJ(cx, obj);
    oldre = (JSRegExp *) obj->getPrivate();
    obj->setPrivate(re);
    obj->zeroRegExpLastIndex();
    JS_UNLOCK_OBJ(cx, obj);
    if (oldre)
        js_DestroyRegExp(cx, oldre);
    *rval = OBJECT_TO_JSVAL(obj);
    return JS_TRUE;
}

static JSBool
regexp_compile(JSContext *cx, uintN argc, jsval *vp)
{
    JSObject *obj;

    obj = JS_THIS_OBJECT(cx, vp);
    return obj && regexp_compile_sub(cx, obj, argc, vp + 2, vp);
}

static JSBool
regexp_exec_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
                JSBool test, jsval *rval)
{
    JSBool ok, sticky;
    JSRegExp *re;
    jsdouble lastIndex;
    JSString *str;
    size_t i;

    ok = JS_InstanceOf(cx, obj, &js_RegExpClass, argv);
    if (!ok)
        return JS_FALSE;
    JS_LOCK_OBJ(cx, obj);
    re = (JSRegExp *) obj->getPrivate();
    if (!re) {
        JS_UNLOCK_OBJ(cx, obj);
        return JS_TRUE;
    }

    /* NB: we must reach out: after this paragraph, in order to drop re. */
    HOLD_REGEXP(cx, re);
    sticky = (re->flags & JSREG_STICKY) != 0;
    if (re->flags & (JSREG_GLOB | JSREG_STICKY)) {
        jsval v = obj->getRegExpLastIndex();
        if (JSVAL_IS_INT(v)) {
            lastIndex = JSVAL_TO_INT(v);
        } else {
            JS_ASSERT(JSVAL_IS_DOUBLE(v));
            lastIndex = *JSVAL_TO_DOUBLE(v);
        }
    } else {
        lastIndex = 0;
    }
    JS_UNLOCK_OBJ(cx, obj);

    /* Now that obj is unlocked, it's safe to (potentially) grab the GC lock. */
    if (argc == 0) {
        str = cx->regExpStatics.input;
        if (!str) {
            const char *bytes = js_GetStringBytes(cx, re->source);

            if (bytes) {
                JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
                                     JSMSG_NO_INPUT,
                                     bytes,
                                     (re->flags & JSREG_GLOB) ? "g" : "",
                                     (re->flags & JSREG_FOLD) ? "i" : "",
                                     (re->flags & JSREG_MULTILINE) ? "m" : "",
                                     (re->flags & JSREG_STICKY) ? "y" : "");
            }
            ok = JS_FALSE;
            goto out;
        }
    } else {
        str = js_ValueToString(cx, argv[0]);
        if (!str) {
            ok = JS_FALSE;
            goto out;
        }
        argv[0] = STRING_TO_JSVAL(str);
    }

    if (lastIndex < 0 || str->length() < lastIndex) {
        obj->zeroRegExpLastIndex();
        *rval = JSVAL_NULL;
    } else {
        i = (size_t) lastIndex;
        ok = js_ExecuteRegExp(cx, re, str, &i, test, rval);
        if (ok &&
            ((re->flags & JSREG_GLOB) || (*rval != JSVAL_NULL && sticky))) {
            if (*rval == JSVAL_NULL)
                obj->zeroRegExpLastIndex();
            else
                ok = SetRegExpLastIndex(cx, obj, i);
        }
    }

out:
    DROP_REGEXP(cx, re);
    return ok;
}

static JSBool
regexp_exec(JSContext *cx, uintN argc, jsval *vp)
{
    return regexp_exec_sub(cx, JS_THIS_OBJECT(cx, vp), argc, vp + 2, JS_FALSE,
                           vp);
}

static JSBool
regexp_test(JSContext *cx, uintN argc, jsval *vp)
{
    if (!regexp_exec_sub(cx, JS_THIS_OBJECT(cx, vp), argc, vp + 2, JS_TRUE, vp))
        return JS_FALSE;
    if (*vp != JSVAL_TRUE)
        *vp = JSVAL_FALSE;
    return JS_TRUE;
}

static JSFunctionSpec regexp_methods[] = {
#if JS_HAS_TOSOURCE
    JS_FN(js_toSource_str,  regexp_toString,    0,0),
#endif
    JS_FN(js_toString_str,  regexp_toString,    0,0),
    JS_FN("compile",        regexp_compile,     2,0),
    JS_FN("exec",           regexp_exec,        1,0),
    JS_FN("test",           regexp_test,        1,0),
    JS_FS_END
};

static JSBool
RegExp(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
{
    if (!JS_IsConstructing(cx)) {
        /*
         * If first arg is regexp and no flags are given, just return the arg.
         * (regexp_compile_sub detects the regexp + flags case and throws a
         * TypeError.)  See 10.15.3.1.
         */
        if ((argc < 2 || JSVAL_IS_VOID(argv[1])) &&
            !JSVAL_IS_PRIMITIVE(argv[0]) &&
            JSVAL_TO_OBJECT(argv[0])->getClass() == &js_RegExpClass) {
            *rval = argv[0];
            return JS_TRUE;
        }

        /* Otherwise, replace obj with a new RegExp object. */
        obj = NewObject(cx, &js_RegExpClass, NULL, NULL);
        if (!obj)
            return JS_FALSE;

        /*
         * regexp_compile_sub does not use rval to root its temporaries so we
         * can use it to root obj.
         */
        *rval = OBJECT_TO_JSVAL(obj);
    }
    return regexp_compile_sub(cx, obj, argc, argv, rval);
}

JSObject *
js_InitRegExpClass(JSContext *cx, JSObject *obj)
{
    JSObject *proto = js_InitClass(cx, obj, NULL, &js_RegExpClass, RegExp, 1,
                                   regexp_props, regexp_methods,
                                   regexp_static_props, NULL);
    if (!proto)
        return NULL;

    JSObject *ctor = JS_GetConstructor(cx, proto);
    if (!ctor)
        return NULL;

    /* Give RegExp.prototype private data so it matches the empty string. */
    jsval rval;
    if (!JS_AliasProperty(cx, ctor, "input",        "$_") ||
        !JS_AliasProperty(cx, ctor, "multiline",    "$*") ||
        !JS_AliasProperty(cx, ctor, "lastMatch",    "$&") ||
        !JS_AliasProperty(cx, ctor, "lastParen",    "$+") ||
        !JS_AliasProperty(cx, ctor, "leftContext",  "$`") ||
        !JS_AliasProperty(cx, ctor, "rightContext", "$'") ||
        !regexp_compile_sub(cx, proto, 0, NULL, &rval)) {
        return NULL;
    }

    return proto;
}

JSObject *
js_NewRegExpObject(JSContext *cx, TokenStream *ts,
                   const jschar *chars, size_t length, uintN flags)
{
    JSString *str;
    JSObject *obj;
    JSRegExp *re;

    str = js_NewStringCopyN(cx, chars, length);
    if (!str)
        return NULL;
    AutoValueRooter tvr(cx, str);
    re = js_NewRegExp(cx, ts,  str, flags, JS_FALSE);
    if (!re)
        return NULL;
    obj = NewObject(cx, &js_RegExpClass, NULL, NULL);
    if (!obj) {
        js_DestroyRegExp(cx, re);
        return NULL;
    }
    obj->setPrivate(re);
    obj->zeroRegExpLastIndex();
    return obj;
}

JSObject * JS_FASTCALL
js_CloneRegExpObject(JSContext *cx, JSObject *obj, JSObject *proto)
{
    JS_ASSERT(obj->getClass() == &js_RegExpClass);
    JS_ASSERT(proto);
    JS_ASSERT(proto->getClass() == &js_RegExpClass);
    JSObject *clone = NewObjectWithGivenProto(cx, &js_RegExpClass, proto, NULL);
    if (!clone)
        return NULL;
    JSRegExp *re = static_cast<JSRegExp *>(obj->getPrivate());
    clone->setPrivate(re);
    clone->zeroRegExpLastIndex();
    HOLD_REGEXP(cx, re);
    return clone;
}

#ifdef JS_TRACER
JS_DEFINE_CALLINFO_3(extern, OBJECT, js_CloneRegExpObject, CONTEXT, OBJECT, OBJECT, 0,
                     ACC_STORE_ANY)
#endif

bool
js_ContainsRegExpMetaChars(const jschar *chars, size_t length)
{
    for (size_t i = 0; i < length; ++i) {
        jschar c = chars[i];
        switch (c) {
          /* Taken from the PatternCharacter production in 15.10.1. */
          case '^': case '$': case '\\': case '.': case '*': case '+':
          case '?': case '(': case ')': case '[': case ']': case '{':
          case '}': case '|':
            return true;
          default:;
        }
    }
    return false;
}

JSBool
js_ObjectIsRegExp(JSObject *obj)
{
    return obj->isRegExp();
}