Encapsulate RegExpStatics more. (r=gal, b=610223)

This commit is contained in:
Chris Leary 2010-11-10 17:02:08 -08:00
parent d0066f07ca
commit 0715f9cef8
6 changed files with 212 additions and 113 deletions

View File

@ -0,0 +1,8 @@
var re = /(pattern)/g;
var input = "patternpatternpattern";
re.exec(input)
RegExp.input = "satturn";
assertEq(RegExp.$1, "pattern");
assertEq(RegExp.lastMatch, "pattern");
assertEq(RegExp.lastParen, "pattern");
assertEq(RegExp.rightContext, "patternpattern");

View File

@ -0,0 +1,8 @@
var re = /abc(WHOO!)?def/y;
var input = 'abcdefabcdefabcdef';
var count = 0;
while ((match = re.exec(input)) !== null) {
print(count++);
assertEq(match[0], 'abcdef');
assertEq(match[1], undefined);
}

View File

@ -393,7 +393,7 @@ regexp_resolve(JSContext *cx, JSObject *obj, jsid id, uint32 flags, JSObject **o
code; \
}
DEFINE_STATIC_GETTER(static_input_getter, return res->createInput(cx, Valueify(vp)))
DEFINE_STATIC_GETTER(static_input_getter, return res->createPendingInput(cx, Valueify(vp)))
DEFINE_STATIC_GETTER(static_multiline_getter, *vp = BOOLEAN_TO_JSVAL(res->multiline());
return true)
DEFINE_STATIC_GETTER(static_lastMatch_getter, return res->createLastMatch(cx, Valueify(vp)))
@ -423,7 +423,7 @@ DEFINE_STATIC_GETTER(static_paren9_getter, return res->createParen(cx, 8,
DEFINE_STATIC_SETTER(static_input_setter,
if (!JSVAL_IS_STRING(*vp) && !JS_ConvertValue(cx, *vp, JSTYPE_STRING, vp))
return false;
res->setInput(JSVAL_TO_STRING(*vp)))
res->setPendingInput(JSVAL_TO_STRING(*vp)))
DEFINE_STATIC_SETTER(static_multiline_setter,
if (!JSVAL_IS_BOOLEAN(*vp) && !JS_ConvertValue(cx, *vp, JSTYPE_BOOLEAN, vp))
return false;
@ -783,7 +783,7 @@ regexp_exec_sub(JSContext *cx, JSObject *obj, uintN argc, Value *argv, JSBool te
argv[0] = StringValue(str);
} else {
/* Need to grab input from statics. */
str = res->getInput();
str = res->getPendingInput();
if (!str) {
const char *sourceBytes = js_GetStringBytes(cx, re->getSource());
if (sourceBytes) {

View File

@ -59,11 +59,14 @@ namespace js {
class RegExpStatics
{
typedef Vector<int, 20, SystemAllocPolicy> MatchPairs;
MatchPairs matchPairs;
JSString *input;
uintN flags;
RegExpStatics *bufferLink;
bool copied;
MatchPairs matchPairs;
/* The input that was used to produce matchPairs. */
JSString *matchPairsInput;
/* The input last set on the statics. */
JSString *pendingInput;
uintN flags;
RegExpStatics *bufferLink;
bool copied;
bool createDependent(JSContext *cx, size_t start, size_t end, Value *out) const;
@ -76,7 +79,8 @@ class RegExpStatics
dst.matchPairs.clear();
/* 'save' has already reserved space in matchPairs */
JS_ALWAYS_TRUE(dst.matchPairs.append(matchPairs));
dst.input = input;
dst.matchPairsInput = matchPairsInput;
dst.pendingInput = pendingInput;
dst.flags = flags;
}
@ -87,25 +91,6 @@ class RegExpStatics
}
}
/*
* Check whether the index at |checkValidIndex| is valid (>= 0).
* If so, construct a string for it and place it in |*out|.
* If not, place undefined in |*out|.
*/
bool makeMatch(JSContext *cx, size_t checkValidIndex, size_t pairNum, Value *out) const;
static const uintN allFlags = JSREG_FOLD | JSREG_GLOB | JSREG_STICKY | JSREG_MULTILINE;
friend class RegExp;
public:
RegExpStatics() : bufferLink(NULL), copied(false) { clear(); }
struct InitBuffer {};
explicit RegExpStatics(InitBuffer) : bufferLink(NULL), copied(false) {}
static RegExpStatics *extractFrom(JSObject *global);
/* Mutators. */
bool save(JSContext *cx, RegExpStatics *buffer) {
JS_ASSERT(!buffer->copied && !buffer->bufferLink);
buffer->bufferLink = bufferLink;
@ -123,6 +108,76 @@ class RegExpStatics
bufferLink = bufferLink->bufferLink;
}
void checkInvariants() {
#if DEBUG
if (pairCount() == 0) {
JS_ASSERT(!matchPairsInput);
return;
}
/* Pair count is non-zero, so there must be match pairs input. */
JS_ASSERT(matchPairsInput);
size_t mpiLen = matchPairsInput->length();
JS_ASSERT(pairIsPresent(0));
/* Present pairs must be valid. */
for (size_t i = 0; i < pairCount(); ++i) {
if (!pairIsPresent(i))
continue;
int start = get(i, 0);
int limit = get(i, 1);
JS_ASSERT(mpiLen >= size_t(limit) && limit >= start && start >= 0);
}
#endif
}
int get(size_t pairNum, bool which) const {
JS_ASSERT(pairNum < pairCount());
return matchPairs[2 * pairNum + which];
}
/*
* Check whether the index at |checkValidIndex| is valid (>= 0).
* If so, construct a string for it and place it in |*out|.
* If not, place undefined in |*out|.
*/
bool makeMatch(JSContext *cx, size_t checkValidIndex, size_t pairNum, Value *out) const;
static const uintN allFlags = JSREG_FOLD | JSREG_GLOB | JSREG_STICKY | JSREG_MULTILINE;
struct InitBuffer {};
explicit RegExpStatics(InitBuffer) : bufferLink(NULL), copied(false) {}
friend class PreserveRegExpStatics;
public:
RegExpStatics() : bufferLink(NULL), copied(false) { clear(); }
static RegExpStatics *extractFrom(JSObject *global);
/* Mutators. */
/*
* The inputOffset parameter is added to the present (i.e. non-negative) match items to emulate
* sticky mode.
*/
bool updateFromMatch(JSContext *cx, JSString *input, int *buf, size_t matchItemCount) {
aboutToWrite();
pendingInput = input;
if (!matchPairs.resizeUninitialized(matchItemCount)) {
js_ReportOutOfMemory(cx);
return false;
}
for (size_t i = 0; i < matchItemCount; ++i)
matchPairs[i] = buf[i];
matchPairsInput = input;
return true;
}
void setMultiline(bool enabled) {
aboutToWrite();
if (enabled)
@ -133,43 +188,61 @@ class RegExpStatics
void clear() {
aboutToWrite();
input = 0;
flags = 0;
pendingInput = NULL;
matchPairsInput = NULL;
matchPairs.clear();
}
void checkInvariants() {
if (pairCount() > 0) {
JS_ASSERT(input);
JS_ASSERT(get(0, 0) <= get(0, 1));
JS_ASSERT(get(0, 1) <= int(input->length()));
}
}
bool pairIsPresent(size_t pairNum) { return get(0, 0) != -1; }
/* Corresponds to JSAPI functionality to set the pending RegExp input. */
void reset(JSString *newInput, bool newMultiline) {
aboutToWrite();
clear();
input = newInput;
pendingInput = newInput;
setMultiline(newMultiline);
checkInvariants();
}
void setInput(JSString *newInput) {
void setPendingInput(JSString *newInput) {
aboutToWrite();
input = newInput;
pendingInput = newInput;
}
/* Accessors. */
JSString *getInput() const { return input; }
JSString *getPendingInput() const { return pendingInput; }
uintN getFlags() const { return flags; }
bool multiline() const { return flags & JSREG_MULTILINE; }
bool matched() const { JS_ASSERT(pairCount() > 0); return get(0, 1) - get(0, 0) > 0; }
size_t getParenCount() const { JS_ASSERT(pairCount() > 0); return pairCount() - 1; }
size_t matchStart() const {
int start = get(0, 0);
JS_ASSERT(start >= 0);
return size_t(start);
}
size_t matchLimit() const {
int limit = get(0, 1);
JS_ASSERT(size_t(limit) >= matchStart() && limit >= 0);
return size_t(limit);
}
bool matched() const {
JS_ASSERT(pairCount() > 0);
return get(0, 1) - get(0, 0) > 0;
}
size_t getParenCount() const {
JS_ASSERT(pairCount() > 0);
return pairCount() - 1;
}
void mark(JSTracer *trc) const {
if (input)
JS_CALL_STRING_TRACER(trc, input, "res->input");
if (pendingInput)
JS_CALL_STRING_TRACER(trc, pendingInput, "res->pendingInput");
if (matchPairsInput)
JS_CALL_STRING_TRACER(trc, matchPairsInput, "res->matchPairsInput");
}
size_t getParenLength(size_t parenNum) const {
@ -178,14 +251,9 @@ class RegExpStatics
return get(parenNum + 1, 1) - get(parenNum + 1, 0);
}
int get(size_t pairNum, bool which) const {
JS_ASSERT(pairNum < pairCount());
return matchPairs[2 * pairNum + which];
}
/* Value creators. */
bool createInput(JSContext *cx, Value *out) const;
bool createPendingInput(JSContext *cx, Value *out) const;
bool createLastMatch(JSContext *cx, Value *out) const { return makeMatch(cx, 0, 0, out); }
bool createLastParen(JSContext *cx, Value *out) const;
bool createLeftContext(JSContext *cx, Value *out) const;
@ -204,6 +272,26 @@ class RegExpStatics
void getRightContext(JSSubString *out) const;
};
class PreserveRegExpStatics
{
RegExpStatics *const original;
RegExpStatics buffer;
public:
explicit PreserveRegExpStatics(RegExpStatics *original)
: original(original),
buffer(RegExpStatics::InitBuffer())
{}
bool init(JSContext *cx) {
return original->save(cx, &buffer);
}
~PreserveRegExpStatics() {
original->restore();
}
};
}
static inline bool

View File

@ -93,9 +93,8 @@ class RegExp
void handlePCREError(JSContext *cx, int error);
void handleYarrError(JSContext *cx, int error);
static inline bool initArena(JSContext *cx);
static inline void checkMatchPairs(int *buf, size_t matchItemCount);
JSObject *createResult(JSContext *cx, JSString *input, int *buf, size_t matchItemCount,
size_t inputOffset);
static inline void checkMatchPairs(JSString *input, int *buf, size_t matchItemCount);
static JSObject *createResult(JSContext *cx, JSString *input, int *buf, size_t matchItemCount);
inline bool executeInternal(JSContext *cx, RegExpStatics *res, JSString *input,
size_t *lastIndex, bool test, Value *rval);
@ -218,19 +217,29 @@ RegExp::initArena(JSContext *cx)
}
inline void
RegExp::checkMatchPairs(int *buf, size_t matchItemCount)
RegExp::checkMatchPairs(JSString *input, int *buf, size_t matchItemCount)
{
#if DEBUG
for (size_t i = 0; i < matchItemCount; i += 2)
JS_ASSERT(buf[i + 1] >= buf[i]); /* Limit index must be larger than the start index. */
size_t inputLength = input->length();
int largestStartSeen = 0;
for (size_t i = 0; i < matchItemCount; i += 2) {
int start = buf[i];
int limit = buf[i + 1];
JS_ASSERT(limit >= start); /* Limit index must be larger than the start index. */
if (start == -1)
continue;
JS_ASSERT(start >= 0);
JS_ASSERT(size_t(limit) <= inputLength);
/* Test the monotonically increasing nature of left parens. */
JS_ASSERT(start >= largestStartSeen);
largestStartSeen = start;
}
#endif
}
inline JSObject *
RegExp::createResult(JSContext *cx, JSString *input, int *buf, size_t matchItemCount,
size_t inputOffset)
RegExp::createResult(JSContext *cx, JSString *input, int *buf, size_t matchItemCount)
{
#define MATCH_VALUE(__index) (buf[(__index)] + inputOffset)
/*
* Create the result array for a match. Array contents:
* 0: matched string
@ -242,13 +251,13 @@ RegExp::createResult(JSContext *cx, JSString *input, int *buf, size_t matchItemC
RegExpMatchBuilder builder(cx, array);
for (size_t i = 0; i < matchItemCount; i += 2) {
int start = MATCH_VALUE(i);
int end = MATCH_VALUE(i + 1);
int start = buf[i];
int end = buf[i + 1];
JSString *captured;
if (start >= 0) {
JS_ASSERT(start <= end);
JS_ASSERT((unsigned) end <= input->length());
JS_ASSERT(unsigned(end) <= input->length());
captured = js_NewDependentString(cx, input, start, end - start);
if (!(captured && builder.append(i / 2, captured)))
return NULL;
@ -261,12 +270,11 @@ RegExp::createResult(JSContext *cx, JSString *input, int *buf, size_t matchItemC
}
}
if (!builder.appendIndex(MATCH_VALUE(0)) ||
if (!builder.appendIndex(buf[0]) ||
!builder.appendInput(input))
return NULL;
return array;
#undef MATCH_VALUE
}
inline bool
@ -297,6 +305,11 @@ RegExp::executeInternal(JSContext *cx, RegExpStatics *res, JSString *input,
const jschar *chars = input->chars();
size_t len = input->length();
/*
* inputOffset emulates sticky mode by matching from this offset into the char buf and
* subtracting the delta off at the end.
*/
size_t inputOffset = 0;
if (sticky()) {
@ -318,27 +331,29 @@ RegExp::executeInternal(JSContext *cx, RegExpStatics *res, JSString *input,
return true;
}
checkMatchPairs(buf, matchItemCount);
if (res) {
res->aboutToWrite();
res->input = input;
if (!res->matchPairs.resizeUninitialized(matchItemCount)) {
js_ReportOutOfMemory(cx);
return false;
}
/*
* Adjust buf for the inputOffset. Use of sticky is rare and the matchItemCount is small, so
* just do another pass.
*/
if (JS_UNLIKELY(inputOffset)) {
for (size_t i = 0; i < matchItemCount; ++i)
res->matchPairs[i] = buf[i] + inputOffset;
buf[i] = buf[i] < 0 ? -1 : buf[i] + inputOffset;
}
*lastIndex = buf[1] + inputOffset;
/* Make sure the populated contents of |buf| are sane values against |input|. */
checkMatchPairs(input, buf, matchItemCount);
if (res)
res->updateFromMatch(cx, input, buf, matchItemCount);
*lastIndex = buf[1];
if (test) {
*rval = BooleanValue(true);
return true;
}
JSObject *array = createResult(cx, input, buf, matchItemCount, inputOffset);
JSObject *array = createResult(cx, input, buf, matchItemCount);
if (!array)
return false;
@ -530,8 +545,8 @@ inline bool
RegExpStatics::createDependent(JSContext *cx, size_t start, size_t end, Value *out) const
{
JS_ASSERT(start <= end);
JS_ASSERT(end <= input->length());
JSString *str = js_NewDependentString(cx, input, start, end - start);
JS_ASSERT(end <= matchPairsInput->length());
JSString *str = js_NewDependentString(cx, matchPairsInput, start, end - start);
if (!str)
return false;
*out = StringValue(str);
@ -539,9 +554,9 @@ RegExpStatics::createDependent(JSContext *cx, size_t start, size_t end, Value *o
}
inline bool
RegExpStatics::createInput(JSContext *cx, Value *out) const
RegExpStatics::createPendingInput(JSContext *cx, Value *out) const
{
out->setString(input ? input : cx->runtime->emptyString);
out->setString(pendingInput ? pendingInput : cx->runtime->emptyString);
return true;
}
@ -599,13 +614,13 @@ RegExpStatics::createRightContext(JSContext *cx, Value *out) const
*out = UndefinedValue();
return true;
}
return createDependent(cx, matchPairs[1], input->length(), out);
return createDependent(cx, matchPairs[1], matchPairsInput->length(), out);
}
inline void
RegExpStatics::getParen(size_t num, JSSubString *out) const
{
out->chars = input->chars() + get(num + 1, 0);
out->chars = matchPairsInput->chars() + get(num + 1, 0);
out->length = getParenLength(num);
}
@ -616,8 +631,8 @@ RegExpStatics::getLastMatch(JSSubString *out) const
*out = js_EmptySubString;
return;
}
JS_ASSERT(input);
out->chars = input->chars() + get(0, 0);
JS_ASSERT(matchPairsInput);
out->chars = matchPairsInput->chars() + get(0, 0);
JS_ASSERT(get(0, 1) >= get(0, 0));
out->length = get(0, 1) - get(0, 0);
}
@ -630,7 +645,7 @@ RegExpStatics::getLastParen(JSSubString *out) const
return;
}
size_t num = pairCount() - 1;
out->chars = input->chars() + get(num, 0);
out->chars = matchPairsInput->chars() + get(num, 0);
JS_ASSERT(get(num, 1) >= get(num, 0));
out->length = get(num, 1) - get(num, 0);
}
@ -642,7 +657,7 @@ RegExpStatics::getLeftContext(JSSubString *out) const
*out = js_EmptySubString;
return;
}
out->chars = input->chars();
out->chars = matchPairsInput->chars();
out->length = get(0, 0);
}
@ -653,9 +668,9 @@ RegExpStatics::getRightContext(JSSubString *out) const
*out = js_EmptySubString;
return;
}
out->chars = input->chars() + get(0, 1);
JS_ASSERT(get(0, 1) <= int(input->length()));
out->length = input->length() - get(0, 1);
out->chars = matchPairsInput->chars() + get(0, 1);
JS_ASSERT(get(0, 1) <= int(matchPairsInput->length()));
out->length = matchPairsInput->length() - get(0, 1);
}
}

View File

@ -1955,7 +1955,7 @@ str_search(JSContext *cx, uintN argc, Value *vp)
return false;
if (vp->isTrue())
vp->setInt32(res->get(0, 0));
vp->setInt32(res->matchStart());
else
vp->setInt32(-1);
return true;
@ -2045,26 +2045,6 @@ InterpretDollar(JSContext *cx, RegExpStatics *res, jschar *dp, jschar *ep, Repla
return false;
}
class PreserveRegExpStatics
{
js::RegExpStatics *const original;
js::RegExpStatics buffer;
public:
explicit PreserveRegExpStatics(RegExpStatics *original)
: original(original),
buffer(RegExpStatics::InitBuffer())
{}
bool init(JSContext *cx) {
return original->save(cx, &buffer);
}
~PreserveRegExpStatics() {
original->restore();
}
};
static bool
FindReplaceLength(JSContext *cx, RegExpStatics *res, ReplaceData &rdata, size_t *sizep)
{
@ -2155,7 +2135,7 @@ FindReplaceLength(JSContext *cx, RegExpStatics *res, ReplaceData &rdata, size_t
}
/* Push match index and input string. */
session[argi++].setInt32(res->get(0, 0));
session[argi++].setInt32(res->matchStart());
session[argi].setString(rdata.str);
if (!session.invoke(cx))
@ -2222,8 +2202,8 @@ ReplaceCallback(JSContext *cx, RegExpStatics *res, size_t count, void *p)
JSString *str = rdata.str;
size_t leftoff = rdata.leftIndex;
const jschar *left = str->chars() + leftoff;
size_t leftlen = res->get(0, 0) - leftoff;
rdata.leftIndex = res->get(0, 1);
size_t leftlen = res->matchStart() - leftoff;
rdata.leftIndex = res->matchLimit();
size_t replen = 0; /* silence 'unused' warning */
if (!FindReplaceLength(cx, res, rdata, &replen))