/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- * vim: set sw=4 ts=8 et tw=78: * * ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is Mozilla Communicator client code, released * March 31, 1998. * * The Initial Developer of the Original Code is * Netscape Communications Corporation. * Portions created by the Initial Developer are Copyright (C) 1998 * the Initial Developer. All Rights Reserved. * * Contributor(s): * * Alternatively, the contents of this file may be used under the terms of * either of the GNU General Public License Version 2 or later (the "GPL"), * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ /* * JS lexical scanner. */ #include "jsstddef.h" #include /* first to avoid trouble on some systems */ #include #include #include #ifdef HAVE_MEMORY_H #include #endif #include #include #include #include "jstypes.h" #include "jsarena.h" /* Added by JSIFY */ #include "jsutil.h" /* Added by JSIFY */ #include "jsdtoa.h" #include "jsprf.h" #include "jsapi.h" #include "jsatom.h" #include "jscntxt.h" #include "jsconfig.h" #include "jsemit.h" #include "jsexn.h" #include "jsnum.h" #include "jsopcode.h" #include "jsparse.h" #include "jsregexp.h" #include "jsscan.h" #include "jsscript.h" #if JS_HAS_XML_SUPPORT #include "jsxml.h" #endif #define JS_KEYWORD(keyword, type, op, version) \ const char js_##keyword##_str[] = #keyword; #include "jskeyword.tbl" #undef JS_KEYWORD struct keyword { const char *chars; /* C string with keyword text */ JSTokenType tokentype; /* JSTokenType */ JSOp op; /* JSOp */ JSVersion version; /* JSVersion */ }; static const struct keyword keyword_defs[] = { #define JS_KEYWORD(keyword, type, op, version) \ {js_##keyword##_str, type, op, version}, #include "jskeyword.tbl" #undef JS_KEYWORD }; #define KEYWORD_COUNT (sizeof keyword_defs / sizeof keyword_defs[0]) static const struct keyword * FindKeyword(const jschar *s, size_t length) { register size_t i; const struct keyword *kw; const char *chars; JS_ASSERT(length != 0); #define JSKW_LENGTH() length #define JSKW_AT(column) s[column] #define JSKW_GOT_MATCH(index) i = (index); goto got_match; #define JSKW_TEST_GUESS(index) i = (index); goto test_guess; #define JSKW_NO_MATCH() goto no_match; #include "jsautokw.h" #undef JSKW_NO_MATCH #undef JSKW_TEST_GUESS #undef JSKW_GOT_MATCH #undef JSKW_AT #undef JSKW_LENGTH got_match: return &keyword_defs[i]; test_guess: kw = &keyword_defs[i]; chars = kw->chars; do { if (*s++ != (unsigned char)(*chars++)) goto no_match; } while (--length != 0); return kw; no_match: return NULL; } JSTokenType js_CheckKeyword(const jschar *str, size_t length) { const struct keyword *kw; JS_ASSERT(length != 0); kw = FindKeyword(str, length); return kw ? kw->tokentype : TOK_EOF; } JS_FRIEND_API(void) js_MapKeywords(void (*mapfun)(const char *)) { size_t i; for (i = 0; i != KEYWORD_COUNT; ++i) mapfun(keyword_defs[i].chars); } JSBool js_IsIdentifier(JSString *str) { size_t length; jschar c, *chars, *end; length = JSSTRING_LENGTH(str); if (length == 0) return JS_FALSE; chars = JSSTRING_CHARS(str); c = *chars; if (!JS_ISIDSTART(c)) return JS_FALSE; end = chars + length; while (++chars != end) { c = *chars; if (!JS_ISIDENT(c)) return JS_FALSE; } return JS_TRUE; } JSTokenStream * js_NewTokenStream(JSContext *cx, const jschar *base, size_t length, const char *filename, uintN lineno, JSPrincipals *principals) { JSTokenStream *ts; ts = js_NewBufferTokenStream(cx, base, length); if (!ts) return NULL; ts->filename = filename; ts->lineno = lineno; if (principals) JSPRINCIPALS_HOLD(cx, principals); ts->principals = principals; return ts; } #define TBMIN 64 static JSBool GrowTokenBuf(JSStringBuffer *sb, size_t newlength) { JSContext *cx; jschar *base; ptrdiff_t offset, length; size_t tbsize; JSArenaPool *pool; cx = sb->data; base = sb->base; offset = PTRDIFF(sb->ptr, base, jschar); pool = &cx->tempPool; if (!base) { tbsize = TBMIN * sizeof(jschar); length = TBMIN - 1; JS_ARENA_ALLOCATE_CAST(base, jschar *, pool, tbsize); } else { length = PTRDIFF(sb->limit, base, jschar); if ((size_t)length >= ~(size_t)0 / sizeof(jschar)) { base = NULL; } else { tbsize = (length + 1) * sizeof(jschar); length += length + 1; JS_ARENA_GROW_CAST(base, jschar *, pool, tbsize, tbsize); } } if (!base) { JS_ReportOutOfMemory(cx); sb->base = STRING_BUFFER_ERROR_BASE; return JS_FALSE; } sb->base = base; sb->limit = base + length; sb->ptr = base + offset; return JS_TRUE; } JS_FRIEND_API(JSTokenStream *) js_NewBufferTokenStream(JSContext *cx, const jschar *base, size_t length) { size_t nb; JSTokenStream *ts; nb = sizeof(JSTokenStream) + JS_LINE_LIMIT * sizeof(jschar); JS_ARENA_ALLOCATE_CAST(ts, JSTokenStream *, &cx->tempPool, nb); if (!ts) { JS_ReportOutOfMemory(cx); return NULL; } memset(ts, 0, nb); ts->lineno = 1; ts->linebuf.base = ts->linebuf.limit = ts->linebuf.ptr = (jschar *)(ts + 1); ts->userbuf.base = (jschar *)base; ts->userbuf.limit = (jschar *)base + length; ts->userbuf.ptr = (jschar *)base; ts->tokenbuf.grow = GrowTokenBuf; ts->tokenbuf.data = cx; ts->listener = cx->debugHooks->sourceHandler; ts->listenerData = cx->debugHooks->sourceHandlerData; return ts; } JS_FRIEND_API(JSTokenStream *) js_NewFileTokenStream(JSContext *cx, const char *filename, FILE *defaultfp) { jschar *base; JSTokenStream *ts; FILE *file; JS_ARENA_ALLOCATE_CAST(base, jschar *, &cx->tempPool, JS_LINE_LIMIT * sizeof(jschar)); if (!base) return NULL; ts = js_NewBufferTokenStream(cx, base, JS_LINE_LIMIT); if (!ts) return NULL; if (!filename || strcmp(filename, "-") == 0) { file = defaultfp; } else { file = fopen(filename, "r"); if (!file) { JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL, JSMSG_CANT_OPEN, filename, "No such file or directory"); return NULL; } } ts->userbuf.ptr = ts->userbuf.limit; ts->file = file; ts->filename = filename; return ts; } JS_FRIEND_API(JSBool) js_CloseTokenStream(JSContext *cx, JSTokenStream *ts) { if (ts->flags & TSF_OWNFILENAME) JS_free(cx, (void *) ts->filename); if (ts->principals) JSPRINCIPALS_DROP(cx, ts->principals); return !ts->file || fclose(ts->file) == 0; } JS_FRIEND_API(int) js_fgets(char *buf, int size, FILE *file) { int n, i, c; JSBool crflag; n = size - 1; if (n < 0) return -1; crflag = JS_FALSE; for (i = 0; i < n && (c = getc(file)) != EOF; i++) { buf[i] = c; if (c == '\n') { /* any \n ends a line */ i++; /* keep the \n; we know there is room for \0 */ break; } if (crflag) { /* \r not followed by \n ends line at the \r */ ungetc(c, file); break; /* and overwrite c in buf with \0 */ } crflag = (c == '\r'); } buf[i] = '\0'; return i; } static int32 GetChar(JSTokenStream *ts) { int32 c; ptrdiff_t i, j, len, olen; JSBool crflag; char cbuf[JS_LINE_LIMIT]; jschar *ubuf, *nl; if (ts->ungetpos != 0) { c = ts->ungetbuf[--ts->ungetpos]; } else { if (ts->linebuf.ptr == ts->linebuf.limit) { len = PTRDIFF(ts->userbuf.limit, ts->userbuf.ptr, jschar); if (len <= 0) { if (!ts->file) { ts->flags |= TSF_EOF; return EOF; } /* Fill ts->userbuf so that \r and \r\n convert to \n. */ crflag = (ts->flags & TSF_CRFLAG) != 0; len = js_fgets(cbuf, JS_LINE_LIMIT - crflag, ts->file); if (len <= 0) { ts->flags |= TSF_EOF; return EOF; } olen = len; ubuf = ts->userbuf.base; i = 0; if (crflag) { ts->flags &= ~TSF_CRFLAG; if (cbuf[0] != '\n') { ubuf[i++] = '\n'; len++; ts->linepos--; } } for (j = 0; i < len; i++, j++) ubuf[i] = (jschar) (unsigned char) cbuf[j]; ts->userbuf.limit = ubuf + len; ts->userbuf.ptr = ubuf; } if (ts->listener) { ts->listener(ts->filename, ts->lineno, ts->userbuf.ptr, len, &ts->listenerTSData, ts->listenerData); } nl = ts->saveEOL; if (!nl) { /* * Any one of \n, \r, or \r\n ends a line (the longest * match wins). Also allow the Unicode line and paragraph * separators. */ for (nl = ts->userbuf.ptr; nl < ts->userbuf.limit; nl++) { /* * Try to prevent value-testing on most characters by * filtering out characters that aren't 000x or 202x. */ if ((*nl & 0xDFD0) == 0) { if (*nl == '\n') break; if (*nl == '\r') { if (nl + 1 < ts->userbuf.limit && nl[1] == '\n') nl++; break; } if (*nl == LINE_SEPARATOR || *nl == PARA_SEPARATOR) break; } } } /* * If there was a line terminator, copy thru it into linebuf. * Else copy JS_LINE_LIMIT-1 bytes into linebuf. */ if (nl < ts->userbuf.limit) len = PTRDIFF(nl, ts->userbuf.ptr, jschar) + 1; if (len >= JS_LINE_LIMIT) { len = JS_LINE_LIMIT - 1; ts->saveEOL = nl; } else { ts->saveEOL = NULL; } js_strncpy(ts->linebuf.base, ts->userbuf.ptr, len); ts->userbuf.ptr += len; olen = len; /* * Make sure linebuf contains \n for EOL (don't do this in * userbuf because the user's string might be readonly). */ if (nl < ts->userbuf.limit) { if (*nl == '\r') { if (ts->linebuf.base[len-1] == '\r') { /* * Does the line segment end in \r? We must check * for a \n at the front of the next segment before * storing a \n into linebuf. This case matters * only when we're reading from a file. */ if (nl + 1 == ts->userbuf.limit && ts->file) { len--; ts->flags |= TSF_CRFLAG; /* clear NLFLAG? */ if (len == 0) { /* * This can happen when a segment ends in * \r\r. Start over. ptr == limit in this * case, so we'll fall into buffer-filling * code. */ return GetChar(ts); } } else { ts->linebuf.base[len-1] = '\n'; } } } else if (*nl == '\n') { if (nl > ts->userbuf.base && nl[-1] == '\r' && ts->linebuf.base[len-2] == '\r') { len--; JS_ASSERT(ts->linebuf.base[len] == '\n'); ts->linebuf.base[len-1] = '\n'; } } else if (*nl == LINE_SEPARATOR || *nl == PARA_SEPARATOR) { ts->linebuf.base[len-1] = '\n'; } } /* Reset linebuf based on adjusted segment length. */ ts->linebuf.limit = ts->linebuf.base + len; ts->linebuf.ptr = ts->linebuf.base; /* Update position of linebuf within physical userbuf line. */ if (!(ts->flags & TSF_NLFLAG)) ts->linepos += ts->linelen; else ts->linepos = 0; if (ts->linebuf.limit[-1] == '\n') ts->flags |= TSF_NLFLAG; else ts->flags &= ~TSF_NLFLAG; /* Update linelen from original segment length. */ ts->linelen = olen; } c = *ts->linebuf.ptr++; } if (c == '\n') ts->lineno++; return c; } static void UngetChar(JSTokenStream *ts, int32 c) { if (c == EOF) return; JS_ASSERT(ts->ungetpos < sizeof ts->ungetbuf / sizeof ts->ungetbuf[0]); if (c == '\n') ts->lineno--; ts->ungetbuf[ts->ungetpos++] = (jschar)c; } static int32 PeekChar(JSTokenStream *ts) { int32 c; c = GetChar(ts); UngetChar(ts, c); return c; } /* * Peek n chars ahead into ts. Return true if n chars were read, false if * there weren't enough characters in the input stream. This function cannot * be used to peek into or past a newline. */ static JSBool PeekChars(JSTokenStream *ts, intN n, jschar *cp) { intN i, j; int32 c; for (i = 0; i < n; i++) { c = GetChar(ts); if (c == EOF) break; if (c == '\n') { UngetChar(ts, c); break; } cp[i] = (jschar)c; } for (j = i - 1; j >= 0; j--) UngetChar(ts, cp[j]); return i == n; } static void SkipChars(JSTokenStream *ts, intN n) { while (--n >= 0) GetChar(ts); } static JSBool MatchChar(JSTokenStream *ts, int32 expect) { int32 c; c = GetChar(ts); if (c == expect) return JS_TRUE; UngetChar(ts, c); return JS_FALSE; } static JSBool ReportCompileErrorNumber(JSContext *cx, void *handle, uintN flags, uintN errorNumber, JSErrorReport *report, JSBool charArgs, va_list ap) { size_t linelength; jschar *linechars = NULL; char *linebytes = NULL; JSTokenStream *ts = NULL; JSCodeGenerator *cg = NULL; JSParseNode *pn = NULL; JSErrorReporter onError; JSTokenPos *tp; JSStackFrame *fp; uintN index; char *message; JSBool warning; memset(report, 0, sizeof (struct JSErrorReport)); report->flags = flags; report->errorNumber = errorNumber; message = NULL; if (!js_ExpandErrorArguments(cx, js_GetErrorMessage, NULL, errorNumber, &message, report, &warning, charArgs, ap)) { return JS_FALSE; } switch (flags & JSREPORT_HANDLE) { case JSREPORT_TS: ts = handle; break; case JSREPORT_CG: cg = handle; break; case JSREPORT_PN: pn = handle; ts = pn->pn_ts; break; } JS_ASSERT(!ts || ts->linebuf.limit < ts->linebuf.base + JS_LINE_LIMIT); /* * We are typically called with non-null ts and null cg from jsparse.c. * We can be called with null ts from the regexp compilation functions. * The code generator (jsemit.c) may pass null ts and non-null cg. */ if (ts) { report->filename = ts->filename; if (pn) { report->lineno = pn->pn_pos.begin.lineno; if (report->lineno != ts->lineno) goto report; } report->lineno = ts->lineno; linelength = PTRDIFF(ts->linebuf.limit, ts->linebuf.base, jschar); linechars = (jschar *)JS_malloc(cx, (linelength + 1) * sizeof(jschar)); if (!linechars) { warning = JS_FALSE; goto out; } memcpy(linechars, ts->linebuf.base, linelength * sizeof(jschar)); linechars[linelength] = 0; linebytes = js_DeflateString(cx, linechars, linelength); if (!linebytes) { warning = JS_FALSE; goto out; } report->linebuf = linebytes; tp = &ts->tokens[(ts->cursor+ts->lookahead) & NTOKENS_MASK].pos; if (pn) tp = &pn->pn_pos; /* * FIXME: What should instead happen here is that we should * find error-tokens in userbuf, if !ts->file. That will * allow us to deliver a more helpful error message, which * includes all or part of the bad string or bad token. The * code here yields something that looks truncated. * See https://bugzilla.mozilla.org/show_bug.cgi?id=352970 */ index = 0; if (tp->begin.lineno == tp->end.lineno) { if (tp->begin.index < ts->linepos) goto report; index = tp->begin.index - ts->linepos; } report->tokenptr = report->linebuf + index; report->uclinebuf = linechars; report->uctokenptr = report->uclinebuf + index; } else if (cg) { report->filename = cg->filename; report->lineno = CG_CURRENT_LINE(cg); } else { /* * If we can't find out where the error was based on the current * frame, see if the next frame has a script/pc combo we can use. */ for (fp = cx->fp; fp; fp = fp->down) { if (fp->script && fp->pc) { report->filename = fp->script->filename; report->lineno = js_PCToLineNumber(cx, fp->script, fp->pc); break; } } } /* * If there's a runtime exception type associated with this error * number, set that as the pending exception. For errors occuring at * compile time, this is very likely to be a JSEXN_SYNTAXERR. * * If an exception is thrown but not caught, the JSREPORT_EXCEPTION * flag will be set in report.flags. Proper behavior for an error * reporter is to ignore a report with this flag for all but top-level * compilation errors. The exception will remain pending, and so long * as the non-top-level "load", "eval", or "compile" native function * returns false, the top-level reporter will eventually receive the * uncaught exception report. * * XXX it'd probably be best if there was only one call to this * function, but there seem to be two error reporter call points. */ report: onError = cx->errorReporter; /* * Try to raise an exception only if there isn't one already set -- * otherwise the exception will describe the last compile-time error, * which is likely spurious. */ if (!ts || !(ts->flags & TSF_ERROR)) { if (js_ErrorToException(cx, message, report)) onError = NULL; } /* * Suppress any compile-time errors that don't occur at the top level. * This may still fail, as interplevel may be zero in contexts where we * don't really want to call the error reporter, as when js is called * by other code which could catch the error. */ if (cx->interpLevel != 0 && !JSREPORT_IS_WARNING(flags)) onError = NULL; if (onError) { JSDebugErrorHook hook = cx->debugHooks->debugErrorHook; /* * If debugErrorHook is present then we give it a chance to veto * sending the error on to the regular error reporter. */ if (hook && !hook(cx, message, report, cx->debugHooks->debugErrorHookData)) { onError = NULL; } } if (onError) (*onError)(cx, message, report); out: if (linebytes) JS_free(cx, linebytes); if (linechars) JS_free(cx, linechars); if (message) JS_free(cx, message); if (report->ucmessage) JS_free(cx, (void *)report->ucmessage); if (ts && !JSREPORT_IS_WARNING(flags)) { /* Set the error flag to suppress spurious reports. */ ts->flags |= TSF_ERROR; } return warning; } JSBool js_ReportCompileErrorNumber(JSContext *cx, void *handle, uintN flags, uintN errorNumber, ...) { va_list ap; JSErrorReport report; JSBool warning; if ((flags & JSREPORT_STRICT) && !JS_HAS_STRICT_OPTION(cx)) return JS_TRUE; va_start(ap, errorNumber); warning = ReportCompileErrorNumber(cx, handle, flags, errorNumber, &report, JS_TRUE, ap); va_end(ap); /* * We have to do this here because js_ReportCompileErrorNumberUC doesn't * need to do this. */ if (report.messageArgs) { int i = 0; while (report.messageArgs[i]) JS_free(cx, (void *)report.messageArgs[i++]); JS_free(cx, (void *)report.messageArgs); } return warning; } JSBool js_ReportCompileErrorNumberUC(JSContext *cx, void *handle, uintN flags, uintN errorNumber, ...) { va_list ap; JSErrorReport report; JSBool warning; if ((flags & JSREPORT_STRICT) && !JS_HAS_STRICT_OPTION(cx)) return JS_TRUE; va_start(ap, errorNumber); warning = ReportCompileErrorNumber(cx, handle, flags, errorNumber, &report, JS_FALSE, ap); va_end(ap); if (report.messageArgs) JS_free(cx, (void *)report.messageArgs); return warning; } static JSBool GrowStringBuffer(JSStringBuffer *sb, size_t newlength) { ptrdiff_t offset; jschar *bp; offset = PTRDIFF(sb->ptr, sb->base, jschar); JS_ASSERT(offset >= 0); newlength += offset + 1; if ((size_t)offset < newlength && newlength < ~(size_t)0 / sizeof(jschar)) bp = realloc(sb->base, newlength * sizeof(jschar)); else bp = NULL; if (!bp) { free(sb->base); sb->base = STRING_BUFFER_ERROR_BASE; return JS_FALSE; } sb->base = bp; sb->ptr = bp + offset; sb->limit = bp + newlength - 1; return JS_TRUE; } static void FreeStringBuffer(JSStringBuffer *sb) { JS_ASSERT(STRING_BUFFER_OK(sb)); if (sb->base) free(sb->base); } void js_InitStringBuffer(JSStringBuffer *sb) { sb->base = sb->limit = sb->ptr = NULL; sb->data = NULL; sb->grow = GrowStringBuffer; sb->free = FreeStringBuffer; } void js_FinishStringBuffer(JSStringBuffer *sb) { sb->free(sb); } #define ENSURE_STRING_BUFFER(sb,n) \ ((sb)->ptr + (n) <= (sb)->limit || sb->grow(sb, n)) static void FastAppendChar(JSStringBuffer *sb, jschar c) { if (!STRING_BUFFER_OK(sb)) return; if (!ENSURE_STRING_BUFFER(sb, 1)) return; *sb->ptr++ = c; } void js_AppendChar(JSStringBuffer *sb, jschar c) { jschar *bp; if (!STRING_BUFFER_OK(sb)) return; if (!ENSURE_STRING_BUFFER(sb, 1)) return; bp = sb->ptr; *bp++ = c; *bp = 0; sb->ptr = bp; } #if JS_HAS_XML_SUPPORT void js_RepeatChar(JSStringBuffer *sb, jschar c, uintN count) { jschar *bp; if (!STRING_BUFFER_OK(sb) || count == 0) return; if (!ENSURE_STRING_BUFFER(sb, count)) return; for (bp = sb->ptr; count; --count) *bp++ = c; *bp = 0; sb->ptr = bp; } void js_AppendCString(JSStringBuffer *sb, const char *asciiz) { size_t length; jschar *bp; if (!STRING_BUFFER_OK(sb) || *asciiz == '\0') return; length = strlen(asciiz); if (!ENSURE_STRING_BUFFER(sb, length)) return; for (bp = sb->ptr; length; --length) *bp++ = (jschar) *asciiz++; *bp = 0; sb->ptr = bp; } void js_AppendJSString(JSStringBuffer *sb, JSString *str) { size_t length; jschar *bp; if (!STRING_BUFFER_OK(sb)) return; length = JSSTRING_LENGTH(str); if (length == 0 || !ENSURE_STRING_BUFFER(sb, length)) return; bp = sb->ptr; js_strncpy(bp, JSSTRING_CHARS(str), length); bp += length; *bp = 0; sb->ptr = bp; } static JSBool GetXMLEntity(JSContext *cx, JSTokenStream *ts) { ptrdiff_t offset, length, i; int32 c, d; JSBool ispair; jschar *bp, digit; char *bytes; JSErrNum msg; /* Put the entity, including the '&' already scanned, in ts->tokenbuf. */ offset = PTRDIFF(ts->tokenbuf.ptr, ts->tokenbuf.base, jschar); FastAppendChar(&ts->tokenbuf, '&'); while ((c = GetChar(ts)) != ';') { if (c == EOF || c == '\n') { js_ReportCompileErrorNumber(cx, ts, JSREPORT_TS | JSREPORT_ERROR, JSMSG_END_OF_XML_ENTITY); return JS_FALSE; } FastAppendChar(&ts->tokenbuf, (jschar) c); } /* Let length be the number of jschars after the '&', including the ';'. */ length = PTRDIFF(ts->tokenbuf.ptr, ts->tokenbuf.base, jschar) - offset; bp = ts->tokenbuf.base + offset; c = d = 0; ispair = JS_FALSE; if (length > 2 && bp[1] == '#') { /* Match a well-formed XML Character Reference. */ i = 2; if (length > 3 && JS_TOLOWER(bp[i]) == 'x') { if (length > 9) /* at most 6 hex digits allowed */ goto badncr; while (++i < length) { digit = bp[i]; if (!JS7_ISHEX(digit)) goto badncr; c = (c << 4) + JS7_UNHEX(digit); } } else { while (i < length) { digit = bp[i++]; if (!JS7_ISDEC(digit)) goto badncr; c = (c * 10) + JS7_UNDEC(digit); if (c < 0) goto badncr; } } if (0x10000 <= c && c <= 0x10FFFF) { /* Form a surrogate pair (c, d) -- c is the high surrogate. */ d = 0xDC00 + (c & 0x3FF); c = 0xD7C0 + (c >> 10); ispair = JS_TRUE; } else { /* Enforce the http://www.w3.org/TR/REC-xml/#wf-Legalchar WFC. */ if (c != 0x9 && c != 0xA && c != 0xD && !(0x20 <= c && c <= 0xD7FF) && !(0xE000 <= c && c <= 0xFFFD)) { goto badncr; } } } else { /* Try to match one of the five XML 1.0 predefined entities. */ switch (length) { case 3: if (bp[2] == 't') { if (bp[1] == 'l') c = '<'; else if (bp[1] == 'g') c = '>'; } break; case 4: if (bp[1] == 'a' && bp[2] == 'm' && bp[3] == 'p') c = '&'; break; case 5: if (bp[3] == 'o') { if (bp[1] == 'a' && bp[2] == 'p' && bp[4] == 's') c = '\''; else if (bp[1] == 'q' && bp[2] == 'u' && bp[4] == 't') c = '"'; } break; } if (c == 0) { msg = JSMSG_UNKNOWN_XML_ENTITY; goto bad; } } /* If we matched, retract ts->tokenbuf and store the entity's value. */ *bp++ = (jschar) c; if (ispair) *bp++ = (jschar) d; *bp = 0; ts->tokenbuf.ptr = bp; return JS_TRUE; badncr: msg = JSMSG_BAD_XML_NCR; bad: /* No match: throw a TypeError per ECMA-357 10.3.2.1 step 8(a). */ bytes = js_DeflateString(cx, bp + 1, PTRDIFF(ts->tokenbuf.ptr, bp, jschar) - 1); if (bytes) { js_ReportCompileErrorNumber(cx, ts, JSREPORT_TS | JSREPORT_ERROR, msg, bytes); JS_free(cx, bytes); } return JS_FALSE; } #endif /* JS_HAS_XML_SUPPORT */ JSTokenType js_PeekToken(JSContext *cx, JSTokenStream *ts) { JSTokenType tt; if (ts->lookahead != 0) { tt = ts->tokens[(ts->cursor + ts->lookahead) & NTOKENS_MASK].type; } else { tt = js_GetToken(cx, ts); js_UngetToken(ts); } return tt; } JSTokenType js_PeekTokenSameLine(JSContext *cx, JSTokenStream *ts) { JSTokenType tt; if (!ON_CURRENT_LINE(ts, CURRENT_TOKEN(ts).pos)) return TOK_EOL; ts->flags |= TSF_NEWLINES; tt = js_PeekToken(cx, ts); ts->flags &= ~TSF_NEWLINES; return tt; } /* * We have encountered a '\': check for a Unicode escape sequence after it, * returning the character code value if we found a Unicode escape sequence. * Otherwise, non-destructively return the original '\'. */ static int32 GetUnicodeEscape(JSTokenStream *ts) { jschar cp[5]; int32 c; if (PeekChars(ts, 5, cp) && cp[0] == 'u' && JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4])) { c = (((((JS7_UNHEX(cp[1]) << 4) + JS7_UNHEX(cp[2])) << 4) + JS7_UNHEX(cp[3])) << 4) + JS7_UNHEX(cp[4]); SkipChars(ts, 5); return c; } return '\\'; } static JSToken * NewToken(JSTokenStream *ts, ptrdiff_t adjust) { JSToken *tp; ts->cursor = (ts->cursor + 1) & NTOKENS_MASK; tp = &CURRENT_TOKEN(ts); tp->ptr = ts->linebuf.ptr + adjust; tp->pos.begin.index = ts->linepos + PTRDIFF(tp->ptr, ts->linebuf.base, jschar) - ts->ungetpos; tp->pos.begin.lineno = tp->pos.end.lineno = (uint16)ts->lineno; return tp; } JSTokenType js_GetToken(JSContext *cx, JSTokenStream *ts) { JSTokenType tt; int32 c, qc; JSToken *tp; JSAtom *atom; JSBool hadUnicodeEscape; const struct keyword *kw; #define INIT_TOKENBUF() (ts->tokenbuf.ptr = ts->tokenbuf.base) #define TOKENBUF_LENGTH() PTRDIFF(ts->tokenbuf.ptr, ts->tokenbuf.base, jschar) #define TOKENBUF_OK() STRING_BUFFER_OK(&ts->tokenbuf) #define TOKENBUF_TO_ATOM() (TOKENBUF_OK() \ ? js_AtomizeChars(cx, \ TOKENBUF_BASE(), \ TOKENBUF_LENGTH(), \ 0) \ : NULL) #define ADD_TO_TOKENBUF(c) FastAppendChar(&ts->tokenbuf, (jschar) (c)) /* The following 4 macros should only be used when TOKENBUF_OK() is true. */ #define TOKENBUF_BASE() (ts->tokenbuf.base) #define TOKENBUF_CHAR(i) (ts->tokenbuf.base[i]) #define TRIM_TOKENBUF(i) (ts->tokenbuf.ptr = ts->tokenbuf.base + i) #define NUL_TERM_TOKENBUF() (*ts->tokenbuf.ptr = 0) /* Check for a pushed-back token resulting from mismatching lookahead. */ while (ts->lookahead != 0) { JS_ASSERT(!(ts->flags & TSF_XMLTEXTMODE)); ts->lookahead--; ts->cursor = (ts->cursor + 1) & NTOKENS_MASK; tt = CURRENT_TOKEN(ts).type; if (tt != TOK_EOL || (ts->flags & TSF_NEWLINES)) return tt; } /* If there was a fatal error, keep returning TOK_ERROR. */ if (ts->flags & TSF_ERROR) return TOK_ERROR; #if JS_HAS_XML_SUPPORT if (ts->flags & TSF_XMLTEXTMODE) { tt = TOK_XMLSPACE; /* veto if non-space, return TOK_XMLTEXT */ tp = NewToken(ts, 0); INIT_TOKENBUF(); qc = (ts->flags & TSF_XMLONLYMODE) ? '<' : '{'; while ((c = GetChar(ts)) != qc && c != '<' && c != EOF) { if (c == '&' && qc == '<') { if (!GetXMLEntity(cx, ts)) goto error; tt = TOK_XMLTEXT; continue; } if (!JS_ISXMLSPACE(c)) tt = TOK_XMLTEXT; ADD_TO_TOKENBUF(c); } UngetChar(ts, c); if (TOKENBUF_LENGTH() == 0) { atom = NULL; } else { atom = TOKENBUF_TO_ATOM(); if (!atom) goto error; } tp->pos.end.lineno = (uint16)ts->lineno; tp->t_op = JSOP_STRING; tp->t_atom = atom; goto out; } if (ts->flags & TSF_XMLTAGMODE) { tp = NewToken(ts, 0); c = GetChar(ts); if (JS_ISXMLSPACE(c)) { do { c = GetChar(ts); } while (JS_ISXMLSPACE(c)); UngetChar(ts, c); tt = TOK_XMLSPACE; goto out; } if (c == EOF) { tt = TOK_EOF; goto out; } INIT_TOKENBUF(); if (JS_ISXMLNSSTART(c)) { JSBool sawColon = JS_FALSE; ADD_TO_TOKENBUF(c); while ((c = GetChar(ts)) != EOF && JS_ISXMLNAME(c)) { if (c == ':') { int nextc; if (sawColon || (nextc = PeekChar(ts), ((ts->flags & TSF_XMLONLYMODE) || nextc != '{') && !JS_ISXMLNAME(nextc))) { js_ReportCompileErrorNumber(cx, ts, JSREPORT_TS | JSREPORT_ERROR, JSMSG_BAD_XML_QNAME); goto error; } sawColon = JS_TRUE; } ADD_TO_TOKENBUF(c); } UngetChar(ts, c); atom = TOKENBUF_TO_ATOM(); if (!atom) goto error; tp->t_op = JSOP_STRING; tp->t_atom = atom; tt = TOK_XMLNAME; goto out; } switch (c) { case '{': if (ts->flags & TSF_XMLONLYMODE) goto bad_xml_char; tt = TOK_LC; goto out; case '=': tt = TOK_ASSIGN; goto out; case '"': case '\'': qc = c; while ((c = GetChar(ts)) != qc) { if (c == EOF) { js_ReportCompileErrorNumber(cx, ts, JSREPORT_TS | JSREPORT_ERROR, JSMSG_UNTERMINATED_STRING); goto error; } /* * XML attribute values are double-quoted when pretty-printed, * so escape " if it is expressed directly in a single-quoted * attribute value. */ if (c == '"' && !(ts->flags & TSF_XMLONLYMODE)) { JS_ASSERT(qc == '\''); js_AppendCString(&ts->tokenbuf, js_quot_entity_str); continue; } if (c == '&' && (ts->flags & TSF_XMLONLYMODE)) { if (!GetXMLEntity(cx, ts)) goto error; continue; } ADD_TO_TOKENBUF(c); } atom = TOKENBUF_TO_ATOM(); if (!atom) goto error; tp->pos.end.lineno = (uint16)ts->lineno; tp->t_op = JSOP_STRING; tp->t_atom = atom; tt = TOK_XMLATTR; goto out; case '>': tt = TOK_XMLTAGC; goto out; case '/': if (MatchChar(ts, '>')) { tt = TOK_XMLPTAGC; goto out; } /* FALL THROUGH */ bad_xml_char: default: js_ReportCompileErrorNumber(cx, ts, JSREPORT_TS | JSREPORT_ERROR, JSMSG_BAD_XML_CHARACTER); goto error; } /* NOTREACHED */ } #endif /* JS_HAS_XML_SUPPORT */ retry: do { c = GetChar(ts); if (c == '\n') { ts->flags &= ~TSF_DIRTYLINE; if (ts->flags & TSF_NEWLINES) break; } } while (JS_ISSPACE(c)); tp = NewToken(ts, -1); if (c == EOF) { tt = TOK_EOF; goto out; } hadUnicodeEscape = JS_FALSE; if (JS_ISIDSTART(c) || (c == '\\' && (c = GetUnicodeEscape(ts), hadUnicodeEscape = JS_ISIDSTART(c)))) { INIT_TOKENBUF(); for (;;) { ADD_TO_TOKENBUF(c); c = GetChar(ts); if (c == '\\') { c = GetUnicodeEscape(ts); if (!JS_ISIDENT(c)) break; hadUnicodeEscape = JS_TRUE; } else { if (!JS_ISIDENT(c)) break; } } UngetChar(ts, c); /* * Check for keywords unless we saw Unicode escape or parser asks * to ignore keywords. */ if (!hadUnicodeEscape && !(ts->flags & TSF_KEYWORD_IS_NAME) && TOKENBUF_OK() && (kw = FindKeyword(TOKENBUF_BASE(), TOKENBUF_LENGTH()))) { if (kw->tokentype == TOK_RESERVED) { if (!js_ReportCompileErrorNumber(cx, ts, JSREPORT_TS | JSREPORT_WARNING | JSREPORT_STRICT, JSMSG_RESERVED_ID, kw->chars)) { goto error; } } else if (kw->version <= JSVERSION_NUMBER(cx)) { tt = kw->tokentype; tp->t_op = (JSOp) kw->op; goto out; } } atom = TOKENBUF_TO_ATOM(); if (!atom) goto error; tp->t_op = JSOP_NAME; tp->t_atom = atom; tt = TOK_NAME; goto out; } if (JS7_ISDEC(c) || (c == '.' && JS7_ISDEC(PeekChar(ts)))) { jsint radix; const jschar *endptr; jsdouble dval; radix = 10; INIT_TOKENBUF(); if (c == '0') { ADD_TO_TOKENBUF(c); c = GetChar(ts); if (JS_TOLOWER(c) == 'x') { ADD_TO_TOKENBUF(c); c = GetChar(ts); radix = 16; } else if (JS7_ISDEC(c)) { radix = 8; } } while (JS7_ISHEX(c)) { if (radix < 16) { if (JS7_ISLET(c)) break; /* * We permit 08 and 09 as decimal numbers, which makes our * behaviour a superset of the ECMA numeric grammar. We might * not always be so permissive, so we warn about it. */ if (radix == 8 && c >= '8') { if (!js_ReportCompileErrorNumber(cx, ts, JSREPORT_TS | JSREPORT_WARNING, JSMSG_BAD_OCTAL, c == '8' ? "08" : "09")) { goto error; } radix = 10; } } ADD_TO_TOKENBUF(c); c = GetChar(ts); } if (radix == 10 && (c == '.' || JS_TOLOWER(c) == 'e')) { if (c == '.') { do { ADD_TO_TOKENBUF(c); c = GetChar(ts); } while (JS7_ISDEC(c)); } if (JS_TOLOWER(c) == 'e') { ADD_TO_TOKENBUF(c); c = GetChar(ts); if (c == '+' || c == '-') { ADD_TO_TOKENBUF(c); c = GetChar(ts); } if (!JS7_ISDEC(c)) { js_ReportCompileErrorNumber(cx, ts, JSREPORT_TS | JSREPORT_ERROR, JSMSG_MISSING_EXPONENT); goto error; } do { ADD_TO_TOKENBUF(c); c = GetChar(ts); } while (JS7_ISDEC(c)); } } /* Put back the next char and NUL-terminate tokenbuf for js_strto*. */ UngetChar(ts, c); ADD_TO_TOKENBUF(0); if (!TOKENBUF_OK()) goto error; if (radix == 10) { if (!js_strtod(cx, TOKENBUF_BASE(), &endptr, &dval)) { js_ReportCompileErrorNumber(cx, ts, JSREPORT_TS | JSREPORT_ERROR, JSMSG_OUT_OF_MEMORY); goto error; } } else { if (!js_strtointeger(cx, TOKENBUF_BASE(), &endptr, radix, &dval)) { js_ReportCompileErrorNumber(cx, ts, JSREPORT_TS | JSREPORT_ERROR, JSMSG_OUT_OF_MEMORY); goto error; } } tp->t_dval = dval; tt = TOK_NUMBER; goto out; } if (c == '"' || c == '\'') { qc = c; INIT_TOKENBUF(); while ((c = GetChar(ts)) != qc) { if (c == '\n' || c == EOF) { UngetChar(ts, c); js_ReportCompileErrorNumber(cx, ts, JSREPORT_TS | JSREPORT_ERROR, JSMSG_UNTERMINATED_STRING); goto error; } if (c == '\\') { switch (c = GetChar(ts)) { case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\v'; break; default: if ('0' <= c && c < '8') { int32 val = JS7_UNDEC(c); c = PeekChar(ts); if ('0' <= c && c < '8') { val = 8 * val + JS7_UNDEC(c); GetChar(ts); c = PeekChar(ts); if ('0' <= c && c < '8') { int32 save = val; val = 8 * val + JS7_UNDEC(c); if (val <= 0377) GetChar(ts); else val = save; } } c = (jschar)val; } else if (c == 'u') { jschar cp[4]; if (PeekChars(ts, 4, cp) && JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) { c = (((((JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1])) << 4) + JS7_UNHEX(cp[2])) << 4) + JS7_UNHEX(cp[3]); SkipChars(ts, 4); } } else if (c == 'x') { jschar cp[2]; if (PeekChars(ts, 2, cp) && JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) { c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]); SkipChars(ts, 2); } } else if (c == '\n') { /* ECMA follows C by removing escaped newlines. */ continue; } break; } } ADD_TO_TOKENBUF(c); } atom = TOKENBUF_TO_ATOM(); if (!atom) goto error; tp->pos.end.lineno = (uint16)ts->lineno; tp->t_op = JSOP_STRING; tp->t_atom = atom; tt = TOK_STRING; goto out; } switch (c) { case '\n': tt = TOK_EOL; goto eol_out; case ';': tt = TOK_SEMI; break; case '[': tt = TOK_LB; break; case ']': tt = TOK_RB; break; case '{': tt = TOK_LC; break; case '}': tt = TOK_RC; break; case '(': tt = TOK_LP; break; case ')': tt = TOK_RP; break; case ',': tt = TOK_COMMA; break; case '?': tt = TOK_HOOK; break; case '.': #if JS_HAS_XML_SUPPORT if (MatchChar(ts, c)) tt = TOK_DBLDOT; else #endif tt = TOK_DOT; break; case ':': #if JS_HAS_XML_SUPPORT if (MatchChar(ts, c)) { tt = TOK_DBLCOLON; break; } #endif /* * Default so compiler can modify to JSOP_GETTER if 'p getter: v' in an * object initializer, likewise for setter. */ tp->t_op = JSOP_NOP; tt = TOK_COLON; break; case '|': if (MatchChar(ts, c)) { tt = TOK_OR; } else if (MatchChar(ts, '=')) { tp->t_op = JSOP_BITOR; tt = TOK_ASSIGN; } else { tt = TOK_BITOR; } break; case '^': if (MatchChar(ts, '=')) { tp->t_op = JSOP_BITXOR; tt = TOK_ASSIGN; } else { tt = TOK_BITXOR; } break; case '&': if (MatchChar(ts, c)) { tt = TOK_AND; } else if (MatchChar(ts, '=')) { tp->t_op = JSOP_BITAND; tt = TOK_ASSIGN; } else { tt = TOK_BITAND; } break; case '=': if (MatchChar(ts, c)) { tp->t_op = MatchChar(ts, c) ? JSOP_STRICTEQ : JSOP_EQ; tt = TOK_EQOP; } else { tp->t_op = JSOP_NOP; tt = TOK_ASSIGN; } break; case '!': if (MatchChar(ts, '=')) { tp->t_op = MatchChar(ts, '=') ? JSOP_STRICTNE : JSOP_NE; tt = TOK_EQOP; } else { tp->t_op = JSOP_NOT; tt = TOK_UNARYOP; } break; #if JS_HAS_XML_SUPPORT case '@': tt = TOK_AT; break; #endif case '<': #if JS_HAS_XML_SUPPORT /* * After much testing, it's clear that Postel's advice to protocol * designers ("be liberal in what you accept, and conservative in what * you send") invites a natural-law repercussion for JS as "protocol": * * "If you are liberal in what you accept, others will utterly fail to * be conservative in what they send." * * Which means you will get