gecko/js/src/jsstr.h

1035 lines
34 KiB
C
Raw Normal View History

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Communicator client code, released
* March 31, 1998.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
*
* Alternatively, the contents of this file may be used under the terms of
* either of the GNU General Public License Version 2 or later (the "GPL"),
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#ifndef jsstr_h___
#define jsstr_h___
/*
* JS string type implementation.
*
* A JS string is a counted array of unicode characters. To support handoff
* of API client memory, the chars are allocated separately from the length,
* necessitating a pointer after the count, to form a separately allocated
* string descriptor. String descriptors are GC'ed, while their chars are
* allocated from the malloc heap.
*/
#include <ctype.h>
2010-07-14 23:19:36 -07:00
#include "jsapi.h"
#include "jsprvtd.h"
#include "jshashtable.h"
#include "jslock.h"
#include "jsobj.h"
2010-07-14 23:19:36 -07:00
#include "jsvalue.h"
2010-09-24 10:54:39 -07:00
#include "jscell.h"
enum {
UNIT_STRING_LIMIT = 256U,
SMALL_CHAR_LIMIT = 128U, /* Bigger chars cannot be in a length-2 string. */
NUM_SMALL_CHARS = 64U,
INT_STRING_LIMIT = 256U,
NUM_HUNDRED_STRINGS = 156U
};
extern jschar *
js_GetDependentStringChars(JSString *str);
extern JSString * JS_FASTCALL
js_ConcatStrings(JSContext *cx, JSString *left, JSString *right);
JS_STATIC_ASSERT(JS_BITS_PER_WORD >= 32);
2010-07-16 17:41:22 -07:00
struct JSRopeBufferInfo {
/* Number of jschars we can hold, not including null terminator. */
size_t capacity;
2010-07-16 17:41:22 -07:00
};
/* Forward declaration for friending. */
namespace js { namespace mjit {
class Compiler;
}}
/*
* The GC-thing "string" type.
*
2010-07-16 17:41:22 -07:00
* In FLAT strings, the mChars field points to a flat character array owned by
* its GC-thing descriptor. The array is terminated at index length by a zero
* character and the size of the array in bytes is
* (length + 1) * sizeof(jschar). The terminator is purely a backstop, in case
* the chars pointer flows out to native code that requires \u0000 termination.
*
* A flat string with the ATOMIZED flag means that the string is hashed as
* an atom. This flag is used to avoid re-hashing the already-atomized string.
*
* A flat string with the EXTENSIBLE flag means that the string may change into
* a dependent string as part of an optimization with js_ConcatStrings:
* extending |str1 = "abc"| with the character |str2 = str1 + "d"| will place
* "d" in the extra capacity from |str1|, make that the buffer for |str2|, and
* turn |str1| into a dependent string of |str2|.
*
* Flat strings without the EXTENSIBLE flag can be safely accessed by multiple
* threads.
2010-07-16 17:41:22 -07:00
*
* When the string is DEPENDENT, the string depends on characters of another
* string strongly referenced by the base field. The base member may point to
* another dependent string if chars() has not been called yet.
*
* When a string is a ROPE, it represents the lazy concatenation of other
* strings. In general, the nodes reachable from any rope form a dag.
*/
struct JSString
{
friend class js::TraceRecorder;
friend class js::mjit::Compiler;
friend JSAtom *js_AtomizeString(JSContext *cx, JSString *str, uintN flags);
/*
* Not private because we want to be able to use static initializers for
* them. Don't use these directly! FIXME bug 614459.
*/
size_t lengthAndFlags; /* in all strings */
union {
jschar *chars; /* in non-rope strings */
JSString *left; /* in rope strings */
} u;
2010-07-16 17:41:22 -07:00
union {
jschar inlineStorage[4]; /* in short strings */
struct {
union {
JSString *right; /* in rope strings */
JSString *base; /* in dependent strings */
size_t capacity; /* in extensible flat strings */
};
union {
JSString *parent; /* temporarily used during flatten */
size_t reserved; /* may use for bug 615290 */
};
} s;
size_t externalStringType; /* in external strings */
};
/*
* The lengthAndFlags field in string headers has data arranged in the
2010-07-16 17:41:22 -07:00
* following way:
*
* [ length (bits 4-31) ][ flags (bits 2-3) ][ type (bits 0-1) ]
*
* The length is packed in lengthAndFlags, even in string types that don't
2010-07-16 17:41:22 -07:00
* need 3 other fields, to make the length check simpler.
*
* When the string type is FLAT, the flags can contain ATOMIZED or
* EXTENSIBLE.
*/
static const size_t TYPE_FLAGS_MASK = JS_BITMASK(4);
static const size_t LENGTH_SHIFT = 4;
2010-07-16 17:41:22 -07:00
static const size_t TYPE_MASK = JS_BITMASK(2);
static const size_t FLAT = 0x0;
static const size_t DEPENDENT = 0x1;
static const size_t ROPE = 0x2;
2010-07-16 17:41:22 -07:00
/* Allow checking 1 bit for dependent/rope strings. */
static const size_t DEPENDENT_BIT = JS_BIT(0);
static const size_t ROPE_BIT = JS_BIT(1);
2010-07-16 17:41:22 -07:00
static const size_t ATOMIZED = JS_BIT(2);
static const size_t EXTENSIBLE = JS_BIT(3);
2010-07-16 17:41:22 -07:00
size_t buildLengthAndFlags(size_t length, size_t flags) {
return (length << LENGTH_SHIFT) | flags;
}
2010-09-24 10:54:39 -07:00
inline js::gc::Cell *asCell() {
return reinterpret_cast<js::gc::Cell *>(this);
}
2010-09-24 10:54:39 -07:00
inline js::gc::FreeCell *asFreeCell() {
return reinterpret_cast<js::gc::FreeCell *>(this);
}
/*
* Generous but sane length bound; the "-1" is there for comptibility with
* OOM tests.
*/
static const size_t MAX_LENGTH = (1 << 28) - 1;
JS_ALWAYS_INLINE bool isDependent() const {
return lengthAndFlags & DEPENDENT_BIT;
}
JS_ALWAYS_INLINE bool isFlat() const {
return (lengthAndFlags & TYPE_MASK) == FLAT;
}
JS_ALWAYS_INLINE bool isExtensible() const {
JS_ASSERT_IF(lengthAndFlags & EXTENSIBLE, isFlat());
return lengthAndFlags & EXTENSIBLE;
}
JS_ALWAYS_INLINE bool isAtomized() const {
JS_ASSERT_IF(lengthAndFlags & ATOMIZED, isFlat());
return lengthAndFlags & ATOMIZED;
2010-07-16 17:41:22 -07:00
}
JS_ALWAYS_INLINE bool isRope() const {
return lengthAndFlags & ROPE_BIT;
}
JS_ALWAYS_INLINE jschar *chars() {
ensureNotRope();
return u.chars;
}
JS_ALWAYS_INLINE size_t length() const {
return lengthAndFlags >> LENGTH_SHIFT;
}
JS_ALWAYS_INLINE bool empty() const {
return lengthAndFlags <= TYPE_FLAGS_MASK;
}
JS_ALWAYS_INLINE void getCharsAndLength(const jschar *&chars, size_t &length) {
chars = this->chars();
length = this->length();
}
JS_ALWAYS_INLINE void getCharsAndEnd(const jschar *&chars, const jschar *&end) {
end = length() + (chars = this->chars());
}
2010-12-08 08:33:00 -08:00
JS_ALWAYS_INLINE void initFlatNotTerminated(jschar *chars, size_t length) {
JS_ASSERT(length <= MAX_LENGTH);
JS_ASSERT(!isStatic(this));
lengthAndFlags = buildLengthAndFlags(length, FLAT);
u.chars = chars;
2010-07-16 17:41:22 -07:00
}
2010-12-08 08:33:00 -08:00
/* Specific flat string initializer and accessor methods. */
JS_ALWAYS_INLINE void initFlat(jschar *chars, size_t length) {
initFlatNotTerminated(chars, length);
JS_ASSERT(chars[length] == jschar(0));
}
JS_ALWAYS_INLINE void initShortString(jschar *chars, size_t length) {
JS_ASSERT(length <= MAX_LENGTH);
JS_ASSERT(chars >= inlineStorage && chars < (jschar *)(this + 2));
JS_ASSERT(!isStatic(this));
lengthAndFlags = buildLengthAndFlags(length, FLAT);
u.chars = chars;
}
JS_ALWAYS_INLINE void initFlatExtensible(jschar *chars, size_t length, size_t cap) {
2010-07-16 17:41:22 -07:00
JS_ASSERT(length <= MAX_LENGTH);
2010-12-08 08:33:00 -08:00
JS_ASSERT(chars[length] == jschar(0));
JS_ASSERT(!isStatic(this));
lengthAndFlags = buildLengthAndFlags(length, FLAT | EXTENSIBLE);
u.chars = chars;
s.capacity = cap;
}
JS_ALWAYS_INLINE jschar *flatChars() const {
JS_ASSERT(isFlat());
return u.chars;
}
JS_ALWAYS_INLINE size_t flatLength() const {
JS_ASSERT(isFlat());
return length();
}
inline void flatSetAtomized() {
2010-07-16 17:41:22 -07:00
JS_ASSERT(isFlat());
JS_ASSERT(!isStatic(this));
lengthAndFlags |= ATOMIZED;
}
inline void flatClearExtensible() {
/*
* N.B. This may be called on static strings, which may be in read-only
* memory, so we cannot unconditionally apply the mask.
*/
JS_ASSERT(isFlat());
if (lengthAndFlags & EXTENSIBLE)
lengthAndFlags &= ~EXTENSIBLE;
}
/*
* The chars pointer should point somewhere inside the buffer owned by base.
* The caller still needs to pass base for GC purposes.
*/
inline void initDependent(JSString *base, jschar *chars, size_t length) {
JS_ASSERT(!isStatic(this));
JS_ASSERT(base->isFlat());
JS_ASSERT(chars >= base->chars() && chars < base->chars() + base->length());
JS_ASSERT(length <= base->length() - (chars - base->chars()));
lengthAndFlags = buildLengthAndFlags(length, DEPENDENT);
u.chars = chars;
s.base = base;
}
inline JSString *dependentBase() const {
JS_ASSERT(isDependent());
return s.base;
}
JS_ALWAYS_INLINE jschar *dependentChars() {
JS_ASSERT(isDependent());
return u.chars;
}
inline size_t dependentLength() const {
JS_ASSERT(isDependent());
return length();
}
const jschar *undepend(JSContext *cx);
const jschar *getCharsZ(JSContext *cx) {
if (!isFlat())
return undepend(cx);
return flatChars();
}
inline bool ensureNotDependent(JSContext *cx) {
return !isDependent() || undepend(cx);
2010-07-16 17:41:22 -07:00
}
const jschar *nonRopeChars() const {
JS_ASSERT(!isRope());
return u.chars;
2010-07-16 17:41:22 -07:00
}
/* Rope-related initializers and accessors. */
inline void initRopeNode(JSString *left, JSString *right, size_t length) {
JS_ASSERT(left->length() + right->length() == length);
lengthAndFlags = buildLengthAndFlags(length, ROPE);
u.left = left;
s.right = right;
2010-07-16 17:41:22 -07:00
}
inline JSString *ropeLeft() const {
JS_ASSERT(isRope());
return u.left;
2010-07-16 17:41:22 -07:00
}
inline JSString *ropeRight() const {
JS_ASSERT(isRope());
return s.right;
2010-07-16 17:41:22 -07:00
}
inline void finishTraversalConversion(JSString *base, jschar *baseBegin, jschar *end) {
JS_ASSERT(baseBegin <= u.chars && u.chars <= end);
lengthAndFlags = buildLengthAndFlags(end - u.chars, DEPENDENT);
s.base = base;
2010-07-16 17:41:22 -07:00
}
void flatten();
2010-07-16 17:41:22 -07:00
void ensureNotRope() {
2010-07-16 17:41:22 -07:00
if (isRope())
flatten();
}
typedef uint8 SmallChar;
static inline bool fitsInSmallChar(jschar c) {
return c < SMALL_CHAR_LIMIT && toSmallChar[c] != INVALID_SMALL_CHAR;
}
static inline bool isUnitString(void *ptr) {
jsuword delta = reinterpret_cast<jsuword>(ptr) -
reinterpret_cast<jsuword>(unitStringTable);
if (delta >= UNIT_STRING_LIMIT * sizeof(JSString))
return false;
/* If ptr points inside the static array, it must be well-aligned. */
JS_ASSERT(delta % sizeof(JSString) == 0);
return true;
}
static inline bool isLength2String(void *ptr) {
jsuword delta = reinterpret_cast<jsuword>(ptr) -
reinterpret_cast<jsuword>(length2StringTable);
if (delta >= NUM_SMALL_CHARS * NUM_SMALL_CHARS * sizeof(JSString))
return false;
/* If ptr points inside the static array, it must be well-aligned. */
JS_ASSERT(delta % sizeof(JSString) == 0);
return true;
}
static inline bool isHundredString(void *ptr) {
jsuword delta = reinterpret_cast<jsuword>(ptr) -
reinterpret_cast<jsuword>(hundredStringTable);
if (delta >= NUM_HUNDRED_STRINGS * sizeof(JSString))
return false;
/* If ptr points inside the static array, it must be well-aligned. */
JS_ASSERT(delta % sizeof(JSString) == 0);
return true;
}
static inline bool isStatic(void *ptr) {
return isUnitString(ptr) || isLength2String(ptr) || isHundredString(ptr);
}
#ifdef __SUNPRO_CC
#pragma align 8 (__1cIJSStringPunitStringTable_, __1cIJSStringSlength2StringTable_, __1cIJSStringShundredStringTable_)
#endif
static const SmallChar INVALID_SMALL_CHAR = -1;
static const jschar fromSmallChar[];
static const SmallChar toSmallChar[];
static const JSString unitStringTable[];
static const JSString length2StringTable[];
static const JSString hundredStringTable[];
/*
* Since int strings can be unit strings, length-2 strings, or hundred
* strings, we keep a table to map from integer to the correct string.
*/
static const JSString *const intStringTable[];
static JSString *unitString(jschar c);
static JSString *getUnitString(JSContext *cx, JSString *str, size_t index);
static JSString *length2String(jschar c1, jschar c2);
static JSString *length2String(uint32 i);
static JSString *intString(jsint i);
static JSString *lookupStaticString(const jschar *chars, size_t length);
JS_ALWAYS_INLINE void finalize(JSContext *cx);
static size_t offsetOfLengthAndFlags() {
return offsetof(JSString, lengthAndFlags);
}
static size_t offsetOfChars() {
return offsetof(JSString, u.chars);
}
static void staticAsserts() {
JS_STATIC_ASSERT(((JSString::MAX_LENGTH << JSString::LENGTH_SHIFT) >>
JSString::LENGTH_SHIFT) == JSString::MAX_LENGTH);
}
};
struct JSFlatString : JSString
{
};
JS_STATIC_ASSERT(sizeof(JSFlatString) == sizeof(JSString));
struct JSExternalString : JSString
{
static const uintN TYPE_LIMIT = 8;
static JSStringFinalizeOp str_finalizers[TYPE_LIMIT];
static intN changeFinalizer(JSStringFinalizeOp oldop,
JSStringFinalizeOp newop) {
for (uintN i = 0; i != JS_ARRAY_LENGTH(str_finalizers); i++) {
if (str_finalizers[i] == oldop) {
str_finalizers[i] = newop;
return intN(i);
}
}
return -1;
}
void finalize(JSContext *cx);
void finalize();
};
JS_STATIC_ASSERT(sizeof(JSString) == sizeof(JSExternalString));
/*
* Short strings should be created in cases where it's worthwhile to avoid
* mallocing the string buffer for a small string. We keep 2 string headers'
* worth of space in short strings so that more strings can be stored this way.
*/
class JSShortString : public js::gc::Cell
{
JSString mHeader;
JSString mDummy;
public:
/*
* Set the length of the string, and return a buffer for the caller to write
* to. This buffer must be written immediately, and should not be modified
* afterward.
*/
inline jschar *init(size_t length) {
JS_ASSERT(length <= MAX_SHORT_STRING_LENGTH);
mHeader.initShortString(mHeader.inlineStorage, length);
return mHeader.inlineStorage;
}
inline jschar *getInlineStorageBeforeInit() {
return mHeader.inlineStorage;
}
inline void initAtOffsetInBuffer(jschar *p, size_t length) {
JS_ASSERT(p >= mHeader.inlineStorage && p < mHeader.inlineStorage + MAX_SHORT_STRING_LENGTH);
mHeader.initShortString(p, length);
}
inline void resetLength(size_t length) {
mHeader.initShortString(mHeader.flatChars(), length);
}
inline JSString *header() {
return &mHeader;
}
static const size_t FREE_STRING_WORDS = 2;
static const size_t MAX_SHORT_STRING_LENGTH =
((sizeof(JSString) + FREE_STRING_WORDS * sizeof(size_t)) / sizeof(jschar)) - 1;
static inline bool fitsIntoShortString(size_t length) {
return length <= MAX_SHORT_STRING_LENGTH;
}
JS_ALWAYS_INLINE void finalize(JSContext *cx);
static void staticAsserts() {
JS_STATIC_ASSERT(offsetof(JSString, inlineStorage) ==
sizeof(JSString) - JSShortString::FREE_STRING_WORDS * sizeof(void *));
JS_STATIC_ASSERT(offsetof(JSShortString, mDummy) == sizeof(JSString));
JS_STATIC_ASSERT(offsetof(JSString, inlineStorage) +
sizeof(jschar) * (JSShortString::MAX_SHORT_STRING_LENGTH + 1) ==
sizeof(JSShortString));
}
};
namespace js {
2010-07-16 17:41:22 -07:00
/*
* When an algorithm does not need a string represented as a single linear
* array of characters, this range utility may be used to traverse the string a
* sequence of linear arrays of characters. This avoids flattening ropes.
2010-07-16 17:41:22 -07:00
*
* Implemented in jsstrinlines.h.
2010-07-16 17:41:22 -07:00
*/
class StringSegmentRange;
2010-07-16 17:41:22 -07:00
/*
* Utility for building a rope (lazy concatenation) of strings.
*/
class RopeBuilder;
2010-07-16 17:41:22 -07:00
} /* namespace js */
2010-07-16 17:41:22 -07:00
extern const jschar *
js_GetStringChars(JSContext *cx, JSString *str);
extern const jschar *
js_UndependString(JSContext *cx, JSString *str);
extern JSBool
js_MakeStringImmutable(JSContext *cx, JSString *str);
extern JSString * JS_FASTCALL
js_toLowerCase(JSContext *cx, JSString *str);
extern JSString * JS_FASTCALL
js_toUpperCase(JSContext *cx, JSString *str);
struct JSSubString {
size_t length;
const jschar *chars;
};
extern jschar js_empty_ucstr[];
extern JSSubString js_EmptySubString;
/* Unicode character attribute lookup tables. */
extern const uint8 js_X[];
extern const uint8 js_Y[];
extern const uint32 js_A[];
/* Enumerated Unicode general category types. */
typedef enum JSCharType {
JSCT_UNASSIGNED = 0,
JSCT_UPPERCASE_LETTER = 1,
JSCT_LOWERCASE_LETTER = 2,
JSCT_TITLECASE_LETTER = 3,
JSCT_MODIFIER_LETTER = 4,
JSCT_OTHER_LETTER = 5,
JSCT_NON_SPACING_MARK = 6,
JSCT_ENCLOSING_MARK = 7,
JSCT_COMBINING_SPACING_MARK = 8,
JSCT_DECIMAL_DIGIT_NUMBER = 9,
JSCT_LETTER_NUMBER = 10,
JSCT_OTHER_NUMBER = 11,
JSCT_SPACE_SEPARATOR = 12,
JSCT_LINE_SEPARATOR = 13,
JSCT_PARAGRAPH_SEPARATOR = 14,
JSCT_CONTROL = 15,
JSCT_FORMAT = 16,
JSCT_PRIVATE_USE = 18,
JSCT_SURROGATE = 19,
JSCT_DASH_PUNCTUATION = 20,
JSCT_START_PUNCTUATION = 21,
JSCT_END_PUNCTUATION = 22,
JSCT_CONNECTOR_PUNCTUATION = 23,
JSCT_OTHER_PUNCTUATION = 24,
JSCT_MATH_SYMBOL = 25,
JSCT_CURRENCY_SYMBOL = 26,
JSCT_MODIFIER_SYMBOL = 27,
JSCT_OTHER_SYMBOL = 28
} JSCharType;
/* Character classifying and mapping macros, based on java.lang.Character. */
#define JS_CCODE(c) (js_A[js_Y[(js_X[(uint16)(c)>>6]<<6)|((c)&0x3F)]])
#define JS_CTYPE(c) (JS_CCODE(c) & 0x1F)
#define JS_ISALPHA(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
(1 << JSCT_LOWERCASE_LETTER) | \
(1 << JSCT_TITLECASE_LETTER) | \
(1 << JSCT_MODIFIER_LETTER) | \
(1 << JSCT_OTHER_LETTER)) \
>> JS_CTYPE(c)) & 1)
#define JS_ISALNUM(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
(1 << JSCT_LOWERCASE_LETTER) | \
(1 << JSCT_TITLECASE_LETTER) | \
(1 << JSCT_MODIFIER_LETTER) | \
(1 << JSCT_OTHER_LETTER) | \
(1 << JSCT_DECIMAL_DIGIT_NUMBER)) \
>> JS_CTYPE(c)) & 1)
/* A unicode letter, suitable for use in an identifier. */
#define JS_ISLETTER(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
(1 << JSCT_LOWERCASE_LETTER) | \
(1 << JSCT_TITLECASE_LETTER) | \
(1 << JSCT_MODIFIER_LETTER) | \
(1 << JSCT_OTHER_LETTER) | \
(1 << JSCT_LETTER_NUMBER)) \
>> JS_CTYPE(c)) & 1)
/*
* 'IdentifierPart' from ECMA grammar, is Unicode letter or combining mark or
* digit or connector punctuation.
*/
#define JS_ISIDPART(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
(1 << JSCT_LOWERCASE_LETTER) | \
(1 << JSCT_TITLECASE_LETTER) | \
(1 << JSCT_MODIFIER_LETTER) | \
(1 << JSCT_OTHER_LETTER) | \
(1 << JSCT_LETTER_NUMBER) | \
(1 << JSCT_NON_SPACING_MARK) | \
(1 << JSCT_COMBINING_SPACING_MARK) | \
(1 << JSCT_DECIMAL_DIGIT_NUMBER) | \
(1 << JSCT_CONNECTOR_PUNCTUATION)) \
>> JS_CTYPE(c)) & 1)
/* Unicode control-format characters, ignored in input */
#define JS_ISFORMAT(c) (((1 << JSCT_FORMAT) >> JS_CTYPE(c)) & 1)
/*
* This table is used in JS_ISWORD. The definition has external linkage to
* allow the raw table data to be used in the regular expression compiler.
*/
extern const bool js_alnum[];
/*
* This macro performs testing for the regular expression word class \w, which
* is defined by ECMA-262 15.10.2.6 to be [0-9A-Z_a-z]. If we want a
* Unicode-friendlier definition of "word", we should rename this macro to
* something regexp-y.
*/
#define JS_ISWORD(c) ((c) < 128 && js_alnum[(c)])
#define JS_ISIDSTART(c) (JS_ISLETTER(c) || (c) == '_' || (c) == '$')
#define JS_ISIDENT(c) (JS_ISIDPART(c) || (c) == '_' || (c) == '$')
#define JS_ISXMLSPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\r' || \
(c) == '\n')
#define JS_ISXMLNSSTART(c) ((JS_CCODE(c) & 0x00000100) || (c) == '_')
#define JS_ISXMLNS(c) ((JS_CCODE(c) & 0x00000080) || (c) == '.' || \
(c) == '-' || (c) == '_')
#define JS_ISXMLNAMESTART(c) (JS_ISXMLNSSTART(c) || (c) == ':')
#define JS_ISXMLNAME(c) (JS_ISXMLNS(c) || (c) == ':')
#define JS_ISDIGIT(c) (JS_CTYPE(c) == JSCT_DECIMAL_DIGIT_NUMBER)
const jschar BYTE_ORDER_MARK = 0xFEFF;
const jschar NO_BREAK_SPACE = 0x00A0;
static inline bool
JS_ISSPACE(jschar c)
{
2009-08-29 14:19:44 -07:00
unsigned w = c;
if (w < 256)
return (w <= ' ' && (w == ' ' || (9 <= w && w <= 0xD))) || w == NO_BREAK_SPACE;
return w == BYTE_ORDER_MARK || (JS_CCODE(w) & 0x00070000) == 0x00040000;
}
#define JS_ISPRINT(c) ((c) < 128 && isprint(c))
#define JS_ISUPPER(c) (JS_CTYPE(c) == JSCT_UPPERCASE_LETTER)
#define JS_ISLOWER(c) (JS_CTYPE(c) == JSCT_LOWERCASE_LETTER)
#define JS_TOUPPER(c) ((jschar) ((JS_CCODE(c) & 0x00100000) \
? (c) - ((int32)JS_CCODE(c) >> 22) \
: (c)))
#define JS_TOLOWER(c) ((jschar) ((JS_CCODE(c) & 0x00200000) \
? (c) + ((int32)JS_CCODE(c) >> 22) \
: (c)))
/*
* Shorthands for ASCII (7-bit) decimal and hex conversion.
* Manually inline isdigit for performance; MSVC doesn't do this for us.
*/
#define JS7_ISDEC(c) ((((unsigned)(c)) - '0') <= 9)
#define JS7_UNDEC(c) ((c) - '0')
#define JS7_ISHEX(c) ((c) < 128 && isxdigit(c))
#define JS7_UNHEX(c) (uintN)(JS7_ISDEC(c) ? (c) - '0' : 10 + tolower(c) - 'a')
#define JS7_ISLET(c) ((c) < 128 && isalpha(c))
/* Initialize the String class, returning its prototype object. */
2010-07-14 23:19:36 -07:00
extern js::Class js_StringClass;
inline bool
JSObject::isString() const
{
return getClass() == &js_StringClass;
}
extern JSObject *
js_InitStringClass(JSContext *cx, JSObject *obj);
extern const char js_escape_str[];
extern const char js_unescape_str[];
extern const char js_uneval_str[];
extern const char js_decodeURI_str[];
extern const char js_encodeURI_str[];
extern const char js_decodeURIComponent_str[];
extern const char js_encodeURIComponent_str[];
/* GC-allocate a string descriptor for the given malloc-allocated chars. */
extern JSString *
js_NewString(JSContext *cx, jschar *chars, size_t length);
/*
* GC-allocate a string descriptor and steal the char buffer held by |cb|.
* This function takes responsibility for adding the terminating '\0' required
* by js_NewString.
*/
extern JSString *
js_NewStringFromCharBuffer(JSContext *cx, JSCharBuffer &cb);
extern JSString *
js_NewDependentString(JSContext *cx, JSString *base, size_t start,
size_t length);
/* Copy a counted string and GC-allocate a descriptor for it. */
extern JSString *
js_NewStringCopyN(JSContext *cx, const jschar *s, size_t n);
extern JSString *
js_NewStringCopyN(JSContext *cx, const char *s, size_t n);
/* Copy a C string and GC-allocate a descriptor for it. */
extern JSString *
js_NewStringCopyZ(JSContext *cx, const jschar *s);
extern JSString *
js_NewStringCopyZ(JSContext *cx, const char *s);
/*
* Convert a value to a printable C string.
*/
extern const char *
js_ValueToPrintable(JSContext *cx, const js::Value &,
JSAutoByteString *bytes, bool asSource = false);
/*
* Convert a value to a string, returning null after reporting an error,
* otherwise returning a new string reference.
*/
2010-07-14 23:19:36 -07:00
extern JSString *
js_ValueToString(JSContext *cx, const js::Value &v);
namespace js {
/*
* Most code that calls js_ValueToString knows the value is (probably) not a
* string, so it does not make sense to put this inline fast path into
* js_ValueToString.
*/
static JS_ALWAYS_INLINE JSString *
ValueToString_TestForStringInline(JSContext *cx, const Value &v)
{
if (v.isString())
return v.toString();
return js_ValueToString(cx, v);
}
}
/*
* This function implements E-262-3 section 9.8, toString. Convert the given
* value to a string of jschars appended to the given buffer. On error, the
* passed buffer may have partial results appended.
*/
2010-07-14 23:19:36 -07:00
extern JSBool
js_ValueToCharBuffer(JSContext *cx, const js::Value &v, JSCharBuffer &cb);
/*
* Convert a value to its source expression, returning null after reporting
* an error, otherwise returning a new string reference.
*/
extern JS_FRIEND_API(JSString *)
2010-07-14 23:19:36 -07:00
js_ValueToSource(JSContext *cx, const js::Value &v);
/*
* Compute a hash function from str. The caller can call this function even if
* str is not a GC-allocated thing.
*/
extern uint32
js_HashString(JSString *str);
/*
* Test if strings are equal. The caller can call the function even if str1
* or str2 are not GC-allocated things.
*/
extern JSBool JS_FASTCALL
js_EqualStrings(JSString *str1, JSString *str2);
/*
* Return less than, equal to, or greater than zero depending on whether
* str1 is less than, equal to, or greater than str2.
*/
extern int32 JS_FASTCALL
js_CompareStrings(JSString *str1, JSString *str2);
namespace js {
/*
* Return true if the string matches the given sequence of ASCII bytes.
*/
extern JSBool
MatchStringAndAscii(JSString *str, const char *asciiBytes);
} /* namespacejs */
/*
* Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen.
* The patlen argument must be positive and no greater than sBMHPatLenMax.
*
* Return the index of pat in text, or -1 if not found.
*/
static const jsuint sBMHCharSetSize = 256; /* ISO-Latin-1 */
static const jsuint sBMHPatLenMax = 255; /* skip table element is uint8 */
static const jsint sBMHBadPattern = -2; /* return value if pat is not ISO-Latin-1 */
extern jsint
js_BoyerMooreHorspool(const jschar *text, jsuint textlen,
const jschar *pat, jsuint patlen);
extern size_t
js_strlen(const jschar *s);
extern jschar *
js_strchr(const jschar *s, jschar c);
extern jschar *
js_strchr_limit(const jschar *s, jschar c, const jschar *limit);
#define js_strncpy(t, s, n) memcpy((t), (s), (n) * sizeof(jschar))
inline void
js_short_strncpy(jschar *dest, const jschar *src, size_t num)
{
/*
* It isn't strictly necessary here for |num| to be small, but this function
* is currently only called on buffers for short strings.
*/
JS_ASSERT(JSShortString::fitsIntoShortString(num));
for (size_t i = 0; i < num; i++)
dest[i] = src[i];
}
/*
* Return s advanced past any Unicode white space characters.
*/
static inline const jschar *
js_SkipWhiteSpace(const jschar *s, const jschar *end)
{
JS_ASSERT(s <= end);
while (s != end && JS_ISSPACE(*s))
s++;
return s;
}
/*
* Inflate bytes to JS chars and vice versa. Report out of memory via cx and
* return null on error, otherwise return the jschar or byte vector that was
* JS_malloc'ed. length is updated to the length of the new string in jschars.
*/
extern jschar *
js_InflateString(JSContext *cx, const char *bytes, size_t *length);
extern char *
js_DeflateString(JSContext *cx, const jschar *chars, size_t length);
/*
* Inflate bytes to JS chars into a buffer. 'chars' must be large enough for
* 'length' jschars. The buffer is NOT null-terminated. The destination length
* must be be initialized with the buffer size and will contain on return the
* number of copied chars. Conversion behavior depends on js_CStringsAreUTF8.
*/
extern JSBool
js_InflateStringToBuffer(JSContext *cx, const char *bytes, size_t length,
jschar *chars, size_t *charsLength);
/*
* Same as js_InflateStringToBuffer, but always treats 'bytes' as UTF-8.
*/
extern JSBool
js_InflateUTF8StringToBuffer(JSContext *cx, const char *bytes, size_t length,
jschar *chars, size_t *charsLength);
/*
* Get number of bytes in the deflated sequence of characters. Behavior depends
* on js_CStringsAreUTF8.
*/
extern size_t
js_GetDeflatedStringLength(JSContext *cx, const jschar *chars,
size_t charsLength);
/*
* Same as js_GetDeflatedStringLength, but always treats the result as UTF-8.
*/
extern size_t
js_GetDeflatedUTF8StringLength(JSContext *cx, const jschar *chars,
size_t charsLength);
/*
* Deflate JS chars to bytes into a buffer. 'bytes' must be large enough for
* 'length chars. The buffer is NOT null-terminated. The destination length
* must to be initialized with the buffer size and will contain on return the
* number of copied bytes. Conversion behavior depends on js_CStringsAreUTF8.
*/
extern JSBool
js_DeflateStringToBuffer(JSContext *cx, const jschar *chars,
size_t charsLength, char *bytes, size_t *length);
/*
* Same as js_DeflateStringToBuffer, but always treats 'bytes' as UTF-8.
*/
extern JSBool
js_DeflateStringToUTF8Buffer(JSContext *cx, const jschar *chars,
size_t charsLength, char *bytes, size_t *length);
/* Export a few natives and a helper to other files in SpiderMonkey. */
extern JSBool
2010-07-14 23:19:36 -07:00
js_str_escape(JSContext *cx, JSObject *obj, uintN argc, js::Value *argv,
js::Value *rval);
/*
* The String.prototype.replace fast-native entry point is exported for joined
* function optimization in js{interp,tracer}.cpp.
*/
namespace js {
extern JSBool
str_replace(JSContext *cx, uintN argc, js::Value *vp);
}
extern JSBool
2010-07-14 23:19:36 -07:00
js_str_toString(JSContext *cx, uintN argc, js::Value *vp);
extern JSBool
js_str_charAt(JSContext *cx, uintN argc, js::Value *vp);
extern JSBool
js_str_charCodeAt(JSContext *cx, uintN argc, js::Value *vp);
/*
* Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at
* least 6 bytes long. Return the number of UTF-8 bytes of data written.
*/
extern int
js_OneUcs4ToUtf8Char(uint8 *utf8Buffer, uint32 ucs4Char);
namespace js {
extern size_t
PutEscapedStringImpl(char *buffer, size_t size, FILE *fp, JSString *str, uint32 quote);
/*
* Write str into buffer escaping any non-printable or non-ASCII character
* using \escapes for JS string literals.
* Guarantees that a NUL is at the end of the buffer unless size is 0. Returns
* the length of the written output, NOT including the NUL. Thus, a return
* value of size or more means that the output was truncated. If buffer
* is null, just returns the length of the output. If quote is not 0, it must
* be a single or double quote character that will quote the output.
*/
inline size_t
PutEscapedString(char *buffer, size_t size, JSString *str, uint32 quote)
{
size_t n = PutEscapedStringImpl(buffer, size, NULL, str, quote);
/* PutEscapedStringImpl can only fail with a file. */
JS_ASSERT(n != size_t(-1));
return n;
}
/*
* Write str into file escaping any non-printable or non-ASCII character.
* If quote is not 0, it must be a single or double quote character that
* will quote the output.
*/
inline bool
FileEscapedString(FILE *fp, JSString *str, uint32 quote)
{
return PutEscapedStringImpl(NULL, 0, fp, str, quote) != size_t(-1);
}
} /* namespace js */
extern JSBool
js_String(JSContext *cx, uintN argc, js::Value *vp);
#endif /* jsstr_h___ */