Bug 543151, part A1: Preliminary cleanups to the scanner/parser interface and the organization of nsCSSScanner.cpp. r=heycam

This commit is contained in:
Zack Weinberg 2013-02-16 18:27:53 -05:00
parent 1162fa289b
commit ad3efbd714
3 changed files with 438 additions and 445 deletions

View File

@ -340,15 +340,6 @@ protected:
bool GetToken(bool aSkipWS);
void UngetToken();
// get the part in paretheses of the url() function, which is really a
// part of a token in the CSS grammar, but we're using a combination
// of the parser and the scanner to do it to handle the backtracking
// required by the error handling of the tokenization (since if we
// fail to scan the full token, we should fall back to tokenizing as
// FUNCTION ... ')').
// Note that this function WILL WRITE TO aURL IN SOME FAILURE CASES.
bool GetURLInParens(nsString& aURL);
bool ExpectSymbol(PRUnichar aSymbol, bool aSkipWS);
bool ExpectEndProperty();
bool CheckEndProperty();
@ -1437,44 +1428,13 @@ CSSParserImpl::EvaluateSupportsCondition(const nsAString& aDeclaration,
bool
CSSParserImpl::GetToken(bool aSkipWS)
{
for (;;) {
if (!mHavePushBack) {
if (!mScanner->Next(mToken)) {
break;
}
}
if (mHavePushBack) {
mHavePushBack = false;
if (aSkipWS && (eCSSToken_WhiteSpace == mToken.mType)) {
continue;
if (!aSkipWS || mToken.mType != eCSSToken_Whitespace) {
return true;
}
return true;
}
return false;
}
bool
CSSParserImpl::GetURLInParens(nsString& aURL)
{
NS_ASSERTION(!mHavePushBack, "mustn't have pushback at this point");
if (! mScanner->NextURL(mToken)) {
// EOF
return false;
}
aURL = mToken.mIdent;
if (eCSSToken_URL != mToken.mType) {
// In the failure case (which gives a token of type
// eCSSToken_Bad_URL), we do not have to match parentheses *inside*
// the Bad_URL token, since this is now an invalid URL token. But
// we do need to match the closing parenthesis to match the 'url('.
NS_ABORT_IF_FALSE(mToken.mType == eCSSToken_Bad_URL,
"unexpected token type");
SkipUntil(')');
return false;
}
return true;
return mScanner->Next(mToken, aSkipWS);
}
void
@ -2207,9 +2167,10 @@ CSSParserImpl::ParseMozDocumentRule(RuleAppendFunc aAppendFunc, void* aData)
cur->func = css::DocumentRule::eDomain;
}
nsAutoString url;
if (!GetURLInParens(url)) {
NS_ASSERTION(!mHavePushBack, "mustn't have pushback at this point");
if (!mScanner->NextURL(mToken) || mToken.mType != eCSSToken_URL) {
REPORT_UNEXPECTED_TOKEN(PEMozDocRuleNotURI);
SkipUntil(')');
delete urls;
return false;
}
@ -2217,7 +2178,7 @@ CSSParserImpl::ParseMozDocumentRule(RuleAppendFunc aAppendFunc, void* aData)
// We could try to make the URL (as long as it's not domain())
// canonical and absolute with NS_NewURI and GetSpec, but I'm
// inclined to think we shouldn't.
CopyUTF16toUTF8(url, cur->url);
CopyUTF16toUTF8(mToken.mIdent, cur->url);
}
} while (ExpectSymbol(',', true));
@ -3039,7 +3000,7 @@ CSSParserImpl::ParseSelectorGroup(nsCSSSelectorList*& aList)
}
combinator = PRUnichar(0);
if (mToken.mType == eCSSToken_WhiteSpace) {
if (mToken.mType == eCSSToken_Whitespace) {
if (!GetToken(true)) {
break; // EOF ok here
}
@ -4121,7 +4082,7 @@ CSSParserImpl::ParseColor(nsCSSValue& aValue)
nscolor rgba;
switch (tk->mType) {
case eCSSToken_ID:
case eCSSToken_Ref:
case eCSSToken_Hash:
// #xxyyzz
if (NS_HexToRGB(tk->mIdent, &rgba)) {
aValue.SetColorValue(rgba);
@ -5031,7 +4992,7 @@ CSSParserImpl::ParseVariant(nsCSSValue& aValue,
if ((aVariantMask & VARIANT_COLOR) != 0) {
if (mHashlessColorQuirk || // NONSTANDARD: Nav interprets 'xxyyzz' values even without '#' prefix
(eCSSToken_ID == tk->mType) ||
(eCSSToken_Ref == tk->mType) ||
(eCSSToken_Hash == tk->mType) ||
(eCSSToken_Ident == tk->mType) ||
((eCSSToken_Function == tk->mType) &&
(tk->mIdent.LowerCaseEqualsLiteral("rgb") ||
@ -5748,7 +5709,7 @@ CSSParserImpl::IsLegacyGradientLine(const nsCSSTokenType& aType,
}
// fall through
case eCSSToken_ID:
case eCSSToken_Ref:
case eCSSToken_Hash:
// this is a color
break;
@ -8021,7 +7982,7 @@ CSSParserImpl::RequireWhitespace()
{
if (!GetToken(false))
return false;
if (mToken.mType != eCSSToken_WhiteSpace) {
if (mToken.mType != eCSSToken_Whitespace) {
UngetToken();
return false;
}
@ -8427,7 +8388,7 @@ CSSParserImpl::ParseOneFamily(nsAString& aFamily, bool& aOneKeyword)
if (eCSSToken_Ident == tk->mType) {
aOneKeyword = false;
aFamily.Append(tk->mIdent);
} else if (eCSSToken_WhiteSpace == tk->mType) {
} else if (eCSSToken_Whitespace == tk->mType) {
// Lookahead one token and drop whitespace if we are ending the
// font name.
if (!GetToken(true))

View File

@ -121,11 +121,6 @@ HexDigitValue(int32_t ch)
}
}
nsCSSToken::nsCSSToken()
{
mType = eCSSToken_Symbol;
}
void
nsCSSToken::AppendToString(nsString& aBuffer) const
{
@ -140,7 +135,7 @@ nsCSSToken::AppendToString(nsString& aBuffer) const
break;
case eCSSToken_ID:
case eCSSToken_Ref:
case eCSSToken_Hash:
aBuffer.Append('#');
nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
break;
@ -200,7 +195,7 @@ nsCSSToken::AppendToString(nsString& aBuffer) const
aBuffer.Append(mSymbol);
break;
case eCSSToken_WhiteSpace:
case eCSSToken_Whitespace:
aBuffer.Append(' ');
break;
@ -232,7 +227,7 @@ nsCSSToken::AppendToString(nsString& aBuffer) const
}
nsCSSScanner::nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber)
: mReadPointer(aBuffer.BeginReading())
: mBuffer(aBuffer.BeginReading())
, mOffset(0)
, mCount(aBuffer.Length())
, mPushback(mLocalPushback)
@ -259,6 +254,43 @@ nsCSSScanner::~nsCSSScanner()
}
}
void
nsCSSScanner::StartRecording()
{
NS_ASSERTION(!mRecording, "already started recording");
mRecording = true;
mRecordStartOffset = mOffset - mPushbackCount;
}
void
nsCSSScanner::StopRecording()
{
NS_ASSERTION(mRecording, "haven't started recording");
mRecording = false;
}
void
nsCSSScanner::StopRecording(nsString& aBuffer)
{
NS_ASSERTION(mRecording, "haven't started recording");
mRecording = false;
aBuffer.Append(mBuffer + mRecordStartOffset,
mOffset - mPushbackCount - mRecordStartOffset);
}
nsDependentSubstring
nsCSSScanner::GetCurrentLine() const
{
uint32_t end = mTokenOffset;
while (end < mCount &&
mBuffer[end] != '\n' && mBuffer[end] != '\r' &&
mBuffer[end] != '\f') {
end++;
}
return nsDependentSubstring(mBuffer + mTokenLineOffset,
mBuffer + end);
}
// Returns -1 on error or eof
int32_t
nsCSSScanner::Read()
@ -270,11 +302,11 @@ nsCSSScanner::Read()
if (mOffset == mCount) {
return -1;
}
rv = int32_t(mReadPointer[mOffset++]);
rv = int32_t(mBuffer[mOffset++]);
// There are four types of newlines in CSS: "\r", "\n", "\r\n", and "\f".
// To simplify dealing with newlines, they are all normalized to "\n" here
if (rv == '\r') {
if (mOffset < mCount && mReadPointer[mOffset] == '\n') {
if (mOffset < mCount && mBuffer[mOffset] == '\n') {
mOffset++;
}
rv = '\n';
@ -323,43 +355,6 @@ nsCSSScanner::Pushback(PRUnichar aChar)
mPushback[mPushbackCount++] = aChar;
}
void
nsCSSScanner::StartRecording()
{
NS_ASSERTION(!mRecording, "already started recording");
mRecording = true;
mRecordStartOffset = mOffset - mPushbackCount;
}
void
nsCSSScanner::StopRecording()
{
NS_ASSERTION(mRecording, "haven't started recording");
mRecording = false;
}
void
nsCSSScanner::StopRecording(nsString& aBuffer)
{
NS_ASSERTION(mRecording, "haven't started recording");
mRecording = false;
aBuffer.Append(mReadPointer + mRecordStartOffset,
mOffset - mPushbackCount - mRecordStartOffset);
}
nsDependentSubstring
nsCSSScanner::GetCurrentLine() const
{
uint32_t end = mTokenOffset;
while (end < mCount &&
mReadPointer[end] != '\n' && mReadPointer[end] != '\r' &&
mReadPointer[end] != '\f') {
end++;
}
return nsDependentSubstring(mReadPointer + mTokenLineOffset,
mReadPointer + end);
}
bool
nsCSSScanner::LookAhead(PRUnichar aChar)
{
@ -389,7 +384,7 @@ nsCSSScanner::LookAheadOrEOF(PRUnichar aChar)
}
void
nsCSSScanner::EatWhiteSpace()
nsCSSScanner::SkipWhitespace()
{
for (;;) {
int32_t ch = Read();
@ -403,233 +398,28 @@ nsCSSScanner::EatWhiteSpace()
}
}
bool
nsCSSScanner::Next(nsCSSToken& aToken)
void
nsCSSScanner::SkipComment()
{
for (;;) { // Infinite loop so we can restart after comments.
mTokenOffset = mOffset;
mTokenLineOffset = mLineOffset;
mTokenLineNumber = mLineNumber;
int32_t ch = Read();
if (ch < 0) {
return false;
}
// UNICODE-RANGE
if ((ch == 'u' || ch == 'U') && Peek() == '+')
return ParseURange(ch, aToken);
// IDENT
if (StartsIdent(ch, Peek()))
return ParseIdent(ch, aToken);
// AT_KEYWORD
if (ch == '@') {
return ParseAtKeyword(aToken);
}
// NUMBER or DIM
if ((ch == '.') || (ch == '+') || (ch == '-')) {
int32_t nextChar = Peek();
if (IsDigit(nextChar)) {
return ParseNumber(ch, aToken);
}
else if (('.' == nextChar) && ('.' != ch)) {
nextChar = Read();
int32_t followingChar = Peek();
Pushback(nextChar);
if (IsDigit(followingChar))
return ParseNumber(ch, aToken);
}
}
if (IsDigit(ch)) {
return ParseNumber(ch, aToken);
}
// ID
if (ch == '#') {
return ParseRef(ch, aToken);
}
// STRING
if ((ch == '"') || (ch == '\'')) {
return ParseString(ch, aToken);
}
// WS
if (IsWhitespace(ch)) {
aToken.mType = eCSSToken_WhiteSpace;
aToken.mIdent.Assign(PRUnichar(ch));
EatWhiteSpace();
return true;
}
if (ch == '/' && !IsSVGMode()) {
int32_t nextChar = Peek();
if (nextChar == '*') {
Read();
// FIXME: Editor wants comments to be preserved (bug 60290).
if (!SkipCComment()) {
return false;
}
continue; // start again at the beginning
}
}
if (ch == '<') { // consume HTML comment tags
if (LookAhead('!')) {
if (LookAhead('-')) {
if (LookAhead('-')) {
aToken.mType = eCSSToken_HTMLComment;
aToken.mIdent.AssignLiteral("<!--");
return true;
}
Pushback('-');
}
Pushback('!');
}
}
if (ch == '-') { // check for HTML comment end
if (LookAhead('-')) {
if (LookAhead('>')) {
aToken.mType = eCSSToken_HTMLComment;
aToken.mIdent.AssignLiteral("-->");
return true;
}
Pushback('-');
}
}
// INCLUDES ("~=") and DASHMATCH ("|=")
if (( ch == '|' ) || ( ch == '~' ) || ( ch == '^' ) ||
( ch == '$' ) || ( ch == '*' )) {
int32_t nextChar = Read();
if ( nextChar == '=' ) {
if (ch == '~') {
aToken.mType = eCSSToken_Includes;
}
else if (ch == '|') {
aToken.mType = eCSSToken_Dashmatch;
}
else if (ch == '^') {
aToken.mType = eCSSToken_Beginsmatch;
}
else if (ch == '$') {
aToken.mType = eCSSToken_Endsmatch;
}
else if (ch == '*') {
aToken.mType = eCSSToken_Containsmatch;
}
return true;
} else if (nextChar >= 0) {
Pushback(nextChar);
}
}
aToken.mType = eCSSToken_Symbol;
aToken.mSymbol = ch;
return true;
}
}
bool
nsCSSScanner::NextURL(nsCSSToken& aToken)
{
EatWhiteSpace();
int32_t ch = Read();
if (ch < 0) {
return false;
}
// STRING
if ((ch == '"') || (ch == '\'')) {
#ifdef DEBUG
bool ok =
#endif
ParseString(ch, aToken);
NS_ABORT_IF_FALSE(ok, "ParseString should never fail, "
"since there's always something read");
NS_ABORT_IF_FALSE(aToken.mType == eCSSToken_String ||
aToken.mType == eCSSToken_Bad_String,
"unexpected token type");
if (MOZ_LIKELY(aToken.mType == eCSSToken_String)) {
EatWhiteSpace();
if (LookAheadOrEOF(')')) {
aToken.mType = eCSSToken_URL;
} else {
aToken.mType = eCSSToken_Bad_URL;
}
} else {
aToken.mType = eCSSToken_Bad_URL;
}
return true;
}
// Process a url lexical token. A CSS1 url token can contain
// characters beyond identifier characters (e.g. '/', ':', etc.)
// Because of this the normal rules for tokenizing the input don't
// apply very well. To simplify the parser and relax some of the
// requirements on the scanner we parse url's here. If we find a
// malformed URL then we emit a token of type "Bad_URL" so that
// the CSS1 parser can ignore the invalid input. The parser must
// treat a Bad_URL token like a Function token, and process
// tokens until a matching parenthesis.
aToken.mType = eCSSToken_Bad_URL;
aToken.mSymbol = PRUnichar(0);
nsString& ident = aToken.mIdent;
ident.SetLength(0);
// start of a non-quoted url (which may be empty)
bool ok = true;
for (;;) {
if (IsURLChar(ch)) {
// A regular url character.
ident.Append(PRUnichar(ch));
} else if (ch == ')') {
// All done
break;
} else if (IsWhitespace(ch)) {
// Whitespace is allowed at the end of the URL
EatWhiteSpace();
// Consume the close paren if we have it; if not we're an invalid URL.
ok = LookAheadOrEOF(')');
break;
} else if (ch == '\\') {
if (!ParseAndAppendEscape(ident, false)) {
ok = false;
Pushback(ch);
break;
int32_t ch = Read();
if (ch < 0) break;
if (ch == '*') {
if (LookAhead('/')) {
return;
}
} else {
// This is an invalid URL spec
ok = false;
Pushback(ch); // push it back so the parser can match tokens and
// then closing parenthesis
break;
}
ch = Read();
if (ch < 0) {
break;
}
}
// If the result of the above scanning is ok then change the token
// type to a useful one.
if (ok) {
aToken.mType = eCSSToken_URL;
}
return true;
mReporter->ReportUnexpectedEOF("PECommentEOF");
}
/**
* Returns whether an escape was succesfully parsed; if it was not,
* the backslash needs to be its own symbol token.
*/
bool
nsCSSScanner::ParseAndAppendEscape(nsString& aOutput, bool aInString)
nsCSSScanner::GatherEscape(nsString& aOutput, bool aInString)
{
int32_t ch = Read();
if (ch < 0) {
@ -717,7 +507,7 @@ bool
nsCSSScanner::GatherIdent(int32_t aChar, nsString& aIdent)
{
if (aChar == '\\') {
if (!ParseAndAppendEscape(aIdent, false)) {
if (!GatherEscape(aIdent, false)) {
return false;
}
} else {
@ -730,12 +520,12 @@ nsCSSScanner::GatherIdent(int32_t aChar, nsString& aIdent)
// See how much we can consume and append in one go
uint32_t n = mOffset;
// Count number of Ident characters that can be processed
while (n < mCount && IsIdent(mReadPointer[n])) {
while (n < mCount && IsIdent(mBuffer[n])) {
++n;
}
// Add to the token what we have so far
if (n > mOffset) {
aIdent.Append(&mReadPointer[mOffset], n - mOffset);
aIdent.Append(&mBuffer[mOffset], n - mOffset);
mOffset = n;
}
}
@ -743,7 +533,7 @@ nsCSSScanner::GatherIdent(int32_t aChar, nsString& aIdent)
aChar = Read();
if (aChar < 0) break;
if (aChar == '\\') {
if (!ParseAndAppendEscape(aIdent, false)) {
if (!GatherEscape(aIdent, false)) {
Pushback(aChar);
break;
}
@ -759,35 +549,7 @@ nsCSSScanner::GatherIdent(int32_t aChar, nsString& aIdent)
}
bool
nsCSSScanner::ParseRef(int32_t aChar, nsCSSToken& aToken)
{
// Fall back for when we don't have name characters following:
aToken.mType = eCSSToken_Symbol;
aToken.mSymbol = aChar;
int32_t ch = Read();
if (ch < 0) {
return true;
}
if (IsIdent(ch) || ch == '\\') {
// First char after the '#' is a valid ident char (or an escape),
// so it makes sense to keep going
nsCSSTokenType type =
StartsIdent(ch, Peek()) ? eCSSToken_ID : eCSSToken_Ref;
aToken.mIdent.SetLength(0);
if (GatherIdent(ch, aToken.mIdent)) {
aToken.mType = type;
return true;
}
}
// No ident chars after the '#'. Just unread |ch| and get out of here.
Pushback(ch);
return true;
}
bool
nsCSSScanner::ParseIdent(int32_t aChar, nsCSSToken& aToken)
nsCSSScanner::ScanIdent(int32_t aChar, nsCSSToken& aToken)
{
nsString& ident = aToken.mIdent;
ident.SetLength(0);
@ -814,7 +576,7 @@ nsCSSScanner::ParseIdent(int32_t aChar, nsCSSToken& aToken)
}
bool
nsCSSScanner::ParseAtKeyword(nsCSSToken& aToken)
nsCSSScanner::ScanAtKeyword(nsCSSToken& aToken)
{
int32_t ch = Read();
if (StartsIdent(ch, Peek())) {
@ -833,7 +595,35 @@ nsCSSScanner::ParseAtKeyword(nsCSSToken& aToken)
}
bool
nsCSSScanner::ParseNumber(int32_t c, nsCSSToken& aToken)
nsCSSScanner::ScanHash(int32_t aChar, nsCSSToken& aToken)
{
// Fall back for when we don't have name characters following:
aToken.mType = eCSSToken_Symbol;
aToken.mSymbol = aChar;
int32_t ch = Read();
if (ch < 0) {
return true;
}
if (IsIdent(ch) || ch == '\\') {
// First char after the '#' is a valid ident char (or an escape),
// so it makes sense to keep going
nsCSSTokenType type =
StartsIdent(ch, Peek()) ? eCSSToken_ID : eCSSToken_Hash;
aToken.mIdent.SetLength(0);
if (GatherIdent(ch, aToken.mIdent)) {
aToken.mType = type;
return true;
}
}
// No ident chars after the '#'. Just unread |ch| and get out of here.
Pushback(ch);
return true;
}
bool
nsCSSScanner::ScanNumber(int32_t c, nsCSSToken& aToken)
{
NS_PRECONDITION(c == '.' || c == '+' || c == '-' || IsDigit(c),
"Why did we get called?");
@ -867,7 +657,7 @@ nsCSSScanner::ParseNumber(int32_t c, nsCSSToken& aToken)
bool gotDot = (c == '.');
if (!gotDot) {
// Parse the integer part of the mantisssa
// Scan the integer part of the mantisssa
NS_ASSERTION(IsDigit(c), "Why did we get called?");
do {
intPart = 10*intPart + DecimalDigitValue(c);
@ -879,7 +669,7 @@ nsCSSScanner::ParseNumber(int32_t c, nsCSSToken& aToken)
}
if (gotDot) {
// Parse the fractional part of the mantissa.
// Scan the fractional part of the mantissa.
c = Read();
NS_ASSERTION(IsDigit(c), "How did we get here?");
// Power of ten by which we need to divide our next digit
@ -967,24 +757,7 @@ nsCSSScanner::ParseNumber(int32_t c, nsCSSToken& aToken)
}
bool
nsCSSScanner::SkipCComment()
{
for (;;) {
int32_t ch = Read();
if (ch < 0) break;
if (ch == '*') {
if (LookAhead('/')) {
return true;
}
}
}
mReporter->ReportUnexpectedEOF("PECommentEOF");
return false;
}
bool
nsCSSScanner::ParseString(int32_t aStop, nsCSSToken& aToken)
nsCSSScanner::ScanString(int32_t aStop, nsCSSToken& aToken)
{
aToken.mIdent.SetLength(0);
aToken.mType = eCSSToken_String;
@ -996,7 +769,7 @@ nsCSSScanner::ParseString(int32_t aStop, nsCSSToken& aToken)
uint32_t n = mOffset;
// Count number of characters that can be processed
for (;n < mCount; ++n) {
PRUnichar nextChar = mReadPointer[n];
PRUnichar nextChar = mBuffer[n];
if ((nextChar == aStop) || (nextChar == '\\') ||
(nextChar == '\n') || (nextChar == '\r') || (nextChar == '\f')) {
break;
@ -1004,7 +777,7 @@ nsCSSScanner::ParseString(int32_t aStop, nsCSSToken& aToken)
}
// Add to the token what we have so far
if (n > mOffset) {
aToken.mIdent.Append(&mReadPointer[mOffset], n - mOffset);
aToken.mIdent.Append(&mBuffer[mOffset], n - mOffset);
mOffset = n;
}
}
@ -1018,10 +791,10 @@ nsCSSScanner::ParseString(int32_t aStop, nsCSSToken& aToken)
break;
}
if (ch == '\\') {
if (!ParseAndAppendEscape(aToken.mIdent, true)) {
if (!GatherEscape(aToken.mIdent, true)) {
aToken.mType = eCSSToken_Bad_String;
Pushback(ch);
// For strings, the only case where ParseAndAppendEscape will
// For strings, the only case where GatherEscape will
// return false is when there's a backslash to start an escape
// immediately followed by end-of-stream. In that case, the
// correct tokenization is badstring *followed* by a DELIM for
@ -1052,7 +825,7 @@ nsCSSScanner::ParseString(int32_t aStop, nsCSSToken& aToken)
// are also decoded into mInteger and mInteger2, and mIntegerValid is set.
bool
nsCSSScanner::ParseURange(int32_t aChar, nsCSSToken& aResult)
nsCSSScanner::ScanURange(int32_t aChar, nsCSSToken& aResult)
{
int32_t intro2 = Read();
int32_t ch = Peek();
@ -1069,7 +842,7 @@ nsCSSScanner::ParseURange(int32_t aChar, nsCSSToken& aResult)
if (!IsHexDigit(ch) && ch != '?') {
Pushback(intro2);
Pushback(aChar);
return ParseIdent(aChar, aResult);
return ScanIdent(aChar, aResult);
}
aResult.mIdent.Truncate();
@ -1129,3 +902,223 @@ nsCSSScanner::ParseURange(int32_t aChar, nsCSSToken& aResult)
aResult.mType = eCSSToken_URange;
return true;
}
bool
nsCSSScanner::NextURL(nsCSSToken& aToken)
{
SkipWhitespace();
int32_t ch = Read();
if (ch < 0) {
return false;
}
// STRING
if ((ch == '"') || (ch == '\'')) {
#ifdef DEBUG
bool ok =
#endif
ScanString(ch, aToken);
NS_ABORT_IF_FALSE(ok, "ScanString should never fail, "
"since there's always something read");
NS_ABORT_IF_FALSE(aToken.mType == eCSSToken_String ||
aToken.mType == eCSSToken_Bad_String,
"unexpected token type");
if (MOZ_LIKELY(aToken.mType == eCSSToken_String)) {
SkipWhitespace();
if (LookAheadOrEOF(')')) {
aToken.mType = eCSSToken_URL;
} else {
aToken.mType = eCSSToken_Bad_URL;
}
} else {
aToken.mType = eCSSToken_Bad_URL;
}
return true;
}
// Process a url lexical token. A CSS1 url token can contain
// characters beyond identifier characters (e.g. '/', ':', etc.)
// Because of this the normal rules for tokenizing the input don't
// apply very well. To simplify the parser and relax some of the
// requirements on the scanner we parse url's here. If we find a
// malformed URL then we emit a token of type "Bad_URL" so that
// the CSS1 parser can ignore the invalid input. The parser must
// treat a Bad_URL token like a Function token, and process
// tokens until a matching parenthesis.
aToken.mType = eCSSToken_Bad_URL;
aToken.mSymbol = PRUnichar(0);
nsString& ident = aToken.mIdent;
ident.SetLength(0);
// start of a non-quoted url (which may be empty)
bool ok = true;
for (;;) {
if (IsURLChar(ch)) {
// A regular url character.
ident.Append(PRUnichar(ch));
} else if (ch == ')') {
// All done
break;
} else if (IsWhitespace(ch)) {
// Whitespace is allowed at the end of the URL
SkipWhitespace();
// Consume the close paren if we have it; if not we're an invalid URL.
ok = LookAheadOrEOF(')');
break;
} else if (ch == '\\') {
if (!GatherEscape(ident, false)) {
ok = false;
Pushback(ch);
break;
}
} else {
// This is an invalid URL spec
ok = false;
Pushback(ch); // push it back so the parser can match tokens and
// then closing parenthesis
break;
}
ch = Read();
if (ch < 0) {
break;
}
}
// If the result of the above scanning is ok then change the token
// type to a useful one.
if (ok) {
aToken.mType = eCSSToken_URL;
}
return true;
}
bool
nsCSSScanner::Next(nsCSSToken& aToken, bool aSkipWS)
{
for (;;) { // Infinite loop so we can restart after comments.
mTokenOffset = mOffset;
mTokenLineOffset = mLineOffset;
mTokenLineNumber = mLineNumber;
int32_t ch = Read();
if (ch < 0) {
return false;
}
// UNICODE-RANGE
if ((ch == 'u' || ch == 'U') && Peek() == '+')
return ScanURange(ch, aToken);
// IDENT
if (StartsIdent(ch, Peek()))
return ScanIdent(ch, aToken);
// AT_KEYWORD
if (ch == '@') {
return ScanAtKeyword(aToken);
}
// NUMBER or DIM
if ((ch == '.') || (ch == '+') || (ch == '-')) {
int32_t nextChar = Peek();
if (IsDigit(nextChar)) {
return ScanNumber(ch, aToken);
}
else if (('.' == nextChar) && ('.' != ch)) {
nextChar = Read();
int32_t followingChar = Peek();
Pushback(nextChar);
if (IsDigit(followingChar))
return ScanNumber(ch, aToken);
}
}
if (IsDigit(ch)) {
return ScanNumber(ch, aToken);
}
// ID
if (ch == '#') {
return ScanHash(ch, aToken);
}
// STRING
if ((ch == '"') || (ch == '\'')) {
return ScanString(ch, aToken);
}
// WS
if (IsWhitespace(ch)) {
SkipWhitespace();
if (!aSkipWS) {
aToken.mType = eCSSToken_Whitespace;
return true;
}
continue; // start again at the beginning
}
if (ch == '/' && !IsSVGMode()) {
int32_t nextChar = Peek();
if (nextChar == '*') {
Read();
// FIXME: Editor wants comments to be preserved (bug 60290).
SkipComment();
continue; // start again at the beginning
}
}
if (ch == '<') { // consume HTML comment tags
if (LookAhead('!')) {
if (LookAhead('-')) {
if (LookAhead('-')) {
aToken.mType = eCSSToken_HTMLComment;
aToken.mIdent.AssignLiteral("<!--");
return true;
}
Pushback('-');
}
Pushback('!');
}
}
if (ch == '-') { // check for HTML comment end
if (LookAhead('-')) {
if (LookAhead('>')) {
aToken.mType = eCSSToken_HTMLComment;
aToken.mIdent.AssignLiteral("-->");
return true;
}
Pushback('-');
}
}
// INCLUDES ("~=") and DASHMATCH ("|=")
if (( ch == '|' ) || ( ch == '~' ) || ( ch == '^' ) ||
( ch == '$' ) || ( ch == '*' )) {
int32_t nextChar = Read();
if ( nextChar == '=' ) {
if (ch == '~') {
aToken.mType = eCSSToken_Includes;
}
else if (ch == '|') {
aToken.mType = eCSSToken_Dashmatch;
}
else if (ch == '^') {
aToken.mType = eCSSToken_Beginsmatch;
}
else if (ch == '$') {
aToken.mType = eCSSToken_Endsmatch;
}
else if (ch == '*') {
aToken.mType = eCSSToken_Containsmatch;
}
return true;
} else if (nextChar >= 0) {
Pushback(nextChar);
}
}
aToken.mType = eCSSToken_Symbol;
aToken.mSymbol = ch;
return true;
}
}

View File

@ -16,58 +16,88 @@ class ErrorReporter;
}
}
// Token types
// Token types; in close but not perfect correspondence to the token
// categorization in section 4.1.1 of CSS2.1. (The deviations are all
// the fault of css3-selectors, which has requirements that can only be
// met by changing the generic tokenization.) The comment on each line
// illustrates the form of each identifier.
enum nsCSSTokenType {
// A css identifier (e.g. foo)
eCSSToken_Ident, // mIdent
// White space of any kind. No value fields are used. Note that
// comments do *not* count as white space; comments separate tokens
// but are not themselves tokens.
eCSSToken_Whitespace, //
// A css at keyword (e.g. @foo)
eCSSToken_AtKeyword, // mIdent
// Identifier-like tokens. mIdent is the text of the identifier.
// The difference between ID and Hash is: if the text after the #
// would have been a valid Ident if the # hadn't been there, the
// scanner produces an ID token. Otherwise it produces a Hash token.
// (This distinction is required by css3-selectors.)
eCSSToken_Ident, // word
eCSSToken_Function, // word(
eCSSToken_AtKeyword, // @word
eCSSToken_ID, // #word
eCSSToken_Hash, // #0word
// A css number without a percentage or dimension; with percentage;
// without percentage but with a dimension
eCSSToken_Number, // mNumber
eCSSToken_Percentage, // mNumber
eCSSToken_Dimension, // mNumber + mIdent
// Numeric tokens. mNumber is the floating-point value of the
// number, and mHasSign indicates whether there was an explicit sign
// (+ or -) in front of the number. If mIntegerValid is true, the
// number had the lexical form of an integer, and mInteger is its
// integer value. Lexically integer values outside the range of a
// 32-bit signed number are clamped to the maximum values; mNumber
// will indicate a 'truer' value in that case. Percentage tokens
// are always considered not to be integers, even if their numeric
// value is integral (100% => mNumber = 1.0). For Dimension
// tokens, mIdent holds the text of the unit.
eCSSToken_Number, // 1 -5 +2e3 3.14159 7.297352e-3
eCSSToken_Dimension, // 24px 8.5in
eCSSToken_Percentage, // 85% 1280.4%
// A css string (e.g. "foo" or 'foo')
eCSSToken_String, // mSymbol + mIdent + mSymbol
// String-like tokens. In all cases, mIdent holds the text
// belonging to the string, and mSymbol holds the delimiter
// character, which may be ', ", or zero (only for unquoted URLs).
// Bad_String and Bad_URL tokens are emitted when the closing
// delimiter or parenthesis was missing.
eCSSToken_String, // 'foo bar' "foo bar"
eCSSToken_Bad_String, // 'foo bar
eCSSToken_URL, // url(foobar) url("foo bar")
eCSSToken_Bad_URL, // url(foo
// Whitespace (e.g. " " or "/* abc */")
eCSSToken_WhiteSpace, // mIdent
// Any one-character symbol. mSymbol holds the character.
eCSSToken_Symbol, // . ; { } ! *
// A css symbol (e.g. ':', ';', '+', etc.)
eCSSToken_Symbol, // mSymbol
// Match operators. These are single tokens rather than pairs of
// Symbol tokens because css3-selectors forbids the presence of
// comments between the two characters. No value fields are used;
// the token type indicates which operator.
eCSSToken_Includes, // ~=
eCSSToken_Dashmatch, // |=
eCSSToken_Beginsmatch, // ^=
eCSSToken_Endsmatch, // $=
eCSSToken_Containsmatch, // *=
// A css1 id (e.g. #foo3)
eCSSToken_ID, // mIdent
// Just like eCSSToken_ID, except the part following the '#' is not
// a valid CSS identifier (eg. starts with a digit, is the empty
// string, etc).
eCSSToken_Ref, // mIdent
// Unicode-range token: currently used only in @font-face.
// The lexical rule for this token includes several forms that are
// semantically invalid. Therefore, mIdent always holds the
// complete original text of the token (so we can print it
// accurately in diagnostics), and mIntegerValid is true iff the
// token is semantically valid. In that case, mInteger holds the
// lowest value included in the range, and mInteger2 holds the
// highest value included in the range.
eCSSToken_URange, // U+007e U+01?? U+2000-206F
eCSSToken_Function, // mIdent
eCSSToken_URL, // mIdent + mSymbol
eCSSToken_Bad_URL, // mIdent + mSymbol
eCSSToken_HTMLComment, // "<!--" or "-->"
eCSSToken_Includes, // "~="
eCSSToken_Dashmatch, // "|="
eCSSToken_Beginsmatch, // "^="
eCSSToken_Endsmatch, // "$="
eCSSToken_Containsmatch, // "*="
eCSSToken_URange, // Low in mInteger, high in mInteger2;
// mIntegerValid is true if the token is a
// valid range; mIdent preserves the textual
// form of the token for error reporting
// An unterminated string, which is always an error.
eCSSToken_Bad_String // mSymbol + mIdent
// HTML comment delimiters, ignored as a unit when they appear at
// the top level of a style sheet, for compatibility with websites
// written for compatibility with pre-CSS browsers. This token type
// subsumes the css2.1 CDO and CDC tokens, which are always treated
// the same by the parser. mIdent holds the text of the token, for
// diagnostics.
eCSSToken_HTMLComment, // <!-- -->
};
// A single token returned from the scanner. mType is always
// meaningful; comments above describe which other fields are
// meaningful for which token types.
struct nsCSSToken {
nsAutoString mIdent NS_OKONHEAP;
float mNumber;
@ -75,22 +105,24 @@ struct nsCSSToken {
int32_t mInteger2;
nsCSSTokenType mType;
PRUnichar mSymbol;
bool mIntegerValid; // for number, dimension, urange
bool mHasSign; // for number, percentage, and dimension
bool mIntegerValid;
bool mHasSign;
nsCSSToken();
nsCSSToken()
: mNumber(0), mInteger(0), mInteger2(0), mType(eCSSToken_Whitespace),
mSymbol('\0'), mIntegerValid(false), mHasSign(false)
{}
bool IsSymbol(PRUnichar aSymbol) {
return bool((eCSSToken_Symbol == mType) && (mSymbol == aSymbol));
bool IsSymbol(PRUnichar aSymbol) const {
return mType == eCSSToken_Symbol && mSymbol == aSymbol;
}
void AppendToString(nsString& aBuffer) const;
};
// CSS Scanner API. Used to tokenize an input stream using the CSS
// forward compatible tokenization rules. This implementation is
// private to this package and is only used internally by the css
// parser.
// nsCSSScanner tokenizes an input stream using the CSS2.1 forward
// compatible tokenization rules. Used internally by nsCSSParser;
// not available for use by other code.
class nsCSSScanner {
public:
// |aLineNumber == 1| is the beginning of a file, use |aLineNumber == 0|
@ -122,17 +154,23 @@ class nsCSSScanner {
// the most recently processed token.
nsDependentSubstring GetCurrentLine() const;
// Get the next token. Return false on EOF. aTokenResult
// is filled in with the data for the token.
bool Next(nsCSSToken& aTokenResult);
// Get the next token. Return false on EOF. aTokenResult is filled
// in with the data for the token. If aSkipWS is true, skip over
// eCSSToken_Whitespace tokens rather than returning them.
bool Next(nsCSSToken& aTokenResult, bool aSkipWS);
// Get the next token that may be a string or unquoted URL
// Get the body of an URL token (everything after the 'url(').
// This is exposed for use by nsCSSParser::ParseMozDocumentRule,
// which, for historical reasons, must make additional function
// tokens behave like url(). Please do not add new uses to the
// parser.
bool NextURL(nsCSSToken& aTokenResult);
// It's really ugly that we have to expose this, but it's the easiest
// way to do :nth-child() parsing sanely. (In particular, in
// :nth-child(2n-1), "2n-1" is a dimension, and we need to push the
// "-1" back so we can read it again as a number.)
// This is exposed for use by nsCSSParser::ParsePseudoClassWithNthPairArg,
// because "2n-1" is a single DIMENSION token, and "n-1" is a single
// IDENT token, but the :nth() selector syntax wants to interpret
// them the same as "2n -1" and "n -1" respectively. Please do not
// add new uses to the parser.
void Pushback(PRUnichar aChar);
// Starts recording the input stream from the current position.
@ -150,20 +188,21 @@ protected:
int32_t Peek();
bool LookAhead(PRUnichar aChar);
bool LookAheadOrEOF(PRUnichar aChar); // expect either aChar or EOF
void EatWhiteSpace();
bool ParseAndAppendEscape(nsString& aOutput, bool aInString);
bool ParseIdent(int32_t aChar, nsCSSToken& aResult);
bool ParseAtKeyword(nsCSSToken& aResult);
bool ParseNumber(int32_t aChar, nsCSSToken& aResult);
bool ParseRef(int32_t aChar, nsCSSToken& aResult);
bool ParseString(int32_t aChar, nsCSSToken& aResult);
bool ParseURange(int32_t aChar, nsCSSToken& aResult);
bool SkipCComment();
void SkipWhitespace();
void SkipComment();
bool GatherEscape(nsString& aOutput, bool aInString);
bool GatherIdent(int32_t aChar, nsString& aIdent);
const PRUnichar *mReadPointer;
bool ScanIdent(int32_t aChar, nsCSSToken& aResult);
bool ScanAtKeyword(nsCSSToken& aResult);
bool ScanHash(int32_t aChar, nsCSSToken& aResult);
bool ScanNumber(int32_t aChar, nsCSSToken& aResult);
bool ScanString(int32_t aChar, nsCSSToken& aResult);
bool ScanURange(int32_t aChar, nsCSSToken& aResult);
const PRUnichar *mBuffer;
uint32_t mOffset;
uint32_t mCount;