mirror of
https://github.com/encounter/engine.git
synced 2026-03-30 11:09:55 -07:00
Parse comments according to parsing.md
Now we don't even generate comment tokens. The new tokenizer for comments is much easier. R=eseidel@chromium.org Review URL: https://codereview.chromium.org/682893002
This commit is contained in:
@@ -83,12 +83,6 @@ public:
|
||||
return m_data;
|
||||
}
|
||||
|
||||
const String& comment() const
|
||||
{
|
||||
ASSERT(m_type == HTMLToken::Comment);
|
||||
return m_data;
|
||||
}
|
||||
|
||||
explicit AtomicHTMLToken(HTMLToken& token)
|
||||
: m_type(token.type())
|
||||
{
|
||||
@@ -109,7 +103,6 @@ public:
|
||||
break;
|
||||
}
|
||||
case HTMLToken::Character:
|
||||
case HTMLToken::Comment:
|
||||
if (token.isAll8BitData())
|
||||
m_data = String::make8BitFrom16BitSource(token.data());
|
||||
else
|
||||
@@ -141,7 +134,6 @@ public:
|
||||
m_name = AtomicString(token.data());
|
||||
break;
|
||||
case HTMLToken::Character:
|
||||
case HTMLToken::Comment:
|
||||
m_data = token.data();
|
||||
break;
|
||||
}
|
||||
@@ -175,7 +167,7 @@ private:
|
||||
// "name" for StartTag and EndTag
|
||||
AtomicString m_name;
|
||||
|
||||
// "data" for Comment, "characters" for Character
|
||||
// "characters" for Character
|
||||
String m_data;
|
||||
|
||||
// For StartTag and EndTag
|
||||
|
||||
@@ -59,7 +59,6 @@ CompactHTMLToken::CompactHTMLToken(const HTMLToken* token, const TextPosition& t
|
||||
case HTMLToken::EndTag:
|
||||
m_selfClosing = token->selfClosing();
|
||||
// Fall through!
|
||||
case HTMLToken::Comment:
|
||||
case HTMLToken::Character: {
|
||||
m_isAll8BitData = token->isAll8BitData();
|
||||
m_data = attemptStaticStringCreation(token->data(), token->isAll8BitData() ? Force8Bit : Force16Bit);
|
||||
|
||||
@@ -50,7 +50,6 @@ public:
|
||||
Uninitialized,
|
||||
StartTag,
|
||||
EndTag,
|
||||
Comment,
|
||||
Character,
|
||||
EndOfFile,
|
||||
};
|
||||
@@ -114,7 +113,7 @@ public:
|
||||
|
||||
const DataVector& data() const
|
||||
{
|
||||
ASSERT(m_type == Character || m_type == Comment || m_type == StartTag || m_type == EndTag);
|
||||
ASSERT(m_type == Character || m_type == StartTag || m_type == EndTag);
|
||||
return m_data;
|
||||
}
|
||||
|
||||
@@ -298,28 +297,6 @@ public:
|
||||
m_data.appendVector(characters);
|
||||
}
|
||||
|
||||
/* Comment Tokens */
|
||||
|
||||
const DataVector& comment() const
|
||||
{
|
||||
ASSERT(m_type == Comment);
|
||||
return m_data;
|
||||
}
|
||||
|
||||
void beginComment()
|
||||
{
|
||||
ASSERT(m_type == Uninitialized);
|
||||
m_type = Comment;
|
||||
}
|
||||
|
||||
void appendToComment(UChar character)
|
||||
{
|
||||
ASSERT(character);
|
||||
ASSERT(m_type == Comment);
|
||||
m_data.append(character);
|
||||
m_orAllData |= character;
|
||||
}
|
||||
|
||||
private:
|
||||
Type m_type;
|
||||
Attribute::Range m_range; // Always starts at zero.
|
||||
|
||||
@@ -235,21 +235,15 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
|
||||
|
||||
HTML_BEGIN_STATE(TagOpenState) {
|
||||
if (cc == '!')
|
||||
HTML_ADVANCE_TO(MarkupDeclarationOpenState);
|
||||
HTML_ADVANCE_TO(CommentStart1State);
|
||||
else if (cc == '/')
|
||||
HTML_ADVANCE_TO(EndTagOpenState);
|
||||
HTML_ADVANCE_TO(CloseTagState);
|
||||
else if (isASCIIUpper(cc)) {
|
||||
m_token->beginStartTag(toLowerCase(cc));
|
||||
HTML_ADVANCE_TO(TagNameState);
|
||||
} else if (isASCIILower(cc)) {
|
||||
m_token->beginStartTag(cc);
|
||||
HTML_ADVANCE_TO(TagNameState);
|
||||
} else if (cc == '?') {
|
||||
parseError();
|
||||
// The spec consumes the current character before switching
|
||||
// to the bogus comment state, but it's easier to implement
|
||||
// if we reconsume the current character.
|
||||
HTML_RECONSUME_IN(BogusCommentState);
|
||||
} else {
|
||||
parseError();
|
||||
bufferCharacter('<');
|
||||
@@ -258,7 +252,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
|
||||
}
|
||||
END_STATE()
|
||||
|
||||
HTML_BEGIN_STATE(EndTagOpenState) {
|
||||
HTML_BEGIN_STATE(CloseTagState) {
|
||||
if (isASCIIUpper(cc)) {
|
||||
m_token->beginEndTag(static_cast<LChar>(toLowerCase(cc)));
|
||||
m_appropriateEndTagName.clear();
|
||||
@@ -268,16 +262,14 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
|
||||
m_appropriateEndTagName.clear();
|
||||
HTML_ADVANCE_TO(TagNameState);
|
||||
} else if (cc == '>') {
|
||||
parseError();
|
||||
bufferCharacter('<');
|
||||
bufferCharacter('/');
|
||||
bufferCharacter('>');
|
||||
HTML_ADVANCE_TO(DataState);
|
||||
} else if (cc == kEndOfFileMarker) {
|
||||
parseError();
|
||||
} else {
|
||||
bufferCharacter('<');
|
||||
bufferCharacter('/');
|
||||
HTML_RECONSUME_IN(DataState);
|
||||
} else {
|
||||
parseError();
|
||||
HTML_RECONSUME_IN(BogusCommentState);
|
||||
}
|
||||
}
|
||||
END_STATE()
|
||||
@@ -571,144 +563,54 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
|
||||
}
|
||||
END_STATE()
|
||||
|
||||
HTML_BEGIN_STATE(BogusCommentState) {
|
||||
m_token->beginComment();
|
||||
HTML_RECONSUME_IN(ContinueBogusCommentState);
|
||||
}
|
||||
END_STATE()
|
||||
|
||||
HTML_BEGIN_STATE(ContinueBogusCommentState) {
|
||||
if (cc == '>')
|
||||
return emitAndResumeIn(source, HTMLTokenizer::DataState);
|
||||
else if (cc == kEndOfFileMarker)
|
||||
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
|
||||
else {
|
||||
m_token->appendToComment(cc);
|
||||
HTML_ADVANCE_TO(ContinueBogusCommentState);
|
||||
}
|
||||
}
|
||||
END_STATE()
|
||||
|
||||
HTML_BEGIN_STATE(MarkupDeclarationOpenState) {
|
||||
HTML_BEGIN_STATE(CommentStart1State) {
|
||||
if (cc == '-') {
|
||||
SegmentedString::LookAheadResult result = source.lookAhead(HTMLTokenizerNames::dashDash);
|
||||
if (result == SegmentedString::DidMatch) {
|
||||
source.advanceAndASSERT('-');
|
||||
source.advanceAndASSERT('-');
|
||||
m_token->beginComment();
|
||||
HTML_SWITCH_TO(CommentStartState);
|
||||
} else if (result == SegmentedString::NotEnoughCharacters)
|
||||
return haveBufferedCharacterToken();
|
||||
}
|
||||
parseError();
|
||||
HTML_RECONSUME_IN(BogusCommentState);
|
||||
}
|
||||
END_STATE()
|
||||
|
||||
HTML_BEGIN_STATE(CommentStartState) {
|
||||
if (cc == '-')
|
||||
HTML_ADVANCE_TO(CommentStartDashState);
|
||||
else if (cc == '>') {
|
||||
parseError();
|
||||
return emitAndResumeIn(source, HTMLTokenizer::DataState);
|
||||
} else if (cc == kEndOfFileMarker) {
|
||||
parseError();
|
||||
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
|
||||
HTML_ADVANCE_TO(CommentStart2State);
|
||||
} else {
|
||||
m_token->appendToComment(cc);
|
||||
HTML_ADVANCE_TO(CommentState);
|
||||
bufferCharacter('<');
|
||||
bufferCharacter('!');
|
||||
HTML_RECONSUME_IN(DataState);
|
||||
}
|
||||
}
|
||||
END_STATE()
|
||||
|
||||
HTML_BEGIN_STATE(CommentStartDashState) {
|
||||
if (cc == '-')
|
||||
HTML_ADVANCE_TO(CommentEndState);
|
||||
else if (cc == '>') {
|
||||
parseError();
|
||||
return emitAndResumeIn(source, HTMLTokenizer::DataState);
|
||||
} else if (cc == kEndOfFileMarker) {
|
||||
parseError();
|
||||
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
|
||||
} else {
|
||||
m_token->appendToComment('-');
|
||||
m_token->appendToComment(cc);
|
||||
HTML_BEGIN_STATE(CommentStart2State) {
|
||||
if (cc == '-') {
|
||||
HTML_ADVANCE_TO(CommentState);
|
||||
} else {
|
||||
bufferCharacter('<');
|
||||
bufferCharacter('!');
|
||||
bufferCharacter('-');
|
||||
HTML_RECONSUME_IN(DataState);
|
||||
}
|
||||
}
|
||||
END_STATE()
|
||||
|
||||
HTML_BEGIN_STATE(CommentState) {
|
||||
if (cc == '-')
|
||||
HTML_ADVANCE_TO(CommentEndDashState);
|
||||
else if (cc == kEndOfFileMarker) {
|
||||
parseError();
|
||||
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
|
||||
} else {
|
||||
m_token->appendToComment(cc);
|
||||
HTML_ADVANCE_TO(CommentEnd1State);
|
||||
else
|
||||
HTML_ADVANCE_TO(CommentState);
|
||||
}
|
||||
}
|
||||
END_STATE()
|
||||
|
||||
HTML_BEGIN_STATE(CommentEndDashState) {
|
||||
HTML_BEGIN_STATE(CommentEnd1State) {
|
||||
if (cc == '-')
|
||||
HTML_ADVANCE_TO(CommentEndState);
|
||||
else if (cc == kEndOfFileMarker) {
|
||||
parseError();
|
||||
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
|
||||
} else {
|
||||
m_token->appendToComment('-');
|
||||
m_token->appendToComment(cc);
|
||||
HTML_ADVANCE_TO(CommentEnd2State);
|
||||
else
|
||||
HTML_ADVANCE_TO(CommentState);
|
||||
}
|
||||
}
|
||||
END_STATE()
|
||||
|
||||
HTML_BEGIN_STATE(CommentEndState) {
|
||||
if (cc == '>')
|
||||
return emitAndResumeIn(source, HTMLTokenizer::DataState);
|
||||
else if (cc == '!') {
|
||||
parseError();
|
||||
HTML_ADVANCE_TO(CommentEndBangState);
|
||||
} else if (cc == '-') {
|
||||
parseError();
|
||||
m_token->appendToComment('-');
|
||||
HTML_ADVANCE_TO(CommentEndState);
|
||||
} else if (cc == kEndOfFileMarker) {
|
||||
parseError();
|
||||
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
|
||||
} else {
|
||||
parseError();
|
||||
m_token->appendToComment('-');
|
||||
m_token->appendToComment('-');
|
||||
m_token->appendToComment(cc);
|
||||
HTML_BEGIN_STATE(CommentEnd2State) {
|
||||
if (cc == '-')
|
||||
HTML_ADVANCE_TO(CommentEnd2State);
|
||||
else if (cc == '>')
|
||||
HTML_ADVANCE_TO(DataState);
|
||||
else
|
||||
HTML_ADVANCE_TO(CommentState);
|
||||
}
|
||||
}
|
||||
END_STATE()
|
||||
|
||||
HTML_BEGIN_STATE(CommentEndBangState) {
|
||||
if (cc == '-') {
|
||||
m_token->appendToComment('-');
|
||||
m_token->appendToComment('-');
|
||||
m_token->appendToComment('!');
|
||||
HTML_ADVANCE_TO(CommentEndDashState);
|
||||
} else if (cc == '>')
|
||||
return emitAndResumeIn(source, HTMLTokenizer::DataState);
|
||||
else if (cc == kEndOfFileMarker) {
|
||||
parseError();
|
||||
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
|
||||
} else {
|
||||
m_token->appendToComment('-');
|
||||
m_token->appendToComment('-');
|
||||
m_token->appendToComment('!');
|
||||
m_token->appendToComment(cc);
|
||||
HTML_ADVANCE_TO(CommentState);
|
||||
}
|
||||
}
|
||||
END_STATE()
|
||||
|
||||
}
|
||||
|
||||
ASSERT_NOT_REACHED();
|
||||
|
||||
@@ -49,7 +49,7 @@ public:
|
||||
CharacterReferenceInAttributeValueState,
|
||||
RAWTEXTState,
|
||||
TagOpenState,
|
||||
EndTagOpenState,
|
||||
CloseTagState,
|
||||
TagNameState,
|
||||
RAWTEXTLessThanSignState,
|
||||
RAWTEXTEndTagOpenState,
|
||||
@@ -63,18 +63,11 @@ public:
|
||||
AttributeValueUnquotedState,
|
||||
AfterAttributeValueQuotedState,
|
||||
SelfClosingStartTagState,
|
||||
BogusCommentState,
|
||||
// The ContinueBogusCommentState is not in the HTML5 spec, but we use
|
||||
// it internally to keep track of whether we've started the bogus
|
||||
// comment token yet.
|
||||
ContinueBogusCommentState,
|
||||
MarkupDeclarationOpenState,
|
||||
CommentStartState,
|
||||
CommentStartDashState,
|
||||
CommentStart1State,
|
||||
CommentStart2State,
|
||||
CommentState,
|
||||
CommentEndDashState,
|
||||
CommentEndState,
|
||||
CommentEndBangState,
|
||||
CommentEnd1State,
|
||||
CommentEnd2State,
|
||||
};
|
||||
|
||||
// This function returns true if it emits a token. Otherwise, callers
|
||||
|
||||
@@ -128,8 +128,7 @@ void HTMLTreeBuilder::constructTree(AtomicHTMLToken* token)
|
||||
} else if (type == HTMLToken::EndOfFile) {
|
||||
processEndOfFile(token);
|
||||
} else {
|
||||
// We ignore Comments.
|
||||
ASSERT(type == HTMLToken::Comment);
|
||||
ASSERT_NOT_REACHED();
|
||||
}
|
||||
|
||||
m_tree.executeQueuedTasks();
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
< <! <!- <!-> --> -> > </ </>
|
||||
@@ -0,0 +1,15 @@
|
||||
<html>
|
||||
<link rel="import" href="../resources/dump-as-text.html" />
|
||||
<body>
|
||||
<
|
||||
<!
|
||||
<!-
|
||||
<!->
|
||||
<!-->aaa-->
|
||||
-->
|
||||
->
|
||||
>
|
||||
</
|
||||
</>
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user