diff --git a/engine/core/html/parser/AtomicHTMLToken.h b/engine/core/html/parser/AtomicHTMLToken.h
index c23c42d04..0ba618499 100644
--- a/engine/core/html/parser/AtomicHTMLToken.h
+++ b/engine/core/html/parser/AtomicHTMLToken.h
@@ -83,12 +83,6 @@ public:
return m_data;
}
- const String& comment() const
- {
- ASSERT(m_type == HTMLToken::Comment);
- return m_data;
- }
-
explicit AtomicHTMLToken(HTMLToken& token)
: m_type(token.type())
{
@@ -109,7 +103,6 @@ public:
break;
}
case HTMLToken::Character:
- case HTMLToken::Comment:
if (token.isAll8BitData())
m_data = String::make8BitFrom16BitSource(token.data());
else
@@ -141,7 +134,6 @@ public:
m_name = AtomicString(token.data());
break;
case HTMLToken::Character:
- case HTMLToken::Comment:
m_data = token.data();
break;
}
@@ -175,7 +167,7 @@ private:
// "name" for StartTag and EndTag
AtomicString m_name;
- // "data" for Comment, "characters" for Character
+ // "characters" for Character
String m_data;
// For StartTag and EndTag
diff --git a/engine/core/html/parser/CompactHTMLToken.cpp b/engine/core/html/parser/CompactHTMLToken.cpp
index 46b4a5740..b4ec1c19b 100644
--- a/engine/core/html/parser/CompactHTMLToken.cpp
+++ b/engine/core/html/parser/CompactHTMLToken.cpp
@@ -59,7 +59,6 @@ CompactHTMLToken::CompactHTMLToken(const HTMLToken* token, const TextPosition& t
case HTMLToken::EndTag:
m_selfClosing = token->selfClosing();
// Fall through!
- case HTMLToken::Comment:
case HTMLToken::Character: {
m_isAll8BitData = token->isAll8BitData();
m_data = attemptStaticStringCreation(token->data(), token->isAll8BitData() ? Force8Bit : Force16Bit);
diff --git a/engine/core/html/parser/HTMLToken.h b/engine/core/html/parser/HTMLToken.h
index 293459342..d716906bb 100644
--- a/engine/core/html/parser/HTMLToken.h
+++ b/engine/core/html/parser/HTMLToken.h
@@ -50,7 +50,6 @@ public:
Uninitialized,
StartTag,
EndTag,
- Comment,
Character,
EndOfFile,
};
@@ -114,7 +113,7 @@ public:
const DataVector& data() const
{
- ASSERT(m_type == Character || m_type == Comment || m_type == StartTag || m_type == EndTag);
+ ASSERT(m_type == Character || m_type == StartTag || m_type == EndTag);
return m_data;
}
@@ -298,28 +297,6 @@ public:
m_data.appendVector(characters);
}
- /* Comment Tokens */
-
- const DataVector& comment() const
- {
- ASSERT(m_type == Comment);
- return m_data;
- }
-
- void beginComment()
- {
- ASSERT(m_type == Uninitialized);
- m_type = Comment;
- }
-
- void appendToComment(UChar character)
- {
- ASSERT(character);
- ASSERT(m_type == Comment);
- m_data.append(character);
- m_orAllData |= character;
- }
-
private:
Type m_type;
Attribute::Range m_range; // Always starts at zero.
diff --git a/engine/core/html/parser/HTMLTokenizer.cpp b/engine/core/html/parser/HTMLTokenizer.cpp
index ca454ad3c..6c4c21099 100644
--- a/engine/core/html/parser/HTMLTokenizer.cpp
+++ b/engine/core/html/parser/HTMLTokenizer.cpp
@@ -235,21 +235,15 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
HTML_BEGIN_STATE(TagOpenState) {
if (cc == '!')
- HTML_ADVANCE_TO(MarkupDeclarationOpenState);
+ HTML_ADVANCE_TO(CommentStart1State);
else if (cc == '/')
- HTML_ADVANCE_TO(EndTagOpenState);
+ HTML_ADVANCE_TO(CloseTagState);
else if (isASCIIUpper(cc)) {
m_token->beginStartTag(toLowerCase(cc));
HTML_ADVANCE_TO(TagNameState);
} else if (isASCIILower(cc)) {
m_token->beginStartTag(cc);
HTML_ADVANCE_TO(TagNameState);
- } else if (cc == '?') {
- parseError();
- // The spec consumes the current character before switching
- // to the bogus comment state, but it's easier to implement
- // if we reconsume the current character.
- HTML_RECONSUME_IN(BogusCommentState);
} else {
parseError();
bufferCharacter('<');
@@ -258,7 +252,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
}
END_STATE()
- HTML_BEGIN_STATE(EndTagOpenState) {
+ HTML_BEGIN_STATE(CloseTagState) {
if (isASCIIUpper(cc)) {
m_token->beginEndTag(static_cast(toLowerCase(cc)));
m_appropriateEndTagName.clear();
@@ -268,16 +262,14 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
m_appropriateEndTagName.clear();
HTML_ADVANCE_TO(TagNameState);
} else if (cc == '>') {
- parseError();
+ bufferCharacter('<');
+ bufferCharacter('/');
+ bufferCharacter('>');
HTML_ADVANCE_TO(DataState);
- } else if (cc == kEndOfFileMarker) {
- parseError();
+ } else {
bufferCharacter('<');
bufferCharacter('/');
HTML_RECONSUME_IN(DataState);
- } else {
- parseError();
- HTML_RECONSUME_IN(BogusCommentState);
}
}
END_STATE()
@@ -571,144 +563,54 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
}
END_STATE()
- HTML_BEGIN_STATE(BogusCommentState) {
- m_token->beginComment();
- HTML_RECONSUME_IN(ContinueBogusCommentState);
- }
- END_STATE()
-
- HTML_BEGIN_STATE(ContinueBogusCommentState) {
- if (cc == '>')
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- else if (cc == kEndOfFileMarker)
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- else {
- m_token->appendToComment(cc);
- HTML_ADVANCE_TO(ContinueBogusCommentState);
- }
- }
- END_STATE()
-
- HTML_BEGIN_STATE(MarkupDeclarationOpenState) {
+ HTML_BEGIN_STATE(CommentStart1State) {
if (cc == '-') {
- SegmentedString::LookAheadResult result = source.lookAhead(HTMLTokenizerNames::dashDash);
- if (result == SegmentedString::DidMatch) {
- source.advanceAndASSERT('-');
- source.advanceAndASSERT('-');
- m_token->beginComment();
- HTML_SWITCH_TO(CommentStartState);
- } else if (result == SegmentedString::NotEnoughCharacters)
- return haveBufferedCharacterToken();
- }
- parseError();
- HTML_RECONSUME_IN(BogusCommentState);
- }
- END_STATE()
-
- HTML_BEGIN_STATE(CommentStartState) {
- if (cc == '-')
- HTML_ADVANCE_TO(CommentStartDashState);
- else if (cc == '>') {
- parseError();
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- } else if (cc == kEndOfFileMarker) {
- parseError();
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ HTML_ADVANCE_TO(CommentStart2State);
} else {
- m_token->appendToComment(cc);
- HTML_ADVANCE_TO(CommentState);
+ bufferCharacter('<');
+ bufferCharacter('!');
+ HTML_RECONSUME_IN(DataState);
}
}
END_STATE()
- HTML_BEGIN_STATE(CommentStartDashState) {
- if (cc == '-')
- HTML_ADVANCE_TO(CommentEndState);
- else if (cc == '>') {
- parseError();
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- } else if (cc == kEndOfFileMarker) {
- parseError();
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- } else {
- m_token->appendToComment('-');
- m_token->appendToComment(cc);
+ HTML_BEGIN_STATE(CommentStart2State) {
+ if (cc == '-') {
HTML_ADVANCE_TO(CommentState);
+ } else {
+ bufferCharacter('<');
+ bufferCharacter('!');
+ bufferCharacter('-');
+ HTML_RECONSUME_IN(DataState);
}
}
END_STATE()
HTML_BEGIN_STATE(CommentState) {
if (cc == '-')
- HTML_ADVANCE_TO(CommentEndDashState);
- else if (cc == kEndOfFileMarker) {
- parseError();
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- } else {
- m_token->appendToComment(cc);
+ HTML_ADVANCE_TO(CommentEnd1State);
+ else
HTML_ADVANCE_TO(CommentState);
- }
}
END_STATE()
- HTML_BEGIN_STATE(CommentEndDashState) {
+ HTML_BEGIN_STATE(CommentEnd1State) {
if (cc == '-')
- HTML_ADVANCE_TO(CommentEndState);
- else if (cc == kEndOfFileMarker) {
- parseError();
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- } else {
- m_token->appendToComment('-');
- m_token->appendToComment(cc);
+ HTML_ADVANCE_TO(CommentEnd2State);
+ else
HTML_ADVANCE_TO(CommentState);
- }
}
END_STATE()
- HTML_BEGIN_STATE(CommentEndState) {
- if (cc == '>')
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- else if (cc == '!') {
- parseError();
- HTML_ADVANCE_TO(CommentEndBangState);
- } else if (cc == '-') {
- parseError();
- m_token->appendToComment('-');
- HTML_ADVANCE_TO(CommentEndState);
- } else if (cc == kEndOfFileMarker) {
- parseError();
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- } else {
- parseError();
- m_token->appendToComment('-');
- m_token->appendToComment('-');
- m_token->appendToComment(cc);
+ HTML_BEGIN_STATE(CommentEnd2State) {
+ if (cc == '-')
+ HTML_ADVANCE_TO(CommentEnd2State);
+ else if (cc == '>')
+ HTML_ADVANCE_TO(DataState);
+ else
HTML_ADVANCE_TO(CommentState);
- }
}
END_STATE()
-
- HTML_BEGIN_STATE(CommentEndBangState) {
- if (cc == '-') {
- m_token->appendToComment('-');
- m_token->appendToComment('-');
- m_token->appendToComment('!');
- HTML_ADVANCE_TO(CommentEndDashState);
- } else if (cc == '>')
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- else if (cc == kEndOfFileMarker) {
- parseError();
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- } else {
- m_token->appendToComment('-');
- m_token->appendToComment('-');
- m_token->appendToComment('!');
- m_token->appendToComment(cc);
- HTML_ADVANCE_TO(CommentState);
- }
- }
- END_STATE()
-
}
ASSERT_NOT_REACHED();
diff --git a/engine/core/html/parser/HTMLTokenizer.h b/engine/core/html/parser/HTMLTokenizer.h
index a22cf50df..bc8c48af0 100644
--- a/engine/core/html/parser/HTMLTokenizer.h
+++ b/engine/core/html/parser/HTMLTokenizer.h
@@ -49,7 +49,7 @@ public:
CharacterReferenceInAttributeValueState,
RAWTEXTState,
TagOpenState,
- EndTagOpenState,
+ CloseTagState,
TagNameState,
RAWTEXTLessThanSignState,
RAWTEXTEndTagOpenState,
@@ -63,18 +63,11 @@ public:
AttributeValueUnquotedState,
AfterAttributeValueQuotedState,
SelfClosingStartTagState,
- BogusCommentState,
- // The ContinueBogusCommentState is not in the HTML5 spec, but we use
- // it internally to keep track of whether we've started the bogus
- // comment token yet.
- ContinueBogusCommentState,
- MarkupDeclarationOpenState,
- CommentStartState,
- CommentStartDashState,
+ CommentStart1State,
+ CommentStart2State,
CommentState,
- CommentEndDashState,
- CommentEndState,
- CommentEndBangState,
+ CommentEnd1State,
+ CommentEnd2State,
};
// This function returns true if it emits a token. Otherwise, callers
diff --git a/engine/core/html/parser/HTMLTreeBuilder.cpp b/engine/core/html/parser/HTMLTreeBuilder.cpp
index 89fc3a854..710a99df3 100644
--- a/engine/core/html/parser/HTMLTreeBuilder.cpp
+++ b/engine/core/html/parser/HTMLTreeBuilder.cpp
@@ -128,8 +128,7 @@ void HTMLTreeBuilder::constructTree(AtomicHTMLToken* token)
} else if (type == HTMLToken::EndOfFile) {
processEndOfFile(token);
} else {
- // We ignore Comments.
- ASSERT(type == HTMLToken::Comment);
+ ASSERT_NOT_REACHED();
}
m_tree.executeQueuedTasks();
diff --git a/tests/parser/comments-expected.txt b/tests/parser/comments-expected.txt
new file mode 100644
index 000000000..8de7aeba2
--- /dev/null
+++ b/tests/parser/comments-expected.txt
@@ -0,0 +1 @@
+< --> -> > >
diff --git a/tests/parser/comments.html b/tests/parser/comments.html
new file mode 100644
index 000000000..777314308
--- /dev/null
+++ b/tests/parser/comments.html
@@ -0,0 +1,15 @@
+
+
+
+<
+
+aaa-->
+-->
+->
+>
+
+>
+
+