/* * Copyright (c) 2005-2007 Henri Sivonen * Copyright (c) 2007-2010 Mozilla Foundation * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla * Foundation, and Opera Software ASA. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ /* * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. * Please edit Tokenizer.java instead and regenerate. */ #define nsHtml5Tokenizer_cpp__ #include "prtypes.h" #include "nsIAtom.h" #include "nsHtml5AtomTable.h" #include "nsString.h" #include "nsINameSpaceManager.h" #include "nsIContent.h" #include "nsIDocument.h" #include "nsTraceRefcnt.h" #include "jArray.h" #include "nsHtml5DocumentMode.h" #include "nsHtml5ArrayCopy.h" #include "nsHtml5NamedCharacters.h" #include "nsHtml5NamedCharactersAccel.h" #include "nsHtml5Atoms.h" #include "nsHtml5ByteReadable.h" #include "nsIUnicodeDecoder.h" #include "nsAHtml5TreeBuilderState.h" #include "nsHtml5Macros.h" #include "nsHtml5TreeBuilder.h" #include "nsHtml5MetaScanner.h" #include "nsHtml5AttributeName.h" #include "nsHtml5ElementName.h" #include "nsHtml5HtmlAttributes.h" #include "nsHtml5StackNode.h" #include "nsHtml5UTF16Buffer.h" #include "nsHtml5StateSnapshot.h" #include "nsHtml5Portability.h" #include "nsHtml5Tokenizer.h" static PRUnichar const TITLE_ARR_DATA[] = { 't', 'i', 't', 'l', 'e' }; static PRUnichar const SCRIPT_ARR_DATA[] = { 's', 'c', 'r', 'i', 'p', 't' }; static PRUnichar const STYLE_ARR_DATA[] = { 's', 't', 'y', 'l', 'e' }; static PRUnichar const PLAINTEXT_ARR_DATA[] = { 'p', 'l', 'a', 'i', 'n', 't', 'e', 'x', 't' }; static PRUnichar const XMP_ARR_DATA[] = { 'x', 'm', 'p' }; static PRUnichar const TEXTAREA_ARR_DATA[] = { 't', 'e', 'x', 't', 'a', 'r', 'e', 'a' }; static PRUnichar const IFRAME_ARR_DATA[] = { 'i', 'f', 'r', 'a', 'm', 'e' }; static PRUnichar const NOEMBED_ARR_DATA[] = { 'n', 'o', 'e', 'm', 'b', 'e', 'd' }; static PRUnichar const NOSCRIPT_ARR_DATA[] = { 'n', 'o', 's', 'c', 'r', 'i', 'p', 't' }; static PRUnichar const NOFRAMES_ARR_DATA[] = { 'n', 'o', 'f', 'r', 'a', 'm', 'e', 's' }; nsHtml5Tokenizer::nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler) : tokenHandler(tokenHandler), encodingDeclarationHandler(nsnull), bmpChar(jArray(1)), astralChar(jArray(2)), tagName(nsnull), attributeName(nsnull), doctypeName(nsnull), publicIdentifier(nsnull), systemIdentifier(nsnull), attributes(nsnull) { MOZ_COUNT_CTOR(nsHtml5Tokenizer); } void nsHtml5Tokenizer::setInterner(nsHtml5AtomTable* interner) { this->interner = interner; } void nsHtml5Tokenizer::initLocation(nsString* newPublicId, nsString* newSystemId) { this->systemId = newSystemId; this->publicId = newPublicId; } nsHtml5Tokenizer::~nsHtml5Tokenizer() { MOZ_COUNT_DTOR(nsHtml5Tokenizer); bmpChar.release(); astralChar.release(); } void nsHtml5Tokenizer::setStateAndEndTagExpectation(PRInt32 specialTokenizerState, nsIAtom* endTagExpectation) { this->stateSave = specialTokenizerState; if (specialTokenizerState == NS_HTML5TOKENIZER_DATA) { return; } jArray asArray = nsHtml5Portability::newCharArrayFromLocal(endTagExpectation); this->endTagExpectation = nsHtml5ElementName::elementNameByBuffer(asArray, 0, asArray.length, interner); asArray.release(); endTagExpectationToArray(); } void nsHtml5Tokenizer::setStateAndEndTagExpectation(PRInt32 specialTokenizerState, nsHtml5ElementName* endTagExpectation) { this->stateSave = specialTokenizerState; this->endTagExpectation = endTagExpectation; endTagExpectationToArray(); } void nsHtml5Tokenizer::endTagExpectationToArray() { switch(endTagExpectation->group) { case NS_HTML5TREE_BUILDER_TITLE: { endTagExpectationAsArray = TITLE_ARR; return; } case NS_HTML5TREE_BUILDER_SCRIPT: { endTagExpectationAsArray = SCRIPT_ARR; return; } case NS_HTML5TREE_BUILDER_STYLE: { endTagExpectationAsArray = STYLE_ARR; return; } case NS_HTML5TREE_BUILDER_PLAINTEXT: { endTagExpectationAsArray = PLAINTEXT_ARR; return; } case NS_HTML5TREE_BUILDER_XMP: { endTagExpectationAsArray = XMP_ARR; return; } case NS_HTML5TREE_BUILDER_TEXTAREA: { endTagExpectationAsArray = TEXTAREA_ARR; return; } case NS_HTML5TREE_BUILDER_IFRAME: { endTagExpectationAsArray = IFRAME_ARR; return; } case NS_HTML5TREE_BUILDER_NOEMBED: { endTagExpectationAsArray = NOEMBED_ARR; return; } case NS_HTML5TREE_BUILDER_NOSCRIPT: { endTagExpectationAsArray = NOSCRIPT_ARR; return; } case NS_HTML5TREE_BUILDER_NOFRAMES: { endTagExpectationAsArray = NOFRAMES_ARR; return; } default: { return; } } } void nsHtml5Tokenizer::setLineNumber(PRInt32 line) { this->line = line; } nsHtml5HtmlAttributes* nsHtml5Tokenizer::emptyAttributes() { return nsHtml5HtmlAttributes::EMPTY_ATTRIBUTES; } void nsHtml5Tokenizer::appendStrBuf(PRUnichar c) { if (strBufLen == strBuf.length) { jArray newBuf = jArray(strBuf.length + NS_HTML5TOKENIZER_BUFFER_GROW_BY); nsHtml5ArrayCopy::arraycopy(strBuf, newBuf, strBuf.length); strBuf.release(); strBuf = newBuf; } strBuf[strBufLen++] = c; } nsString* nsHtml5Tokenizer::strBufToString() { return nsHtml5Portability::newStringFromBuffer(strBuf, 0, strBufLen); } void nsHtml5Tokenizer::strBufToDoctypeName() { doctypeName = nsHtml5Portability::newLocalNameFromBuffer(strBuf, 0, strBufLen, interner); } void nsHtml5Tokenizer::emitStrBuf() { if (strBufLen > 0) { tokenHandler->characters(strBuf, 0, strBufLen); } } void nsHtml5Tokenizer::appendLongStrBuf(PRUnichar c) { if (longStrBufLen == longStrBuf.length) { jArray newBuf = jArray(longStrBufLen + (longStrBufLen >> 1)); nsHtml5ArrayCopy::arraycopy(longStrBuf, newBuf, longStrBuf.length); longStrBuf.release(); longStrBuf = newBuf; } longStrBuf[longStrBufLen++] = c; } void nsHtml5Tokenizer::appendLongStrBuf(jArray buffer, PRInt32 offset, PRInt32 length) { PRInt32 reqLen = longStrBufLen + length; if (longStrBuf.length < reqLen) { jArray newBuf = jArray(reqLen + (reqLen >> 1)); nsHtml5ArrayCopy::arraycopy(longStrBuf, newBuf, longStrBuf.length); longStrBuf.release(); longStrBuf = newBuf; } nsHtml5ArrayCopy::arraycopy(buffer, offset, longStrBuf, longStrBufLen, length); longStrBufLen = reqLen; } nsString* nsHtml5Tokenizer::longStrBufToString() { return nsHtml5Portability::newStringFromBuffer(longStrBuf, 0, longStrBufLen); } void nsHtml5Tokenizer::emitComment(PRInt32 provisionalHyphens, PRInt32 pos) { tokenHandler->comment(longStrBuf, 0, longStrBufLen - provisionalHyphens); cstart = pos + 1; } void nsHtml5Tokenizer::flushChars(PRUnichar* buf, PRInt32 pos) { if (pos > cstart) { tokenHandler->characters(buf, cstart, pos - cstart); } cstart = PR_INT32_MAX; } void nsHtml5Tokenizer::resetAttributes() { attributes = nsnull; } void nsHtml5Tokenizer::strBufToElementNameString() { tagName = nsHtml5ElementName::elementNameByBuffer(strBuf, 0, strBufLen, interner); } PRInt32 nsHtml5Tokenizer::emitCurrentTagToken(PRBool selfClosing, PRInt32 pos) { cstart = pos + 1; stateSave = NS_HTML5TOKENIZER_DATA; nsHtml5HtmlAttributes* attrs = (!attributes ? nsHtml5HtmlAttributes::EMPTY_ATTRIBUTES : attributes); if (endTag) { tokenHandler->endTag(tagName); delete attributes; } else { tokenHandler->startTag(tagName, attrs, selfClosing); } tagName->release(); tagName = nsnull; resetAttributes(); return stateSave; } void nsHtml5Tokenizer::attributeNameComplete() { attributeName = nsHtml5AttributeName::nameByBuffer(strBuf, 0, strBufLen, interner); if (!attributes) { attributes = new nsHtml5HtmlAttributes(0); } if (attributes->contains(attributeName)) { attributeName->release(); attributeName = nsnull; } } void nsHtml5Tokenizer::addAttributeWithoutValue() { if (attributeName) { attributes->addAttribute(attributeName, nsHtml5Portability::newEmptyString()); attributeName = nsnull; } } void nsHtml5Tokenizer::addAttributeWithValue() { if (attributeName) { nsString* val = longStrBufToString(); attributes->addAttribute(attributeName, val); attributeName = nsnull; } } void nsHtml5Tokenizer::start() { initializeWithoutStarting(); tokenHandler->startTokenization(this); } PRBool nsHtml5Tokenizer::tokenizeBuffer(nsHtml5UTF16Buffer* buffer) { PRInt32 state = stateSave; PRInt32 returnState = returnStateSave; PRUnichar c = '\0'; shouldSuspend = PR_FALSE; lastCR = PR_FALSE; PRInt32 start = buffer->getStart(); PRInt32 pos = start - 1; switch(state) { case NS_HTML5TOKENIZER_DATA: case NS_HTML5TOKENIZER_RCDATA: case NS_HTML5TOKENIZER_SCRIPT_DATA: case NS_HTML5TOKENIZER_PLAINTEXT: case NS_HTML5TOKENIZER_RAWTEXT: case NS_HTML5TOKENIZER_CDATA_SECTION: case NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED: case NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START: case NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START_DASH: case NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH: case NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH_DASH: case NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_START: case NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED: case NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: case NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH: case NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: case NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_END: { cstart = start; break; } default: { cstart = PR_INT32_MAX; break; } } pos = stateLoop(state, c, pos, buffer->getBuffer(), PR_FALSE, returnState, buffer->getEnd()); if (pos == buffer->getEnd()) { buffer->setStart(pos); } else { buffer->setStart(pos + 1); } return lastCR; } PRInt32 nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRInt32 pos, PRUnichar* buf, PRBool reconsume, PRInt32 returnState, PRInt32 endPos) { stateloop: for (; ; ) { switch(state) { case NS_HTML5TOKENIZER_DATA: { for (; ; ) { if (reconsume) { reconsume = PR_FALSE; } else { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); } switch(c) { case '&': { flushChars(buf, pos); clearStrBufAndAppend(c); setAdditionalAndRememberAmpersandLocation('\0'); returnState = state; state = NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE; NS_HTML5_CONTINUE(stateloop); } case '<': { flushChars(buf, pos); state = NS_HTML5TOKENIZER_TAG_OPEN; NS_HTML5_BREAK(dataloop); } case '\0': { emitReplacementCharacter(buf, pos); continue; } case '\r': { emitCarriageReturn(buf, pos); NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } default: { continue; } } } dataloop_end: ; } case NS_HTML5TOKENIZER_TAG_OPEN: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); if (c >= 'A' && c <= 'Z') { endTag = PR_FALSE; clearStrBufAndAppend((PRUnichar) (c + 0x20)); state = NS_HTML5TOKENIZER_TAG_NAME; NS_HTML5_BREAK(tagopenloop); } else if (c >= 'a' && c <= 'z') { endTag = PR_FALSE; clearStrBufAndAppend(c); state = NS_HTML5TOKENIZER_TAG_NAME; NS_HTML5_BREAK(tagopenloop); } switch(c) { case '!': { state = NS_HTML5TOKENIZER_MARKUP_DECLARATION_OPEN; NS_HTML5_CONTINUE(stateloop); } case '/': { state = NS_HTML5TOKENIZER_CLOSE_TAG_OPEN; NS_HTML5_CONTINUE(stateloop); } case '\?': { clearLongStrBufAndAppend(c); state = NS_HTML5TOKENIZER_BOGUS_COMMENT; NS_HTML5_CONTINUE(stateloop); } case '>': { tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 2); cstart = pos + 1; state = NS_HTML5TOKENIZER_DATA; NS_HTML5_CONTINUE(stateloop); } default: { tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); cstart = pos; state = NS_HTML5TOKENIZER_DATA; reconsume = PR_TRUE; NS_HTML5_CONTINUE(stateloop); } } } tagopenloop_end: ; } case NS_HTML5TOKENIZER_TAG_NAME: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '\r': { silentCarriageReturn(); strBufToElementNameString(); state = NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME; NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } case ' ': case '\t': case '\f': { strBufToElementNameString(); state = NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME; NS_HTML5_BREAK(tagnameloop); } case '/': { strBufToElementNameString(); state = NS_HTML5TOKENIZER_SELF_CLOSING_START_TAG; NS_HTML5_CONTINUE(stateloop); } case '>': { strBufToElementNameString(); state = emitCurrentTagToken(PR_FALSE, pos); if (shouldSuspend) { NS_HTML5_BREAK(stateloop); } NS_HTML5_CONTINUE(stateloop); } case '\0': { c = 0xfffd; } default: { if (c >= 'A' && c <= 'Z') { c += 0x20; } appendStrBuf(c); continue; } } } tagnameloop_end: ; } case NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME: { for (; ; ) { if (reconsume) { reconsume = PR_FALSE; } else { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); } switch(c) { case '\r': { silentCarriageReturn(); NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } case ' ': case '\t': case '\f': { continue; } case '/': { state = NS_HTML5TOKENIZER_SELF_CLOSING_START_TAG; NS_HTML5_CONTINUE(stateloop); } case '>': { state = emitCurrentTagToken(PR_FALSE, pos); if (shouldSuspend) { NS_HTML5_BREAK(stateloop); } NS_HTML5_CONTINUE(stateloop); } case '\0': { c = 0xfffd; } case '\"': case '\'': case '<': case '=': default: { if (c >= 'A' && c <= 'Z') { c += 0x20; } clearStrBufAndAppend(c); state = NS_HTML5TOKENIZER_ATTRIBUTE_NAME; NS_HTML5_BREAK(beforeattributenameloop); } } } beforeattributenameloop_end: ; } case NS_HTML5TOKENIZER_ATTRIBUTE_NAME: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '\r': { silentCarriageReturn(); attributeNameComplete(); state = NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_NAME; NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } case ' ': case '\t': case '\f': { attributeNameComplete(); state = NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_NAME; NS_HTML5_CONTINUE(stateloop); } case '/': { attributeNameComplete(); addAttributeWithoutValue(); state = NS_HTML5TOKENIZER_SELF_CLOSING_START_TAG; NS_HTML5_CONTINUE(stateloop); } case '=': { attributeNameComplete(); state = NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_VALUE; NS_HTML5_BREAK(attributenameloop); } case '>': { attributeNameComplete(); addAttributeWithoutValue(); state = emitCurrentTagToken(PR_FALSE, pos); if (shouldSuspend) { NS_HTML5_BREAK(stateloop); } NS_HTML5_CONTINUE(stateloop); } case '\0': { c = 0xfffd; } case '\"': case '\'': case '<': default: { if (c >= 'A' && c <= 'Z') { c += 0x20; } appendStrBuf(c); continue; } } } attributenameloop_end: ; } case NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_VALUE: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '\r': { silentCarriageReturn(); NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } case ' ': case '\t': case '\f': { continue; } case '\"': { clearLongStrBuf(); state = NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_DOUBLE_QUOTED; NS_HTML5_BREAK(beforeattributevalueloop); } case '&': { clearLongStrBuf(); state = NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_UNQUOTED; reconsume = PR_TRUE; NS_HTML5_CONTINUE(stateloop); } case '\'': { clearLongStrBuf(); state = NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_SINGLE_QUOTED; NS_HTML5_CONTINUE(stateloop); } case '>': { addAttributeWithoutValue(); state = emitCurrentTagToken(PR_FALSE, pos); if (shouldSuspend) { NS_HTML5_BREAK(stateloop); } NS_HTML5_CONTINUE(stateloop); } case '\0': { c = 0xfffd; } case '<': case '=': case '`': default: { clearLongStrBufAndAppend(c); state = NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_UNQUOTED; NS_HTML5_CONTINUE(stateloop); } } } beforeattributevalueloop_end: ; } case NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_DOUBLE_QUOTED: { for (; ; ) { if (reconsume) { reconsume = PR_FALSE; } else { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); } switch(c) { case '\"': { addAttributeWithValue(); state = NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_VALUE_QUOTED; NS_HTML5_BREAK(attributevaluedoublequotedloop); } case '&': { clearStrBufAndAppend(c); setAdditionalAndRememberAmpersandLocation('\"'); returnState = state; state = NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE; NS_HTML5_CONTINUE(stateloop); } case '\r': { appendLongStrBufCarriageReturn(); NS_HTML5_BREAK(stateloop); } case '\n': { appendLongStrBufLineFeed(); continue; } case '\0': { c = 0xfffd; } default: { appendLongStrBuf(c); continue; } } } attributevaluedoublequotedloop_end: ; } case NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_VALUE_QUOTED: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '\r': { silentCarriageReturn(); state = NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME; NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } case ' ': case '\t': case '\f': { state = NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME; NS_HTML5_CONTINUE(stateloop); } case '/': { state = NS_HTML5TOKENIZER_SELF_CLOSING_START_TAG; NS_HTML5_BREAK(afterattributevaluequotedloop); } case '>': { state = emitCurrentTagToken(PR_FALSE, pos); if (shouldSuspend) { NS_HTML5_BREAK(stateloop); } NS_HTML5_CONTINUE(stateloop); } default: { state = NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME; reconsume = PR_TRUE; NS_HTML5_CONTINUE(stateloop); } } } afterattributevaluequotedloop_end: ; } case NS_HTML5TOKENIZER_SELF_CLOSING_START_TAG: { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '>': { state = emitCurrentTagToken(PR_TRUE, pos); if (shouldSuspend) { NS_HTML5_BREAK(stateloop); } NS_HTML5_CONTINUE(stateloop); } default: { state = NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME; reconsume = PR_TRUE; NS_HTML5_CONTINUE(stateloop); } } } case NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_UNQUOTED: { for (; ; ) { if (reconsume) { reconsume = PR_FALSE; } else { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); } switch(c) { case '\r': { silentCarriageReturn(); addAttributeWithValue(); state = NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME; NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } case ' ': case '\t': case '\f': { addAttributeWithValue(); state = NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME; NS_HTML5_CONTINUE(stateloop); } case '&': { clearStrBufAndAppend(c); setAdditionalAndRememberAmpersandLocation('>'); returnState = state; state = NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE; NS_HTML5_CONTINUE(stateloop); } case '>': { addAttributeWithValue(); state = emitCurrentTagToken(PR_FALSE, pos); if (shouldSuspend) { NS_HTML5_BREAK(stateloop); } NS_HTML5_CONTINUE(stateloop); } case '\0': { c = 0xfffd; } case '<': case '\"': case '\'': case '=': case '`': default: { appendLongStrBuf(c); continue; } } } } case NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_NAME: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '\r': { silentCarriageReturn(); NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } case ' ': case '\t': case '\f': { continue; } case '/': { addAttributeWithoutValue(); state = NS_HTML5TOKENIZER_SELF_CLOSING_START_TAG; NS_HTML5_CONTINUE(stateloop); } case '=': { state = NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_VALUE; NS_HTML5_CONTINUE(stateloop); } case '>': { addAttributeWithoutValue(); state = emitCurrentTagToken(PR_FALSE, pos); if (shouldSuspend) { NS_HTML5_BREAK(stateloop); } NS_HTML5_CONTINUE(stateloop); } case '\0': { c = 0xfffd; } case '\"': case '\'': case '<': default: { addAttributeWithoutValue(); if (c >= 'A' && c <= 'Z') { c += 0x20; } clearStrBufAndAppend(c); state = NS_HTML5TOKENIZER_ATTRIBUTE_NAME; NS_HTML5_CONTINUE(stateloop); } } } } case NS_HTML5TOKENIZER_MARKUP_DECLARATION_OPEN: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '-': { clearLongStrBufAndAppend(c); state = NS_HTML5TOKENIZER_MARKUP_DECLARATION_HYPHEN; NS_HTML5_BREAK(markupdeclarationopenloop); } case 'd': case 'D': { clearLongStrBufAndAppend(c); index = 0; state = NS_HTML5TOKENIZER_MARKUP_DECLARATION_OCTYPE; NS_HTML5_CONTINUE(stateloop); } case '[': { if (tokenHandler->cdataSectionAllowed()) { clearLongStrBufAndAppend(c); index = 0; state = NS_HTML5TOKENIZER_CDATA_START; NS_HTML5_CONTINUE(stateloop); } } default: { clearLongStrBuf(); state = NS_HTML5TOKENIZER_BOGUS_COMMENT; reconsume = PR_TRUE; NS_HTML5_CONTINUE(stateloop); } } } markupdeclarationopenloop_end: ; } case NS_HTML5TOKENIZER_MARKUP_DECLARATION_HYPHEN: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '\0': { NS_HTML5_BREAK(stateloop); } case '-': { clearLongStrBuf(); state = NS_HTML5TOKENIZER_COMMENT_START; NS_HTML5_BREAK(markupdeclarationhyphenloop); } default: { state = NS_HTML5TOKENIZER_BOGUS_COMMENT; reconsume = PR_TRUE; NS_HTML5_CONTINUE(stateloop); } } } markupdeclarationhyphenloop_end: ; } case NS_HTML5TOKENIZER_COMMENT_START: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '-': { appendLongStrBuf(c); state = NS_HTML5TOKENIZER_COMMENT_START_DASH; NS_HTML5_CONTINUE(stateloop); } case '>': { emitComment(0, pos); state = NS_HTML5TOKENIZER_DATA; NS_HTML5_CONTINUE(stateloop); } case '\r': { appendLongStrBufCarriageReturn(); state = NS_HTML5TOKENIZER_COMMENT; NS_HTML5_BREAK(stateloop); } case '\n': { appendLongStrBufLineFeed(); state = NS_HTML5TOKENIZER_COMMENT; NS_HTML5_BREAK(commentstartloop); } case '\0': { c = 0xfffd; } default: { appendLongStrBuf(c); state = NS_HTML5TOKENIZER_COMMENT; NS_HTML5_BREAK(commentstartloop); } } } commentstartloop_end: ; } case NS_HTML5TOKENIZER_COMMENT: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '-': { appendLongStrBuf(c); state = NS_HTML5TOKENIZER_COMMENT_END_DASH; NS_HTML5_BREAK(commentloop); } case '\r': { appendLongStrBufCarriageReturn(); NS_HTML5_BREAK(stateloop); } case '\n': { appendLongStrBufLineFeed(); continue; } case '\0': { c = 0xfffd; } default: { appendLongStrBuf(c); continue; } } } commentloop_end: ; } case NS_HTML5TOKENIZER_COMMENT_END_DASH: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '-': { appendLongStrBuf(c); state = NS_HTML5TOKENIZER_COMMENT_END; NS_HTML5_BREAK(commentenddashloop); } case '\r': { appendLongStrBufCarriageReturn(); state = NS_HTML5TOKENIZER_COMMENT; NS_HTML5_BREAK(stateloop); } case '\n': { appendLongStrBufLineFeed(); state = NS_HTML5TOKENIZER_COMMENT; NS_HTML5_CONTINUE(stateloop); } case '\0': { c = 0xfffd; } default: { appendLongStrBuf(c); state = NS_HTML5TOKENIZER_COMMENT; NS_HTML5_CONTINUE(stateloop); } } } commentenddashloop_end: ; } case NS_HTML5TOKENIZER_COMMENT_END: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '>': { emitComment(2, pos); state = NS_HTML5TOKENIZER_DATA; NS_HTML5_CONTINUE(stateloop); } case '-': { adjustDoubleHyphenAndAppendToLongStrBufAndErr(c); continue; } case '\r': { adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn(); state = NS_HTML5TOKENIZER_COMMENT; NS_HTML5_BREAK(stateloop); } case '\n': { adjustDoubleHyphenAndAppendToLongStrBufLineFeed(); state = NS_HTML5TOKENIZER_COMMENT; NS_HTML5_CONTINUE(stateloop); } case '!': { appendLongStrBuf(c); state = NS_HTML5TOKENIZER_COMMENT_END_BANG; NS_HTML5_CONTINUE(stateloop); } case '\0': { c = 0xfffd; } default: { adjustDoubleHyphenAndAppendToLongStrBufAndErr(c); state = NS_HTML5TOKENIZER_COMMENT; NS_HTML5_CONTINUE(stateloop); } } } } case NS_HTML5TOKENIZER_COMMENT_END_SPACE: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '>': { emitComment(0, pos); state = NS_HTML5TOKENIZER_DATA; NS_HTML5_CONTINUE(stateloop); } case '-': { appendLongStrBuf(c); state = NS_HTML5TOKENIZER_COMMENT_END_DASH; NS_HTML5_CONTINUE(stateloop); } case ' ': case '\t': case '\f': { appendLongStrBuf(c); continue; } case '\r': { appendLongStrBufCarriageReturn(); NS_HTML5_BREAK(stateloop); } case '\n': { appendLongStrBufLineFeed(); continue; } case '\0': { c = 0xfffd; } default: { appendLongStrBuf(c); state = NS_HTML5TOKENIZER_COMMENT; NS_HTML5_CONTINUE(stateloop); } } } } case NS_HTML5TOKENIZER_COMMENT_END_BANG: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '>': { emitComment(3, pos); state = NS_HTML5TOKENIZER_DATA; NS_HTML5_CONTINUE(stateloop); } case '-': { appendLongStrBuf(c); state = NS_HTML5TOKENIZER_COMMENT_END_DASH; NS_HTML5_CONTINUE(stateloop); } case '\r': { appendLongStrBufCarriageReturn(); NS_HTML5_BREAK(stateloop); } case '\n': { appendLongStrBufLineFeed(); continue; } case '\0': { c = 0xfffd; } default: { appendLongStrBuf(c); state = NS_HTML5TOKENIZER_COMMENT; NS_HTML5_CONTINUE(stateloop); } } } } case NS_HTML5TOKENIZER_COMMENT_START_DASH: { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '-': { appendLongStrBuf(c); state = NS_HTML5TOKENIZER_COMMENT_END; NS_HTML5_CONTINUE(stateloop); } case '>': { emitComment(1, pos); state = NS_HTML5TOKENIZER_DATA; NS_HTML5_CONTINUE(stateloop); } case '\r': { appendLongStrBufCarriageReturn(); state = NS_HTML5TOKENIZER_COMMENT; NS_HTML5_BREAK(stateloop); } case '\n': { appendLongStrBufLineFeed(); state = NS_HTML5TOKENIZER_COMMENT; NS_HTML5_CONTINUE(stateloop); } case '\0': { c = 0xfffd; } default: { appendLongStrBuf(c); state = NS_HTML5TOKENIZER_COMMENT; NS_HTML5_CONTINUE(stateloop); } } } case NS_HTML5TOKENIZER_CDATA_START: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); if (index < 6) { if (c == nsHtml5Tokenizer::CDATA_LSQB[index]) { appendLongStrBuf(c); } else { state = NS_HTML5TOKENIZER_BOGUS_COMMENT; reconsume = PR_TRUE; NS_HTML5_CONTINUE(stateloop); } index++; continue; } else { cstart = pos; state = NS_HTML5TOKENIZER_CDATA_SECTION; reconsume = PR_TRUE; break; } } } case NS_HTML5TOKENIZER_CDATA_SECTION: { for (; ; ) { if (reconsume) { reconsume = PR_FALSE; } else { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); } switch(c) { case ']': { flushChars(buf, pos); state = NS_HTML5TOKENIZER_CDATA_RSQB; NS_HTML5_BREAK(cdatasectionloop); } case '\0': { emitReplacementCharacter(buf, pos); continue; } case '\r': { emitCarriageReturn(buf, pos); NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } default: { continue; } } } cdatasectionloop_end: ; } case NS_HTML5TOKENIZER_CDATA_RSQB: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case ']': { state = NS_HTML5TOKENIZER_CDATA_RSQB_RSQB; NS_HTML5_BREAK(cdatarsqb); } default: { tokenHandler->characters(nsHtml5Tokenizer::RSQB_RSQB, 0, 1); cstart = pos; state = NS_HTML5TOKENIZER_CDATA_SECTION; reconsume = PR_TRUE; NS_HTML5_CONTINUE(stateloop); } } } cdatarsqb_end: ; } case NS_HTML5TOKENIZER_CDATA_RSQB_RSQB: { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '>': { cstart = pos + 1; state = NS_HTML5TOKENIZER_DATA; NS_HTML5_CONTINUE(stateloop); } default: { tokenHandler->characters(nsHtml5Tokenizer::RSQB_RSQB, 0, 2); cstart = pos; state = NS_HTML5TOKENIZER_CDATA_SECTION; reconsume = PR_TRUE; NS_HTML5_CONTINUE(stateloop); } } } case NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_SINGLE_QUOTED: { for (; ; ) { if (reconsume) { reconsume = PR_FALSE; } else { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); } switch(c) { case '\'': { addAttributeWithValue(); state = NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_VALUE_QUOTED; NS_HTML5_CONTINUE(stateloop); } case '&': { clearStrBufAndAppend(c); setAdditionalAndRememberAmpersandLocation('\''); returnState = state; state = NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE; NS_HTML5_BREAK(attributevaluesinglequotedloop); } case '\r': { appendLongStrBufCarriageReturn(); NS_HTML5_BREAK(stateloop); } case '\n': { appendLongStrBufLineFeed(); continue; } case '\0': { c = 0xfffd; } default: { appendLongStrBuf(c); continue; } } } attributevaluesinglequotedloop_end: ; } case NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE: { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); if (c == '\0') { NS_HTML5_BREAK(stateloop); } switch(c) { case ' ': case '\t': case '\n': case '\r': case '\f': case '<': case '&': { emitOrAppendStrBuf(returnState); if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { cstart = pos; } state = returnState; reconsume = PR_TRUE; NS_HTML5_CONTINUE(stateloop); } case '#': { appendStrBuf('#'); state = NS_HTML5TOKENIZER_CONSUME_NCR; NS_HTML5_CONTINUE(stateloop); } default: { if (c == additional) { emitOrAppendStrBuf(returnState); state = returnState; reconsume = PR_TRUE; NS_HTML5_CONTINUE(stateloop); } if (c >= 'a' && c <= 'z') { firstCharKey = c - 'a' + 26; } else if (c >= 'A' && c <= 'Z') { firstCharKey = c - 'A'; } else { emitOrAppendStrBuf(returnState); if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { cstart = pos; } state = returnState; reconsume = PR_TRUE; NS_HTML5_CONTINUE(stateloop); } appendStrBuf(c); state = NS_HTML5TOKENIZER_CHARACTER_REFERENCE_HILO_LOOKUP; } } } case NS_HTML5TOKENIZER_CHARACTER_REFERENCE_HILO_LOOKUP: { { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); if (c == '\0') { NS_HTML5_BREAK(stateloop); } PRInt32 hilo = 0; if (c <= 'z') { const PRInt32* row = nsHtml5NamedCharactersAccel::HILO_ACCEL[c]; if (row) { hilo = row[firstCharKey]; } } if (!hilo) { emitOrAppendStrBuf(returnState); if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { cstart = pos; } state = returnState; reconsume = PR_TRUE; NS_HTML5_CONTINUE(stateloop); } appendStrBuf(c); lo = hilo & 0xFFFF; hi = hilo >> 16; entCol = -1; candidate = -1; strBufMark = 0; state = NS_HTML5TOKENIZER_CHARACTER_REFERENCE_TAIL; } } case NS_HTML5TOKENIZER_CHARACTER_REFERENCE_TAIL: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); if (c == '\0') { NS_HTML5_BREAK(stateloop); } entCol++; for (; ; ) { if (hi < lo) { NS_HTML5_BREAK(outer); } if (entCol == nsHtml5NamedCharacters::NAMES[lo].length) { candidate = lo; strBufMark = strBufLen; lo++; } else if (entCol > nsHtml5NamedCharacters::NAMES[lo].length) { NS_HTML5_BREAK(outer); } else if (c > nsHtml5NamedCharacters::NAMES[lo][entCol]) { lo++; } else { NS_HTML5_BREAK(loloop); } } loloop_end: ; for (; ; ) { if (hi < lo) { NS_HTML5_BREAK(outer); } if (entCol == nsHtml5NamedCharacters::NAMES[hi].length) { NS_HTML5_BREAK(hiloop); } if (entCol > nsHtml5NamedCharacters::NAMES[hi].length) { NS_HTML5_BREAK(outer); } else if (c < nsHtml5NamedCharacters::NAMES[hi][entCol]) { hi--; } else { NS_HTML5_BREAK(hiloop); } } hiloop_end: ; if (hi < lo) { NS_HTML5_BREAK(outer); } appendStrBuf(c); continue; } outer_end: ; if (candidate == -1) { emitOrAppendStrBuf(returnState); if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { cstart = pos; } state = returnState; reconsume = PR_TRUE; NS_HTML5_CONTINUE(stateloop); } else { jArray candidateArr = nsHtml5NamedCharacters::NAMES[candidate]; if (!candidateArr.length || candidateArr[candidateArr.length - 1] != ';') { if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { PRUnichar ch; if (strBufMark == strBufLen) { ch = c; } else { ch = strBuf[strBufMark]; } if (ch == '=' || (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) { appendStrBufToLongStrBuf(); state = returnState; reconsume = PR_TRUE; NS_HTML5_CONTINUE(stateloop); } } } const PRUnichar* val = nsHtml5NamedCharacters::VALUES[candidate]; if ((val[0] & 0xFC00) == 0xD800) { emitOrAppendTwo(val, returnState); } else { emitOrAppendOne(val, returnState); } if (strBufMark < strBufLen) { if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { for (PRInt32 i = strBufMark; i < strBufLen; i++) { appendLongStrBuf(strBuf[i]); } } else { tokenHandler->characters(strBuf, strBufMark, strBufLen - strBufMark); } } if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { cstart = pos; } state = returnState; reconsume = PR_TRUE; NS_HTML5_CONTINUE(stateloop); } } case NS_HTML5TOKENIZER_CONSUME_NCR: { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); prevValue = -1; value = 0; seenDigits = PR_FALSE; switch(c) { case 'x': case 'X': { appendStrBuf(c); state = NS_HTML5TOKENIZER_HEX_NCR_LOOP; NS_HTML5_CONTINUE(stateloop); } default: { state = NS_HTML5TOKENIZER_DECIMAL_NRC_LOOP; reconsume = PR_TRUE; } } } case NS_HTML5TOKENIZER_DECIMAL_NRC_LOOP: { for (; ; ) { if (reconsume) { reconsume = PR_FALSE; } else { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); } if (value < prevValue) { value = 0x110000; } prevValue = value; if (c >= '0' && c <= '9') { seenDigits = PR_TRUE; value *= 10; value += c - '0'; continue; } else if (c == ';') { if (seenDigits) { if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { cstart = pos + 1; } state = NS_HTML5TOKENIZER_HANDLE_NCR_VALUE; NS_HTML5_BREAK(decimalloop); } else { appendStrBuf(';'); emitOrAppendStrBuf(returnState); if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { cstart = pos + 1; } state = returnState; NS_HTML5_CONTINUE(stateloop); } } else { if (!seenDigits) { emitOrAppendStrBuf(returnState); if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { cstart = pos; } state = returnState; reconsume = PR_TRUE; NS_HTML5_CONTINUE(stateloop); } else { if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { cstart = pos; } state = NS_HTML5TOKENIZER_HANDLE_NCR_VALUE; reconsume = PR_TRUE; NS_HTML5_BREAK(decimalloop); } } } decimalloop_end: ; } case NS_HTML5TOKENIZER_HANDLE_NCR_VALUE: { handleNcrValue(returnState); state = returnState; NS_HTML5_CONTINUE(stateloop); } case NS_HTML5TOKENIZER_HEX_NCR_LOOP: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); if (value < prevValue) { value = 0x110000; } prevValue = value; if (c >= '0' && c <= '9') { seenDigits = PR_TRUE; value *= 16; value += c - '0'; continue; } else if (c >= 'A' && c <= 'F') { seenDigits = PR_TRUE; value *= 16; value += c - 'A' + 10; continue; } else if (c >= 'a' && c <= 'f') { seenDigits = PR_TRUE; value *= 16; value += c - 'a' + 10; continue; } else if (c == ';') { if (seenDigits) { if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { cstart = pos + 1; } state = NS_HTML5TOKENIZER_HANDLE_NCR_VALUE; NS_HTML5_CONTINUE(stateloop); } else { appendStrBuf(';'); emitOrAppendStrBuf(returnState); if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { cstart = pos + 1; } state = returnState; NS_HTML5_CONTINUE(stateloop); } } else { if (!seenDigits) { emitOrAppendStrBuf(returnState); if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { cstart = pos; } state = returnState; reconsume = PR_TRUE; NS_HTML5_CONTINUE(stateloop); } else { if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { cstart = pos; } state = NS_HTML5TOKENIZER_HANDLE_NCR_VALUE; reconsume = PR_TRUE; NS_HTML5_CONTINUE(stateloop); } } } } case NS_HTML5TOKENIZER_PLAINTEXT: { for (; ; ) { if (reconsume) { reconsume = PR_FALSE; } else { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); } switch(c) { case '\0': { emitReplacementCharacter(buf, pos); continue; } case '\r': { emitCarriageReturn(buf, pos); NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } default: { continue; } } } } case NS_HTML5TOKENIZER_CLOSE_TAG_OPEN: { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '>': { cstart = pos + 1; state = NS_HTML5TOKENIZER_DATA; NS_HTML5_CONTINUE(stateloop); } case '\r': { silentCarriageReturn(); clearLongStrBufAndAppend('\n'); state = NS_HTML5TOKENIZER_BOGUS_COMMENT; NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); clearLongStrBufAndAppend('\n'); state = NS_HTML5TOKENIZER_BOGUS_COMMENT; NS_HTML5_CONTINUE(stateloop); } case '\0': { c = 0xfffd; } default: { if (c >= 'A' && c <= 'Z') { c += 0x20; } if (c >= 'a' && c <= 'z') { endTag = PR_TRUE; clearStrBufAndAppend(c); state = NS_HTML5TOKENIZER_TAG_NAME; NS_HTML5_CONTINUE(stateloop); } else { clearLongStrBufAndAppend(c); state = NS_HTML5TOKENIZER_BOGUS_COMMENT; NS_HTML5_CONTINUE(stateloop); } } } } case NS_HTML5TOKENIZER_RCDATA: { for (; ; ) { if (reconsume) { reconsume = PR_FALSE; } else { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); } switch(c) { case '&': { flushChars(buf, pos); clearStrBufAndAppend(c); additional = '\0'; returnState = state; state = NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE; NS_HTML5_CONTINUE(stateloop); } case '<': { flushChars(buf, pos); returnState = state; state = NS_HTML5TOKENIZER_RAWTEXT_RCDATA_LESS_THAN_SIGN; NS_HTML5_CONTINUE(stateloop); } case '\0': { emitReplacementCharacter(buf, pos); continue; } case '\r': { emitCarriageReturn(buf, pos); NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } default: { continue; } } } } case NS_HTML5TOKENIZER_RAWTEXT: { for (; ; ) { if (reconsume) { reconsume = PR_FALSE; } else { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); } switch(c) { case '<': { flushChars(buf, pos); returnState = state; state = NS_HTML5TOKENIZER_RAWTEXT_RCDATA_LESS_THAN_SIGN; NS_HTML5_BREAK(rawtextloop); } case '\0': { emitReplacementCharacter(buf, pos); continue; } case '\r': { emitCarriageReturn(buf, pos); NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } default: { continue; } } } rawtextloop_end: ; } case NS_HTML5TOKENIZER_RAWTEXT_RCDATA_LESS_THAN_SIGN: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '/': { index = 0; clearStrBuf(); state = NS_HTML5TOKENIZER_NON_DATA_END_TAG_NAME; NS_HTML5_BREAK(rawtextrcdatalessthansignloop); } default: { tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); cstart = pos; state = returnState; reconsume = PR_TRUE; NS_HTML5_CONTINUE(stateloop); } } } rawtextrcdatalessthansignloop_end: ; } case NS_HTML5TOKENIZER_NON_DATA_END_TAG_NAME: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); if (index < endTagExpectationAsArray.length) { PRUnichar e = endTagExpectationAsArray[index]; PRUnichar folded = c; if (c >= 'A' && c <= 'Z') { folded += 0x20; } if (folded != e) { tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2); emitStrBuf(); cstart = pos; state = returnState; reconsume = PR_TRUE; NS_HTML5_CONTINUE(stateloop); } appendStrBuf(c); index++; continue; } else { endTag = PR_TRUE; tagName = endTagExpectation; switch(c) { case '\r': { silentCarriageReturn(); state = NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME; NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } case ' ': case '\t': case '\f': { state = NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME; NS_HTML5_CONTINUE(stateloop); } case '/': { state = NS_HTML5TOKENIZER_SELF_CLOSING_START_TAG; NS_HTML5_CONTINUE(stateloop); } case '>': { state = emitCurrentTagToken(PR_FALSE, pos); if (shouldSuspend) { NS_HTML5_BREAK(stateloop); } NS_HTML5_CONTINUE(stateloop); } default: { tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2); emitStrBuf(); if (c == '\0') { emitReplacementCharacter(buf, pos); } else { cstart = pos; } state = returnState; NS_HTML5_CONTINUE(stateloop); } } } } } case NS_HTML5TOKENIZER_BOGUS_COMMENT: { for (; ; ) { if (reconsume) { reconsume = PR_FALSE; } else { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); } switch(c) { case '>': { emitComment(0, pos); state = NS_HTML5TOKENIZER_DATA; NS_HTML5_CONTINUE(stateloop); } case '-': { appendLongStrBuf(c); state = NS_HTML5TOKENIZER_BOGUS_COMMENT_HYPHEN; NS_HTML5_BREAK(boguscommentloop); } case '\r': { appendLongStrBufCarriageReturn(); NS_HTML5_BREAK(stateloop); } case '\n': { appendLongStrBufLineFeed(); continue; } case '\0': { c = 0xfffd; } default: { appendLongStrBuf(c); continue; } } } boguscommentloop_end: ; } case NS_HTML5TOKENIZER_BOGUS_COMMENT_HYPHEN: { boguscommenthyphenloop: for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '>': { emitComment(0, pos); state = NS_HTML5TOKENIZER_DATA; NS_HTML5_CONTINUE(stateloop); } case '-': { appendSecondHyphenToBogusComment(); NS_HTML5_CONTINUE(boguscommenthyphenloop); } case '\r': { appendLongStrBufCarriageReturn(); state = NS_HTML5TOKENIZER_BOGUS_COMMENT; NS_HTML5_BREAK(stateloop); } case '\n': { appendLongStrBufLineFeed(); state = NS_HTML5TOKENIZER_BOGUS_COMMENT; NS_HTML5_CONTINUE(stateloop); } case '\0': { c = 0xfffd; } default: { appendLongStrBuf(c); state = NS_HTML5TOKENIZER_BOGUS_COMMENT; NS_HTML5_CONTINUE(stateloop); } } } } case NS_HTML5TOKENIZER_SCRIPT_DATA: { for (; ; ) { if (reconsume) { reconsume = PR_FALSE; } else { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); } switch(c) { case '<': { flushChars(buf, pos); returnState = state; state = NS_HTML5TOKENIZER_SCRIPT_DATA_LESS_THAN_SIGN; NS_HTML5_BREAK(scriptdataloop); } case '\0': { emitReplacementCharacter(buf, pos); continue; } case '\r': { emitCarriageReturn(buf, pos); NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } default: { continue; } } } scriptdataloop_end: ; } case NS_HTML5TOKENIZER_SCRIPT_DATA_LESS_THAN_SIGN: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '/': { index = 0; clearStrBuf(); state = NS_HTML5TOKENIZER_NON_DATA_END_TAG_NAME; NS_HTML5_CONTINUE(stateloop); } case '!': { tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); cstart = pos; state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START; NS_HTML5_BREAK(scriptdatalessthansignloop); } default: { tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); cstart = pos; state = NS_HTML5TOKENIZER_SCRIPT_DATA; reconsume = PR_TRUE; NS_HTML5_CONTINUE(stateloop); } } } scriptdatalessthansignloop_end: ; } case NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '-': { state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START_DASH; NS_HTML5_BREAK(scriptdataescapestartloop); } default: { state = NS_HTML5TOKENIZER_SCRIPT_DATA; reconsume = PR_TRUE; NS_HTML5_CONTINUE(stateloop); } } } scriptdataescapestartloop_end: ; } case NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START_DASH: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '-': { state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH_DASH; NS_HTML5_BREAK(scriptdataescapestartdashloop); } default: { state = NS_HTML5TOKENIZER_SCRIPT_DATA; reconsume = PR_TRUE; NS_HTML5_CONTINUE(stateloop); } } } scriptdataescapestartdashloop_end: ; } case NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH_DASH: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '-': { continue; } case '<': { flushChars(buf, pos); state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN; NS_HTML5_CONTINUE(stateloop); } case '>': { state = NS_HTML5TOKENIZER_SCRIPT_DATA; NS_HTML5_CONTINUE(stateloop); } case '\0': { emitReplacementCharacter(buf, pos); state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED; NS_HTML5_BREAK(scriptdataescapeddashdashloop); } case '\r': { emitCarriageReturn(buf, pos); state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED; NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } default: { state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED; NS_HTML5_BREAK(scriptdataescapeddashdashloop); } } } scriptdataescapeddashdashloop_end: ; } case NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED: { for (; ; ) { if (reconsume) { reconsume = PR_FALSE; } else { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); } switch(c) { case '-': { state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH; NS_HTML5_BREAK(scriptdataescapedloop); } case '<': { flushChars(buf, pos); state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN; NS_HTML5_CONTINUE(stateloop); } case '\0': { emitReplacementCharacter(buf, pos); continue; } case '\r': { emitCarriageReturn(buf, pos); NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } default: { continue; } } } scriptdataescapedloop_end: ; } case NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '-': { state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH_DASH; NS_HTML5_CONTINUE(stateloop); } case '<': { flushChars(buf, pos); state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN; NS_HTML5_BREAK(scriptdataescapeddashloop); } case '\0': { emitReplacementCharacter(buf, pos); state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED; NS_HTML5_CONTINUE(stateloop); } case '\r': { emitCarriageReturn(buf, pos); state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED; NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } default: { state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED; NS_HTML5_CONTINUE(stateloop); } } } scriptdataescapeddashloop_end: ; } case NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '/': { index = 0; clearStrBuf(); returnState = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED; state = NS_HTML5TOKENIZER_NON_DATA_END_TAG_NAME; NS_HTML5_CONTINUE(stateloop); } case 'S': case 's': { tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); cstart = pos; index = 1; state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_START; NS_HTML5_BREAK(scriptdataescapedlessthanloop); } default: { tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); cstart = pos; reconsume = PR_TRUE; state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED; NS_HTML5_CONTINUE(stateloop); } } } scriptdataescapedlessthanloop_end: ; } case NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_START: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); if (index < 6) { PRUnichar folded = c; if (c >= 'A' && c <= 'Z') { folded += 0x20; } if (folded != nsHtml5Tokenizer::SCRIPT_ARR[index]) { reconsume = PR_TRUE; state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED; NS_HTML5_CONTINUE(stateloop); } index++; continue; } switch(c) { case '\r': { emitCarriageReturn(buf, pos); state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED; NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } case ' ': case '\t': case '\f': case '/': case '>': { state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED; NS_HTML5_BREAK(scriptdatadoubleescapestartloop); } default: { reconsume = PR_TRUE; state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED; NS_HTML5_CONTINUE(stateloop); } } } scriptdatadoubleescapestartloop_end: ; } case NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED: { for (; ; ) { if (reconsume) { reconsume = PR_FALSE; } else { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); } switch(c) { case '-': { state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH; NS_HTML5_BREAK(scriptdatadoubleescapedloop); } case '<': { state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN; NS_HTML5_CONTINUE(stateloop); } case '\0': { emitReplacementCharacter(buf, pos); continue; } case '\r': { emitCarriageReturn(buf, pos); NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } default: { continue; } } } scriptdatadoubleescapedloop_end: ; } case NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '-': { state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH; NS_HTML5_BREAK(scriptdatadoubleescapeddashloop); } case '<': { state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN; NS_HTML5_CONTINUE(stateloop); } case '\0': { emitReplacementCharacter(buf, pos); state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED; NS_HTML5_CONTINUE(stateloop); } case '\r': { emitCarriageReturn(buf, pos); state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED; NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } default: { state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED; NS_HTML5_CONTINUE(stateloop); } } } scriptdatadoubleescapeddashloop_end: ; } case NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '-': { continue; } case '<': { state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN; NS_HTML5_BREAK(scriptdatadoubleescapeddashdashloop); } case '>': { state = NS_HTML5TOKENIZER_SCRIPT_DATA; NS_HTML5_CONTINUE(stateloop); } case '\0': { emitReplacementCharacter(buf, pos); state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED; NS_HTML5_CONTINUE(stateloop); } case '\r': { emitCarriageReturn(buf, pos); state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED; NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } default: { state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED; NS_HTML5_CONTINUE(stateloop); } } } scriptdatadoubleescapeddashdashloop_end: ; } case NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '/': { index = 0; state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_END; NS_HTML5_BREAK(scriptdatadoubleescapedlessthanloop); } default: { reconsume = PR_TRUE; state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED; NS_HTML5_CONTINUE(stateloop); } } } scriptdatadoubleescapedlessthanloop_end: ; } case NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_END: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); if (index < 6) { PRUnichar folded = c; if (c >= 'A' && c <= 'Z') { folded += 0x20; } if (folded != nsHtml5Tokenizer::SCRIPT_ARR[index]) { reconsume = PR_TRUE; state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED; NS_HTML5_CONTINUE(stateloop); } index++; continue; } switch(c) { case '\r': { emitCarriageReturn(buf, pos); state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED; NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } case ' ': case '\t': case '\f': case '/': case '>': { state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED; NS_HTML5_CONTINUE(stateloop); } default: { reconsume = PR_TRUE; state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED; NS_HTML5_CONTINUE(stateloop); } } } } case NS_HTML5TOKENIZER_MARKUP_DECLARATION_OCTYPE: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); if (index < 6) { PRUnichar folded = c; if (c >= 'A' && c <= 'Z') { folded += 0x20; } if (folded == nsHtml5Tokenizer::OCTYPE[index]) { appendLongStrBuf(c); } else { state = NS_HTML5TOKENIZER_BOGUS_COMMENT; reconsume = PR_TRUE; NS_HTML5_CONTINUE(stateloop); } index++; continue; } else { state = NS_HTML5TOKENIZER_DOCTYPE; reconsume = PR_TRUE; NS_HTML5_BREAK(markupdeclarationdoctypeloop); } } markupdeclarationdoctypeloop_end: ; } case NS_HTML5TOKENIZER_DOCTYPE: { for (; ; ) { if (reconsume) { reconsume = PR_FALSE; } else { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); } initDoctypeFields(); switch(c) { case '\r': { silentCarriageReturn(); state = NS_HTML5TOKENIZER_BEFORE_DOCTYPE_NAME; NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } case ' ': case '\t': case '\f': { state = NS_HTML5TOKENIZER_BEFORE_DOCTYPE_NAME; NS_HTML5_BREAK(doctypeloop); } default: { state = NS_HTML5TOKENIZER_BEFORE_DOCTYPE_NAME; reconsume = PR_TRUE; NS_HTML5_BREAK(doctypeloop); } } } doctypeloop_end: ; } case NS_HTML5TOKENIZER_BEFORE_DOCTYPE_NAME: { for (; ; ) { if (reconsume) { reconsume = PR_FALSE; } else { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); } switch(c) { case '\r': { silentCarriageReturn(); NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } case ' ': case '\t': case '\f': { continue; } case '>': { forceQuirks = PR_TRUE; emitDoctypeToken(pos); state = NS_HTML5TOKENIZER_DATA; NS_HTML5_CONTINUE(stateloop); } case '\0': { c = 0xfffd; } default: { if (c >= 'A' && c <= 'Z') { c += 0x20; } clearStrBufAndAppend(c); state = NS_HTML5TOKENIZER_DOCTYPE_NAME; NS_HTML5_BREAK(beforedoctypenameloop); } } } beforedoctypenameloop_end: ; } case NS_HTML5TOKENIZER_DOCTYPE_NAME: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '\r': { silentCarriageReturn(); strBufToDoctypeName(); state = NS_HTML5TOKENIZER_AFTER_DOCTYPE_NAME; NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } case ' ': case '\t': case '\f': { strBufToDoctypeName(); state = NS_HTML5TOKENIZER_AFTER_DOCTYPE_NAME; NS_HTML5_BREAK(doctypenameloop); } case '>': { strBufToDoctypeName(); emitDoctypeToken(pos); state = NS_HTML5TOKENIZER_DATA; NS_HTML5_CONTINUE(stateloop); } case '\0': { c = 0xfffd; } default: { if (c >= 'A' && c <= 'Z') { c += 0x0020; } appendStrBuf(c); continue; } } } doctypenameloop_end: ; } case NS_HTML5TOKENIZER_AFTER_DOCTYPE_NAME: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '\r': { silentCarriageReturn(); NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } case ' ': case '\t': case '\f': { continue; } case '>': { emitDoctypeToken(pos); state = NS_HTML5TOKENIZER_DATA; NS_HTML5_CONTINUE(stateloop); } case 'p': case 'P': { index = 0; state = NS_HTML5TOKENIZER_DOCTYPE_UBLIC; NS_HTML5_BREAK(afterdoctypenameloop); } case 's': case 'S': { index = 0; state = NS_HTML5TOKENIZER_DOCTYPE_YSTEM; NS_HTML5_CONTINUE(stateloop); } default: { bogusDoctype(); state = NS_HTML5TOKENIZER_BOGUS_DOCTYPE; NS_HTML5_CONTINUE(stateloop); } } } afterdoctypenameloop_end: ; } case NS_HTML5TOKENIZER_DOCTYPE_UBLIC: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); if (index < 5) { PRUnichar folded = c; if (c >= 'A' && c <= 'Z') { folded += 0x20; } if (folded != nsHtml5Tokenizer::UBLIC[index]) { bogusDoctype(); state = NS_HTML5TOKENIZER_BOGUS_DOCTYPE; reconsume = PR_TRUE; NS_HTML5_CONTINUE(stateloop); } index++; continue; } else { state = NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_KEYWORD; reconsume = PR_TRUE; NS_HTML5_BREAK(doctypeublicloop); } } doctypeublicloop_end: ; } case NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_KEYWORD: { for (; ; ) { if (reconsume) { reconsume = PR_FALSE; } else { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); } switch(c) { case '\r': { silentCarriageReturn(); state = NS_HTML5TOKENIZER_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER; NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } case ' ': case '\t': case '\f': { state = NS_HTML5TOKENIZER_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER; NS_HTML5_BREAK(afterdoctypepublickeywordloop); } case '\"': { clearLongStrBuf(); state = NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED; NS_HTML5_CONTINUE(stateloop); } case '\'': { clearLongStrBuf(); state = NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED; NS_HTML5_CONTINUE(stateloop); } case '>': { forceQuirks = PR_TRUE; emitDoctypeToken(pos); state = NS_HTML5TOKENIZER_DATA; NS_HTML5_CONTINUE(stateloop); } default: { bogusDoctype(); state = NS_HTML5TOKENIZER_BOGUS_DOCTYPE; NS_HTML5_CONTINUE(stateloop); } } } afterdoctypepublickeywordloop_end: ; } case NS_HTML5TOKENIZER_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '\r': { silentCarriageReturn(); NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } case ' ': case '\t': case '\f': { continue; } case '\"': { clearLongStrBuf(); state = NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED; NS_HTML5_BREAK(beforedoctypepublicidentifierloop); } case '\'': { clearLongStrBuf(); state = NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED; NS_HTML5_CONTINUE(stateloop); } case '>': { forceQuirks = PR_TRUE; emitDoctypeToken(pos); state = NS_HTML5TOKENIZER_DATA; NS_HTML5_CONTINUE(stateloop); } default: { bogusDoctype(); state = NS_HTML5TOKENIZER_BOGUS_DOCTYPE; NS_HTML5_CONTINUE(stateloop); } } } beforedoctypepublicidentifierloop_end: ; } case NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '\"': { publicIdentifier = longStrBufToString(); state = NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_IDENTIFIER; NS_HTML5_BREAK(doctypepublicidentifierdoublequotedloop); } case '>': { forceQuirks = PR_TRUE; publicIdentifier = longStrBufToString(); emitDoctypeToken(pos); state = NS_HTML5TOKENIZER_DATA; NS_HTML5_CONTINUE(stateloop); } case '\r': { appendLongStrBufCarriageReturn(); NS_HTML5_BREAK(stateloop); } case '\n': { appendLongStrBufLineFeed(); continue; } case '\0': { c = 0xfffd; } default: { appendLongStrBuf(c); continue; } } } doctypepublicidentifierdoublequotedloop_end: ; } case NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_IDENTIFIER: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '\r': { silentCarriageReturn(); state = NS_HTML5TOKENIZER_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS; NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } case ' ': case '\t': case '\f': { state = NS_HTML5TOKENIZER_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS; NS_HTML5_BREAK(afterdoctypepublicidentifierloop); } case '>': { emitDoctypeToken(pos); state = NS_HTML5TOKENIZER_DATA; NS_HTML5_CONTINUE(stateloop); } case '\"': { clearLongStrBuf(); state = NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; NS_HTML5_CONTINUE(stateloop); } case '\'': { clearLongStrBuf(); state = NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED; NS_HTML5_CONTINUE(stateloop); } default: { bogusDoctype(); state = NS_HTML5TOKENIZER_BOGUS_DOCTYPE; NS_HTML5_CONTINUE(stateloop); } } } afterdoctypepublicidentifierloop_end: ; } case NS_HTML5TOKENIZER_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '\r': { silentCarriageReturn(); NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } case ' ': case '\t': case '\f': { continue; } case '>': { emitDoctypeToken(pos); state = NS_HTML5TOKENIZER_DATA; NS_HTML5_CONTINUE(stateloop); } case '\"': { clearLongStrBuf(); state = NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; NS_HTML5_BREAK(betweendoctypepublicandsystemidentifiersloop); } case '\'': { clearLongStrBuf(); state = NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED; NS_HTML5_CONTINUE(stateloop); } default: { bogusDoctype(); state = NS_HTML5TOKENIZER_BOGUS_DOCTYPE; NS_HTML5_CONTINUE(stateloop); } } } betweendoctypepublicandsystemidentifiersloop_end: ; } case NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '\"': { systemIdentifier = longStrBufToString(); state = NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER; NS_HTML5_CONTINUE(stateloop); } case '>': { forceQuirks = PR_TRUE; systemIdentifier = longStrBufToString(); emitDoctypeToken(pos); state = NS_HTML5TOKENIZER_DATA; NS_HTML5_CONTINUE(stateloop); } case '\r': { appendLongStrBufCarriageReturn(); NS_HTML5_BREAK(stateloop); } case '\n': { appendLongStrBufLineFeed(); continue; } case '\0': { c = 0xfffd; } default: { appendLongStrBuf(c); continue; } } } } case NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '\r': { silentCarriageReturn(); NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } case ' ': case '\t': case '\f': { continue; } case '>': { emitDoctypeToken(pos); state = NS_HTML5TOKENIZER_DATA; NS_HTML5_CONTINUE(stateloop); } default: { bogusDoctypeWithoutQuirks(); state = NS_HTML5TOKENIZER_BOGUS_DOCTYPE; NS_HTML5_BREAK(afterdoctypesystemidentifierloop); } } } afterdoctypesystemidentifierloop_end: ; } case NS_HTML5TOKENIZER_BOGUS_DOCTYPE: { for (; ; ) { if (reconsume) { reconsume = PR_FALSE; } else { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); } switch(c) { case '>': { emitDoctypeToken(pos); state = NS_HTML5TOKENIZER_DATA; NS_HTML5_CONTINUE(stateloop); } case '\r': { silentCarriageReturn(); NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } default: { continue; } } } } case NS_HTML5TOKENIZER_DOCTYPE_YSTEM: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); if (index < 5) { PRUnichar folded = c; if (c >= 'A' && c <= 'Z') { folded += 0x20; } if (folded != nsHtml5Tokenizer::YSTEM[index]) { bogusDoctype(); state = NS_HTML5TOKENIZER_BOGUS_DOCTYPE; reconsume = PR_TRUE; NS_HTML5_CONTINUE(stateloop); } index++; NS_HTML5_CONTINUE(stateloop); } else { state = NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_KEYWORD; reconsume = PR_TRUE; NS_HTML5_BREAK(doctypeystemloop); } } doctypeystemloop_end: ; } case NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_KEYWORD: { for (; ; ) { if (reconsume) { reconsume = PR_FALSE; } else { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); } switch(c) { case '\r': { silentCarriageReturn(); state = NS_HTML5TOKENIZER_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER; NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } case ' ': case '\t': case '\f': { state = NS_HTML5TOKENIZER_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER; NS_HTML5_BREAK(afterdoctypesystemkeywordloop); } case '\"': { clearLongStrBuf(); state = NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; NS_HTML5_CONTINUE(stateloop); } case '\'': { clearLongStrBuf(); state = NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED; NS_HTML5_CONTINUE(stateloop); } case '>': { forceQuirks = PR_TRUE; emitDoctypeToken(pos); state = NS_HTML5TOKENIZER_DATA; NS_HTML5_CONTINUE(stateloop); } default: { bogusDoctype(); state = NS_HTML5TOKENIZER_BOGUS_DOCTYPE; NS_HTML5_CONTINUE(stateloop); } } } afterdoctypesystemkeywordloop_end: ; } case NS_HTML5TOKENIZER_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '\r': { silentCarriageReturn(); NS_HTML5_BREAK(stateloop); } case '\n': { silentLineFeed(); } case ' ': case '\t': case '\f': { continue; } case '\"': { clearLongStrBuf(); state = NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; NS_HTML5_CONTINUE(stateloop); } case '\'': { clearLongStrBuf(); state = NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED; NS_HTML5_BREAK(beforedoctypesystemidentifierloop); } case '>': { forceQuirks = PR_TRUE; emitDoctypeToken(pos); state = NS_HTML5TOKENIZER_DATA; NS_HTML5_CONTINUE(stateloop); } default: { bogusDoctype(); state = NS_HTML5TOKENIZER_BOGUS_DOCTYPE; NS_HTML5_CONTINUE(stateloop); } } } beforedoctypesystemidentifierloop_end: ; } case NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '\'': { systemIdentifier = longStrBufToString(); state = NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER; NS_HTML5_CONTINUE(stateloop); } case '>': { forceQuirks = PR_TRUE; systemIdentifier = longStrBufToString(); emitDoctypeToken(pos); state = NS_HTML5TOKENIZER_DATA; NS_HTML5_CONTINUE(stateloop); } case '\r': { appendLongStrBufCarriageReturn(); NS_HTML5_BREAK(stateloop); } case '\n': { appendLongStrBufLineFeed(); continue; } case '\0': { c = 0xfffd; } default: { appendLongStrBuf(c); continue; } } } } case NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: { for (; ; ) { if (++pos == endPos) { NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); switch(c) { case '\'': { publicIdentifier = longStrBufToString(); state = NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_IDENTIFIER; NS_HTML5_CONTINUE(stateloop); } case '>': { forceQuirks = PR_TRUE; publicIdentifier = longStrBufToString(); emitDoctypeToken(pos); state = NS_HTML5TOKENIZER_DATA; NS_HTML5_CONTINUE(stateloop); } case '\r': { appendLongStrBufCarriageReturn(); NS_HTML5_BREAK(stateloop); } case '\n': { appendLongStrBufLineFeed(); continue; } case '\0': { c = 0xfffd; } default: { appendLongStrBuf(c); continue; } } } } } } stateloop_end: ; flushChars(buf, pos); stateSave = state; returnStateSave = returnState; return pos; } void nsHtml5Tokenizer::initDoctypeFields() { nsHtml5Portability::releaseLocal(doctypeName); doctypeName = nsHtml5Atoms::emptystring; if (systemIdentifier) { nsHtml5Portability::releaseString(systemIdentifier); systemIdentifier = nsnull; } if (publicIdentifier) { nsHtml5Portability::releaseString(publicIdentifier); publicIdentifier = nsnull; } forceQuirks = PR_FALSE; } void nsHtml5Tokenizer::emitCarriageReturn(PRUnichar* buf, PRInt32 pos) { silentCarriageReturn(); flushChars(buf, pos); tokenHandler->characters(nsHtml5Tokenizer::LF, 0, 1); cstart = PR_INT32_MAX; } void nsHtml5Tokenizer::emitReplacementCharacter(PRUnichar* buf, PRInt32 pos) { flushChars(buf, pos); tokenHandler->zeroOriginatingReplacementCharacter(); cstart = pos + 1; } void nsHtml5Tokenizer::setAdditionalAndRememberAmpersandLocation(PRUnichar add) { additional = add; } void nsHtml5Tokenizer::bogusDoctype() { forceQuirks = PR_TRUE; } void nsHtml5Tokenizer::bogusDoctypeWithoutQuirks() { forceQuirks = PR_FALSE; } void nsHtml5Tokenizer::emitOrAppendStrBuf(PRInt32 returnState) { if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { appendStrBufToLongStrBuf(); } else { emitStrBuf(); } } void nsHtml5Tokenizer::handleNcrValue(PRInt32 returnState) { if (value <= 0xFFFF) { if (value >= 0x80 && value <= 0x9f) { PRUnichar* val = nsHtml5NamedCharacters::WINDOWS_1252[value - 0x80]; emitOrAppendOne(val, returnState); } else if (value == 0x0) { emitOrAppendOne(nsHtml5Tokenizer::REPLACEMENT_CHARACTER, returnState); } else if ((value & 0xF800) == 0xD800) { emitOrAppendOne(nsHtml5Tokenizer::REPLACEMENT_CHARACTER, returnState); } else { PRUnichar ch = (PRUnichar) value; bmpChar[0] = ch; emitOrAppendOne(bmpChar, returnState); } } else if (value <= 0x10FFFF) { astralChar[0] = (PRUnichar) (NS_HTML5TOKENIZER_LEAD_OFFSET + (value >> 10)); astralChar[1] = (PRUnichar) (0xDC00 + (value & 0x3FF)); emitOrAppendTwo(astralChar, returnState); } else { emitOrAppendOne(nsHtml5Tokenizer::REPLACEMENT_CHARACTER, returnState); } } void nsHtml5Tokenizer::eof() { PRInt32 state = stateSave; PRInt32 returnState = returnStateSave; eofloop: for (; ; ) { switch(state) { case NS_HTML5TOKENIZER_SCRIPT_DATA_LESS_THAN_SIGN: case NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: { tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); NS_HTML5_BREAK(eofloop); } case NS_HTML5TOKENIZER_TAG_OPEN: { tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); NS_HTML5_BREAK(eofloop); } case NS_HTML5TOKENIZER_RAWTEXT_RCDATA_LESS_THAN_SIGN: { tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); NS_HTML5_BREAK(eofloop); } case NS_HTML5TOKENIZER_NON_DATA_END_TAG_NAME: { tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2); emitStrBuf(); NS_HTML5_BREAK(eofloop); } case NS_HTML5TOKENIZER_CLOSE_TAG_OPEN: { tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2); NS_HTML5_BREAK(eofloop); } case NS_HTML5TOKENIZER_TAG_NAME: { NS_HTML5_BREAK(eofloop); } case NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME: case NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_VALUE_QUOTED: case NS_HTML5TOKENIZER_SELF_CLOSING_START_TAG: { NS_HTML5_BREAK(eofloop); } case NS_HTML5TOKENIZER_ATTRIBUTE_NAME: { NS_HTML5_BREAK(eofloop); } case NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_NAME: case NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_VALUE: { NS_HTML5_BREAK(eofloop); } case NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_DOUBLE_QUOTED: case NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_SINGLE_QUOTED: case NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_UNQUOTED: { NS_HTML5_BREAK(eofloop); } case NS_HTML5TOKENIZER_BOGUS_COMMENT: { emitComment(0, 0); NS_HTML5_BREAK(eofloop); } case NS_HTML5TOKENIZER_BOGUS_COMMENT_HYPHEN: { emitComment(0, 0); NS_HTML5_BREAK(eofloop); } case NS_HTML5TOKENIZER_MARKUP_DECLARATION_OPEN: { clearLongStrBuf(); emitComment(0, 0); NS_HTML5_BREAK(eofloop); } case NS_HTML5TOKENIZER_MARKUP_DECLARATION_HYPHEN: { emitComment(0, 0); NS_HTML5_BREAK(eofloop); } case NS_HTML5TOKENIZER_MARKUP_DECLARATION_OCTYPE: { if (index < 6) { emitComment(0, 0); } else { nsHtml5Portability::releaseLocal(doctypeName); doctypeName = nsHtml5Atoms::emptystring; if (systemIdentifier) { nsHtml5Portability::releaseString(systemIdentifier); systemIdentifier = nsnull; } if (publicIdentifier) { nsHtml5Portability::releaseString(publicIdentifier); publicIdentifier = nsnull; } forceQuirks = PR_TRUE; emitDoctypeToken(0); NS_HTML5_BREAK(eofloop); } NS_HTML5_BREAK(eofloop); } case NS_HTML5TOKENIZER_COMMENT_START: case NS_HTML5TOKENIZER_COMMENT: case NS_HTML5TOKENIZER_COMMENT_END_SPACE: { emitComment(0, 0); NS_HTML5_BREAK(eofloop); } case NS_HTML5TOKENIZER_COMMENT_END: { emitComment(2, 0); NS_HTML5_BREAK(eofloop); } case NS_HTML5TOKENIZER_COMMENT_END_DASH: case NS_HTML5TOKENIZER_COMMENT_START_DASH: { emitComment(1, 0); NS_HTML5_BREAK(eofloop); } case NS_HTML5TOKENIZER_COMMENT_END_BANG: { emitComment(3, 0); NS_HTML5_BREAK(eofloop); } case NS_HTML5TOKENIZER_DOCTYPE: case NS_HTML5TOKENIZER_BEFORE_DOCTYPE_NAME: { forceQuirks = PR_TRUE; emitDoctypeToken(0); NS_HTML5_BREAK(eofloop); } case NS_HTML5TOKENIZER_DOCTYPE_NAME: { strBufToDoctypeName(); forceQuirks = PR_TRUE; emitDoctypeToken(0); NS_HTML5_BREAK(eofloop); } case NS_HTML5TOKENIZER_DOCTYPE_UBLIC: case NS_HTML5TOKENIZER_DOCTYPE_YSTEM: case NS_HTML5TOKENIZER_AFTER_DOCTYPE_NAME: case NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_KEYWORD: case NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_KEYWORD: case NS_HTML5TOKENIZER_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: { forceQuirks = PR_TRUE; emitDoctypeToken(0); NS_HTML5_BREAK(eofloop); } case NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: case NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: { forceQuirks = PR_TRUE; publicIdentifier = longStrBufToString(); emitDoctypeToken(0); NS_HTML5_BREAK(eofloop); } case NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_IDENTIFIER: case NS_HTML5TOKENIZER_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: case NS_HTML5TOKENIZER_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: { forceQuirks = PR_TRUE; emitDoctypeToken(0); NS_HTML5_BREAK(eofloop); } case NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: case NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: { forceQuirks = PR_TRUE; systemIdentifier = longStrBufToString(); emitDoctypeToken(0); NS_HTML5_BREAK(eofloop); } case NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER: { forceQuirks = PR_TRUE; emitDoctypeToken(0); NS_HTML5_BREAK(eofloop); } case NS_HTML5TOKENIZER_BOGUS_DOCTYPE: { emitDoctypeToken(0); NS_HTML5_BREAK(eofloop); } case NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE: { emitOrAppendStrBuf(returnState); state = returnState; continue; } case NS_HTML5TOKENIZER_CHARACTER_REFERENCE_HILO_LOOKUP: { emitOrAppendStrBuf(returnState); state = returnState; continue; } case NS_HTML5TOKENIZER_CHARACTER_REFERENCE_TAIL: { for (; ; ) { PRUnichar c = '\0'; entCol++; for (; ; ) { if (hi == -1) { NS_HTML5_BREAK(hiloop); } if (entCol == nsHtml5NamedCharacters::NAMES[hi].length) { NS_HTML5_BREAK(hiloop); } if (entCol > nsHtml5NamedCharacters::NAMES[hi].length) { NS_HTML5_BREAK(outer); } else if (c < nsHtml5NamedCharacters::NAMES[hi][entCol]) { hi--; } else { NS_HTML5_BREAK(hiloop); } } hiloop_end: ; for (; ; ) { if (hi < lo) { NS_HTML5_BREAK(outer); } if (entCol == nsHtml5NamedCharacters::NAMES[lo].length) { candidate = lo; strBufMark = strBufLen; lo++; } else if (entCol > nsHtml5NamedCharacters::NAMES[lo].length) { NS_HTML5_BREAK(outer); } else if (c > nsHtml5NamedCharacters::NAMES[lo][entCol]) { lo++; } else { NS_HTML5_BREAK(loloop); } } loloop_end: ; if (hi < lo) { NS_HTML5_BREAK(outer); } continue; } outer_end: ; if (candidate == -1) { emitOrAppendStrBuf(returnState); state = returnState; NS_HTML5_CONTINUE(eofloop); } else { jArray candidateArr = nsHtml5NamedCharacters::NAMES[candidate]; if (!candidateArr.length || candidateArr[candidateArr.length - 1] != ';') { if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { PRUnichar ch; if (strBufMark == strBufLen) { ch = '\0'; } else { ch = strBuf[strBufMark]; } if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) { appendStrBufToLongStrBuf(); state = returnState; NS_HTML5_CONTINUE(eofloop); } } } const PRUnichar* val = nsHtml5NamedCharacters::VALUES[candidate]; if ((val[0] & 0xFC00) == 0xD800) { emitOrAppendTwo(val, returnState); } else { emitOrAppendOne(val, returnState); } if (strBufMark < strBufLen) { if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { for (PRInt32 i = strBufMark; i < strBufLen; i++) { appendLongStrBuf(strBuf[i]); } } else { tokenHandler->characters(strBuf, strBufMark, strBufLen - strBufMark); } } state = returnState; NS_HTML5_CONTINUE(eofloop); } } case NS_HTML5TOKENIZER_CONSUME_NCR: case NS_HTML5TOKENIZER_DECIMAL_NRC_LOOP: case NS_HTML5TOKENIZER_HEX_NCR_LOOP: { if (!seenDigits) { emitOrAppendStrBuf(returnState); state = returnState; continue; } handleNcrValue(returnState); state = returnState; continue; } case NS_HTML5TOKENIZER_DATA: default: { NS_HTML5_BREAK(eofloop); } } } eofloop_end: ; tokenHandler->eof(); return; } void nsHtml5Tokenizer::emitDoctypeToken(PRInt32 pos) { cstart = pos + 1; tokenHandler->doctype(doctypeName, publicIdentifier, systemIdentifier, forceQuirks); nsHtml5Portability::releaseLocal(doctypeName); doctypeName = nsnull; nsHtml5Portability::releaseString(publicIdentifier); publicIdentifier = nsnull; nsHtml5Portability::releaseString(systemIdentifier); systemIdentifier = nsnull; } void nsHtml5Tokenizer::internalEncodingDeclaration(nsString* internalCharset) { if (encodingDeclarationHandler) { encodingDeclarationHandler->internalEncodingDeclaration(internalCharset); } } void nsHtml5Tokenizer::emitOrAppendTwo(const PRUnichar* val, PRInt32 returnState) { if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { appendLongStrBuf(val[0]); appendLongStrBuf(val[1]); } else { tokenHandler->characters(val, 0, 2); } } void nsHtml5Tokenizer::emitOrAppendOne(const PRUnichar* val, PRInt32 returnState) { if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { appendLongStrBuf(val[0]); } else { tokenHandler->characters(val, 0, 1); } } void nsHtml5Tokenizer::end() { strBuf.release(); strBuf = nsnull; longStrBuf.release(); longStrBuf = nsnull; nsHtml5Portability::releaseLocal(doctypeName); doctypeName = nsnull; if (systemIdentifier) { nsHtml5Portability::releaseString(systemIdentifier); systemIdentifier = nsnull; } if (publicIdentifier) { nsHtml5Portability::releaseString(publicIdentifier); publicIdentifier = nsnull; } if (tagName) { tagName->release(); tagName = nsnull; } if (attributeName) { attributeName->release(); attributeName = nsnull; } tokenHandler->endTokenization(); if (attributes) { attributes->clear(0); delete attributes; attributes = nsnull; } } void nsHtml5Tokenizer::requestSuspension() { shouldSuspend = PR_TRUE; } PRBool nsHtml5Tokenizer::isInDataState() { return (stateSave == NS_HTML5TOKENIZER_DATA); } void nsHtml5Tokenizer::resetToDataState() { strBufLen = 0; longStrBufLen = 0; stateSave = NS_HTML5TOKENIZER_DATA; lastCR = PR_FALSE; index = 0; forceQuirks = PR_FALSE; additional = '\0'; entCol = -1; firstCharKey = -1; lo = 0; hi = (nsHtml5NamedCharacters::NAMES.length - 1); candidate = -1; strBufMark = 0; prevValue = -1; value = 0; seenDigits = PR_FALSE; endTag = PR_FALSE; shouldSuspend = PR_FALSE; initDoctypeFields(); if (tagName) { tagName->release(); tagName = nsnull; } if (attributeName) { attributeName->release(); attributeName = nsnull; } if (attributes) { delete attributes; attributes = nsnull; } } void nsHtml5Tokenizer::loadState(nsHtml5Tokenizer* other) { strBufLen = other->strBufLen; if (strBufLen > strBuf.length) { strBuf.release(); strBuf = jArray(strBufLen); } nsHtml5ArrayCopy::arraycopy(other->strBuf, strBuf, strBufLen); longStrBufLen = other->longStrBufLen; if (longStrBufLen > longStrBuf.length) { longStrBuf.release(); longStrBuf = jArray(longStrBufLen); } nsHtml5ArrayCopy::arraycopy(other->longStrBuf, longStrBuf, longStrBufLen); stateSave = other->stateSave; returnStateSave = other->returnStateSave; endTagExpectation = other->endTagExpectation; endTagExpectationAsArray = other->endTagExpectationAsArray; lastCR = other->lastCR; index = other->index; forceQuirks = other->forceQuirks; additional = other->additional; entCol = other->entCol; firstCharKey = other->firstCharKey; lo = other->lo; hi = other->hi; candidate = other->candidate; strBufMark = other->strBufMark; prevValue = other->prevValue; value = other->value; seenDigits = other->seenDigits; endTag = other->endTag; shouldSuspend = PR_FALSE; nsHtml5Portability::releaseLocal(doctypeName); if (!other->doctypeName) { doctypeName = nsnull; } else { doctypeName = nsHtml5Portability::newLocalFromLocal(other->doctypeName, interner); } nsHtml5Portability::releaseString(systemIdentifier); if (!other->systemIdentifier) { systemIdentifier = nsnull; } else { systemIdentifier = nsHtml5Portability::newStringFromString(other->systemIdentifier); } nsHtml5Portability::releaseString(publicIdentifier); if (!other->publicIdentifier) { publicIdentifier = nsnull; } else { publicIdentifier = nsHtml5Portability::newStringFromString(other->publicIdentifier); } if (tagName) { tagName->release(); } if (!other->tagName) { tagName = nsnull; } else { tagName = other->tagName->cloneElementName(interner); } if (attributeName) { attributeName->release(); } if (!other->attributeName) { attributeName = nsnull; } else { attributeName = other->attributeName->cloneAttributeName(interner); } if (attributes) { delete attributes; } if (!other->attributes) { attributes = nsnull; } else { attributes = other->attributes->cloneAttributes(interner); } } void nsHtml5Tokenizer::initializeWithoutStarting() { confident = PR_FALSE; strBuf = jArray(64); longStrBuf = jArray(1024); line = 1; resetToDataState(); } void nsHtml5Tokenizer::setEncodingDeclarationHandler(nsHtml5StreamParser* encodingDeclarationHandler) { this->encodingDeclarationHandler = encodingDeclarationHandler; } void nsHtml5Tokenizer::initializeStatics() { TITLE_ARR = jArray((PRUnichar*)TITLE_ARR_DATA, 5); SCRIPT_ARR = jArray((PRUnichar*)SCRIPT_ARR_DATA, 6); STYLE_ARR = jArray((PRUnichar*)STYLE_ARR_DATA, 5); PLAINTEXT_ARR = jArray((PRUnichar*)PLAINTEXT_ARR_DATA, 9); XMP_ARR = jArray((PRUnichar*)XMP_ARR_DATA, 3); TEXTAREA_ARR = jArray((PRUnichar*)TEXTAREA_ARR_DATA, 8); IFRAME_ARR = jArray((PRUnichar*)IFRAME_ARR_DATA, 6); NOEMBED_ARR = jArray((PRUnichar*)NOEMBED_ARR_DATA, 7); NOSCRIPT_ARR = jArray((PRUnichar*)NOSCRIPT_ARR_DATA, 8); NOFRAMES_ARR = jArray((PRUnichar*)NOFRAMES_ARR_DATA, 8); } void nsHtml5Tokenizer::releaseStatics() { }