Bug 482921 part 4 - Support XML syntax highlighting in the parser core. r=Olli.Pettay.

This commit is contained in:
Henri Sivonen 2011-11-01 13:33:11 +02:00
parent 0429f9c117
commit 216045a5cb
10 changed files with 204 additions and 10 deletions

View File

@ -216,6 +216,10 @@ public class Tokenizer implements Locator {
public static final int SCRIPT_DATA_DOUBLE_ESCAPE_END = 72;
public static final int PROCESSING_INSTRUCTION = 73;
public static final int PROCESSING_INSTRUCTION_QUESTION_MARK = 74;
/**
* Magic value for UTF-16 operations.
*/
@ -505,6 +509,8 @@ public class Tokenizer implements Locator {
private Interner interner;
// CPPONLY: private boolean viewingXmlSource;
// [NOCPP[
protected LocatorImpl ampersandLocation;
@ -531,7 +537,9 @@ public class Tokenizer implements Locator {
* @param tokenHandler
* the handler for receiving tokens
*/
public Tokenizer(TokenHandler tokenHandler) {
public Tokenizer(TokenHandler tokenHandler
// CPPONLY: , boolean viewingXmlSource
) {
this.tokenHandler = tokenHandler;
this.encodingDeclarationHandler = null;
// [NOCPP[
@ -545,6 +553,7 @@ public class Tokenizer implements Locator {
this.publicIdentifier = null;
this.systemIdentifier = null;
this.attributes = null;
// CPPONLY: this.viewingXmlSource = viewingXmlSource;
}
public void setInterner(Interner interner) {
@ -557,6 +566,10 @@ public class Tokenizer implements Locator {
}
// CPPONLY: boolean isViewingXmlSource() {
// CPPONLY: return viewingXmlSource;
// CPPONLY: }
// [NOCPP[
/**
@ -1118,10 +1131,16 @@ public class Tokenizer implements Locator {
* switched to the PCDATA state.
*/
maybeErrAttributesOnEndTag(attrs);
// CPPONLY: if (!viewingXmlSource) {
tokenHandler.endTag(tagName);
// CPPONLY: }
Portability.delete(attributes);
} else {
// CPPONLY: if (viewingXmlSource) {
// CPPONLY: Portability.delete(attributes);
// CPPONLY: } else {
tokenHandler.startTag(tagName, attrs, selfClosing);
// CPPONLY: }
}
tagName.release();
tagName = null;
@ -1534,6 +1553,13 @@ public class Tokenizer implements Locator {
state = transition(state, Tokenizer.CLOSE_TAG_OPEN, reconsume, pos);
continue stateloop;
case '?':
// CPPONLY: if (viewingXmlSource) {
// CPPONLY: state = transition(state,
// CPPONLY: Tokenizer.PROCESSING_INSTRUCTION,
// CPPONLY: reconsume,
// CPPONLY: pos);
// CPPONLY: continue stateloop;
// CPPONLY: }
/*
* U+003F QUESTION MARK (?) Parse error.
*/
@ -5716,6 +5742,41 @@ public class Tokenizer implements Locator {
continue;
}
}
// XXX reorder point
case PROCESSING_INSTRUCTION:
processinginstructionloop: for (;;) {
if (++pos == endPos) {
break stateloop;
}
c = checkChar(buf, pos);
switch (c) {
case '?':
state = transition(
state,
Tokenizer.PROCESSING_INSTRUCTION_QUESTION_MARK,
reconsume, pos);
break processinginstructionloop;
// continue stateloop;
default:
continue;
}
}
case PROCESSING_INSTRUCTION_QUESTION_MARK:
if (++pos == endPos) {
break stateloop;
}
c = checkChar(buf, pos);
switch (c) {
case '>':
state = transition(state, Tokenizer.DATA,
reconsume, pos);
continue stateloop;
default:
state = transition(state,
Tokenizer.PROCESSING_INSTRUCTION,
reconsume, pos);
continue stateloop;
}
// END HOTSPOT WORKAROUND
}
}

View File

@ -575,6 +575,18 @@ public abstract class TreeBuilder<T> implements TokenHandler,
contextNode = null;
} else {
mode = INITIAL;
// If we are viewing XML source, put a foreign element permanently
// on the stack so that cdataSectionAllowed() returns true.
// CPPONLY: if (tokenizer.isViewingXmlSource()) {
// CPPONLY: T elt = createElement("http://www.w3.org/2000/svg",
// CPPONLY: "svg",
// CPPONLY: tokenizer.emptyAttributes());
// CPPONLY: StackNode<T> node = new StackNode<T>(ElementName.SVG,
// CPPONLY: "svg",
// CPPONLY: elt);
// CPPONLY: currentPtr++;
// CPPONLY: stack[currentPtr] = node;
// CPPONLY: }
}
}
@ -856,6 +868,9 @@ public abstract class TreeBuilder<T> implements TokenHandler,
*/
public final void characters(@Const @NoLength char[] buf, int start, int length)
throws SAXException {
// CPPONLY: if (tokenizer.isViewingXmlSource()) {
// CPPONLY: return;
// CPPONLY: }
if (needToDropLF) {
needToDropLF = false;
if (buf[start] == '\n') {

View File

@ -31,6 +31,7 @@ HTML5_ATOM(noframes, "noframes")
HTML5_ATOM(noscript, "noscript")
HTML5_ATOM(plaintext, "plaintext")
HTML5_ATOM(script, "script")
HTML5_ATOM(svg, "svg")
HTML5_ATOM(table, "table")
HTML5_ATOM(caption, "caption")
HTML5_ATOM(p, "p")
@ -771,7 +772,6 @@ HTML5_ATOM(pre, "pre")
HTML5_ATOM(rem, "rem")
HTML5_ATOM(sub, "sub")
HTML5_ATOM(sec, "sec")
HTML5_ATOM(svg, "svg")
HTML5_ATOM(sum, "sum")
HTML5_ATOM(sin, "sin")
HTML5_ATOM(sep, "sep")

View File

@ -70,6 +70,9 @@ PRUnichar nsHtml5Highlighter::sAttributeValue[] =
PRUnichar nsHtml5Highlighter::sDoctype[] =
{ 'd', 'o', 'c', 't', 'y', 'p', 'e', 0 };
PRUnichar nsHtml5Highlighter::sPi[] =
{ 'p', 'i', 0 };
nsHtml5Highlighter::nsHtml5Highlighter(nsAHtml5TreeOpSink* aOpSink)
: mState(NS_HTML5TOKENIZER_DATA)
, mCStart(PR_INT32_MAX)
@ -188,6 +191,9 @@ nsHtml5Highlighter::Transition(PRInt32 aState, bool aReconsume, PRInt32 aPos)
case NS_HTML5TOKENIZER_DATA:
FinishTag(); // DATA
break;
case NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION:
AddClass(sPi);
break;
}
break;
case NS_HTML5TOKENIZER_TAG_NAME:
@ -456,6 +462,11 @@ nsHtml5Highlighter::Transition(PRInt32 aState, bool aReconsume, PRInt32 aPos)
FinishTag();
}
break;
case NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION_QUESTION_MARK:
if (aState == NS_HTML5TOKENIZER_DATA) {
FinishTag();
}
break;
default:
break;
}

View File

@ -433,6 +433,11 @@ class nsHtml5Highlighter
* The string "entity"
*/
static PRUnichar sEntity[];
/**
* The string "pi"
*/
static PRUnichar sPi[];
};
#endif // nsHtml5Highlighter_h_

View File

@ -90,7 +90,7 @@ nsHtml5Parser::nsHtml5Parser()
, mLastBuffer(mFirstBuffer)
, mExecutor(new nsHtml5TreeOpExecutor())
, mTreeBuilder(new nsHtml5TreeBuilder(mExecutor, nsnull))
, mTokenizer(new nsHtml5Tokenizer(mTreeBuilder))
, mTokenizer(new nsHtml5Tokenizer(mTreeBuilder, false))
, mRootContextLineNumber(1)
{
mAtomTable.Init(); // we aren't checking for OOM anyway...
@ -494,7 +494,7 @@ nsHtml5Parser::Parse(const nsAString& aSourceBuffer,
mDocWriteSpeculativeTreeBuilder->setScriptingEnabled(
mTreeBuilder->isScriptingEnabled());
mDocWriteSpeculativeTokenizer =
new nsHtml5Tokenizer(mDocWriteSpeculativeTreeBuilder);
new nsHtml5Tokenizer(mDocWriteSpeculativeTreeBuilder, false);
mDocWriteSpeculativeTokenizer->setInterner(&mAtomTable);
mDocWriteSpeculativeTokenizer->start();
}

View File

@ -185,7 +185,7 @@ nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor,
nsnull : mExecutor->GetStage(),
aMode == NORMAL ?
mExecutor->GetStage() : nsnull))
, mTokenizer(new nsHtml5Tokenizer(mTreeBuilder))
, mTokenizer(new nsHtml5Tokenizer(mTreeBuilder, aMode == VIEW_SOURCE_XML))
, mTokenizerMutex("nsHtml5StreamParser mTokenizerMutex")
, mOwner(aOwner)
, mSpeculationMutex("nsHtml5StreamParser mSpeculationMutex")

View File

@ -88,7 +88,7 @@ staticJArray<PRUnichar,PRInt32> nsHtml5Tokenizer::NOSCRIPT_ARR = { NOSCRIPT_ARR_
static PRUnichar const NOFRAMES_ARR_DATA[] = { 'n', 'o', 'f', 'r', 'a', 'm', 'e', 's' };
staticJArray<PRUnichar,PRInt32> nsHtml5Tokenizer::NOFRAMES_ARR = { NOFRAMES_ARR_DATA, NS_ARRAY_LENGTH(NOFRAMES_ARR_DATA) };
nsHtml5Tokenizer::nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler)
nsHtml5Tokenizer::nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler, bool viewingXmlSource)
: tokenHandler(tokenHandler),
encodingDeclarationHandler(nsnull),
bmpChar(jArray<PRUnichar,PRInt32>::newJArray(1)),
@ -98,7 +98,8 @@ nsHtml5Tokenizer::nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler)
doctypeName(nsnull),
publicIdentifier(nsnull),
systemIdentifier(nsnull),
attributes(nsnull)
attributes(nsnull),
viewingXmlSource(viewingXmlSource)
{
MOZ_COUNT_CTOR(nsHtml5Tokenizer);
}
@ -116,6 +117,12 @@ nsHtml5Tokenizer::initLocation(nsString* newPublicId, nsString* newSystemId)
this->publicId = newPublicId;
}
bool
nsHtml5Tokenizer::isViewingXmlSource()
{
return viewingXmlSource;
}
void
nsHtml5Tokenizer::setStateAndEndTagExpectation(PRInt32 specialTokenizerState, nsIAtom* endTagExpectation)
{
@ -297,10 +304,16 @@ nsHtml5Tokenizer::emitCurrentTagToken(bool selfClosing, PRInt32 pos)
nsHtml5HtmlAttributes* attrs = (!attributes ? nsHtml5HtmlAttributes::EMPTY_ATTRIBUTES : attributes);
if (endTag) {
maybeErrAttributesOnEndTag(attrs);
tokenHandler->endTag(tagName);
if (!viewingXmlSource) {
tokenHandler->endTag(tagName);
}
delete attributes;
} else {
tokenHandler->startTag(tagName, attrs, selfClosing);
if (viewingXmlSource) {
delete attributes;
} else {
tokenHandler->startTag(tagName, attrs, selfClosing);
}
}
tagName->release();
tagName = nsnull;
@ -477,6 +490,10 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRInt32 pos, PRUnichar*
NS_HTML5_CONTINUE(stateloop);
}
case '\?': {
if (viewingXmlSource) {
state = NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION;
NS_HTML5_CONTINUE(stateloop);
}
clearLongStrBufAndAppend(c);
state = NS_HTML5TOKENIZER_BOGUS_COMMENT;
@ -3270,6 +3287,40 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRInt32 pos, PRUnichar*
}
}
}
case NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION: {
for (; ; ) {
if (++pos == endPos) {
NS_HTML5_BREAK(stateloop);
}
c = checkChar(buf, pos);
switch(c) {
case '\?': {
state = NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION_QUESTION_MARK;
NS_HTML5_BREAK(processinginstructionloop);
}
default: {
continue;
}
}
}
processinginstructionloop_end: ;
}
case NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION_QUESTION_MARK: {
if (++pos == endPos) {
NS_HTML5_BREAK(stateloop);
}
c = checkChar(buf, pos);
switch(c) {
case '>': {
state = NS_HTML5TOKENIZER_DATA;
NS_HTML5_CONTINUE(stateloop);
}
default: {
state = NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION;
NS_HTML5_CONTINUE(stateloop);
}
}
}
}
}
stateloop_end: ;
@ -3353,6 +3404,10 @@ nsHtml5Tokenizer::stateLoopReportTransitions(PRInt32 state, PRUnichar c, PRInt32
NS_HTML5_CONTINUE(stateloop);
}
case '\?': {
if (viewingXmlSource) {
state = mViewSource->Transition(NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION, reconsume, pos);
NS_HTML5_CONTINUE(stateloop);
}
errProcessingInstruction();
clearLongStrBufAndAppend(c);
state = mViewSource->Transition(NS_HTML5TOKENIZER_BOGUS_COMMENT, reconsume, pos);
@ -6156,6 +6211,40 @@ nsHtml5Tokenizer::stateLoopReportTransitions(PRInt32 state, PRUnichar c, PRInt32
}
}
}
case NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION: {
for (; ; ) {
if (++pos == endPos) {
NS_HTML5_BREAK(stateloop);
}
c = checkChar(buf, pos);
switch(c) {
case '\?': {
state = mViewSource->Transition(NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION_QUESTION_MARK, reconsume, pos);
NS_HTML5_BREAK(processinginstructionloop);
}
default: {
continue;
}
}
}
processinginstructionloop_end: ;
}
case NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION_QUESTION_MARK: {
if (++pos == endPos) {
NS_HTML5_BREAK(stateloop);
}
c = checkChar(buf, pos);
switch(c) {
case '>': {
state = mViewSource->Transition(NS_HTML5TOKENIZER_DATA, reconsume, pos);
NS_HTML5_CONTINUE(stateloop);
}
default: {
state = mViewSource->Transition(NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION, reconsume, pos);
NS_HTML5_CONTINUE(stateloop);
}
}
}
}
}
stateloop_end: ;

View File

@ -137,10 +137,12 @@ class nsHtml5Tokenizer
private:
PRInt32 line;
nsHtml5AtomTable* interner;
bool viewingXmlSource;
public:
nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler);
nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler, bool viewingXmlSource);
void setInterner(nsHtml5AtomTable* interner);
void initLocation(nsString* newPublicId, nsString* newSystemId);
bool isViewingXmlSource();
void setStateAndEndTagExpectation(PRInt32 specialTokenizerState, nsIAtom* endTagExpectation);
void setStateAndEndTagExpectation(PRInt32 specialTokenizerState, nsHtml5ElementName* endTagExpectation);
private:
@ -367,6 +369,8 @@ class nsHtml5Tokenizer
#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH 70
#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH 71
#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_END 72
#define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION 73
#define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION_QUESTION_MARK 74
#define NS_HTML5TOKENIZER_LEAD_OFFSET (0xD800 - (0x10000 >> 10))
#define NS_HTML5TOKENIZER_BUFFER_GROW_BY 1024

View File

@ -111,6 +111,12 @@ nsHtml5TreeBuilder::startTokenization(nsHtml5Tokenizer* self)
contextNode = nsnull;
} else {
mode = NS_HTML5TREE_BUILDER_INITIAL;
if (tokenizer->isViewingXmlSource()) {
nsIContent** elt = createElement(kNameSpaceID_SVG, nsHtml5Atoms::svg, tokenizer->emptyAttributes());
nsHtml5StackNode* node = new nsHtml5StackNode(nsHtml5ElementName::ELT_SVG, nsHtml5Atoms::svg, elt);
currentPtr++;
stack[currentPtr] = node;
}
}
}
@ -176,6 +182,9 @@ nsHtml5TreeBuilder::comment(PRUnichar* buf, PRInt32 start, PRInt32 length)
void
nsHtml5TreeBuilder::characters(const PRUnichar* buf, PRInt32 start, PRInt32 length)
{
if (tokenizer->isViewingXmlSource()) {
return;
}
if (needToDropLF) {
needToDropLF = false;
if (buf[start] == '\n') {