Bug 704058 - Sync HTML parser .java sources in m-c with the htmlparser repo. rs=smaug. DONTBUILD

This commit is contained in:
Henri Sivonen 2011-11-21 16:12:15 +02:00
parent 35eab5e3f7
commit d6f3956f8a
4 changed files with 69 additions and 8 deletions

View File

@ -370,6 +370,8 @@ public final class ElementName
// return "ANNOTATION_XML";
// case TreeBuilder.FOREIGNOBJECT_OR_DESC:
// return "FOREIGNOBJECT_OR_DESC";
// case TreeBuilder.MENUITEM:
// return "MENUITEM";
// }
// return null;
// }
@ -713,6 +715,7 @@ public final class ElementName
public static final ElementName LOWLIMIT = new ElementName("lowlimit", "lowlimit", TreeBuilder.OTHER);
public static final ElementName METADATA = new ElementName("metadata", "metadata", TreeBuilder.OTHER);
public static final ElementName MENCLOSE = new ElementName("menclose", "menclose", TreeBuilder.OTHER);
public static final ElementName MENUITEM = new ElementName("menuitem", "menuitem", TreeBuilder.MENUITEM | SPECIAL);
public static final ElementName MPHANTOM = new ElementName("mphantom", "mphantom", TreeBuilder.OTHER);
public static final ElementName NOFRAMES = new ElementName("noframes", "noframes", TreeBuilder.NOFRAMES | SPECIAL);
public static final ElementName NOSCRIPT = new ElementName("noscript", "noscript", TreeBuilder.NOSCRIPT | SPECIAL);
@ -1105,6 +1108,7 @@ public final class ElementName
LOWLIMIT,
METADATA,
MENCLOSE,
MENUITEM,
MPHANTOM,
NOFRAMES,
NOSCRIPT,
@ -1498,6 +1502,7 @@ public final class ElementName
281683369,
282120228,
282250732,
282498697,
282508942,
283743649,
283787570,

View File

@ -1204,8 +1204,10 @@ public class Tokenizer implements Locator {
attributes.addAttribute(attributeName, "", xmlnsPolicy);
}
} else {
err("Attribute value omitted for a non-boolean attribute. (HTML4-only error.)");
attributes.addAttribute(attributeName, "", xmlnsPolicy);
if (AttributeName.BORDER != attributeName) {
err("Attribute value omitted for a non-boolean attribute. (HTML4-only error.)");
attributes.addAttribute(attributeName, "", xmlnsPolicy);
}
}
} else {
if (AttributeName.SRC == attributeName
@ -6758,9 +6760,7 @@ public class Tokenizer implements Locator {
attributeName = other.attributeName.cloneAttributeName(interner);
}
if (attributes != null) {
Portability.delete(attributes);
}
Portability.delete(attributes);
if (other.attributes == null) {
attributes = null;
} else {

View File

@ -55,6 +55,7 @@ import nu.validator.htmlparser.common.TokenHandler;
import nu.validator.htmlparser.common.XmlViolationPolicy;
import org.xml.sax.ErrorHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
@ -198,6 +199,8 @@ public abstract class TreeBuilder<T> implements TokenHandler,
final static int KEYGEN = 65;
final static int MENUITEM = 66;
// start insertion modes
private static final int INITIAL = 0;
@ -340,7 +343,7 @@ public abstract class TreeBuilder<T> implements TokenHandler,
// [NOCPP[
private static final @Local String HTML_LOCAL = "html";
// ]NOCPP]
private int mode = INITIAL;
@ -362,6 +365,8 @@ public abstract class TreeBuilder<T> implements TokenHandler,
private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML;
private LocatorImpl firstCommentLocation;
// ]NOCPP]
private boolean scriptingEnabled = false;
@ -519,6 +524,21 @@ public abstract class TreeBuilder<T> implements TokenHandler,
errorHandler.warning(spe);
}
/**
* Reports a warning with an explicit locator
*
* @param message
* the message
* @throws SAXException
*/
final void warn(String message, Locator locator) throws SAXException {
if (errorHandler == null) {
return;
}
SAXParseException spe = new SAXParseException(message, locator);
errorHandler.warning(spe);
}
// ]NOCPP]
@SuppressWarnings("unchecked") public final void startTokenization(Tokenizer self) throws SAXException {
@ -536,6 +556,7 @@ public abstract class TreeBuilder<T> implements TokenHandler,
html4 = false;
idLocations.clear();
wantingComments = wantsComments();
firstCommentLocation = null;
// ]NOCPP]
start(fragment);
charBufferLen = 0;
@ -619,6 +640,11 @@ public abstract class TreeBuilder<T> implements TokenHandler,
false);
} else if (isAlmostStandards(publicIdentifier,
systemIdentifier)) {
// [NOCPP[
if (firstCommentLocation != null) {
warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", firstCommentLocation);
}
// ]NOCPP]
errAlmostStandardsDoctype();
documentModeInternal(
DocumentMode.ALMOST_STANDARDS_MODE,
@ -626,6 +652,9 @@ public abstract class TreeBuilder<T> implements TokenHandler,
false);
} else {
// [NOCPP[
if (firstCommentLocation != null) {
warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", firstCommentLocation);
}
if ((Portability.literalEqualsString(
"-//W3C//DTD HTML 4.0//EN",
publicIdentifier) && (systemIdentifier == null || Portability.literalEqualsString(
@ -672,12 +701,18 @@ public abstract class TreeBuilder<T> implements TokenHandler,
true);
} else if (isAlmostStandards(publicIdentifier,
systemIdentifier)) {
if (firstCommentLocation != null) {
warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", firstCommentLocation);
}
err("Almost standards mode doctype. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
documentModeInternal(
DocumentMode.ALMOST_STANDARDS_MODE,
publicIdentifier, systemIdentifier,
true);
} else {
if (firstCommentLocation != null) {
warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", firstCommentLocation);
}
if ("-//W3C//DTD HTML 4.01//EN".equals(publicIdentifier)) {
if (!"http://www.w3.org/TR/html4/strict.dtd".equals(systemIdentifier)) {
warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
@ -702,6 +737,9 @@ public abstract class TreeBuilder<T> implements TokenHandler,
true);
} else if (isAlmostStandards(publicIdentifier,
systemIdentifier)) {
if (firstCommentLocation != null) {
warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", firstCommentLocation);
}
if ("-//W3C//DTD HTML 4.01 Transitional//EN".equals(publicIdentifier)
&& systemIdentifier != null) {
if (!"http://www.w3.org/TR/html4/loose.dtd".equals(systemIdentifier)) {
@ -715,6 +753,9 @@ public abstract class TreeBuilder<T> implements TokenHandler,
publicIdentifier, systemIdentifier,
true);
} else {
if (firstCommentLocation != null) {
warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", firstCommentLocation);
}
err("The doctype was not the HTML 4.01 Transitional doctype. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
documentModeInternal(
DocumentMode.STANDARDS_MODE,
@ -735,6 +776,9 @@ public abstract class TreeBuilder<T> implements TokenHandler,
html4);
} else if (isAlmostStandards(publicIdentifier,
systemIdentifier)) {
if (firstCommentLocation != null) {
warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", firstCommentLocation);
}
if ("-//W3C//DTD HTML 4.01 Transitional//EN".equals(publicIdentifier)) {
if (!"http://www.w3.org/TR/html4/loose.dtd".equals(systemIdentifier)) {
warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
@ -747,6 +791,9 @@ public abstract class TreeBuilder<T> implements TokenHandler,
publicIdentifier, systemIdentifier,
html4);
} else {
if (firstCommentLocation != null) {
warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", firstCommentLocation);
}
if ("-//W3C//DTD HTML 4.01//EN".equals(publicIdentifier)) {
if (!"http://www.w3.org/TR/html4/strict.dtd".equals(systemIdentifier)) {
warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
@ -822,6 +869,9 @@ public abstract class TreeBuilder<T> implements TokenHandler,
throws SAXException {
needToDropLF = false;
// [NOCPP[
if (firstCommentLocation == null) {
firstCommentLocation = new LocatorImpl(tokenizer);
}
if (!wantingComments) {
return;
}
@ -2057,6 +2107,7 @@ public abstract class TreeBuilder<T> implements TokenHandler,
case AREA_OR_WBR:
reconstructTheActiveFormattingElements();
// FALL THROUGH to PARAM_OR_SOURCE_OR_TRACK
// CPPONLY: case MENUITEM:
case PARAM_OR_SOURCE_OR_TRACK:
appendVoidElementToCurrentMayFoster(
elementName,
@ -3517,6 +3568,7 @@ public abstract class TreeBuilder<T> implements TokenHandler,
HtmlAttributes.EMPTY_ATTRIBUTES);
break endtagloop;
case AREA_OR_WBR:
// CPPONLY: case MENUITEM:
case PARAM_OR_SOURCE_OR_TRACK:
case EMBED_OR_IMG:
case IMAGE:
@ -4683,10 +4735,10 @@ public abstract class TreeBuilder<T> implements TokenHandler,
case ALTER_INFOSET:
// fall through
case ALLOW:
warn("Attribute \u201Cxmlns:xlink\u201D with the value \u201Chttp://www.w3org/1999/xlink\u201D is not serializable as XML 1.0 without changing document semantics.");
warn("Attribute \u201Cxmlns:xlink\u201D with a value other than \u201Chttp://www.w3.org/1999/xlink\u201D is not serializable as XML 1.0 without changing document semantics.");
break;
case FATAL:
fatal("Attribute \u201Cxmlns:xlink\u201D with the value \u201Chttp://www.w3org/1999/xlink\u201D is not serializable as XML 1.0 without changing document semantics.");
fatal("Attribute \u201Cxmlns:xlink\u201D with a value other than \u201Chttp://www.w3.org/1999/xlink\u201D is not serializable as XML 1.0 without changing document semantics.");
break;
}
}

View File

@ -51,6 +51,8 @@ public final class UTF16Buffer {
*/
private int end;
//[NOCPP[
/**
* Constructor for wrapping an existing UTF-16 code unit array.
*
@ -67,6 +69,8 @@ public final class UTF16Buffer {
this.end = end;
}
// ]NOCPP]
/**
* Returns the start index.
*