Bug 650776 part 1 - Introduce a new HTML source to HTML source sanitizer XPCOM API. r=smaug.

This commit is contained in:
Henri Sivonen 2012-03-19 10:16:20 +02:00
parent 900fd6cc7f
commit 82c137d953
7 changed files with 453 additions and 48 deletions

View File

@ -43,6 +43,10 @@
#include "nsIPrincipal.h"
#include "mozilla/dom/Element.h"
/**
* See the documentation of nsIParserUtils::sanitize for documentation
* about the default behavior and the configuration options of this sanitizer.
*/
class NS_STACK_CLASS nsTreeSanitizer {
public:
@ -50,10 +54,9 @@ class NS_STACK_CLASS nsTreeSanitizer {
/**
* The constructor.
*
* @param aAllowStyles Whether to allow <style> and style=""
* @param aAllowComments Whether to allow comment nodes
* @param aFlags Flags from nsIParserUtils
*/
nsTreeSanitizer(bool aAllowStyles, bool aAllowComments);
nsTreeSanitizer(PRUint32 aFlags = 0);
static void InitializeStatics();
static void ReleaseStatics();
@ -67,6 +70,14 @@ class NS_STACK_CLASS nsTreeSanitizer {
*/
void Sanitize(nsIContent* aFragment);
/**
* Sanitizes a disconnected (not in a docshell) document freshly obtained
* from a parser. The document must not be embedded in a docshell and must
* not have had a chance to get mutation event listeners attached to it.
* The root element must be <html>.
*/
void Sanitize(nsIDocument* aDocument);
private:
/**
@ -79,6 +90,33 @@ class NS_STACK_CLASS nsTreeSanitizer {
*/
bool mAllowComments;
/**
* Whether HTML <font>, <center>, bgcolor="", etc., are dropped.
*/
bool mDropNonCSSPresentation;
/**
* Whether to remove forms and form controls (excluding fieldset/legend).
*/
bool mDropForms;
/**
* Whether only cid: embeds are allowed.
*/
bool mCidEmbedsOnly;
/**
* Whether to drop <img>, <video>, <audio> and <svg>.
*/
bool mDropMedia;
/**
* Whether we are sanitizing a full document (as opposed to a fragment).
*/
bool mFullDocument;
void SanitizeChildren(nsINode* aRoot);
/**
* Queries if an element must be replaced with its children.
* @param aNamespace the namespace of the element the question is about
@ -178,10 +216,15 @@ class NS_STACK_CLASS nsTreeSanitizer {
static nsTHashtable<nsISupportsHashKey>* sElementsHTML;
/**
* The whitelist of HTML attributes.
* The whitelist of non-presentational HTML attributes.
*/
static nsTHashtable<nsISupportsHashKey>* sAttributesHTML;
/**
* The whitelist of presentational HTML attributes.
*/
static nsTHashtable<nsISupportsHashKey>* sPresAttributesHTML;
/**
* The whitelist of SVG elements.
*/

View File

@ -55,6 +55,7 @@
#include "nsComponentManagerUtils.h"
#include "nsNullPrincipal.h"
#include "nsContentUtils.h"
#include "nsIParserUtils.h"
using namespace mozilla;
@ -77,6 +78,7 @@ nsIAtom** const kElementsHTML[] = {
&nsGkAtoms::bdo,
&nsGkAtoms::big,
&nsGkAtoms::blockquote,
// body checked specially
&nsGkAtoms::br,
&nsGkAtoms::button,
&nsGkAtoms::canvas,
@ -109,9 +111,11 @@ nsIAtom** const kElementsHTML[] = {
&nsGkAtoms::h4,
&nsGkAtoms::h5,
&nsGkAtoms::h6,
// head checked specially
&nsGkAtoms::header,
&nsGkAtoms::hgroup,
&nsGkAtoms::hr,
// html checked specially
&nsGkAtoms::i,
&nsGkAtoms::img,
&nsGkAtoms::input,
@ -155,6 +159,7 @@ nsIAtom** const kElementsHTML[] = {
&nsGkAtoms::sub,
&nsGkAtoms::summary,
&nsGkAtoms::sup,
// style checked specially
&nsGkAtoms::table,
&nsGkAtoms::tbody,
&nsGkAtoms::td,
@ -163,6 +168,7 @@ nsIAtom** const kElementsHTML[] = {
&nsGkAtoms::th,
&nsGkAtoms::thead,
&nsGkAtoms::time,
// title checked specially
&nsGkAtoms::tr,
#ifdef MOZ_MEDIA
&nsGkAtoms::track,
@ -184,7 +190,6 @@ nsIAtom** const kAttributesHTML[] = {
&nsGkAtoms::acceptcharset,
&nsGkAtoms::accesskey,
&nsGkAtoms::action,
&nsGkAtoms::align,
&nsGkAtoms::alt,
&nsGkAtoms::autocomplete,
&nsGkAtoms::autofocus,
@ -192,27 +197,20 @@ nsIAtom** const kAttributesHTML[] = {
&nsGkAtoms::autoplay,
#endif
&nsGkAtoms::axis,
&nsGkAtoms::background,
&nsGkAtoms::bgcolor,
&nsGkAtoms::border,
&nsGkAtoms::cellpadding,
&nsGkAtoms::cellspacing,
&nsGkAtoms::_char,
&nsGkAtoms::charoff,
&nsGkAtoms::charset,
&nsGkAtoms::checked,
&nsGkAtoms::cite,
&nsGkAtoms::_class,
&nsGkAtoms::clear,
&nsGkAtoms::cols,
&nsGkAtoms::colspan,
&nsGkAtoms::color,
&nsGkAtoms::content,
&nsGkAtoms::contenteditable,
&nsGkAtoms::contextmenu,
#ifdef MOZ_MEDIA
&nsGkAtoms::controls,
#endif
&nsGkAtoms::compact,
&nsGkAtoms::coords,
&nsGkAtoms::datetime,
&nsGkAtoms::dir,
@ -228,7 +226,6 @@ nsIAtom** const kAttributesHTML[] = {
&nsGkAtoms::high,
&nsGkAtoms::href,
&nsGkAtoms::hreflang,
&nsGkAtoms::hspace,
&nsGkAtoms::icon,
&nsGkAtoms::id,
&nsGkAtoms::ismap,
@ -258,7 +255,6 @@ nsIAtom** const kAttributesHTML[] = {
#endif
&nsGkAtoms::name,
&nsGkAtoms::nohref,
&nsGkAtoms::noshade,
&nsGkAtoms::novalidate,
&nsGkAtoms::nowrap,
&nsGkAtoms::open,
@ -268,7 +264,6 @@ nsIAtom** const kAttributesHTML[] = {
#ifdef MOZ_MEDIA
&nsGkAtoms::playbackrate,
#endif
&nsGkAtoms::pointSize,
#ifdef MOZ_MEDIA
&nsGkAtoms::poster,
&nsGkAtoms::preload,
@ -289,7 +284,6 @@ nsIAtom** const kAttributesHTML[] = {
&nsGkAtoms::scope,
&nsGkAtoms::selected,
&nsGkAtoms::shape,
&nsGkAtoms::size,
&nsGkAtoms::span,
&nsGkAtoms::spellcheck,
&nsGkAtoms::src,
@ -301,14 +295,31 @@ nsIAtom** const kAttributesHTML[] = {
&nsGkAtoms::title,
&nsGkAtoms::type,
&nsGkAtoms::usemap,
&nsGkAtoms::valign,
&nsGkAtoms::value,
&nsGkAtoms::vspace,
&nsGkAtoms::width,
&nsGkAtoms::wrap,
nsnull
};
nsIAtom** const kPresAttributesHTML[] = {
&nsGkAtoms::align,
&nsGkAtoms::background,
&nsGkAtoms::bgcolor,
&nsGkAtoms::border,
&nsGkAtoms::cellpadding,
&nsGkAtoms::cellspacing,
&nsGkAtoms::color,
&nsGkAtoms::compact,
&nsGkAtoms::clear,
&nsGkAtoms::hspace,
&nsGkAtoms::noshade,
&nsGkAtoms::pointSize,
&nsGkAtoms::size,
&nsGkAtoms::valign,
&nsGkAtoms::vspace,
nsnull
};
nsIAtom** const kURLAttributesHTML[] = {
&nsGkAtoms::action,
&nsGkAtoms::href,
@ -979,22 +990,36 @@ nsIAtom** const kAttributesMathML[] = {
nsIAtom** const kURLAttributesMathML[] = {
&nsGkAtoms::href,
&nsGkAtoms::src,
&nsGkAtoms::cdgroup_,
&nsGkAtoms::altimg_,
&nsGkAtoms::definitionURL_,
nsnull
};
nsTHashtable<nsISupportsHashKey>* nsTreeSanitizer::sElementsHTML = nsnull;
nsTHashtable<nsISupportsHashKey>* nsTreeSanitizer::sAttributesHTML = nsnull;
nsTHashtable<nsISupportsHashKey>* nsTreeSanitizer::sPresAttributesHTML = nsnull;
nsTHashtable<nsISupportsHashKey>* nsTreeSanitizer::sElementsSVG = nsnull;
nsTHashtable<nsISupportsHashKey>* nsTreeSanitizer::sAttributesSVG = nsnull;
nsTHashtable<nsISupportsHashKey>* nsTreeSanitizer::sElementsMathML = nsnull;
nsTHashtable<nsISupportsHashKey>* nsTreeSanitizer::sAttributesMathML = nsnull;
nsIPrincipal* nsTreeSanitizer::sNullPrincipal = nsnull;
nsTreeSanitizer::nsTreeSanitizer(bool aAllowStyles, bool aAllowComments)
: mAllowStyles(aAllowStyles)
, mAllowComments(aAllowComments)
nsTreeSanitizer::nsTreeSanitizer(PRUint32 aFlags)
: mAllowStyles(aFlags & nsIParserUtils::SanitizerAllowStyle)
, mAllowComments(aFlags & nsIParserUtils::SanitizerAllowComments)
, mDropNonCSSPresentation(aFlags &
nsIParserUtils::SanitizerDropNonCSSPresentation)
, mDropForms(aFlags & nsIParserUtils::SanitizerDropForms)
, mCidEmbedsOnly(aFlags &
nsIParserUtils::SanitizerCidEmbedsOnly)
, mDropMedia(aFlags & nsIParserUtils::SanitizerDropMedia)
, mFullDocument(false)
{
if (mCidEmbedsOnly) {
// Sanitizing styles for external references is not supported.
mAllowStyles = false;
}
if (!sElementsHTML) {
// Initialize lazily to avoid having to initialize at all if the user
// doesn't paste HTML or load feeds.
@ -1006,9 +1031,31 @@ bool
nsTreeSanitizer::MustFlatten(PRInt32 aNamespace, nsIAtom* aLocal)
{
if (aNamespace == kNameSpaceID_XHTML) {
if (mDropNonCSSPresentation && (nsGkAtoms::font == aLocal ||
nsGkAtoms::center == aLocal)) {
return true;
}
if (mDropForms && (nsGkAtoms::form == aLocal ||
nsGkAtoms::input == aLocal ||
nsGkAtoms::keygen == aLocal ||
nsGkAtoms::option == aLocal ||
nsGkAtoms::optgroup == aLocal)) {
return true;
}
if (mFullDocument && (nsGkAtoms::title == aLocal ||
nsGkAtoms::html == aLocal ||
nsGkAtoms::head == aLocal ||
nsGkAtoms::body == aLocal)) {
return false;
}
return !sElementsHTML->GetEntry(aLocal);
}
if (aNamespace == kNameSpaceID_SVG) {
if (mCidEmbedsOnly || mDropMedia) {
// Sanitizing CSS-based URL references inside SVG presentational
// attributes is not supported, so flattening for cid: embed case.
return true;
}
return !sElementsSVG->GetEntry(aLocal);
}
if (aNamespace == kNameSpaceID_MathML) {
@ -1042,11 +1089,30 @@ nsTreeSanitizer::MustPrune(PRInt32 aNamespace,
return true;
}
if (aNamespace == kNameSpaceID_XHTML) {
if (nsGkAtoms::title == aLocal) {
if (nsGkAtoms::title == aLocal && !mFullDocument) {
// emulate the quirks of the old parser
return true;
}
if ((nsGkAtoms::meta == aLocal || nsGkAtoms::link == aLocal) &&
if (mDropForms && (nsGkAtoms::select == aLocal ||
nsGkAtoms::button == aLocal ||
nsGkAtoms::datalist == aLocal)) {
return true;
}
if (mDropMedia && (nsGkAtoms::img == aLocal ||
nsGkAtoms::video == aLocal ||
nsGkAtoms::audio == aLocal ||
nsGkAtoms::source == aLocal)) {
return true;
}
if (nsGkAtoms::meta == aLocal &&
(aElement->HasAttr(kNameSpaceID_None, nsGkAtoms::charset) ||
aElement->HasAttr(kNameSpaceID_None, nsGkAtoms::httpEquiv))) {
// Throw away charset declarations even if they also have microdata
// which they can't validly have.
return true;
}
if (((!mFullDocument && nsGkAtoms::meta == aLocal) ||
nsGkAtoms::link == aLocal) &&
!(aElement->HasAttr(kNameSpaceID_None, nsGkAtoms::itemprop) ||
aElement->HasAttr(kNameSpaceID_None, nsGkAtoms::itemscope))) {
// emulate old behavior for non-Microdata <meta> and <link> presumably
@ -1207,17 +1273,27 @@ nsTreeSanitizer::SanitizeAttributes(mozilla::dom::Element* aElement,
// the loop again.
--ac;
i = ac; // i will be decremented immediately thanks to the for loop
continue;
}
// else fall through to see if there's another reason to drop this
// attribute (in particular if the attribute is background="" on an
// HTML element)
}
if (!mDropNonCSSPresentation &&
(aAllowed == sAttributesHTML) && // element is HTML
sPresAttributesHTML->GetEntry(attrLocal)) {
continue;
}
if (aAllowed->GetEntry(attrLocal) &&
!(attrLocal == nsGkAtoms::rel &&
aElement->IsHTML(nsGkAtoms::link)) &&
!(attrLocal == nsGkAtoms::name &&
aElement->IsHTML(nsGkAtoms::meta))) {
!((attrLocal == nsGkAtoms::rel &&
aElement->IsHTML(nsGkAtoms::link)) ||
(!mFullDocument &&
attrLocal == nsGkAtoms::name &&
aElement->IsHTML(nsGkAtoms::meta)))) {
// name="" and rel="" are whitelisted, but treat them as blacklisted
// for <meta name> and <link rel> to avoid document-wide metadata
// or styling overrides with non-conforming <meta name itemprop> or
// for <meta name> (fragment case) and <link rel> (all cases) to avoid
// document-wide metadata or styling overrides with non-conforming
// <meta name itemprop> or
// <link rel itemprop>
continue;
}
@ -1302,6 +1378,23 @@ nsTreeSanitizer::SanitizeURL(mozilla::dom::Element* aElement,
if (NS_SUCCEEDED(rv)) {
rv = secMan->CheckLoadURIWithPrincipal(sNullPrincipal, attrURI, flags);
}
if (mCidEmbedsOnly &&
NS_SUCCEEDED(rv) &&
kNameSpaceID_None == aNamespace) {
if (nsGkAtoms::src == aLocalName || nsGkAtoms::background == aLocalName) {
bool isCid;
attrURI->SchemeIs("cid", &isCid);
if (!isCid) {
rv = NS_ERROR_FAILURE;
}
} else if (nsGkAtoms::cdgroup_ == aLocalName ||
nsGkAtoms::altimg_ == aLocalName ||
nsGkAtoms::definitionURL_ == aLocalName) {
// Gecko doesn't fetch these now and shouldn't in the future, but
// in case someone goofs with these in the future, let's drop them.
rv = NS_ERROR_FAILURE;
}
}
if (NS_FAILED(rv)) {
aElement->UnsetAttr(aNamespace, aLocalName, false);
return true;
@ -1310,7 +1403,8 @@ nsTreeSanitizer::SanitizeURL(mozilla::dom::Element* aElement,
}
void
nsTreeSanitizer::Sanitize(nsIContent* aFragment) {
nsTreeSanitizer::Sanitize(nsIContent* aFragment)
{
// If you want to relax these preconditions, be sure to check the code in
// here that notifies / does not notify or that fires mutation events if
// in tree.
@ -1318,7 +1412,31 @@ nsTreeSanitizer::Sanitize(nsIContent* aFragment) {
"Argument was not DOM fragment.");
NS_PRECONDITION(!aFragment->IsInDoc(), "The fragment is in doc?");
nsIContent* node = aFragment->GetFirstChild();
mFullDocument = false;
SanitizeChildren(aFragment);
}
void
nsTreeSanitizer::Sanitize(nsIDocument* aDocument)
{
// If you want to relax these preconditions, be sure to check the code in
// here that notifies / does not notify or that fires mutation events if
// in tree.
#ifdef DEBUG
nsCOMPtr<nsISupports> container = aDocument->GetContainer();
NS_PRECONDITION(!container, "The document is in a shell.");
nsRefPtr<mozilla::dom::Element> root = aDocument->GetRootElement();
NS_PRECONDITION(root->IsHTML(nsGkAtoms::html), "Not HTML root.");
#endif
mFullDocument = true;
SanitizeChildren(aDocument);
}
void
nsTreeSanitizer::SanitizeChildren(nsINode* aRoot)
{
nsIContent* node = aRoot->GetFirstChild();
while (node) {
if (node->IsElement()) {
mozilla::dom::Element* elt = node->AsElement();
@ -1327,7 +1445,7 @@ nsTreeSanitizer::Sanitize(nsIContent* aFragment) {
PRInt32 ns = nodeInfo->NamespaceID();
if (MustPrune(ns, localName, elt)) {
nsIContent* next = node->GetNextNonChildNode(aFragment);
nsIContent* next = node->GetNextNonChildNode(aRoot);
node->GetParent()->RemoveChild(node);
node = next;
continue;
@ -1344,7 +1462,7 @@ nsTreeSanitizer::Sanitize(nsIContent* aFragment) {
nsCOMPtr<nsIURI> baseURI = node->GetBaseURI();
if (SanitizeStyleSheet(styleText,
sanitizedStyle,
aFragment->OwnerDoc(),
aRoot->OwnerDoc(),
baseURI)) {
nsContentUtils::SetNodeTextContent(node, sanitizedStyle, true);
} else {
@ -1366,11 +1484,11 @@ nsTreeSanitizer::Sanitize(nsIContent* aFragment) {
mAllowStyles,
false);
}
node = node->GetNextNonChildNode(aFragment);
node = node->GetNextNonChildNode(aRoot);
continue;
}
if (MustFlatten(ns, localName)) {
nsIContent* next = node->GetNextNode(aFragment);
nsIContent* next = node->GetNextNode(aRoot);
nsIContent* parent = node->GetParent();
nsCOMPtr<nsIContent> child; // Must keep the child alive during move
nsresult rv;
@ -1393,7 +1511,8 @@ nsTreeSanitizer::Sanitize(nsIContent* aFragment) {
sAttributesHTML,
(nsIAtom***)kURLAttributesHTML,
false, mAllowStyles,
(nsGkAtoms::img == localName));
(nsGkAtoms::img == localName) &&
!mCidEmbedsOnly);
} else if (ns == kNameSpaceID_SVG) {
SanitizeAttributes(elt,
sAttributesSVG,
@ -1409,13 +1528,13 @@ nsTreeSanitizer::Sanitize(nsIContent* aFragment) {
false,
false);
}
node = node->GetNextNode(aFragment);
node = node->GetNextNode(aRoot);
continue;
}
NS_ASSERTION(!node->GetFirstChild(), "How come non-element node had kids?");
nsIContent* next = node->GetNextNonChildNode(aFragment);
nsIContent* next = node->GetNextNonChildNode(aRoot);
if (!mAllowComments && node->IsNodeOfType(nsINode::eCOMMENT)) {
node->GetParent()->RemoveChild(node);
node->GetNodeParent()->RemoveChild(node);
}
node = next;
}
@ -1438,6 +1557,12 @@ nsTreeSanitizer::InitializeStatics()
sAttributesHTML->PutEntry(*kAttributesHTML[i]);
}
sPresAttributesHTML = new nsTHashtable<nsISupportsHashKey> ();
sPresAttributesHTML->Init(ArrayLength(kPresAttributesHTML));
for (PRUint32 i = 0; kPresAttributesHTML[i]; i++) {
sPresAttributesHTML->PutEntry(*kPresAttributesHTML[i]);
}
sElementsSVG = new nsTHashtable<nsISupportsHashKey> ();
sElementsSVG->Init(ArrayLength(kElementsSVG));
for (PRUint32 i = 0; kElementsSVG[i]; i++) {
@ -1476,6 +1601,9 @@ nsTreeSanitizer::ReleaseStatics()
delete sAttributesHTML;
sAttributesHTML = nsnull;
delete sPresAttributesHTML;
sPresAttributesHTML = nsnull;
delete sElementsSVG;
sElementsSVG = nsnull;

View File

@ -71,6 +71,7 @@ _CHROME_FILES = \
test_bug574596.html \
test_bug683852.xul \
test_bug599295.html \
test_bug650776.html \
test_bug650784.html \
$(NULL)

View File

@ -0,0 +1,110 @@
<!DOCTYPE HTML>
<html>
<!--
https://bugzilla.mozilla.org/show_bug.cgi?id=650776
-->
<head>
<meta charset="utf-8">
<title>Test for Bug 650776</title>
<script type="application/javascript" src="chrome://mochikit/content/tests/SimpleTest/SimpleTest.js"></script>
<link rel="stylesheet" type="text/css" href="chrome://mochikit/content/tests/SimpleTest/test.css"/>
</head>
<body>
<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=650776">Mozilla Bug 650776</a>
<p id="display"></p>
<div id="content" style="display: none">
</div>
<pre id="test">
<script type="application/javascript">
/** Test for Bug 650776 **/
var u = Components.interfaces.nsIParserUtils;
var s = Components.classes["@mozilla.org/parserutils;1"]
.getService(u);
// Basic sanity
is(s.sanitize("foo", 0), "<html><head></head><body>foo</body></html>", "Wrong sanitizer result 1");
// Scripts get removed
is(s.sanitize("<script>\u003c/script>", 0), "<html><head></head><body></body></html>", "Wrong sanitizer result 2");
// Event handlers get removed
is(s.sanitize("<a onclick='boom()'></a>", 0), "<html><head></head><body><a></a></body></html>", "Wrong sanitizer result 3");
// By default, styles are removed
is(s.sanitize("<style>p { color: red; }</style><p style='background-color: blue;'></p>", 0), "<html><head></head><body><p></p></body></html>", "Wrong sanitizer result 4");
// Can allow styles
is(s.sanitize("<style>p { color: red; }</style><p style='background-color: blue;'></p>", u.SanitizerAllowStyle), '<html><head><style>p { color: red; }</style></head><body><p style="background-color: blue;"></p></body></html>', "Wrong sanitizer result 5");
// -moz-binding gets dropped when styles allowed; however, reconstructing the p { ... } part seems broken!
todo_is(s.sanitize("<style>p { color: red; -moz-binding: url(foo); }</style><p style='background-color: blue; -moz-binding: url(foo);'></p>", u.SanitizerAllowStyle), '<html><head><style>p { color: red; }</style></head><body><p style="background-color: blue;"></p></body></html>', "Wrong sanitizer result 6");
// Various cid: embeds only cases
is(s.sanitize("<img src='foo.html'>", u.SanitizerCidEmbedsOnly), '<html><head></head><body><img></body></html>', "Wrong sanitizer result 7");
is(s.sanitize("<img src='cid:foo'>", u.SanitizerCidEmbedsOnly), '<html><head></head><body><img src="cid:foo"></body></html>', "Wrong sanitizer result 8");
is(s.sanitize("<img src='data:image/png,'>", u.SanitizerCidEmbedsOnly), '<html><head></head><body><img></body></html>', "Wrong sanitizer result 9");
is(s.sanitize("<img src='http://mochi.test/'>", u.SanitizerCidEmbedsOnly), '<html><head></head><body><img></body></html>', "Wrong sanitizer result 10");
is(s.sanitize("<a href='http://mochi.test/'></a>", u.SanitizerCidEmbedsOnly), '<html><head></head><body><a href="http://mochi.test/"></a></body></html>', "Wrong sanitizer result 11");
is(s.sanitize("<body background='http://mochi.test/'>", u.SanitizerCidEmbedsOnly), '<html><head></head><body></body></html>', "Wrong sanitizer result 12");
is(s.sanitize("<body background='cid:foo'>", u.SanitizerCidEmbedsOnly), '<html><head></head><body background="cid:foo"></body></html>', "Wrong sanitizer result 13");
is(s.sanitize("<svg></svg>", u.SanitizerCidEmbedsOnly), '<html><head></head><body></body></html>', "Wrong sanitizer result 14");
is(s.sanitize("<math definitionURL='cid:foo' altimg='cid:foo'></math>", u.SanitizerCidEmbedsOnly), '<html><head></head><body><math></math></body></html>', "Wrong sanitizer result 14");
is(s.sanitize("<video><source src='http://mochi.test/'></video>", u.SanitizerCidEmbedsOnly), '<html><head></head><body><video controls="controls"><source></video></body></html>', "Wrong sanitizer result 15");
is(s.sanitize("<style></style>", u.SanitizerAllowStyle | u.SanitizerCidEmbedsOnly), '<html><head></head><body></body></html>', "Wrong sanitizer result 16");
// Dangerous links
is(s.sanitize("<a href='javascript:boom()'></a>", 0), "<html><head></head><body><a></a></body></html>", "Wrong sanitizer result 17");
is(s.sanitize("<a href='JavaScript:boom()'></a>", 0), "<html><head></head><body><a></a></body></html>", "Wrong sanitizer result 18");
is(s.sanitize("<a href=' javascript:boom()'></a>", 0), "<html><head></head><body><a></a></body></html>", "Wrong sanitizer result 19");
is(s.sanitize("<a href='\njavascript:boom()'></a>", 0), "<html><head></head><body><a></a></body></html>", "Wrong sanitizer result 20");
is(s.sanitize("<a href='\fjavascript:boom()'></a>", 0), "<html><head></head><body><a></a></body></html>", "Wrong sanitizer result 21");
is(s.sanitize("<a href='\u00A0javascript:boom()'></a>", 0), "<html><head></head><body><a></a></body></html>", "Wrong sanitizer result 22");
is(s.sanitize("<a href='foo.html'></a>", 0), "<html><head></head><body><a></a></body></html>", "Wrong sanitizer result 23");
// Comments
is(s.sanitize("<!-- foo -->", 0), "<html><head></head><body></body></html>", "Wrong sanitizer result 24");
is(s.sanitize("<!-- foo -->", u.SanitizerAllowComments), "<!-- foo -->\n<html><head></head><body></body></html>", "Wrong sanitizer result 25");
// noscript
is(s.sanitize("<body><noscript><p class=bar>foo</p></noscript>", 0), '<html><head></head><body><noscript><p class="bar">foo</p></noscript></body></html>', "Wrong sanitizer result 26");
// dangerous elements
is(s.sanitize("<iframe></iframe>", 0), "<html><head></head><body></body></html>", "Wrong sanitizer result 27");
is(s.sanitize("<object></object>", 0), "<html><head></head><body></body></html>", "Wrong sanitizer result 28");
is(s.sanitize("<embed>", 0), "<html><head></head><body></body></html>", "Wrong sanitizer result 29");
// presentationalism
is(s.sanitize("<font></font>", 0), "<html><head></head><body><font></font></body></html>", "Wrong sanitizer result 30");
is(s.sanitize("<center></center>", 0), "<html><head></head><body><center></center></body></html>", "Wrong sanitizer result 31");
is(s.sanitize("<div align=center></div>", 0), '<html><head></head><body><div align="center"></div></body></html>', "Wrong sanitizer result 32");
is(s.sanitize("<table><tr><td bgcolor=#FFFFFF>", 0), '<html><head></head><body><table><tbody><tr><td bgcolor="#FFFFFF"></td></tr></tbody></table></body></html>', "Wrong sanitizer result 33");
is(s.sanitize("<font></font>", u.SanitizerDropNonCSSPresentation), "<html><head></head><body></body></html>", "Wrong sanitizer result 34");
is(s.sanitize("<center></center>", u.SanitizerDropNonCSSPresentation), "<html><head></head><body></body></html>", "Wrong sanitizer result 35");
is(s.sanitize("<div align=center></div>", u.SanitizerDropNonCSSPresentation), '<html><head></head><body><div></div></body></html>', "Wrong sanitizer result 36");
is(s.sanitize("<table><tr><td bgcolor=#FFFFFF>", u.SanitizerDropNonCSSPresentation), '<html><head></head><body><table><tbody><tr><td></td></tr></tbody></table></body></html>', "Wrong sanitizer result 37");
// metadata
is(s.sanitize("<meta charset=utf-7>", 0), "<html><head></head><body></body></html>", "Wrong sanitizer result 38");
is(s.sanitize("<meta http-equiv=content-type content='text/html; charset=utf-7'>", 0), "<html><head></head><body></body></html>", "Wrong sanitizer result 39");
is(s.sanitize("<meta itemprop=foo content=bar>", 0), '<html><head><meta itemprop="foo" content="bar"></head><body></body></html>', "Wrong sanitizer result 40");
is(s.sanitize("<link rel=whatever href=http://mochi.test/ >", 0), '<html><head></head><body></body></html>', "Wrong sanitizer result 41");
is(s.sanitize("<link itemprop=foo href=http://mochi.test/ >", 0), '<html><head><link itemprop="foo" href="http://mochi.test/"></head><body></body></html>', "Wrong sanitizer result 42");
is(s.sanitize("<link rel=stylesheet itemprop=foo href=http://mochi.test/ >", 0), '<html><head><link itemprop="foo" href="http://mochi.test/"></head><body></body></html>', "Wrong sanitizer result 43");
is(s.sanitize("<meta name=foo content=bar>", 0), '<html><head><meta name="foo" content="bar"></head><body></body></html>', "Wrong sanitizer result 44");
// forms
is(s.sanitize("<form></form>", 0), '<html><head></head><body><form></form></body></html>', "Wrong sanitizer result 45");
is(s.sanitize("<fieldset><legend></legend></fieldset>", 0), '<html><head></head><body><fieldset><legend></legend></fieldset></body></html>', "Wrong sanitizer result 46");
is(s.sanitize("<input>", 0), '<html><head></head><body><input></body></html>', "Wrong sanitizer result 47");
is(s.sanitize("<button>foo</button>", 0), '<html><head></head><body><button>foo</button></body></html>', "Wrong sanitizer result 48");
is(s.sanitize("<select><optgroup><option>foo</option></optgroup></select></button>", 0), '<html><head></head><body><select><optgroup><option>foo</option></optgroup></select></body></html>', "Wrong sanitizer result 49");
is(s.sanitize("<form></form>", u.SanitizerDropForms), '<html><head></head><body></body></html>', "Wrong sanitizer result 50");
is(s.sanitize("<fieldset><legend></legend></fieldset>", u.SanitizerDropForms), '<html><head></head><body><fieldset><legend></legend></fieldset></body></html>', "Wrong sanitizer result 51");
is(s.sanitize("<input>", u.SanitizerDropForms), '<html><head></head><body></body></html>', "Wrong sanitizer result 52");
is(s.sanitize("<button>foo</button>", u.SanitizerDropForms), '<html><head></head><body></body></html>', "Wrong sanitizer result 53");
is(s.sanitize("<select><optgroup><option>foo</option></optgroup></select></button>", u.SanitizerDropForms), '<html><head></head><body></body></html>', "Wrong sanitizer result 54");
// doctype
is(s.sanitize("<!DOCTYPE html>", 0), '<!DOCTYPE html>\n<html><head></head><body></body></html>', "Wrong sanitizer result 55");
// title
is(s.sanitize("<title></title>", 0), '<html><head><title></title></head><body></body></html>', "Wrong sanitizer result 56");
// Drop media
is(s.sanitize("<img>", u.SanitizerDropMedia), '<html><head></head><body></body></html>', "Wrong sanitizer result 57");
is(s.sanitize("<svg>foo</svg>", u.SanitizerDropMedia), '<html><head></head><body>foo</body></html>', "Wrong sanitizer result 58");
is(s.sanitize("<video><source></video>", u.SanitizerDropMedia), '<html><head></head><body></body></html>', "Wrong sanitizer result 59");
is(s.sanitize("<audio><source></audio>", u.SanitizerDropMedia), '<html><head></head><body></body></html>', "Wrong sanitizer result 60");
</script>
</pre>
</body>
</html>

View File

@ -126,6 +126,7 @@
#include "nsIDocShellTreeItem.h"
#include "nsContentUtils.h"
#include "mozilla/Preferences.h"
#include "nsIParserUtils.h"
using namespace mozilla;
@ -2370,7 +2371,9 @@ nsresult nsHTMLEditor::ParseFragment(const nsAString & aFragStr,
false,
true);
if (!aTrustedInput) {
nsTreeSanitizer sanitizer(!!aContextLocalName, !aContextLocalName);
nsTreeSanitizer sanitizer(aContextLocalName ?
nsIParserUtils::SanitizerAllowStyle :
nsIParserUtils::SanitizerAllowComments);
sanitizer.Sanitize(fragment);
}
*outNode = do_QueryInterface(frag);

View File

@ -6,11 +6,89 @@
/**
* Non-Web HTML parser functionality to Firefox extensions and XULRunner apps.
* Don't use this from within Gecko--use nsContentUtils directly instead.
* Don't use this from within Gecko--use nsContentUtils, nsTreeSanitizer, etc.
* directly instead.
*/
[scriptable, uuid(290f49bb-0619-4bda-8006-ab31bec7231a)]
interface nsIParserUtils : nsISupports
{
/**
* Flag for sanitizer: Allow comment nodes.
*/
const unsigned long SanitizerAllowComments = (1 << 0);
/**
* Flag for sanitizer: Allow <style> and style="" (with contents sanitized
* in case of -moz-binding). Note! If -moz-binding is absent, properties
* that might be XSS risks in other Web engines are preserved!
*/
const unsigned long SanitizerAllowStyle = (1 << 1);
/**
* Flag for sanitizer: Only allow cid: URLs for embedded content.
*
* At present, sanitizing CSS backgrounds, etc., is not supported, so setting
* this together with SanitizerAllowStyle doesn't make sense.
*
* At present, sanitizing CSS syntax in SVG presentational attributes is not
* supported, so this option flattens out SVG.
*/
const unsigned long SanitizerCidEmbedsOnly = (1 << 2);
/**
* Flag for sanitizer: Drop non-CSS presentational HTML elements and
* attributes, such as <font>, <center> and bgcolor="".
*/
const unsigned long SanitizerDropNonCSSPresentation = (1 << 3);
/**
* Flag for sanitizer: Drop forms and form controls (excluding
* fieldset/legend).
*/
const unsigned long SanitizerDropForms = (1 << 4);
/**
* Flag for sanitizer: Drop <img>, <video>, <audio> and <source> and flatten
* out SVG.
*/
const unsigned long SanitizerDropMedia = (1 << 5);
/**
* Parses a string into an HTML document, sanitizes the document and
* returns the result serialized to a string.
*
* The sanitizer is designed to protect against XSS when sanitized content
* is inserted into a different-origin context without an iframe-equivalent
* sandboxing mechanism.
*
* By default, the sanitizer doesn't try to avoid leaking information that
* the content was viewed to third parties. That is, by default, e.g.
* <img src> pointing to an HTTP server potentially controlled by a third
* party is not removed. To avoid ambient information leakage upon loading
* the sanitized content, use the SanitizerInternalEmbedsOnly flag. In that
* case, <a href> links (and similar) to other content are preserved, so an
* explicit user action (following a link) after the content has been loaded
* can still leak information.
*
* By default, non-dangerous non-CSS presentational HTML elements and
* attributes or forms are not removed. To remove these, use
* SanitizerDropNonCSSPresentation and/or SanitizerDropForms.
*
* By default, comments and CSS is removed. To preserve comments, use
* SanitizerAllowComments. To preserve <style> and style="", use
* SanitizerAllowStyle. -moz-binding is removed from <style> and style="" if
* present. In this case, properties that Gecko doesn't recognize can get
* removed as a side effect. Note! If -moz-binding is not present, <style>
* and style="" and SanitizerAllowStyle is specified, the sanitized content
* may still be XSS dangerous if loaded into a non-Gecko Web engine!
*
* @param src the HTML source to parse (C++ callers are allowed but not
* required to use the same string for the return value.)
* @param flags sanitization option flags defined above
*/
AString sanitize(in AString src, in unsigned long flags);
/**
* Convert HTML to plain text.
*

View File

@ -78,10 +78,10 @@ static NS_DEFINE_CID(kCParserCID, NS_PARSER_CID);
NS_IMETHODIMP
nsParserUtils::ConvertToPlainText(const nsAString & aFromStr,
nsParserUtils::ConvertToPlainText(const nsAString& aFromStr,
PRUint32 aFlags,
PRUint32 aWrapCol,
nsAString & aToStr)
nsAString& aToStr)
{
return nsContentUtils::ConvertToPlainText(aFromStr,
aToStr,
@ -90,8 +90,8 @@ nsParserUtils::ConvertToPlainText(const nsAString & aFromStr,
}
NS_IMETHODIMP
nsParserUtils::Unescape(const nsAString & aFromStr,
nsAString & aToStr)
nsParserUtils::Unescape(const nsAString& aFromStr,
nsAString& aToStr)
{
return nsContentUtils::ConvertToPlainText(aFromStr,
aToStr,
@ -100,11 +100,53 @@ nsParserUtils::Unescape(const nsAString & aFromStr,
0);
}
NS_IMETHODIMP
nsParserUtils::Sanitize(const nsAString& aFromStr,
PRUint32 aFlags,
nsAString& aToStr)
{
nsCOMPtr<nsIURI> uri;
NS_NewURI(getter_AddRefs(uri), "about:blank");
nsCOMPtr<nsIPrincipal> principal =
do_CreateInstance("@mozilla.org/nullprincipal;1");
nsCOMPtr<nsIDOMDocument> domDocument;
nsresult rv = nsContentUtils::CreateDocument(EmptyString(),
EmptyString(),
nsnull,
uri,
uri,
principal,
nsnull,
DocumentFlavorHTML,
getter_AddRefs(domDocument));
NS_ENSURE_SUCCESS(rv, rv);
nsCOMPtr<nsIDocument> document = do_QueryInterface(domDocument);
rv = nsContentUtils::ParseDocumentHTML(aFromStr, document, false);
NS_ENSURE_SUCCESS(rv, rv);
nsTreeSanitizer sanitizer(aFlags);
sanitizer.Sanitize(document);
nsCOMPtr<nsIDocumentEncoder> encoder =
do_CreateInstance(NS_DOC_ENCODER_CONTRACTID_BASE "text/html");
encoder->NativeInit(document,
NS_LITERAL_STRING("text/html"),
nsIDocumentEncoder::OutputDontRewriteEncodingDeclaration |
nsIDocumentEncoder::OutputNoScriptContent |
nsIDocumentEncoder::OutputEncodeBasicEntities |
nsIDocumentEncoder::OutputLFLineBreak |
nsIDocumentEncoder::OutputRaw);
return encoder->EncodeToString(aToStr);
}
// The feed version of nsContentUtils::CreateContextualFragment It
// creates a fragment, but doesn't go to all the effort to preserve
// context like innerHTML does, because feed DOMs shouldn't have that.
NS_IMETHODIMP
nsParserUtils::ParseFragment(const nsAString &aFragment,
nsParserUtils::ParseFragment(const nsAString& aFragment,
bool aIsXML,
nsIURI* aBaseURI,
nsIDOMElement* aContextElement,
@ -197,7 +239,7 @@ nsParserUtils::ParseFragment(const nsAString &aFragment,
}
}
if (fragment) {
nsTreeSanitizer sanitizer(false, false);
nsTreeSanitizer sanitizer;
sanitizer.Sanitize(fragment);
}
}