mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
697 lines
19 KiB
C++
697 lines
19 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* vim: set ts=2 sw=2 et tw=80: */
|
|
/* ***** BEGIN LICENSE BLOCK *****
|
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
*
|
|
* The contents of this file are subject to the Mozilla Public License Version
|
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
* http://www.mozilla.org/MPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
* for the specific language governing rights and limitations under the
|
|
* License.
|
|
*
|
|
* The Original Code is mozilla.org HTML Sanitizer code.
|
|
*
|
|
* The Initial Developer of the Original Code is
|
|
* Ben Bucksch <mozilla@bucksch.org>.
|
|
* Portions created by the Initial Developer are Copyright (C) 2002
|
|
* the Initial Developer. All Rights Reserved.
|
|
*
|
|
* Contributor(s):
|
|
* Netscape
|
|
*
|
|
* Alternatively, the contents of this file may be used under the terms of
|
|
* either of the GNU General Public License Version 2 or later (the "GPL"),
|
|
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
* of those above. If you wish to allow use of your version of this file only
|
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
* use your version of this file under the terms of the MPL, indicate your
|
|
* decision by deleting the provisions above and replace them with the notice
|
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
* the provisions above, a recipient may use your version of this file under
|
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
*
|
|
* ***** END LICENSE BLOCK ***** */
|
|
|
|
/*
|
|
* A serializer and content sink that removes potentially insecure or
|
|
* otherwise dangerous or offending HTML (eg for display of HTML
|
|
* e-mail attachments or something).
|
|
*/
|
|
|
|
/* I used nsPlaintextSerializer as base for this class. I don't understand
|
|
all of the functions in the beginning. Possible that I fail to do
|
|
something or do something useless.
|
|
I am not proud about the implementation here at all.
|
|
Feel free to fix it :-).
|
|
*/
|
|
|
|
#include "mozSanitizingSerializer.h"
|
|
#include "nsIServiceManager.h"
|
|
#include "nsIDOMText.h"
|
|
#include "nsIDOMElement.h"
|
|
#include "nsTextFragment.h"
|
|
#include "nsContentUtils.h"
|
|
#include "nsReadableUtils.h"
|
|
#include "plstr.h"
|
|
#include "nsIProperties.h"
|
|
#include "nsUnicharUtils.h"
|
|
#include "nsIURI.h"
|
|
#include "nsNetUtil.h"
|
|
#include "nsEscape.h"
|
|
|
|
//#define DEBUG_BenB
|
|
|
|
static inline PRUnichar* escape(const nsString& source)
|
|
{
|
|
return nsEscapeHTML2(source.get(), source.Length());
|
|
}
|
|
|
|
/* XXX: |printf|s in some error conditions. They are intended as information
|
|
for the user, because they complain about malformed pref values.
|
|
Not sure, if popping up dialog boxes is the right thing for such code
|
|
(and if so, how to do it).
|
|
*/
|
|
|
|
#define TEXT_REMOVED "<Text removed>"
|
|
#define TEXT_BREAKER "|"
|
|
|
|
nsresult NS_NewSanitizingHTMLSerializer(nsIContentSerializer** aSerializer)
|
|
{
|
|
mozSanitizingHTMLSerializer* it = new mozSanitizingHTMLSerializer();
|
|
if (!it) {
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
NS_ADDREF(it);
|
|
*aSerializer = it;
|
|
return NS_OK;
|
|
}
|
|
|
|
mozSanitizingHTMLSerializer::mozSanitizingHTMLSerializer()
|
|
: mSkipLevel(0),
|
|
mAllowedTags(30) // Just some initial buffer size
|
|
{
|
|
mOutputString = nsnull;
|
|
}
|
|
|
|
mozSanitizingHTMLSerializer::~mozSanitizingHTMLSerializer()
|
|
{
|
|
#ifdef DEBUG_BenB
|
|
printf("Output:\n%s\n", NS_LossyConvertUTF16toASCII(*mOutputString).get());
|
|
#endif
|
|
mAllowedTags.Enumerate(ReleaseProperties);
|
|
}
|
|
|
|
//<copy from="xpcom/ds/nsProperties.cpp">
|
|
PRBool
|
|
mozSanitizingHTMLSerializer::ReleaseProperties(nsHashKey* key, void* data,
|
|
void* closure)
|
|
{
|
|
nsIProperties* prop = (nsIProperties*)data;
|
|
NS_IF_RELEASE(prop);
|
|
return PR_TRUE;
|
|
}
|
|
//</copy>
|
|
|
|
NS_IMPL_ISUPPORTS4(mozSanitizingHTMLSerializer,
|
|
nsIContentSerializer,
|
|
nsIContentSink,
|
|
nsIHTMLContentSink,
|
|
mozISanitizingHTMLSerializer)
|
|
|
|
|
|
NS_IMETHODIMP
|
|
mozSanitizingHTMLSerializer::Init(PRUint32 aFlags, PRUint32 dummy,
|
|
const char* aCharSet, PRBool aIsCopying,
|
|
PRBool aIsWholeDocument)
|
|
{
|
|
NS_ENSURE_TRUE(nsContentUtils::GetParserService(), NS_ERROR_UNEXPECTED);
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
mozSanitizingHTMLSerializer::Initialize(nsAString* aOutString,
|
|
PRUint32 aFlags,
|
|
const nsAString& allowedTags)
|
|
{
|
|
nsresult rv = Init(aFlags, 0, nsnull, PR_FALSE, PR_FALSE);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
// XXX This is wrong. It violates XPCOM string ownership rules.
|
|
// We're only getting away with this because instances of this
|
|
// class are restricted to single function scope.
|
|
// (Comment copied from nsPlaintextSerializer)
|
|
mOutputString = aOutString;
|
|
|
|
ParsePrefs(allowedTags);
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
// This is not used within the class, but maybe called from somewhere else?
|
|
NS_IMETHODIMP
|
|
mozSanitizingHTMLSerializer::Flush(nsAString& aStr)
|
|
{
|
|
#ifdef DEBUG_BenB
|
|
printf("Flush: -%s-", NS_LossyConvertUTF16toASCII(aStr).get());
|
|
#endif
|
|
Write(aStr);
|
|
return NS_OK;
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
mozSanitizingHTMLSerializer::AppendDocumentStart(nsIDOMDocument *aDocument,
|
|
nsAString& aStr)
|
|
{
|
|
return NS_OK;
|
|
}
|
|
|
|
void
|
|
mozSanitizingHTMLSerializer::Write(const nsAString& aString)
|
|
{
|
|
mOutputString->Append(aString);
|
|
}
|
|
|
|
|
|
NS_IMETHODIMP
|
|
mozSanitizingHTMLSerializer::IsEnabled(PRInt32 aTag, PRBool* aReturn)
|
|
{
|
|
*aReturn = PR_FALSE;
|
|
return NS_OK;
|
|
}
|
|
|
|
|
|
/**
|
|
* Returns true, if the id represents a container
|
|
*/
|
|
PRBool
|
|
mozSanitizingHTMLSerializer::IsContainer(PRInt32 aId)
|
|
{
|
|
PRBool isContainer = PR_FALSE;
|
|
|
|
nsIParserService* parserService = nsContentUtils::GetParserService();
|
|
if (parserService) {
|
|
parserService->IsContainer(aId, isContainer);
|
|
}
|
|
|
|
return isContainer;
|
|
}
|
|
|
|
|
|
/* XXX I don't really know, what these functions do, but they seem to be
|
|
needed ;-). Mostly copied from nsPlaintextSerializer. */
|
|
/* akk says:
|
|
"I wonder if the sanitizing class could inherit from nsHTMLSerializer,
|
|
so that at least these methods that none of us understand only have to be
|
|
written once?" */
|
|
|
|
// static
|
|
PRInt32
|
|
mozSanitizingHTMLSerializer::GetIdForContent(nsIContent* aContent)
|
|
{
|
|
if (!aContent->IsHTML()) {
|
|
return eHTMLTag_unknown;
|
|
}
|
|
|
|
nsIParserService* parserService = nsContentUtils::GetParserService();
|
|
|
|
return parserService ? parserService->HTMLAtomTagToId(aContent->Tag()) :
|
|
eHTMLTag_unknown;
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
mozSanitizingHTMLSerializer::AppendText(nsIDOMText* aText,
|
|
PRInt32 aStartOffset,
|
|
PRInt32 aEndOffset,
|
|
nsAString& aStr)
|
|
{
|
|
nsresult rv = NS_OK;
|
|
|
|
mOutputString = &aStr;
|
|
|
|
nsAutoString linebuffer;
|
|
rv = DoAddLeaf(eHTMLTag_text, linebuffer);
|
|
|
|
return rv;
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
mozSanitizingHTMLSerializer::AppendElementStart(nsIDOMElement *aElement,
|
|
nsIDOMElement *aOriginalElement,
|
|
nsAString& aStr)
|
|
{
|
|
NS_ENSURE_ARG(aElement);
|
|
|
|
mContent = do_QueryInterface(aElement);
|
|
NS_ENSURE_TRUE(mContent, NS_ERROR_FAILURE);
|
|
|
|
mOutputString = &aStr;
|
|
|
|
PRInt32 id = GetIdForContent(mContent);
|
|
|
|
PRBool isContainer = IsContainer(id);
|
|
|
|
nsresult rv;
|
|
if (isContainer) {
|
|
rv = DoOpenContainer(id);
|
|
}
|
|
else {
|
|
rv = DoAddLeaf(id, EmptyString());
|
|
}
|
|
|
|
mContent = 0;
|
|
mOutputString = nsnull;
|
|
|
|
return rv;
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
mozSanitizingHTMLSerializer::AppendElementEnd(nsIDOMElement *aElement,
|
|
nsAString& aStr)
|
|
{
|
|
NS_ENSURE_ARG(aElement);
|
|
|
|
mContent = do_QueryInterface(aElement);
|
|
NS_ENSURE_TRUE(mContent, NS_ERROR_FAILURE);
|
|
|
|
mOutputString = &aStr;
|
|
|
|
nsresult rv = NS_OK;
|
|
PRInt32 id = GetIdForContent(mContent);
|
|
|
|
PRBool isContainer = IsContainer(id);
|
|
|
|
if (isContainer) {
|
|
rv = DoCloseContainer(id);
|
|
}
|
|
|
|
mContent = 0;
|
|
mOutputString = nsnull;
|
|
|
|
return rv;
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
mozSanitizingHTMLSerializer::OpenContainer(const nsIParserNode& aNode)
|
|
{
|
|
PRInt32 type = aNode.GetNodeType();
|
|
|
|
mParserNode = const_cast<nsIParserNode *>(&aNode);
|
|
return DoOpenContainer(type);
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
mozSanitizingHTMLSerializer::CloseContainer(const nsHTMLTag aTag)
|
|
{
|
|
return DoCloseContainer(aTag);
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
mozSanitizingHTMLSerializer::AddLeaf(const nsIParserNode& aNode)
|
|
{
|
|
eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
|
|
const nsAString& text = aNode.GetText();
|
|
|
|
mParserNode = const_cast<nsIParserNode*>(&aNode);
|
|
return DoAddLeaf(type, text);
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
mozSanitizingHTMLSerializer::AddDocTypeDecl(const nsIParserNode& aNode)
|
|
{
|
|
return NS_OK;
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
mozSanitizingHTMLSerializer::SetDocumentCharset(nsACString& aCharset)
|
|
{
|
|
// No idea, if this works - it isn't invoked by |TestOutput|.
|
|
Write(NS_LITERAL_STRING("\n<meta http-equiv=\"Context-Type\" content=\"text/html; charset=")
|
|
/* Danger: breaking the line within the string literal, like
|
|
"foo"\n"bar", breaks win32! */
|
|
+ nsAdoptingString(escape(NS_ConvertASCIItoUTF16(aCharset)))
|
|
+ NS_LITERAL_STRING("\">\n"));
|
|
return NS_OK;
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
mozSanitizingHTMLSerializer::OpenHead()
|
|
{
|
|
// XXX We don't have a parser node here, is it okay to ignore this?
|
|
// return OpenContainer(aNode);
|
|
return NS_OK;
|
|
}
|
|
|
|
// Here comes the actual code...
|
|
|
|
nsresult
|
|
mozSanitizingHTMLSerializer::DoOpenContainer(PRInt32 aTag)
|
|
{
|
|
eHTMLTags type = (eHTMLTags)aTag;
|
|
|
|
if (mSkipLevel == 0 && IsAllowedTag(type))
|
|
{
|
|
nsIParserService* parserService = nsContentUtils::GetParserService();
|
|
if (!parserService)
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
const PRUnichar* tag_name = parserService->HTMLIdToStringTag(aTag);
|
|
NS_ENSURE_TRUE(tag_name, NS_ERROR_INVALID_POINTER);
|
|
|
|
Write(NS_LITERAL_STRING("<") + nsDependentString(tag_name));
|
|
|
|
// Attributes
|
|
if (mParserNode)
|
|
{
|
|
PRInt32 count = mParserNode->GetAttributeCount();
|
|
for (PRInt32 i = 0; i < count; i++)
|
|
{
|
|
const nsAString& key = mParserNode->GetKeyAt(i);
|
|
if(IsAllowedAttribute(type, key))
|
|
{
|
|
// Ensure basic sanity of value
|
|
nsAutoString value(mParserNode->GetValueAt(i));
|
|
// SanitizeAttrValue() modifies |value|
|
|
if (NS_SUCCEEDED(SanitizeAttrValue(type, key, value)))
|
|
{
|
|
// Write out
|
|
Write(NS_LITERAL_STRING(" "));
|
|
Write(key); // I get an infinive loop with | + key + | !!!
|
|
Write(NS_LITERAL_STRING("=\"") + value + NS_LITERAL_STRING("\""));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
Write(NS_LITERAL_STRING(">"));
|
|
}
|
|
else if (mSkipLevel != 0 || type == eHTMLTag_script || type == eHTMLTag_style)
|
|
++mSkipLevel;
|
|
else
|
|
Write(NS_LITERAL_STRING(" "));
|
|
|
|
return NS_OK;
|
|
|
|
}
|
|
|
|
nsresult
|
|
mozSanitizingHTMLSerializer::DoCloseContainer(PRInt32 aTag)
|
|
{
|
|
eHTMLTags type = (eHTMLTags)aTag;
|
|
|
|
if (mSkipLevel == 0 && IsAllowedTag(type)) {
|
|
nsIParserService* parserService = nsContentUtils::GetParserService();
|
|
if (!parserService)
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
const PRUnichar* tag_name = parserService->HTMLIdToStringTag(aTag);
|
|
NS_ENSURE_TRUE(tag_name, NS_ERROR_INVALID_POINTER);
|
|
|
|
Write(NS_LITERAL_STRING("</") + nsDependentString(tag_name)
|
|
+ NS_LITERAL_STRING(">"));
|
|
}
|
|
else if (mSkipLevel == 0)
|
|
Write(NS_LITERAL_STRING(" "));
|
|
else
|
|
--mSkipLevel;
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
nsresult
|
|
mozSanitizingHTMLSerializer::DoAddLeaf(PRInt32 aTag,
|
|
const nsAString& aText)
|
|
{
|
|
if (mSkipLevel != 0)
|
|
return NS_OK;
|
|
|
|
eHTMLTags type = (eHTMLTags)aTag;
|
|
|
|
nsresult rv = NS_OK;
|
|
|
|
if (type == eHTMLTag_whitespace ||
|
|
type == eHTMLTag_newline)
|
|
{
|
|
Write(aText); // sure to be safe?
|
|
}
|
|
else if (type == eHTMLTag_text)
|
|
{
|
|
nsAutoString text(aText);
|
|
if(NS_SUCCEEDED(SanitizeTextNode(text)))
|
|
Write(text);
|
|
else
|
|
Write(NS_LITERAL_STRING(TEXT_REMOVED)); // Does not happen (yet)
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
}
|
|
else if (type == eHTMLTag_entity)
|
|
{
|
|
Write(NS_LITERAL_STRING("&"));
|
|
Write(aText); // sure to be safe?
|
|
// using + operator here might give an infinitive loop, see above.
|
|
// not adding ";", because Gecko delivers that as part of |aText| (freaky)
|
|
}
|
|
else
|
|
{
|
|
DoOpenContainer(type);
|
|
}
|
|
|
|
return rv;
|
|
}
|
|
|
|
|
|
/**
|
|
Similar to SanitizeAttrValue.
|
|
*/
|
|
nsresult
|
|
mozSanitizingHTMLSerializer::SanitizeTextNode(nsString& aText /*inout*/)
|
|
{
|
|
aText.Adopt(escape(aText));
|
|
return NS_OK;
|
|
}
|
|
|
|
/**
|
|
Ensures basic sanity of attribute value.
|
|
This function also (tries to :-( ) makes sure, that no
|
|
unwanted / dangerous URLs appear in the document
|
|
(like javascript: and data:).
|
|
|
|
Pass the value as |aValue| arg. It will be modified in-place.
|
|
|
|
If the value is not allowed at all, we return with NS_ERROR_ILLEGAL_VALUE.
|
|
In that case, do not use the |aValue|, but output nothing.
|
|
*/
|
|
nsresult
|
|
mozSanitizingHTMLSerializer::SanitizeAttrValue(nsHTMLTag aTag,
|
|
const nsAString& anAttrName,
|
|
nsString& aValue /*inout*/)
|
|
{
|
|
/* First, cut the attribute to 1000 chars.
|
|
Attributes with values longer than 1000 chars seem bogus,
|
|
considering that we don't support any JS. The longest attributes
|
|
I can think of are URLs, and URLs with 1000 chars are likely to be
|
|
bogus, too. */
|
|
aValue = Substring(aValue, 0, 1000);
|
|
//aValue.Truncate(1000); //-- this cuts half of the document !!?!!
|
|
|
|
aValue.Adopt(escape(aValue));
|
|
|
|
/* Check some known bad stuff. Add more!
|
|
I don't care too much, if it happens to trigger in some innocent cases
|
|
(like <img alt="Statistical data: Mortage rates and newspapers">) -
|
|
security first. */
|
|
if (aValue.Find("javascript:") != kNotFound ||
|
|
aValue.Find("data:") != kNotFound ||
|
|
aValue.Find("base64") != kNotFound)
|
|
return NS_ERROR_ILLEGAL_VALUE;
|
|
|
|
// Check img src scheme
|
|
if (aTag == eHTMLTag_img &&
|
|
anAttrName.LowerCaseEqualsLiteral("src"))
|
|
{
|
|
nsresult rv;
|
|
nsCOMPtr<nsIIOService> ioService = do_GetIOService(&rv);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
nsCAutoString scheme;
|
|
rv = ioService->ExtractScheme(NS_LossyConvertUTF16toASCII(aValue), scheme);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
if (!scheme.Equals("cid", nsCaseInsensitiveCStringComparator()))
|
|
return NS_ERROR_ILLEGAL_VALUE;
|
|
}
|
|
|
|
#ifdef DEBUG_BenB
|
|
printf("attribute value for %s: -%s-\n",
|
|
NS_LossyConvertUTF16toASCII(anAttrName).get(),
|
|
NS_LossyConvertUTF16toASCII(aValue).get());
|
|
#endif
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
/**
|
|
*/
|
|
PRBool
|
|
mozSanitizingHTMLSerializer::IsAllowedTag(nsHTMLTag aTag)
|
|
{
|
|
|
|
nsPRUint32Key tag_key(aTag);
|
|
#ifdef DEBUG_BenB
|
|
printf("IsAllowedTag %d: %s\n",
|
|
aTag,
|
|
mAllowedTags.Exists(&tag_key)?"yes":"no");
|
|
#endif
|
|
return mAllowedTags.Exists(&tag_key);
|
|
}
|
|
|
|
|
|
/**
|
|
*/
|
|
PRBool
|
|
mozSanitizingHTMLSerializer::IsAllowedAttribute(nsHTMLTag aTag,
|
|
const nsAString& anAttributeName)
|
|
{
|
|
#ifdef DEBUG_BenB
|
|
printf("IsAllowedAttribute %d, -%s-\n",
|
|
aTag,
|
|
NS_LossyConvertUTF16toASCII(anAttributeName).get());
|
|
#endif
|
|
nsresult rv;
|
|
|
|
nsPRUint32Key tag_key(aTag);
|
|
nsIProperties* attr_bag = (nsIProperties*)mAllowedTags.Get(&tag_key);
|
|
NS_ENSURE_TRUE(attr_bag, PR_FALSE);
|
|
|
|
PRBool allowed;
|
|
nsAutoString attr(anAttributeName);
|
|
ToLowerCase(attr);
|
|
rv = attr_bag->Has(NS_LossyConvertUTF16toASCII(attr).get(),
|
|
&allowed);
|
|
if (NS_FAILED(rv))
|
|
return PR_FALSE;
|
|
|
|
#ifdef DEBUG_BenB
|
|
printf(" Allowed: %s\n", allowed?"yes":"no");
|
|
#endif
|
|
return allowed;
|
|
}
|
|
|
|
|
|
/**
|
|
aPref is a long string, which holds an exhaustive list of allowed tags
|
|
and attributes. All other tags and attributes will be removed.
|
|
|
|
aPref has the format
|
|
"html head body ul ol li a(href,name,title) img(src,alt,title) #text"
|
|
i.e.
|
|
- tags are separated by whitespace
|
|
- the attribute list follows the tag directly in brackets
|
|
- the attributes are separated by commas.
|
|
|
|
There is no way to express further restrictions, like "no text inside the
|
|
<head> element". This is so to considerably reduce the complexity of the
|
|
pref and this implementation.
|
|
|
|
Update: Akk told me that I might be able to use DTD classes. Later(TM)...
|
|
*/
|
|
nsresult
|
|
mozSanitizingHTMLSerializer::ParsePrefs(const nsAString& aPref)
|
|
{
|
|
char* pref = ToNewCString(aPref);
|
|
char* tags_lasts;
|
|
for (char* iTag = PL_strtok_r(pref, " ", &tags_lasts);
|
|
iTag;
|
|
iTag = PL_strtok_r(NULL, " ", &tags_lasts))
|
|
{
|
|
ParseTagPref(nsCAutoString(iTag));
|
|
}
|
|
delete[] pref;
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
|
|
/**
|
|
Parses e.g. "a(href,title)" (but not several tags at once).
|
|
*/
|
|
nsresult
|
|
mozSanitizingHTMLSerializer::ParseTagPref(const nsCAutoString& tagpref)
|
|
{
|
|
nsIParserService* parserService = nsContentUtils::GetParserService();
|
|
if (!parserService)
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
|
|
// Parsing tag
|
|
PRInt32 bracket = tagpref.FindChar('(');
|
|
if (bracket == 0)
|
|
{
|
|
printf(" malformed pref: %s\n", tagpref.get());
|
|
return NS_ERROR_CANNOT_CONVERT_DATA;
|
|
}
|
|
|
|
nsAutoString tag;
|
|
CopyUTF8toUTF16(StringHead(tagpref, bracket), tag);
|
|
|
|
// Create key
|
|
PRInt32 tag_id = parserService->HTMLStringTagToId(tag);
|
|
if (tag_id == eHTMLTag_userdefined)
|
|
{
|
|
printf(" unknown tag <%s>, won't add.\n",
|
|
NS_ConvertUTF16toUTF8(tag).get());
|
|
return NS_ERROR_CANNOT_CONVERT_DATA;
|
|
}
|
|
nsPRUint32Key tag_key(tag_id);
|
|
|
|
if (mAllowedTags.Exists(&tag_key))
|
|
{
|
|
printf(" duplicate tag: %s\n", NS_ConvertUTF16toUTF8(tag).get());
|
|
return NS_ERROR_CANNOT_CONVERT_DATA;
|
|
}
|
|
if (bracket == kNotFound)
|
|
/* There are no attributes in the pref. So, allow none; only the tag
|
|
itself */
|
|
{
|
|
mAllowedTags.Put(&tag_key, 0);
|
|
}
|
|
else
|
|
{
|
|
// Attributes
|
|
|
|
// where is the macro for non-fatal errors in opt builds?
|
|
if(tagpref[tagpref.Length() - 1] != ')' ||
|
|
tagpref.Length() < PRUint32(bracket) + 3)
|
|
{
|
|
printf(" malformed pref: %s\n", tagpref.get());
|
|
return NS_ERROR_CANNOT_CONVERT_DATA;
|
|
}
|
|
nsCOMPtr<nsIProperties> attr_bag =
|
|
do_CreateInstance(NS_PROPERTIES_CONTRACTID);
|
|
NS_ENSURE_TRUE(attr_bag, NS_ERROR_INVALID_POINTER);
|
|
nsCAutoString attrList;
|
|
attrList.Append(Substring(tagpref,
|
|
bracket + 1,
|
|
tagpref.Length() - 2 - bracket));
|
|
char* attrs_lasts;
|
|
for (char* iAttr = PL_strtok_r(attrList.BeginWriting(),
|
|
",", &attrs_lasts);
|
|
iAttr;
|
|
iAttr = PL_strtok_r(NULL, ",", &attrs_lasts))
|
|
{
|
|
attr_bag->Set(iAttr, 0);
|
|
}
|
|
|
|
nsIProperties* attr_bag_raw = attr_bag;
|
|
NS_ADDREF(attr_bag_raw);
|
|
mAllowedTags.Put(&tag_key, attr_bag_raw);
|
|
}
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
/*
|
|
might be useful:
|
|
htmlparser/public/nsHTMLTokens.h for tag categories
|
|
*/
|