Bug 539887 - Make document.written meta charset affect the charset of document.opened documents in the HTML5 parser. r=bnewman.

--HG--
extra : rebase_source : 71434258f5a6f330e1e0583826fbb415fb110b0b
This commit is contained in:
Henri Sivonen 2010-03-09 14:39:32 +02:00
parent e730b4c834
commit 13fbcec474
28 changed files with 169 additions and 28 deletions

View File

@ -0,0 +1,50 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is HTML Parser C++ Translator code.
*
* The Initial Developer of the Original Code is
* Mozilla Foundation.
* Portions created by the Initial Developer are Copyright (C) 2010
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Henri Sivonen <hsivonen@iki.fi>
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#ifndef nsAHtml5EncodingDeclarationHandler_h_
#define nsAHtml5EncodingDeclarationHandler_h_
class nsAHtml5EncodingDeclarationHandler {
public:
virtual void internalEncodingDeclaration(nsString* aEncoding) = 0;
virtual ~nsAHtml5EncodingDeclarationHandler() {
}
};
#endif /* nsAHtml5EncodingDeclarationHandler_h_ */

View File

@ -43,6 +43,7 @@
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
#include "nsAHtml5EncodingDeclarationHandler.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5TreeBuilder.h"

View File

@ -44,6 +44,7 @@
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
#include "nsAHtml5EncodingDeclarationHandler.h"
class nsHtml5StreamParser;
class nsHtml5SpeculativeLoader;

View File

@ -43,6 +43,7 @@
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
#include "nsAHtml5EncodingDeclarationHandler.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5TreeBuilder.h"

View File

@ -44,6 +44,7 @@
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
#include "nsAHtml5EncodingDeclarationHandler.h"
class nsHtml5StreamParser;
class nsHtml5SpeculativeLoader;

View File

@ -44,6 +44,7 @@
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
#include "nsAHtml5EncodingDeclarationHandler.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5TreeBuilder.h"

View File

@ -45,6 +45,7 @@
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
#include "nsAHtml5EncodingDeclarationHandler.h"
class nsHtml5StreamParser;
class nsHtml5SpeculativeLoader;

View File

@ -44,6 +44,7 @@
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
#include "nsAHtml5EncodingDeclarationHandler.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5TreeBuilder.h"

View File

@ -45,6 +45,7 @@
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
#include "nsAHtml5EncodingDeclarationHandler.h"
class nsHtml5StreamParser;
class nsHtml5SpeculativeLoader;

View File

@ -60,6 +60,9 @@
#include "nsHtml5TreeBuilder.h"
#include "nsHtml5Parser.h"
#include "nsHtml5AtomTable.h"
#include "nsICharsetAlias.h"
static NS_DEFINE_CID(kCharsetAliasCID, NS_CHARSETALIAS_CID);
NS_INTERFACE_TABLE_HEAD(nsHtml5Parser)
NS_INTERFACE_TABLE2(nsHtml5Parser, nsIParser, nsISupportsWeakReference)
@ -139,7 +142,8 @@ nsHtml5Parser::SetDocumentCharset(const nsACString& aCharset, PRInt32 aCharsetSo
"Document charset set too late.");
NS_PRECONDITION(mStreamParser, "Tried to set charset on a script-only parser.");
mStreamParser->SetDocumentCharset(aCharset, aCharsetSource);
mExecutor->SetDocumentCharset((nsACString&)aCharset);
mExecutor->SetDocumentCharsetAndSource((nsACString&)aCharset, aCharsetSource);
mCharsetSource = aCharsetSource; // used for the document.open() case only
}
NS_IMETHODIMP_(void)
@ -266,7 +270,9 @@ nsHtml5Parser::Parse(const nsAString& aSourceBuffer,
if (!mExecutor->HasStarted()) {
NS_ASSERTION(!mStreamParser,
"Had stream parser but document.write started life cycle.");
// This is the first document.write() on a document.open()ed document
mExecutor->SetParser(this);
mTokenizer->setEncodingDeclarationHandler(this);
mTreeBuilder->setScriptingEnabled(mExecutor->IsScriptEnabled());
mTokenizer->start();
mExecutor->Start();
@ -688,3 +694,35 @@ nsHtml5Parser::ContinueAfterFailedCharsetSwitch()
"Tried to continue after failed charset switch without a stream parser");
mStreamParser->ContinueAfterFailedCharsetSwitch();
}
void
nsHtml5Parser::internalEncodingDeclaration(nsString* aEncoding)
{
// Note: This handler is only installed when parsing a document.open()ed doc
// See bug 539887 and bug 255820.
if (mCharsetSource >= kCharsetFromMetaTag) { // this threshold corresponds to "confident" in the HTML5 spec
return;
}
nsresult rv = NS_OK;
nsCOMPtr<nsICharsetAlias> calias(do_GetService(kCharsetAliasCID, &rv));
if (NS_FAILED(rv)) {
NS_NOTREACHED("Charset alias service not available.");
return;
}
nsCAutoString newEncoding;
CopyUTF16toUTF8(*aEncoding, newEncoding);
// XXX check HTML5 non-IANA aliases here
nsCAutoString preferred;
rv = calias->GetPreferred(newEncoding, preferred);
if (NS_FAILED(rv)) {
// the encoding name is bogus
return;
}
mCharsetSource = kCharsetFromMetaTag;
mTreeBuilder->SetDocumentCharset(preferred, mCharsetSource);
}

View File

@ -62,9 +62,11 @@
#include "nsHtml5StreamParser.h"
#include "nsHtml5AtomTable.h"
#include "nsWeakReference.h"
#include "nsAHtml5EncodingDeclarationHandler.h"
class nsHtml5Parser : public nsIParser,
public nsSupportsWeakReference
public nsSupportsWeakReference,
public nsAHtml5EncodingDeclarationHandler
{
public:
NS_DECL_AND_IMPL_ZEROING_OPERATOR_NEW
@ -277,6 +279,12 @@ class nsHtml5Parser : public nsIParser,
/* End nsIParser */
// nsAHtml5EncodingDeclarationHandler
/**
* Tree builder uses this to report a late <meta charset>
*/
virtual void internalEncodingDeclaration(nsString* aEncoding);
// Not from an external interface
// Non-inherited methods
@ -319,6 +327,13 @@ class nsHtml5Parser : public nsIParser,
// State variables
/**
* The charset source. This variable is used for script-created parsers
* only. When parsing from the stream, this variable can have a bogus
* value.
*/
PRInt32 mCharsetSource;
/**
* Whether the last character tokenized was a carriage return (for CRLF)
*/

View File

@ -44,6 +44,7 @@
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
#include "nsAHtml5EncodingDeclarationHandler.h"
class nsHtml5StreamParser;
class nsHtml5SpeculativeLoader;

View File

@ -44,6 +44,7 @@
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
#include "nsAHtml5EncodingDeclarationHandler.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5TreeBuilder.h"

View File

@ -45,6 +45,7 @@
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
#include "nsAHtml5EncodingDeclarationHandler.h"
class nsHtml5StreamParser;
class nsHtml5SpeculativeLoader;

View File

@ -43,6 +43,7 @@
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
#include "nsAHtml5EncodingDeclarationHandler.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5TreeBuilder.h"

View File

@ -44,6 +44,7 @@
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
#include "nsAHtml5EncodingDeclarationHandler.h"
class nsHtml5StreamParser;
class nsHtml5SpeculativeLoader;

View File

@ -40,6 +40,7 @@
#include "nsHtml5StreamParser.h"
#include "nsICharsetConverterManager.h"
#include "nsICharsetAlias.h"
#include "nsServiceManagerUtils.h"
#include "nsEncoderDecoderUtils.h"
#include "nsContentUtils.h"
@ -214,7 +215,7 @@ nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf)
if (aConf == eBestAnswer || aConf == eSureAnswer) {
mCharset.Assign(aCharset);
mCharsetSource = kCharsetFromAutoDetection;
mTreeBuilder->SetDocumentCharset(mCharset);
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
}
return NS_OK;
}
@ -233,7 +234,7 @@ nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(const
mCharset.Assign("windows-1252"); // lower case is the raw form
mCharsetSource = kCharsetFromWeakDocTypeDefault;
rv = convManager->GetUnicodeDecoderRaw(mCharset.get(), getter_AddRefs(mUnicodeDecoder));
mTreeBuilder->SetDocumentCharset(mCharset);
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
}
NS_ENSURE_SUCCESS(rv, rv);
mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Recover);
@ -271,7 +272,7 @@ nsHtml5StreamParser::SetupDecodingFromBom(const char* aCharsetName, const char*
NS_ENSURE_SUCCESS(rv, rv);
mCharset.Assign(aCharsetName);
mCharsetSource = kCharsetFromByteOrderMark;
mTreeBuilder->SetDocumentCharset(mCharset);
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
mSniffingBuffer = nsnull;
mMetaScanner = nsnull;
mBomState = BOM_SNIFFING_OVER;
@ -309,7 +310,7 @@ nsHtml5StreamParser::FinalizeSniffing(const PRUint8* aFromSegment, // can be nul
// Hopefully this case is never needed, but dealing with it anyway
mCharset.Assign("windows-1252");
mCharsetSource = kCharsetFromWeakDocTypeDefault;
mTreeBuilder->SetDocumentCharset(mCharset);
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
}
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
}
@ -404,7 +405,7 @@ nsHtml5StreamParser::SniffStreamBytes(const PRUint8* aFromSegment,
mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Recover);
// meta scan successful
mCharsetSource = kCharsetFromMetaPrescan;
mTreeBuilder->SetDocumentCharset(mCharset);
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
mMetaScanner = nsnull;
return WriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
}
@ -417,7 +418,7 @@ nsHtml5StreamParser::SniffStreamBytes(const PRUint8* aFromSegment,
if (mUnicodeDecoder) {
// meta scan successful
mCharsetSource = kCharsetFromMetaPrescan;
mTreeBuilder->SetDocumentCharset(mCharset);
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
mMetaScanner = nsnull;
return WriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
}

View File

@ -48,12 +48,12 @@
#include "nsHtml5TreeOpExecutor.h"
#include "nsHtml5UTF16Buffer.h"
#include "nsIInputStream.h"
#include "nsICharsetAlias.h"
#include "mozilla/Mutex.h"
#include "nsHtml5AtomTable.h"
#include "nsHtml5Speculation.h"
#include "nsITimer.h"
#include "nsICharsetDetector.h"
#include "nsAHtml5EncodingDeclarationHandler.h"
class nsHtml5Parser;
@ -103,7 +103,9 @@ enum eHtml5StreamState {
};
class nsHtml5StreamParser : public nsIStreamListener,
public nsICharsetDetectionObserver {
public nsICharsetDetectionObserver,
public nsAHtml5EncodingDeclarationHandler
{
friend class nsHtml5RequestStopper;
friend class nsHtml5DataAvailable;
@ -133,11 +135,11 @@ class nsHtml5StreamParser : public nsIStreamListener,
*/
NS_IMETHOD Notify(const char* aCharset, nsDetectionConfident aConf);
// EncodingDeclarationHandler
// nsAHtml5EncodingDeclarationHandler
/**
* Tree builder uses this to report a late <meta charset>
*/
void internalEncodingDeclaration(nsString* aEncoding);
virtual void internalEncodingDeclaration(nsString* aEncoding);
// Not from an external interface

View File

@ -46,6 +46,7 @@
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
#include "nsAHtml5EncodingDeclarationHandler.h"
#include "nsHtml5TreeBuilder.h"
#include "nsHtml5MetaScanner.h"
@ -3976,7 +3977,7 @@ nsHtml5Tokenizer::initializeWithoutStarting()
}
void
nsHtml5Tokenizer::setEncodingDeclarationHandler(nsHtml5StreamParser* encodingDeclarationHandler)
nsHtml5Tokenizer::setEncodingDeclarationHandler(nsAHtml5EncodingDeclarationHandler* encodingDeclarationHandler)
{
this->encodingDeclarationHandler = encodingDeclarationHandler;
}

View File

@ -47,6 +47,7 @@
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
#include "nsAHtml5EncodingDeclarationHandler.h"
class nsHtml5StreamParser;
class nsHtml5SpeculativeLoader;
@ -85,7 +86,7 @@ class nsHtml5Tokenizer
static jArray<PRUnichar,PRInt32> NOFRAMES_ARR;
protected:
nsHtml5TreeBuilder* tokenHandler;
nsHtml5StreamParser* encodingDeclarationHandler;
nsAHtml5EncodingDeclarationHandler* encodingDeclarationHandler;
PRBool lastCR;
PRInt32 stateSave;
private:
@ -265,7 +266,7 @@ class nsHtml5Tokenizer
void resetToDataState();
void loadState(nsHtml5Tokenizer* other);
void initializeWithoutStarting();
void setEncodingDeclarationHandler(nsHtml5StreamParser* encodingDeclarationHandler);
void setEncodingDeclarationHandler(nsAHtml5EncodingDeclarationHandler* encodingDeclarationHandler);
static void initializeStatics();
static void releaseStatics();
};

View File

@ -646,11 +646,12 @@ nsHtml5TreeBuilder::Flush()
}
void
nsHtml5TreeBuilder::SetDocumentCharset(nsACString& aCharset)
nsHtml5TreeBuilder::SetDocumentCharset(nsACString& aCharset,
PRInt32 aCharsetSource)
{
nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
NS_ASSERTION(treeOp, "Tree op allocation failed.");
treeOp->Init(eTreeOpSetDocumentCharset, aCharset);
treeOp->Init(eTreeOpSetDocumentCharset, aCharset, aCharsetSource);
}
void

View File

@ -85,7 +85,7 @@
PRBool Flush();
void SetDocumentCharset(nsACString& aCharset);
void SetDocumentCharset(nsACString& aCharset, PRInt32 aCharsetSource);
void StreamEnded();

View File

@ -201,9 +201,13 @@ nsHtml5TreeOpExecutor::FlushPendingNotifications(mozFlushType aType)
{
}
NS_IMETHODIMP
nsHtml5TreeOpExecutor::SetDocumentCharset(nsACString& aCharset)
void
nsHtml5TreeOpExecutor::SetDocumentCharsetAndSource(nsACString& aCharset, PRInt32 aCharsetSource)
{
if (mDocument) {
mDocument->SetDocumentCharacterSetSource(aCharsetSource);
mDocument->SetDocumentCharacterSet(aCharset);
}
if (mDocShell) {
// the following logic to get muCV is copied from
// nsHTMLDocument::StartDocumentLoad
@ -220,7 +224,9 @@ nsHtml5TreeOpExecutor::SetDocumentCharset(nsACString& aCharset)
// parent and parentContentViewer
nsCOMPtr<nsIDocShellTreeItem> docShellAsItem =
do_QueryInterface(mDocShell);
NS_ENSURE_TRUE(docShellAsItem, NS_ERROR_FAILURE);
if (!docShellAsItem) {
return;
}
nsCOMPtr<nsIDocShellTreeItem> parentAsItem;
docShellAsItem->GetSameTypeParent(getter_AddRefs(parentAsItem));
nsCOMPtr<nsIDocShell> parent(do_QueryInterface(parentAsItem));
@ -237,10 +243,6 @@ nsHtml5TreeOpExecutor::SetDocumentCharset(nsACString& aCharset)
muCV->SetPrevDocCharacterSet(aCharset);
}
}
if (mDocument) {
mDocument->SetDocumentCharacterSet(aCharset);
}
return NS_OK;
}
nsISupports*

View File

@ -166,9 +166,12 @@ class nsHtml5TreeOpExecutor : public nsContentSink,
virtual void FlushPendingNotifications(mozFlushType aType);
/**
* Sets mCharset
* Don't call. For interface compat only.
*/
NS_IMETHOD SetDocumentCharset(nsACString& aCharset);
NS_IMETHOD SetDocumentCharset(nsACString& aCharset) {
NS_NOTREACHED("No one should call this.");
return NS_ERROR_NOT_IMPLEMENTED;
}
/**
* Returns the document.
@ -209,6 +212,10 @@ class nsHtml5TreeOpExecutor : public nsContentSink,
mNodeInfoManager = aManager;
}
// Not from interface
void SetDocumentCharsetAndSource(nsACString& aCharset, PRInt32 aCharsetSource);
void SetStreamParser(nsHtml5StreamParser* aStreamParser) {
mStreamParser = aStreamParser;
}

View File

@ -579,8 +579,9 @@ nsHtml5TreeOperation::Perform(nsHtml5TreeOpExecutor* aBuilder,
}
case eTreeOpSetDocumentCharset: {
char* str = mOne.charPtr;
PRInt32 charsetSource = mInt;
nsDependentCString dependentString(str);
aBuilder->SetDocumentCharset(dependentString);
aBuilder->SetDocumentCharsetAndSource(dependentString, charsetSource);
return rv;
}
case eTreeOpNeedsCharsetSwitchTo: {

View File

@ -138,6 +138,13 @@ class nsHtml5TreeOperation {
mOne.node = aNode;
mTwo.node = aParent;
}
inline void Init(eHtml5TreeOperation aOpCode,
const nsACString& aString,
PRInt32 aInt32) {
Init(aOpCode, aString);
mInt = aInt32;
}
inline void Init(eHtml5TreeOperation aOpCode,
nsIContent** aNode,

View File

@ -43,6 +43,7 @@
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
#include "nsAHtml5EncodingDeclarationHandler.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5TreeBuilder.h"

View File

@ -44,6 +44,7 @@
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
#include "nsAHtml5EncodingDeclarationHandler.h"
class nsHtml5StreamParser;
class nsHtml5SpeculativeLoader;