/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set sw=2 ts=2 et tw=79: */ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is mozilla.org code. * * The Initial Developer of the Original Code is * Netscape Communications Corporation. * Portions created by the Initial Developer are Copyright (C) 1998 * the Initial Developer. All Rights Reserved. * * Contributor(s): * Pierre Phaneuf * * Alternatively, the contents of this file may be used under the terms of * either of the GNU General Public License Version 2 or later (the "GPL"), * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ #include "nsIAtom.h" #include "nsParser.h" #include "nsString.h" #include "nsCRT.h" #include "nsScanner.h" #include "plstr.h" #include "nsIStringStream.h" #include "nsIChannel.h" #include "nsICachingChannel.h" #include "nsICacheEntryDescriptor.h" #include "nsICharsetAlias.h" #include "nsICharsetConverterManager.h" #include "nsIInputStream.h" #include "CNavDTD.h" #include "prenv.h" #include "prlock.h" #include "prcvar.h" #include "nsAutoLock.h" #include "nsParserCIID.h" #include "nsReadableUtils.h" #include "nsCOMPtr.h" #include "nsExpatDriver.h" #include "nsIServiceManager.h" #include "nsICategoryManager.h" #include "nsISupportsPrimitives.h" #include "nsIFragmentContentSink.h" #include "nsStreamUtils.h" #include "nsHTMLTokenizer.h" #include "nsIDocument.h" #include "nsNetUtil.h" #include "nsScriptLoader.h" #include "nsDataHashtable.h" #include "nsIThreadPool.h" #include "nsXPCOMCIDInternal.h" #include "nsICSSStyleSheet.h" #include "nsICSSLoaderObserver.h" #include "nsICSSLoader.h" #ifdef MOZ_VIEW_SOURCE #include "nsViewSourceHTML.h" #endif #define NS_PARSER_FLAG_PARSER_ENABLED 0x00000002 #define NS_PARSER_FLAG_OBSERVERS_ENABLED 0x00000004 #define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT 0x00000008 #define NS_PARSER_FLAG_CAN_INTERRUPT 0x00000010 #define NS_PARSER_FLAG_FLUSH_TOKENS 0x00000020 #define NS_PARSER_FLAG_CAN_TOKENIZE 0x00000040 static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID); static NS_DEFINE_CID(kCParserCID, NS_PARSER_CID); static NS_DEFINE_IID(kIParserIID, NS_IPARSER_IID); //------------------------------------------------------------------- nsCOMArray *nsParser::sParserDataListeners; //-------------- Begin ParseContinue Event Definition ------------------------ /* The parser can be explicitly interrupted by passing a return value of NS_ERROR_HTMLPARSER_INTERRUPTED from BuildModel on the DTD. This will cause the parser to stop processing and allow the application to return to the event loop. The data which was left at the time of interruption will be processed the next time OnDataAvailable is called. If the parser has received its final chunk of data then OnDataAvailable will no longer be called by the networking module, so the parser will schedule a nsParserContinueEvent which will call the parser to process the remaining data after returning to the event loop. If the parser is interrupted while processing the remaining data it will schedule another ParseContinueEvent. The processing of data followed by scheduling of the continue events will proceed until either: 1) All of the remaining data can be processed without interrupting 2) The parser has been cancelled. This capability is currently used in CNavDTD and nsHTMLContentSink. The nsHTMLContentSink is notified by CNavDTD when a chunk of tokens is going to be processed and when each token is processed. The nsHTML content sink records the time when the chunk has started processing and will return NS_ERROR_HTMLPARSER_INTERRUPTED if the token processing time has exceeded a threshold called max tokenizing processing time. This allows the content sink to limit how much data is processed in a single chunk which in turn gates how much time is spent away from the event loop. Processing smaller chunks of data also reduces the time spent in subsequent reflows. This capability is most apparent when loading large documents. If the maximum token processing time is set small enough the application will remain responsive during document load. A side-effect of this capability is that document load is not complete when the last chunk of data is passed to OnDataAvailable since the parser may have been interrupted when the last chunk of data arrived. The document is complete when all of the document has been tokenized and there aren't any pending nsParserContinueEvents. This can cause problems if the application assumes that it can monitor the load requests to determine when the document load has been completed. This is what happens in Mozilla. The document is considered completely loaded when all of the load requests have been satisfied. To delay the document load until all of the parsing has been completed the nsHTMLContentSink adds a dummy parser load request which is not removed until the nsHTMLContentSink's DidBuildModel is called. The CNavDTD will not call DidBuildModel until the final chunk of data has been passed to the parser through the OnDataAvailable and there aren't any pending nsParserContineEvents. Currently the parser is ignores requests to be interrupted during the processing of script. This is because a document.write followed by JavaScript calls to manipulate the DOM may fail if the parser was interrupted during the document.write. For more details @see bugzilla bug 76722 */ class nsParserContinueEvent : public nsRunnable { public: nsRefPtr mParser; nsParserContinueEvent(nsParser* aParser) : mParser(aParser) {} NS_IMETHOD Run() { mParser->HandleParserContinueEvent(this); return NS_OK; } }; //-------------- End ParseContinue Event Definition ------------------------ template class Holder { public: typedef void (*Reaper)(Type *); Holder(Reaper aReaper) : mHoldee(nsnull), mReaper(aReaper) { } ~Holder() { if (mHoldee) { mReaper(mHoldee); } } Type *get() { return mHoldee; } const Holder &operator =(Type *aHoldee) { if (mHoldee && aHoldee != mHoldee) { mReaper(mHoldee); } mHoldee = aHoldee; return *this; } private: Type *mHoldee; Reaper mReaper; }; class nsSpeculativeScriptThread : public nsIRunnable { public: nsSpeculativeScriptThread() : mLock(nsAutoLock::DestroyLock), mCVar(PR_DestroyCondVar), mKeepParsing(PR_FALSE), mCurrentlyParsing(PR_FALSE), mNumURIs(0), mNumConsumed(0), mContext(nsnull), mTerminated(PR_FALSE) { } ~nsSpeculativeScriptThread() { NS_ASSERTION(NS_IsMainThread() || !mDocument, "Destroying the document on the wrong thread"); } NS_DECL_ISUPPORTS NS_DECL_NSIRUNNABLE nsresult StartParsing(nsParser *aParser); void StopParsing(PRBool aFromDocWrite); enum PrefetchType { NONE, SCRIPT, STYLESHEET, IMAGE }; struct PrefetchEntry { PrefetchType type; nsString uri; nsString charset; nsString elementType; }; nsIDocument *GetDocument() { NS_ASSERTION(NS_IsMainThread(), "Potential threadsafety hazard"); return mDocument; } PRBool Parsing() { return mCurrentlyParsing; } CParserContext *Context() { return mContext; } typedef nsDataHashtable PreloadedType; PreloadedType& GetPreloadedURIs() { return mPreloadedURIs; } void Terminate() { mTerminated = PR_TRUE; StopParsing(PR_FALSE); } PRBool Terminated() { return mTerminated; } private: void ProcessToken(CToken *aToken); void AddToPrefetchList(const nsAString &src, const nsAString &charset, const nsAString &elementType, PrefetchType type); // These members are only accessed on the speculatively parsing thread. nsTokenAllocator mTokenAllocator; // The following members are shared across the main thread and the // speculatively parsing thread. Holder mLock; Holder mCVar; volatile PRBool mKeepParsing; volatile PRBool mCurrentlyParsing; nsRefPtr mTokenizer; nsAutoPtr mScanner; enum { kBatchPrefetchURIs = 5 }; nsAutoTArray mURIs; PRUint16 mNumURIs; // Number of characters consumed by the last speculative parse. PRUint32 mNumConsumed; // These members are only accessed on the main thread. nsCOMPtr mDocument; CParserContext *mContext; PreloadedType mPreloadedURIs; PRBool mTerminated; }; /** * Used if we need to pass an nsICSSLoaderObserver as parameter, * but don't really need its services */ class nsDummyCSSLoaderObserver : public nsICSSLoaderObserver { public: NS_IMETHOD StyleSheetLoaded(nsICSSStyleSheet* aSheet, PRBool aWasAlternate, nsresult aStatus) { return NS_OK; } NS_DECL_ISUPPORTS }; NS_IMPL_ISUPPORTS1(nsDummyCSSLoaderObserver, nsICSSLoaderObserver) class nsPreloadURIs : public nsIRunnable { public: nsPreloadURIs(nsAutoTArray &aURIs, nsSpeculativeScriptThread *aScriptThread) : mURIs(aURIs), mScriptThread(aScriptThread) { } NS_DECL_ISUPPORTS NS_DECL_NSIRUNNABLE static void PreloadURIs(const nsAutoTArray &aURIs, nsSpeculativeScriptThread *aScriptThread); private: nsAutoTArray mURIs; nsRefPtr mScriptThread; }; NS_IMPL_THREADSAFE_ISUPPORTS1(nsPreloadURIs, nsIRunnable) NS_IMETHODIMP nsPreloadURIs::Run() { PreloadURIs(mURIs, mScriptThread); return NS_OK; } void nsPreloadURIs::PreloadURIs(const nsAutoTArray &aURIs, nsSpeculativeScriptThread *aScriptThread) { NS_ASSERTION(NS_IsMainThread(), "Touching non-threadsafe objects off thread"); if (aScriptThread->Terminated()) { return; } nsIDocument *doc = aScriptThread->GetDocument(); NS_ASSERTION(doc, "We shouldn't have started preloading without a document"); // Note: Per the code in the HTML content sink, we should be keeping track // of each as it comes. However, because we do our speculative // parsing off the main thread, this is hard to emulate. For now, just load // the URIs using the document's base URI at the potential cost of being // wrong and having to re-load a given relative URI later. nsIURI *base = doc->GetBaseURI(); const nsCString &charset = doc->GetDocumentCharacterSet(); nsSpeculativeScriptThread::PreloadedType &alreadyPreloaded = aScriptThread->GetPreloadedURIs(); for (PRUint32 i = 0, e = aURIs.Length(); i < e; ++i) { const nsSpeculativeScriptThread::PrefetchEntry &pe = aURIs[i]; nsCOMPtr uri; nsresult rv = NS_NewURI(getter_AddRefs(uri), pe.uri, charset.get(), base); if (NS_FAILED(rv)) { NS_WARNING("Failed to create a URI"); continue; } nsCAutoString spec; uri->GetSpec(spec); PRBool answer; if (alreadyPreloaded.Get(spec, &answer)) { // Already preloaded. Don't preload again. continue; } alreadyPreloaded.Put(spec, PR_TRUE); switch (pe.type) { case nsSpeculativeScriptThread::SCRIPT: doc->ScriptLoader()->PreloadURI(uri, pe.charset, pe.elementType); break; case nsSpeculativeScriptThread::IMAGE: doc->PreLoadImage(uri); break; case nsSpeculativeScriptThread::STYLESHEET: nsCOMPtr obs = new nsDummyCSSLoaderObserver(); doc->CSSLoader()->LoadSheet(uri, doc->NodePrincipal(), obs); break; } } } NS_IMPL_THREADSAFE_ISUPPORTS1(nsSpeculativeScriptThread, nsIRunnable) NS_IMETHODIMP nsSpeculativeScriptThread::Run() { NS_ASSERTION(!NS_IsMainThread(), "Speculative parsing on the main thread?"); mNumConsumed = 0; mTokenizer->WillTokenize(PR_FALSE, &mTokenAllocator); while (mKeepParsing) { PRBool flushTokens = PR_FALSE; nsresult rv = mTokenizer->ConsumeToken(*mScanner, flushTokens); if (NS_FAILED(rv)) { break; } mNumConsumed += mScanner->Mark(); // TODO Don't pop the tokens. CToken *token; while (mKeepParsing && (token = mTokenizer->PopToken())) { ProcessToken(token); } } mTokenizer->DidTokenize(PR_FALSE); { nsAutoLock al(mLock.get()); mCurrentlyParsing = PR_FALSE; PR_NotifyCondVar(mCVar.get()); } return NS_OK; } nsresult nsSpeculativeScriptThread::StartParsing(nsParser *aParser) { NS_ASSERTION(NS_IsMainThread(), "Called on the wrong thread"); NS_ASSERTION(!mCurrentlyParsing, "Bad race happening"); if (!aParser->ThreadPool()) { return NS_OK; } nsIContentSink *sink = aParser->GetContentSink(); if (!sink) { return NS_OK; } nsCOMPtr doc = do_QueryInterface(sink->GetTarget()); if (!doc) { return NS_OK; } nsAutoString toScan; CParserContext *context = aParser->PeekContext(); if (!mLock.get()) { mLock = nsAutoLock::NewLock("nsSpeculativeScriptThread::mLock"); if (!mLock.get()) { return NS_ERROR_OUT_OF_MEMORY; } mCVar = PR_NewCondVar(mLock.get()); if (!mCVar.get()) { return NS_ERROR_OUT_OF_MEMORY; } if (!mPreloadedURIs.Init(15)) { return NS_ERROR_OUT_OF_MEMORY; } mTokenizer = new nsHTMLTokenizer(context->mDTDMode, context->mDocType, context->mParserCommand, 0); if (!mTokenizer) { return NS_ERROR_OUT_OF_MEMORY; } mTokenizer->CopyState(context->mTokenizer); context->mScanner->CopyUnusedData(toScan); if (toScan.IsEmpty()) { return NS_OK; } } else if (context == mContext) { // Don't parse the same part of the document twice. nsScannerIterator end; context->mScanner->EndReading(end); nsScannerIterator start; context->mScanner->CurrentPosition(start); if (mNumConsumed > context->mNumConsumed) { // We consumed more the last time we tried speculatively parsing than we // did the last time we actually parsed. PRUint32 distance = Distance(start, end); start.advance(PR_MIN(mNumConsumed - context->mNumConsumed, distance)); } if (start == end) { // We're at the end of this context's buffer, nothing else to do. return NS_OK; } CopyUnicodeTo(start, end, toScan); } else { // Grab all of the context. context->mScanner->CopyUnusedData(toScan); if (toScan.IsEmpty()) { // Nothing to parse, don't do anything. return NS_OK; } } nsCAutoString charset; PRInt32 source; aParser->GetDocumentCharset(charset, source); mScanner = new nsScanner(toScan, charset, source); if (!mScanner) { return NS_ERROR_OUT_OF_MEMORY; } mScanner->SetIncremental(PR_TRUE); mDocument.swap(doc); mKeepParsing = PR_TRUE; mCurrentlyParsing = PR_TRUE; mContext = context; return aParser->ThreadPool()->Dispatch(this, NS_DISPATCH_NORMAL); } void nsSpeculativeScriptThread::StopParsing(PRBool /*aFromDocWrite*/) { NS_ASSERTION(NS_IsMainThread(), "Can't stop parsing from another thread"); if (!mLock.get()) { // If we bailed early out of StartParsing, don't do anything. return; } { nsAutoLock al(mLock.get()); mKeepParsing = PR_FALSE; if (mCurrentlyParsing) { PR_WaitCondVar(mCVar.get(), PR_INTERVAL_NO_TIMEOUT); NS_ASSERTION(!mCurrentlyParsing, "Didn't actually stop parsing?"); } } // The thread is now idle. if (mTerminated) { // If we're terminated, then we need to ensure that we release our document // and tokenizer here on the main thread so that our last reference to them // isn't our alter-ego rescheduled on another thread. mDocument = nsnull; mTokenizer = nsnull; mScanner = nsnull; } else if (mNumURIs) { // Note: Don't do this if we're terminated. nsPreloadURIs::PreloadURIs(mURIs, this); mNumURIs = 0; mURIs.Clear(); } // Note: Currently, we pop the tokens off (see the comment in Run) so this // isn't a problem. If and when we actually use the tokens created // off-thread, we'll need to use aFromDocWrite for real. } void nsSpeculativeScriptThread::ProcessToken(CToken *aToken) { // Only called on the speculative script thread. CHTMLToken *token = static_cast(aToken); switch (static_cast(token->GetTokenType())) { case eToken_start: { CStartToken *start = static_cast(aToken); nsHTMLTag tag = static_cast(start->GetTypeID()); PRInt16 attrs = start->GetAttributeCount(); PRInt16 i = 0; nsAutoString src; nsAutoString elementType; nsAutoString charset; nsAutoString href; nsAutoString rel; PrefetchType ptype = NONE; switch (tag) { case eHTMLTag_link: ptype = STYLESHEET; break; case eHTMLTag_img: ptype = IMAGE; break; case eHTMLTag_script: ptype = SCRIPT; break; default: break; } // We currently handle the following element/attribute combos : // // // -- Ref: Bug# 22485 -- // Also remember to update the marked position. mFlags |= NS_PARSER_FLAG_FLUSH_TOKENS; mParserContext->mNumConsumed += mParserContext->mScanner->Mark(); break; } } DidTokenize(aIsFinalChunk); MOZ_TIMER_STOP(mTokenizeTime); if (killSink) { mSink = nsnull; } } else { result = mInternalState = NS_ERROR_HTMLPARSER_BADTOKENIZER; } return result; } /** * This is the tail-end of the code sandwich for the * tokenization process. It gets called once tokenziation * has completed for each phase. */ PRBool nsParser::DidTokenize(PRBool aIsFinalChunk) { if (!mParserContext) { return PR_TRUE; } nsITokenizer* theTokenizer; nsresult rv = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer); NS_ENSURE_SUCCESS(rv, PR_FALSE); rv = theTokenizer->DidTokenize(aIsFinalChunk); return NS_SUCCEEDED(rv); } /** * Get the channel associated with this parser * * @param aChannel out param that will contain the result * @return NS_OK if successful */ NS_IMETHODIMP nsParser::GetChannel(nsIChannel** aChannel) { nsresult result = NS_ERROR_NOT_AVAILABLE; if (mParserContext && mParserContext->mRequest) { result = CallQueryInterface(mParserContext->mRequest, aChannel); } return result; } /** * Get the DTD associated with this parser */ NS_IMETHODIMP nsParser::GetDTD(nsIDTD** aDTD) { if (mParserContext) { NS_IF_ADDREF(*aDTD = mDTD); } return NS_OK; }