From 4e3fba1c96a066cf14beb2df69c43fbfbd04f83e Mon Sep 17 00:00:00 2001 From: Henri Sivonen Date: Tue, 16 Oct 2012 10:42:54 +0300 Subject: [PATCH] Bug 599320 - Refuse encodings that are not rough ASCII supersets as hints from parent, previous doc or cache. r=smaug. --- content/html/document/src/nsHTMLDocument.cpp | 133 +- content/html/document/src/nsHTMLDocument.h | 8 +- parser/html/nsHtml5MetaScannerCppSupplement.h | 5 +- parser/html/nsHtml5StreamParser.cpp | 5 +- .../tests/reftest/bug599320-1-ref.html | 17 + .../htmlparser/tests/reftest/bug599320-1.html | Bin 0 -> 616 bytes .../tests/reftest/frame599320-1-ref.html | 15 + .../tests/reftest/frame599320-1.html | 1086 +++++++++++++++++ parser/htmlparser/tests/reftest/reftest.list | 1 + 9 files changed, 1204 insertions(+), 66 deletions(-) create mode 100644 parser/htmlparser/tests/reftest/bug599320-1-ref.html create mode 100644 parser/htmlparser/tests/reftest/bug599320-1.html create mode 100644 parser/htmlparser/tests/reftest/frame599320-1-ref.html create mode 100644 parser/htmlparser/tests/reftest/frame599320-1.html diff --git a/content/html/document/src/nsHTMLDocument.cpp b/content/html/document/src/nsHTMLDocument.cpp index fe75ac29e40..8324f9b8312 100644 --- a/content/html/document/src/nsHTMLDocument.cpp +++ b/content/html/document/src/nsHTMLDocument.cpp @@ -313,11 +313,7 @@ nsHTMLDocument::CreateShell(nsPresContext* aContext, aInstancePtrResult); } -// The following Try*Charset will return false only if the charset source -// should be considered (ie. aCharsetSource < thisCharsetSource) but we failed -// to get the charset from this source. - -bool +void nsHTMLDocument::TryHintCharset(nsIMarkupDocumentViewer* aMarkupDV, int32_t& aCharsetSource, nsACString& aCharset) { @@ -331,17 +327,17 @@ nsHTMLDocument::TryHintCharset(nsIMarkupDocumentViewer* aMarkupDV, aMarkupDV->SetHintCharacterSetSource((int32_t)(kCharsetUninitialized)); if(requestCharsetSource <= aCharsetSource) - return true; + return; - if(NS_SUCCEEDED(rv)) { + if(NS_SUCCEEDED(rv) && IsAsciiCompatible(requestCharset)) { aCharsetSource = requestCharsetSource; aCharset = requestCharset; - return true; + return; } } } - return false; + return; } @@ -361,6 +357,8 @@ nsHTMLDocument::TryUserForcedCharset(nsIMarkupDocumentViewer* aMarkupDV, rv = aMarkupDV->GetForceCharacterSet(forceCharsetFromDocShell); } + // Not making the IsAsciiCompatible() check here to allow the user to + // force UTF-16 from the menu. if(NS_SUCCEEDED(rv) && !forceCharsetFromDocShell.IsEmpty()) { aCharset = forceCharsetFromDocShell; //TODO: we should define appropriate constant for force charset @@ -392,7 +390,12 @@ nsHTMLDocument::TryCacheCharset(nsICachingChannel* aCachingChannel, nsCString cachedCharset; rv = aCachingChannel->GetCacheTokenCachedCharset(cachedCharset); - if (NS_SUCCEEDED(rv) && !cachedCharset.IsEmpty()) + // Check IsAsciiCompatible() even in the cache case, because the value + // might be stale and in the case of a stale charset that is not a rough + // ASCII superset, the parser has no way to recover. + if (NS_SUCCEEDED(rv) && + !cachedCharset.IsEmpty() && + IsAsciiCompatible(cachedCharset)) { aCharset = cachedCharset; aCharsetSource = kCharsetFromCache; @@ -417,69 +420,87 @@ CheckSameOrigin(nsINode* aNode1, nsINode* aNode2) } bool +nsHTMLDocument::IsAsciiCompatible(const nsACString& aPreferredName) +{ + return !(aPreferredName.LowerCaseEqualsLiteral("utf-16") || + aPreferredName.LowerCaseEqualsLiteral("utf-16be") || + aPreferredName.LowerCaseEqualsLiteral("utf-16le") || + aPreferredName.LowerCaseEqualsLiteral("utf-7") || + aPreferredName.LowerCaseEqualsLiteral("x-imap4-modified-utf7")); +} + +void nsHTMLDocument::TryParentCharset(nsIDocShell* aDocShell, nsIDocument* aParentDocument, int32_t& aCharsetSource, nsACString& aCharset) { - if (aDocShell) { - int32_t source; - nsCOMPtr csAtom; - int32_t parentSource; - aDocShell->GetParentCharsetSource(&parentSource); - if (kCharsetFromParentForced <= parentSource) - source = kCharsetFromParentForced; - else if (kCharsetFromHintPrevDoc == parentSource) { - // Make sure that's OK - if (!aParentDocument || !CheckSameOrigin(this, aParentDocument)) { - return false; - } - - // if parent is posted doc, set this prevent autodections - // I'm not sure this makes much sense... but whatever. - source = kCharsetFromHintPrevDoc; - } - else if (kCharsetFromCache <= parentSource) { - // Make sure that's OK - if (!aParentDocument || !CheckSameOrigin(this, aParentDocument)) { - return false; - } - - source = kCharsetFromParentFrame; - } - else - return false; - - if (source < aCharsetSource) - return true; - - aDocShell->GetParentCharset(getter_AddRefs(csAtom)); - if (csAtom) { - csAtom->ToUTF8String(aCharset); - aCharsetSource = source; - return true; - } + if (!aDocShell) { + return; } - return false; + int32_t source; + nsCOMPtr csAtom; + int32_t parentSource; + nsAutoCString parentCharset; + aDocShell->GetParentCharset(getter_AddRefs(csAtom)); + if (!csAtom) { + return; + } + aDocShell->GetParentCharsetSource(&parentSource); + csAtom->ToUTF8String(parentCharset); + if (kCharsetFromParentForced <= parentSource) { + source = kCharsetFromParentForced; + } else if (kCharsetFromHintPrevDoc == parentSource) { + // Make sure that's OK + if (!aParentDocument || + !CheckSameOrigin(this, aParentDocument) || + !IsAsciiCompatible(parentCharset)) { + return; + } + + // if parent is posted doc, set this prevent autodetections + // I'm not sure this makes much sense... but whatever. + source = kCharsetFromHintPrevDoc; + } else if (kCharsetFromCache <= parentSource) { + // Make sure that's OK + if (!aParentDocument || + !CheckSameOrigin(this, aParentDocument) || + !IsAsciiCompatible(parentCharset)) { + return; + } + + source = kCharsetFromParentFrame; + } else { + return; + } + + if (source < aCharsetSource) { + return; + } + + aCharset.Assign(parentCharset); + aCharsetSource = source; } -bool +void nsHTMLDocument::UseWeakDocTypeDefault(int32_t& aCharsetSource, nsACString& aCharset) { if (kCharsetFromWeakDocTypeDefault <= aCharsetSource) - return true; - // fallback value in case docshell return error - aCharset.AssignLiteral("ISO-8859-1"); + return; const nsAdoptingCString& defCharset = Preferences::GetLocalizedCString("intl.charset.default"); - if (!defCharset.IsEmpty()) { + // Don't let the user break things by setting intl.charset.default to + // not a rough ASCII superset + if (!defCharset.IsEmpty() && IsAsciiCompatible(defCharset)) { aCharset = defCharset; - aCharsetSource = kCharsetFromWeakDocTypeDefault; + } else { + aCharset.AssignLiteral("ISO-8859-1"); } - return true; + aCharsetSource = kCharsetFromWeakDocTypeDefault; + return; } bool @@ -494,6 +515,8 @@ nsHTMLDocument::TryDefaultCharset( nsIMarkupDocumentViewer* aMarkupDV, if (aMarkupDV) { nsresult rv = aMarkupDV->GetDefaultCharacterSet(defaultCharsetFromDocShell); + // Not making the IsAsciiCompatible() check here to allow the user to + // force UTF-16 from the menu. if(NS_SUCCEEDED(rv)) { aCharset = defaultCharsetFromDocShell; diff --git a/content/html/document/src/nsHTMLDocument.h b/content/html/document/src/nsHTMLDocument.h index a2e58e7c8e0..17d97ffcae3 100644 --- a/content/html/document/src/nsHTMLDocument.h +++ b/content/html/document/src/nsHTMLDocument.h @@ -218,7 +218,9 @@ protected: static uint32_t gWyciwygSessionCnt; - static bool TryHintCharset(nsIMarkupDocumentViewer* aMarkupDV, + static bool IsAsciiCompatible(const nsACString& aPreferredName); + + static void TryHintCharset(nsIMarkupDocumentViewer* aMarkupDV, int32_t& aCharsetSource, nsACString& aCharset); static bool TryUserForcedCharset(nsIMarkupDocumentViewer* aMarkupDV, @@ -229,10 +231,10 @@ protected: int32_t& aCharsetSource, nsACString& aCharset); // aParentDocument could be null. - bool TryParentCharset(nsIDocShell* aDocShell, + void TryParentCharset(nsIDocShell* aDocShell, nsIDocument* aParentDocument, int32_t& charsetSource, nsACString& aCharset); - static bool UseWeakDocTypeDefault(int32_t& aCharsetSource, + static void UseWeakDocTypeDefault(int32_t& aCharsetSource, nsACString& aCharset); static bool TryDefaultCharset(nsIMarkupDocumentViewer* aMarkupDV, int32_t& aCharsetSource, diff --git a/parser/html/nsHtml5MetaScannerCppSupplement.h b/parser/html/nsHtml5MetaScannerCppSupplement.h index c6b6376432e..402e0216ccf 100644 --- a/parser/html/nsHtml5MetaScannerCppSupplement.h +++ b/parser/html/nsHtml5MetaScannerCppSupplement.h @@ -56,10 +56,7 @@ nsHtml5MetaScanner::tryCharset(nsString* charset) preferred.LowerCaseEqualsLiteral("utf-16be") || preferred.LowerCaseEqualsLiteral("utf-16le") || preferred.LowerCaseEqualsLiteral("utf-7") || - preferred.LowerCaseEqualsLiteral("jis_x0212-1990") || - preferred.LowerCaseEqualsLiteral("x-jis0208") || - preferred.LowerCaseEqualsLiteral("x-imap4-modified-utf7") || - preferred.LowerCaseEqualsLiteral("x-user-defined")) { + preferred.LowerCaseEqualsLiteral("x-imap4-modified-utf7")) { return false; } res = convManager->GetUnicodeDecoderRaw(preferred.get(), getter_AddRefs(mUnicodeDecoder)); diff --git a/parser/html/nsHtml5StreamParser.cpp b/parser/html/nsHtml5StreamParser.cpp index c8671bcdde5..1ac7f733bcf 100644 --- a/parser/html/nsHtml5StreamParser.cpp +++ b/parser/html/nsHtml5StreamParser.cpp @@ -1213,10 +1213,7 @@ nsHtml5StreamParser::PreferredForInternalEncodingDecl(nsACString& aEncoding) preferred.LowerCaseEqualsLiteral("utf-16be") || preferred.LowerCaseEqualsLiteral("utf-16le") || preferred.LowerCaseEqualsLiteral("utf-7") || - preferred.LowerCaseEqualsLiteral("jis_x0212-1990") || - preferred.LowerCaseEqualsLiteral("x-jis0208") || - preferred.LowerCaseEqualsLiteral("x-imap4-modified-utf7") || - preferred.LowerCaseEqualsLiteral("x-user-defined")) { + preferred.LowerCaseEqualsLiteral("x-imap4-modified-utf7")) { // Not a rough ASCII superset mTreeBuilder->MaybeComplainAboutCharset("EncMetaNonRoughSuperset", true, diff --git a/parser/htmlparser/tests/reftest/bug599320-1-ref.html b/parser/htmlparser/tests/reftest/bug599320-1-ref.html new file mode 100644 index 00000000000..bb48fe5d256 --- /dev/null +++ b/parser/htmlparser/tests/reftest/bug599320-1-ref.html @@ -0,0 +1,17 @@ + + + + + +UTF-16 doc + + +

UTF-16 doc

+ +

Euro sign: €

+

iframe:

+ + + + + diff --git a/parser/htmlparser/tests/reftest/bug599320-1.html b/parser/htmlparser/tests/reftest/bug599320-1.html new file mode 100644 index 0000000000000000000000000000000000000000..590e9126c3fa1abdde37eca08dd8708c670da3b2 GIT binary patch literal 616 zcmZ{iOHYG96ot>azk;$>DZUb$!ba;>7rJn%FfAl~1<=O-_wY*W3a=#ofnV@7&5${PS;MdjH99M@Ehw`bywrEN z!M94LYAOze9rYSmp)ccio|WsFP28iD>KgqLWWg2^Un5{_FA2R3r+EWe>a(9T#rjXH zLfy7&Iy + + + + +Non-UTF-16 doc + + +

Non-UTF-16 doc

+ +

Euro sign: €

+ + + + diff --git a/parser/htmlparser/tests/reftest/frame599320-1.html b/parser/htmlparser/tests/reftest/frame599320-1.html new file mode 100644 index 00000000000..cb2956bfe82 --- /dev/null +++ b/parser/htmlparser/tests/reftest/frame599320-1.html @@ -0,0 +1,1086 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Non-UTF-16 doc + + +

Non-UTF-16 doc

+ +

Euro sign: €

+ + + + diff --git a/parser/htmlparser/tests/reftest/reftest.list b/parser/htmlparser/tests/reftest/reftest.list index cd378313cbf..d4015f890bd 100644 --- a/parser/htmlparser/tests/reftest/reftest.list +++ b/parser/htmlparser/tests/reftest/reftest.list @@ -5,6 +5,7 @@ == bug582788-1.html bug582788-1-ref.html == bug582940-1.html bug582940-1-ref.html == bug592656-1.html bug592656-1-ref.html +== bug599320-1.html bug599320-1-ref.html == bug608373-1.html bug608373-1-ref.html fails-if(/^Windows\x20NT\x206\.1/.test(http.oscpu)&&!layersGPUAccelerated&&!azureSkia) == view-source:bug482921-1.html bug482921-1-ref.html # bug 703201 == view-source:bug482921-2.xhtml bug482921-2-ref.html