Bug 844461 - Perform alias resolution on the fallback encoding pref value. r=smontagu.

This commit is contained in:
Henri Sivonen 2013-03-04 20:09:11 +02:00
parent 421f07e516
commit ad623cb811
10 changed files with 54 additions and 35 deletions

View File

@ -62,7 +62,7 @@ function xhrDoc(idx) {
// inputEncoding expected for that document.
var tests = [
[ frameDoc("one"), "ISO-8859-1", "ISO-8859-1" ],
[ frameDoc("one"), "windows-1252", "windows-1252" ],
[ frameDoc("two"), "UTF-8", "UTF-8" ],
[ frameDoc("three"), "windows-1252", "windows-1252" ],
[ frameDoc("four"), "UTF-8", "UTF-8" ],

View File

@ -82,6 +82,7 @@
//AHMED 12-2
#include "nsBidiUtils.h"
#include "mozilla/dom/EncodingUtils.h"
#include "nsIEditingSession.h"
#include "nsIEditor.h"
#include "nsNodeInfoManager.h"
@ -328,7 +329,7 @@ nsHTMLDocument::TryHintCharset(nsIMarkupDocumentViewer* aMarkupDV,
if(requestCharsetSource <= aCharsetSource)
return;
if(NS_SUCCEEDED(rv) && IsAsciiCompatible(requestCharset)) {
if(NS_SUCCEEDED(rv) && EncodingUtils::IsAsciiCompatible(requestCharset)) {
aCharsetSource = requestCharsetSource;
aCharset = requestCharset;
@ -352,7 +353,7 @@ nsHTMLDocument::TryUserForcedCharset(nsIMarkupDocumentViewer* aMarkupDV,
return;
// mCharacterSet not updated yet for channel, so check aCharset, too.
if (WillIgnoreCharsetOverride() || !IsAsciiCompatible(aCharset)) {
if (WillIgnoreCharsetOverride() || !EncodingUtils::IsAsciiCompatible(aCharset)) {
return;
}
@ -364,7 +365,7 @@ nsHTMLDocument::TryUserForcedCharset(nsIMarkupDocumentViewer* aMarkupDV,
if(NS_SUCCEEDED(rv) &&
!forceCharsetFromDocShell.IsEmpty() &&
IsAsciiCompatible(forceCharsetFromDocShell)) {
EncodingUtils::IsAsciiCompatible(forceCharsetFromDocShell)) {
aCharset = forceCharsetFromDocShell;
aCharsetSource = kCharsetFromUserForced;
return;
@ -377,7 +378,7 @@ nsHTMLDocument::TryUserForcedCharset(nsIMarkupDocumentViewer* aMarkupDV,
if (csAtom) {
nsAutoCString charset;
csAtom->ToUTF8String(charset);
if (!IsAsciiCompatible(charset)) {
if (!EncodingUtils::IsAsciiCompatible(charset)) {
return;
}
aCharset = charset;
@ -400,12 +401,12 @@ nsHTMLDocument::TryCacheCharset(nsICachingChannel* aCachingChannel,
nsCString cachedCharset;
rv = aCachingChannel->GetCacheTokenCachedCharset(cachedCharset);
// Check IsAsciiCompatible() even in the cache case, because the value
// Check EncodingUtils::IsAsciiCompatible() even in the cache case, because the value
// might be stale and in the case of a stale charset that is not a rough
// ASCII superset, the parser has no way to recover.
if (NS_SUCCEEDED(rv) &&
!cachedCharset.IsEmpty() &&
IsAsciiCompatible(cachedCharset))
EncodingUtils::IsAsciiCompatible(cachedCharset))
{
aCharset = cachedCharset;
aCharsetSource = kCharsetFromCache;
@ -425,16 +426,6 @@ CheckSameOrigin(nsINode* aNode1, nsINode* aNode2)
equal;
}
bool
nsHTMLDocument::IsAsciiCompatible(const nsACString& aPreferredName)
{
return !(aPreferredName.LowerCaseEqualsLiteral("utf-16") ||
aPreferredName.LowerCaseEqualsLiteral("utf-16be") ||
aPreferredName.LowerCaseEqualsLiteral("utf-16le") ||
aPreferredName.LowerCaseEqualsLiteral("utf-7") ||
aPreferredName.LowerCaseEqualsLiteral("x-imap4-modified-utf7"));
}
void
nsHTMLDocument::TryParentCharset(nsIDocShell* aDocShell,
nsIDocument* aParentDocument,
@ -460,8 +451,8 @@ nsHTMLDocument::TryParentCharset(nsIDocShell* aDocShell,
if (kCharsetFromParentForced == parentSource ||
kCharsetFromUserForced == parentSource) {
if (WillIgnoreCharsetOverride() ||
!IsAsciiCompatible(aCharset) || // if channel said UTF-16
!IsAsciiCompatible(parentCharset)) {
!EncodingUtils::IsAsciiCompatible(aCharset) || // if channel said UTF-16
!EncodingUtils::IsAsciiCompatible(parentCharset)) {
return;
}
aCharset.Assign(parentCharset);
@ -477,7 +468,7 @@ nsHTMLDocument::TryParentCharset(nsIDocShell* aDocShell,
// Make sure that's OK
if (!aParentDocument ||
!CheckSameOrigin(this, aParentDocument) ||
!IsAsciiCompatible(parentCharset)) {
!EncodingUtils::IsAsciiCompatible(parentCharset)) {
return;
}
@ -496,7 +487,7 @@ nsHTMLDocument::TryParentCharset(nsIDocShell* aDocShell,
// Make sure that's OK
if (!aParentDocument ||
!CheckSameOrigin(this, aParentDocument) ||
!IsAsciiCompatible(parentCharset)) {
!EncodingUtils::IsAsciiCompatible(parentCharset)) {
return;
}
@ -517,10 +508,12 @@ nsHTMLDocument::TryWeakDocTypeDefault(int32_t& aCharsetSource,
// Don't let the user break things by setting intl.charset.default to
// not a rough ASCII superset
if (!defCharset.IsEmpty() && IsAsciiCompatible(defCharset)) {
aCharset = defCharset;
nsAutoCString canonical;
if (EncodingUtils::FindEncodingForLabel(defCharset, canonical) &&
EncodingUtils::IsAsciiCompatible(canonical)) {
aCharset = canonical;
} else {
aCharset.AssignLiteral("ISO-8859-1");
aCharset.AssignLiteral("windows-1252");
}
aCharsetSource = kCharsetFromWeakDocTypeDefault;
return;
@ -538,7 +531,7 @@ nsHTMLDocument::TryDefaultCharset( nsIMarkupDocumentViewer* aMarkupDV,
if (aMarkupDV) {
nsresult rv =
aMarkupDV->GetDefaultCharacterSet(defaultCharsetFromDocShell);
if(NS_SUCCEEDED(rv) && IsAsciiCompatible(defaultCharsetFromDocShell)) {
if(NS_SUCCEEDED(rv) && EncodingUtils::IsAsciiCompatible(defaultCharsetFromDocShell)) {
aCharset = defaultCharsetFromDocShell;
aCharsetSource = kCharsetFromUserDefault;
}
@ -3808,7 +3801,7 @@ nsHTMLDocument::WillIgnoreCharsetOverride()
if (mCharacterSetSource == kCharsetFromByteOrderMark) {
return true;
}
if (!IsAsciiCompatible(mCharacterSet)) {
if (!EncodingUtils::IsAsciiCompatible(mCharacterSet)) {
return true;
}
nsCOMPtr<nsIWyciwygChannel> wyciwyg = do_QueryInterface(mChannel);

View File

@ -297,8 +297,6 @@ protected:
static uint32_t gWyciwygSessionCnt;
static bool IsAsciiCompatible(const nsACString& aPreferredName);
static void TryHintCharset(nsIMarkupDocumentViewer* aMarkupDV,
int32_t& aCharsetSource,
nsACString& aCharset);

View File

@ -68,5 +68,15 @@ EncodingUtils::FindEncodingForLabel(const nsACString& aLabel,
labelsEncodings, ArrayLength(labelsEncodings), label, aOutEncoding));
}
bool
EncodingUtils::IsAsciiCompatible(const nsACString& aPreferredName)
{
return !(aPreferredName.LowerCaseEqualsLiteral("utf-16") ||
aPreferredName.LowerCaseEqualsLiteral("utf-16be") ||
aPreferredName.LowerCaseEqualsLiteral("utf-16le") ||
aPreferredName.LowerCaseEqualsLiteral("utf-7") ||
aPreferredName.LowerCaseEqualsLiteral("x-imap4-modified-utf7"));
}
} // namespace dom
} // namespace mozilla

View File

@ -70,6 +70,15 @@ public:
aString.Trim(" \t\n\f\r");
}
/**
* Check is the encoding is ASCII-compatible in the sense that Basic Latin
* encodes to ASCII bytes. (The reverse may not be true!)
*
* @param aPreferredName a preferred encoding label
* @return whether the encoding is ASCII-compatible
*/
static bool IsAsciiCompatible(const nsACString& aPreferredName);
private:
EncodingUtils() MOZ_DELETE;
};

View File

@ -50,8 +50,11 @@ function InitDetectorTests()
.getComplexValue("intl.charset.default",
Ci.nsIPrefLocalizedString)
.data;
if (gExpectedCharset == "ISO-8859-1") {
gExpectedCharset = "windows-1252";
}
} catch (e) {
gExpectedCharset = "ISO-8859-8";
gExpectedCharset = "windows-1252";
}
}

View File

@ -23,7 +23,7 @@ https://bugzilla.mozilla.org/show_bug.cgi?id=638318
/** Test for Bug 638318 **/
/* Note! This test uses the chardet test harness but doesn't test chardet! */
CharsetDetectionTests("bug638318_text.html",
"ISO-8859-1",
"windows-1252",
new Array(""));
</script>
</pre>

View File

@ -22,7 +22,7 @@ https://bugzilla.mozilla.org/show_bug.cgi?id=811363
<script class="testbody" type="text/javascript">
/** Test for Bug 811363 **/
CharsetDetectionTests("bug811363-invalid-1.text",
"ISO-8859-1",
"windows-1252",
new Array("ja_parallel_state_machine",
"zh_parallel_state_machine",
"zhtw_parallel_state_machine",

View File

@ -22,7 +22,7 @@ https://bugzilla.mozilla.org/show_bug.cgi?id=811363
<script class="testbody" type="text/javascript">
/** Test for Bug 811363 **/
CharsetDetectionTests("bug811363-invalid-5.text",
"ISO-8859-1",
"windows-1252",
new Array("ja_parallel_state_machine",
"zh_parallel_state_machine",
"zhtw_parallel_state_machine",

View File

@ -39,6 +39,7 @@
#include "nsContentUtils.h"
#include "nsLayoutStylesheetCache.h"
#include "mozilla/Preferences.h"
#include "mozilla/dom/EncodingUtils.h"
#include "nsIDeviceContextSpec.h"
#include "nsViewManager.h"
@ -155,6 +156,7 @@ static const char sPrintOptionsContractID[] = "@mozilla.org/gfx/printset
#include "jsfriendapi.h"
using namespace mozilla;
using namespace mozilla::dom;
#ifdef DEBUG
@ -2970,10 +2972,14 @@ nsDocumentViewer::GetDefaultCharacterSet(nsACString& aDefaultCharacterSet)
const nsAdoptingCString& defCharset =
Preferences::GetLocalizedCString("intl.charset.default");
if (!defCharset.IsEmpty()) {
mDefaultCharacterSet = defCharset;
// Don't let the user break things by setting intl.charset.default to
// not a rough ASCII superset
nsAutoCString canonical;
if (EncodingUtils::FindEncodingForLabel(defCharset, canonical) &&
EncodingUtils::IsAsciiCompatible(canonical)) {
mDefaultCharacterSet = canonical;
} else {
mDefaultCharacterSet.AssignLiteral("ISO-8859-1");
mDefaultCharacterSet.AssignLiteral("windows-1252");
}
}
aDefaultCharacterSet = mDefaultCharacterSet;