Bug 213517 part 1 - Interpret x-user-defined as windows-1252 in <meta> for compatibility with other browsers. r=smontagu.

--HG--
rename : parser/htmlparser/tests/mochitest/file_bug672453_meta_utf16.html => parser/htmlparser/tests/mochitest/file_bug672453_meta_userdefined.html
This commit is contained in:
Henri Sivonen 2014-01-02 09:18:19 +02:00
parent f6672247c2
commit f0fad33dd2
7 changed files with 56 additions and 4 deletions

View File

@ -14,6 +14,7 @@ EncMetaUnsupported=An unsupported character encoding was declared for the HTML d
EncProtocolUnsupported=An unsupported character encoding was declared on the transfer protocol level. The declaration was ignored.
EncBomlessUtf16=Detected UTF-16-encoded Basic Latin-only text without a byte order mark and without a transfer protocol-level declaration. Encoding this content in UTF-16 is inefficient and the character encoding should have been declared in any case.
EncMetaUtf16=A meta tag was used to declare the character encoding as UTF-16. This was interpreted as an UTF-8 declaration instead.
EncMetaUserDefined=A meta tag was used to declare the character encoding as x-user-defined. This was interpreted as a windows-1252 declaration instead for compatibility with intentionally mis-encoded legacy fonts. This site should migrate to Unicode.
# The bulk of the messages below are derived from
# http://hg.mozilla.org/projects/htmlparser/file/1f633cef7de7/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java

View File

@ -35,6 +35,11 @@ nsHtml5MetaScanner::tryCharset(nsString* charset)
mCharset.Assign("UTF-8");
return true;
}
if (encoding.EqualsLiteral("x-user-defined")) {
// WebKit/Blink hack for Indian and Armenian legacy sites
mCharset.Assign("windows-1252");
return true;
}
mCharset.Assign(encoding);
return true;
}

View File

@ -1203,8 +1203,7 @@ nsHtml5StreamParser::PreferredForInternalEncodingDecl(nsACString& aEncoding)
return false;
}
if (newEncoding.EqualsLiteral("UTF-16") ||
newEncoding.EqualsLiteral("UTF-16BE") ||
if (newEncoding.EqualsLiteral("UTF-16BE") ||
newEncoding.EqualsLiteral("UTF-16LE")) {
mTreeBuilder->MaybeComplainAboutCharset("EncMetaUtf16",
true,
@ -1212,6 +1211,14 @@ nsHtml5StreamParser::PreferredForInternalEncodingDecl(nsACString& aEncoding)
newEncoding.Assign("UTF-8");
}
if (newEncoding.EqualsLiteral("x-user-defined")) {
// WebKit/Blink hack for Indian and Armenian legacy sites
mTreeBuilder->MaybeComplainAboutCharset("EncMetaUserDefined",
true,
mTokenizer->getLineNumber());
newEncoding.Assign("windows-1252");
}
if (newEncoding.Equals(mCharset)) {
if (mCharsetSource < kCharsetFromMetaPrescan) {
if (mInitialEncodingWasFromParentFrame) {

View File

@ -0,0 +1 @@
<meta charset="x-user-defined">

View File

@ -25,6 +25,7 @@ support-files =
file_bug672453_meta_unsupported.html
file_bug672453_meta_utf16.html
file_bug672453_not_declared.html
file_bug672453_meta_userdefined.html
file_bug716579-16.html
file_bug716579-16.html^headers^
file_bug716579-16.xhtml
@ -92,6 +93,7 @@ support-files =
[test_bug102699.html]
[test_bug174351.html]
[test_bug213517.html]
[test_bug339350.xhtml]
[test_bug358797.html]
[test_bug396568.html]

View File

@ -0,0 +1,30 @@
<!DOCTYPE HTML>
<html>
<!--
https://bugzilla.mozilla.org/show_bug.cgi?id=213517
-->
<head>
<meta charset="x-user-defined">
<title>Test for Bug 213517</title>
<script type="application/javascript" src="/tests/SimpleTest/SimpleTest.js"></script>
<link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
<script type="application/javascript">
/** Test for Bug 213517 **/
is(document.characterSet, "windows-1252", "x-user-defined in <meta> should have gotten mapped to windows-1252");
</script>
</head>
<body>
<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=213517">Mozilla Bug 213517</a>
<p id="display"></p>
<div id="content" style="display: none">
</div>
<pre id="test">
</pre>
</body>
</html>

View File

@ -25,7 +25,8 @@ var tests = [
"file_bug672453_http_unsupported.html",
"file_bug672453_bomless_utf16.html",
"file_bug672453_meta_utf16.html",
"file_bug672453_meta_non_superset.html"
"file_bug672453_meta_non_superset.html",
"file_bug672453_meta_userdefined.html",
];
var expectedErrors = [
@ -68,7 +69,12 @@ var expectedErrors = [
sourceName: "http://mochi.test:8888/tests/parser/htmlparser/tests/mochitest/file_bug672453_meta_non_superset.html",
lineNumber: 1,
isWarning: false,
isException: false }
isException: false },
{ errorMessage: "A meta tag was used to declare the character encoding as x-user-defined. This was interpreted as a windows-1252 declaration instead for compatibility with intentionally mis-encoded legacy fonts. This site should migrate to Unicode.",
sourceName: "http://mochi.test:8888/tests/parser/htmlparser/tests/mochitest/file_bug672453_meta_userdefined.html",
lineNumber: 1,
isWarning: false,
isException: false },
];
SimpleTest.waitForExplicitFinish();