Bug 863728 - Implement the replacement encoding. r=emk.

2024-09-13 09:24:08 -07:00 · 2013-11-25 10:06:56 +02:00 · 2013-11-25 10:06:56 +02:00 · d0c0e04f69
commit d0c0e04f69
parent 15eb0861a8
36 changed files with 264 additions and 157 deletions
--- a/content/base/src/EventSource.cpp
+++ b/content/base/src/EventSource.cpp
@ -268,7 +268,8 @@ EventSource::Init(nsISupports* aOwner,
    do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
  NS_ENSURE_SUCCESS(rv, rv);

-  rv = convManager->GetUnicodeDecoder("UTF-8", getter_AddRefs(mUnicodeDecoder));
+  rv = convManager->GetUnicodeDecoderRaw("UTF-8",
+                                         getter_AddRefs(mUnicodeDecoder));
  NS_ENSURE_SUCCESS(rv, rv);

  // the constructor should throw a SYNTAX_ERROR only if it fails resolving the
--- a/content/base/src/nsDOMFileReader.cpp
+++ b/content/base/src/nsDOMFileReader.cpp
@ -540,7 +540,8 @@ nsDOMFileReader::ConvertStream(const char *aFileData,
  NS_ENSURE_SUCCESS(rv, rv);

  nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder;
-  rv = charsetConverter->GetUnicodeDecoder(aCharset, getter_AddRefs(unicodeDecoder));
+  rv = charsetConverter->GetUnicodeDecoderRaw(aCharset,
+                                              getter_AddRefs(unicodeDecoder));
  NS_ENSURE_SUCCESS(rv, rv);

  int32_t destLength;
--- a/content/base/src/nsDocument.cpp
+++ b/content/base/src/nsDocument.cpp
@ -3289,16 +3289,12 @@ nsDocument::GetBaseTarget(nsAString &aBaseTarget)
 void
 nsDocument::SetDocumentCharacterSet(const nsACString& aCharSetID)
 {
+  // XXX it would be a good idea to assert the sanity of the argument,
+  // but before we figure out what to do about non-Encoding Standard
+  // encodings in the charset menu and in mailnews, assertions are futile.
  if (!mCharacterSet.Equals(aCharSetID)) {
    mCharacterSet = aCharSetID;

-#ifdef DEBUG
-    nsAutoCString canonicalName;
-    nsCharsetAlias::GetPreferred(aCharSetID, canonicalName);
-    NS_ASSERTION(canonicalName.Equals(aCharSetID),
-                 "charset name must be canonical");
-#endif
-
    int32_t n = mCharSetObservers.Length();

    for (int32_t i = 0; i < n; i++) {
--- a/content/base/src/nsScriptLoader.cpp
+++ b/content/base/src/nsScriptLoader.cpp
@ -48,6 +48,7 @@
 #include "nsSandboxFlags.h"
 #include "nsContentTypeParser.h"
 #include "nsINetworkSeer.h"
+#include "mozilla/dom/EncodingUtils.h"

 #include "mozilla/CORSMode.h"
 #include "mozilla/Attributes.h"
@ -1197,15 +1198,15 @@ nsScriptLoader::ConvertToUTF16(nsIChannel* aChannel, const uint8_t* aData,
  if (!unicodeDecoder &&
      aChannel &&
      NS_SUCCEEDED(aChannel->GetContentCharset(charset)) &&
-      !charset.IsEmpty()) {
-    charsetConv->GetUnicodeDecoder(charset.get(),
-                                   getter_AddRefs(unicodeDecoder));
+      EncodingUtils::FindEncodingForLabel(charset, charset)) {
+    charsetConv->GetUnicodeDecoderRaw(charset.get(),
+                                      getter_AddRefs(unicodeDecoder));
  }

-  if (!unicodeDecoder && !aHintCharset.IsEmpty()) {
-    CopyUTF16toUTF8(aHintCharset, charset);
-    charsetConv->GetUnicodeDecoder(charset.get(),
-                                   getter_AddRefs(unicodeDecoder));
+  if (!unicodeDecoder &&
+      EncodingUtils::FindEncodingForLabel(aHintCharset, charset)) {
+    charsetConv->GetUnicodeDecoderRaw(charset.get(),
+                                      getter_AddRefs(unicodeDecoder));
  }

  if (!unicodeDecoder && aDocument) {
--- a/dom/encoding/EncodingUtils.cpp
+++ b/dom/encoding/EncodingUtils.cpp
@ -38,6 +38,7 @@ EncodingUtils::IsAsciiCompatible(const nsACString& aPreferredName)
  return !(aPreferredName.LowerCaseEqualsLiteral("utf-16") ||
           aPreferredName.LowerCaseEqualsLiteral("utf-16be") ||
           aPreferredName.LowerCaseEqualsLiteral("utf-16le") ||
+           aPreferredName.LowerCaseEqualsLiteral("replacement") ||
           aPreferredName.LowerCaseEqualsLiteral("utf-7") ||
           aPreferredName.LowerCaseEqualsLiteral("x-imap4-modified-utf7"));
 }
--- a/dom/encoding/TextDecoder.cpp
+++ b/dom/encoding/TextDecoder.cpp
@ -21,8 +21,9 @@ TextDecoder::Init(const nsAString& aEncoding, const bool aFatal,
  EncodingUtils::TrimSpaceCharacters(label);

  // Let encoding be the result of getting an encoding from label.
-  // If encoding is failure, throw a TypeError.
-  if (!EncodingUtils::FindEncodingForLabel(label, mEncoding)) {
+  // If encoding is failure or replacement, throw a TypeError.
+  if (!EncodingUtils::FindEncodingForLabel(label, mEncoding) ||
+      mEncoding.EqualsLiteral("replacement")) {
    aRv.ThrowTypeError(MSG_ENCODING_NOT_SUPPORTED, &label);
    return;
  }
--- a/dom/encoding/labelsencodings.properties
+++ b/dom/encoding/labelsencodings.properties
@ -215,8 +215,10 @@ ks_c_5601-1989=EUC-KR
 ksc5601=EUC-KR
 ksc_5601=EUC-KR
 windows-949=EUC-KR
-csiso2022kr=ISO-2022-KR
-iso-2022-kr=ISO-2022-KR
+csiso2022kr=replacement
+iso-2022-kr=replacement
+iso-2022-cn=replacement
+iso-2022-cn-ext=replacement
 utf-16=UTF-16LE
 utf-16le=UTF-16LE
 utf-16be=UTF-16BE
--- a/dom/encoding/test/reftest/bug863728-1-ref.html
+++ b/dom/encoding/test/reftest/bug863728-1-ref.html
@ -0,0 +1 @@
+<meta charset=utf-8><EFBFBD>
--- a/dom/encoding/test/reftest/bug863728-1.html
+++ b/dom/encoding/test/reftest/bug863728-1.html
@ -0,0 +1 @@
+<meta charset=iso-2022-cn>
--- a/dom/encoding/test/reftest/bug863728-2-ref.html
+++ b/dom/encoding/test/reftest/bug863728-2-ref.html
@ -0,0 +1,3 @@
+<!DOCTYPE html>
+<meta charset=utf-8>
+<iframe src="data:text/html;charset=utf-8,<2C><iframe src='data:text/html;charset=utf-8,PASS'></iframe>" width=400 height=200></iframe>
--- a/dom/encoding/test/reftest/bug863728-2.html
+++ b/dom/encoding/test/reftest/bug863728-2.html
@ -0,0 +1,17 @@
+<!DOCTYPE html>
+<html class=reftest-wait>
+<meta charset=utf-8>
+<script>
+function runTest() {
+  var r = document.documentElement;
+  var d = window[0].document;
+  var i = d.createElement("iframe");
+  i.src = "data:text/html,PASS";
+  i.onload = function() {
+    r.removeAttribute("class");
+  }
+  d.body.appendChild(i);
+}
+</script>
+<body onload="runTest();">
+<iframe src="bug863728-1.html" width=400 height=200></iframe>
--- a/dom/encoding/test/reftest/bug863728-3-ref.html
+++ b/dom/encoding/test/reftest/bug863728-3-ref.html
--- a/dom/encoding/test/reftest/bug863728-3.html
+++ b/dom/encoding/test/reftest/bug863728-3.html
@ -0,0 +1,5 @@
+<link rel=stylesheet href="data:text/css;charset=iso-2022-kr,html { background-color: red }">
+<link rel=stylesheet href="data:text/css,html { background-color: red }" charset="iso-2022-cn-ext">
+<link rel=stylesheet href='data:text/css,@charset "csiso2022kr"; html { background-color: red }'>
+<script src="data:text/javascript;charset=iso-2022-kr,document.write('FAIL');"></script>
+<script src="data:text/javascript,document.write('FAIL');" charset="iso-2022-kr"></script>
--- a/dom/encoding/test/reftest/reftest.list
+++ b/dom/encoding/test/reftest/reftest.list
@ -0,0 +1,3 @@
+== bug863728-1.html bug863728-1-ref.html
+== bug863728-2.html bug863728-2-ref.html
+== bug863728-3.html bug863728-3-ref.html
--- a/dom/encoding/test/test_TextDecoder.js
+++ b/dom/encoding/test/test_TextDecoder.js
@ -349,11 +349,10 @@ function testDecoderGetEncoding()
    {encoding: "iso-2022-jp", labels: ["csiso2022jp", "iso-2022-jp"]},
    {encoding: "shift_jis", labels: ["csshiftjis", "ms_kanji", "shift-jis", "shift_jis", "sjis", "windows-31j", "x-sjis"]},
    {encoding: "euc-kr", labels: ["cseuckr", "csksc56011987", "euc-kr", "iso-ir-149", "korean", "ks_c_5601-1987", "ks_c_5601-1989", "ksc5601", "ksc_5601", "windows-949"]},
-    {encoding: "iso-2022-kr", labels: ["csiso2022kr", "iso-2022-kr"]},
    {encoding: "utf-16le", labels: ["utf-16", "utf-16le"]},
    {encoding: "utf-16be", labels: ["utf-16be"]},
    {encoding: "x-user-defined", labels: ["x-user-defined"]},
-    {error: "TypeError", labels: ["x-windows-949", "\u0130SO-8859-1"]},
+    {error: "TypeError", labels: ["x-windows-949", "\u0130SO-8859-1", "csiso2022kr", "iso-2022-kr", "iso-2022-cn", "iso-2022-cn-ext", "replacement"]},
  ];

  for (var le of labelEncodings) {
--- a/dom/encoding/test/test_stringencoding.html
+++ b/dom/encoding/test/test_stringencoding.html
@ -24,7 +24,6 @@ setup({explicit_timeout: true});
 <script type="text/javascript" src="unit/test_iso-2022-jp.js"></script>
 <script type="text/javascript" src="unit/test_shift_jis.js"></script>
 <script type="text/javascript" src="unit/test_euc-kr.js"></script>
-<script type="text/javascript" src="unit/test_iso-2022-kr.js"></script>

 </body>
 </html>
--- a/dom/encoding/test/unit/mochitest.ini
+++ b/dom/encoding/test/unit/mochitest.ini
@ -6,6 +6,5 @@
 [test_gbk.js]
 [test_hz-gb-2312.js]
 [test_iso-2022-jp.js]
-[test_iso-2022-kr.js]
 [test_shift_jis.js]
 [test_singlebytes.js]
--- a/dom/encoding/test/unit/test_iso-2022-kr.js
+++ b/dom/encoding/test/unit/test_iso-2022-kr.js
--- a/dom/encoding/test/unit/test_singlebytes.js
+++ b/dom/encoding/test/unit/test_singlebytes.js
@ -297,7 +297,7 @@ test(

 test(
  function () {
-    var encodings = ["utf-8", "ibm866", "iso-8859-2", "iso-8859-3", "iso-8859-4", "iso-8859-5", "iso-8859-6", "iso-8859-7", "iso-8859-8", "iso-8859-8-i", "iso-8859-10", "iso-8859-13", "iso-8859-14", "iso-8859-15", "iso-8859-16", "koi8-r", "koi8-u", "macintosh", "windows-874", "windows-1250", "windows-1251", "windows-1252", "windows-1253", "windows-1254", "windows-1255", "windows-1256", "windows-1257", "windows-1258", "x-mac-cyrillic", "gbk", "gb18030", "hz-gb-2312", "big5", "euc-jp", "iso-2022-jp", "shift_jis", "euc-kr", "iso-2022-kr", "x-user-defined"];
+    var encodings = ["utf-8", "ibm866", "iso-8859-2", "iso-8859-3", "iso-8859-4", "iso-8859-5", "iso-8859-6", "iso-8859-7", "iso-8859-8", "iso-8859-8-i", "iso-8859-10", "iso-8859-13", "iso-8859-14", "iso-8859-15", "iso-8859-16", "koi8-r", "koi8-u", "macintosh", "windows-874", "windows-1250", "windows-1251", "windows-1252", "windows-1253", "windows-1254", "windows-1255", "windows-1256", "windows-1257", "windows-1258", "x-mac-cyrillic", "gbk", "gb18030", "hz-gb-2312", "big5", "euc-jp", "iso-2022-jp", "shift_jis", "euc-kr", "x-user-defined"];

    encodings.forEach(function (encoding) {
      var string = '', bytes = [];
@ -308,8 +308,6 @@ test(
          continue;
        if (encoding === "iso-2022-jp" && i === 0x1B)
          continue;
-        if (encoding === "iso-2022-kr" && (i === 0x0E || i === 0x0F || i === 0x1B))
-          continue;

        string += String.fromCharCode(i);
        bytes.push(i);
@ -344,7 +342,7 @@ test(

    var utf_encodings = ["utf-8", "utf-16le", "utf-16be"];

-    var legacy_encodings = ["ibm866", "iso-8859-2", "iso-8859-3", "iso-8859-4", "iso-8859-5", "iso-8859-6", "iso-8859-7", "iso-8859-8", "iso-8859-8-i", "iso-8859-10", "iso-8859-13", "iso-8859-14", "iso-8859-15", "iso-8859-16", "koi8-r", "koi8-u", "macintosh", "windows-874", "windows-1250", "windows-1251", "windows-1252", "windows-1253", "windows-1254", "windows-1255", "windows-1256", "windows-1257", "windows-1258", "x-mac-cyrillic", "gbk", "gb18030", "hz-gb-2312", "big5", "euc-jp", "iso-2022-jp", "shift_jis", "euc-kr", "iso-2022-kr", "x-user-defined"];
+    var legacy_encodings = ["ibm866", "iso-8859-2", "iso-8859-3", "iso-8859-4", "iso-8859-5", "iso-8859-6", "iso-8859-7", "iso-8859-8", "iso-8859-8-i", "iso-8859-10", "iso-8859-13", "iso-8859-14", "iso-8859-15", "iso-8859-16", "koi8-r", "koi8-u", "macintosh", "windows-874", "windows-1250", "windows-1251", "windows-1252", "windows-1253", "windows-1254", "windows-1255", "windows-1256", "windows-1257", "windows-1258", "x-mac-cyrillic", "gbk", "gb18030", "hz-gb-2312", "big5", "euc-jp", "iso-2022-jp", "shift_jis", "euc-kr", "x-user-defined"];

    utf_encodings.forEach(function(encoding) {
      assert_equals(TextDecoder(encoding).encoding, encoding);
--- a/dom/encoding/test/unit/xpcshell.ini
+++ b/dom/encoding/test/unit/xpcshell.ini
@ -8,6 +8,5 @@ tail =
 [test_gbk.js]
 [test_hz-gb-2312.js]
 [test_iso-2022-jp.js]
-[test_iso-2022-kr.js]
 [test_shift_jis.js]
 [test_singlebytes.js]
--- a/extensions/universalchardet/src/base/LangThaiModel.cpp
+++ b/extensions/universalchardet/src/base/LangThaiModel.cpp
@ -185,5 +185,5 @@ const SequenceModel TIS620ThaiModel =
  ThaiLangModel,
  (float)0.926386,
  false,
-  "TIS-620"
+  "windows-874"
 };
--- a/extensions/universalchardet/tests/test_bug488426.html
+++ b/extensions/universalchardet/tests/test_bug488426.html
@ -22,7 +22,7 @@ https://bugzilla.mozilla.org/show_bug.cgi?id=488426
 <script class="testbody" type="text/javascript">
 /** Test for Bug 488426 **/
 CharsetDetectionTests("bug488426_text.html",
-		      "TIS-620",
+		      "windows-874",
 		      new Array("universal_charset_detector"));
 </script>
 </pre>
--- a/intl/uconv/src/charsetData.properties
+++ b/intl/uconv/src/charsetData.properties
@ -87,6 +87,7 @@ x-mac-hebrew.notForBrowser              = true
 x-imap4-modified-utf7.notForBrowser     = true
 utf-7.notForBrowser                     = true
 ibm864.notForBrowser                    = true
+replacement.notForBrowser               = true

 x-mac-arabic.isInternal                 = true
 x-mac-farsi.isInternal                  = true
@ -95,6 +96,7 @@ x-imap4-modified-utf7.isInternal        = true
 utf-7.isInternal                        = true
 t.61-8bit.isInternal                    = true
 ibm864.isInternal                       = true
+replacement.isInternal                  = true

 t.61-8bit.notForOutgoing                = true
 utf-7.notForOutgoing                    = true
@ -110,7 +112,7 @@ iso-8859-8-e.notForOutgoing             = true
 iso-8859-8.notForOutgoing               = true
 iso-2022-kr.notForOutgoing              = true
 x-johab.notForOutgoing                  = true
-
+replacement.notForOutgoing              = true

 // XXX : there are some entries only necessary for Gtk/Xlib builds
 // to map  XLFD registry-encoding pairs to langGroups. they can be
@ -119,6 +121,7 @@ x-johab.notForOutgoing                  = true
 // XXX : todo: move to something based on BCP 47 (RFC 5646);
 // these should primarily specify script (and sometimes region),
 // but NOT language.
+// See also https://bugzilla.mozilla.org/show_bug.cgi?id=756022
 // e.g. x-western      -> *-Latn-155 (Western Europe)
 //      x-central-euro -> *-Latn-151 (Eastern Europe)
 //      x-baltic       -> *-Latn-154 (Northern Europe)
@ -194,6 +197,7 @@ utf-16be.LangGroup                 = x-unicode
 utf-16le.LangGroup                 = x-unicode
 utf-7.LangGroup                    = x-unicode
 x-imap4-modified-utf7.LangGroup    = x-unicode
+replacement.LangGroup              = x-unicode
 viscii.LangGroup                   = x-western
 x-viet-tcvn5712.LangGroup          = x-western
 x-viet-vps.LangGroup               = x-western
@ -244,3 +248,4 @@ euc-kr.isMultibyte          = true
 x-johab.isMultibyte         = true
 utf-7.isMultibyte           = true
 utf-8.isMultibyte           = true
+replacement.isMultibyte     = true
--- a/intl/uconv/src/moz.build
+++ b/intl/uconv/src/moz.build
@ -11,6 +11,7 @@ UNIFIED_SOURCES += [
    'nsCP1252ToUnicode.cpp',
    'nsISO88591ToUnicode.cpp',
    'nsMacRomanToUnicode.cpp',
+    'nsReplacementToUnicode.cpp',
    'nsScriptableUConv.cpp',
    'nsTextToSubURI.cpp',
    'nsUConvModule.cpp',
--- a/intl/uconv/src/nsReplacementToUnicode.cpp
+++ b/intl/uconv/src/nsReplacementToUnicode.cpp
@ -0,0 +1,56 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsReplacementToUnicode.h"
+
+nsReplacementToUnicode::nsReplacementToUnicode()
+ : mSeenByte(false)
+{
+}
+
+NS_IMETHODIMP
+nsReplacementToUnicode::Convert(const char* aSrc,
+                                int32_t* aSrcLength,
+                                PRUnichar* aDest,
+                                int32_t* aDestLength)
+{
+  if (mSeenByte || !(*aSrcLength)) {
+    *aDestLength = 0;
+    return NS_PARTIAL_MORE_INPUT;
+  }
+  if (mErrBehavior == kOnError_Signal) {
+    mSeenByte = true;
+    *aSrcLength = 0;
+    *aDestLength = 0;
+    return NS_ERROR_ILLEGAL_INPUT;
+  }
+  if (!(*aDestLength)) {
+    *aSrcLength = -1;
+    return NS_PARTIAL_MORE_OUTPUT;
+  }
+  mSeenByte = true;
+  *aDest = 0xFFFD;
+  *aDestLength = 1;
+  return NS_PARTIAL_MORE_INPUT;
+}
+
+NS_IMETHODIMP
+nsReplacementToUnicode::GetMaxLength(const char* aSrc,
+                          int32_t aSrcLength,
+                          int32_t* aDestLength)
+{
+  if (!mSeenByte && aSrcLength > 0) {
+    *aDestLength = 1;
+  } else {
+    *aDestLength = 0;
+  }
+  return NS_EXACT_LENGTH;
+}
+
+NS_IMETHODIMP
+nsReplacementToUnicode::Reset()
+{
+  mSeenByte = false;
+  return NS_OK;
+}
--- a/intl/uconv/src/nsReplacementToUnicode.h
+++ b/intl/uconv/src/nsReplacementToUnicode.h
@ -0,0 +1,37 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsReplacementToUnicode_h_
+#define nsReplacementToUnicode_h_
+
+#include "nsUCSupport.h"
+
+#define NS_REPLACEMENTTOUNICODE_CID \
+  { 0xd24b24da, 0xc607, 0x489a, \
+    { 0xb5, 0xf0, 0x67, 0x91, 0xf4, 0x45, 0x45, 0x6d } }
+
+#define NS_REPLACEMENTTOUNICODE_CONTRACTID \
+  "@mozilla.org/intl/unicode/decoder;1?charset=replacement"
+
+class nsReplacementToUnicode : public nsBasicDecoderSupport
+{
+public:
+  nsReplacementToUnicode();
+
+  NS_IMETHOD Convert(const char* aSrc,
+                     int32_t* aSrcLength,
+                     PRUnichar* aDest,
+                     int32_t* aDestLength);
+
+  NS_IMETHOD GetMaxLength(const char* aSrc,
+                          int32_t aSrcLength,
+                          int32_t* aDestLength);
+
+  NS_IMETHOD Reset();
+
+private:
+  bool mSeenByte;
+};
+
+#endif // nsReplacementToUnicode_h_
--- a/intl/uconv/src/nsUConvModule.cpp
+++ b/intl/uconv/src/nsUConvModule.cpp
@ -21,6 +21,7 @@
 #include "nsISO88591ToUnicode.h"
 #include "nsCP1252ToUnicode.h"
 #include "nsMacRomanToUnicode.h"
+#include "nsReplacementToUnicode.h"
 #include "nsUTF8ToUnicode.h"
 #include "nsUnicodeToISO88591.h"
 #include "nsUnicodeToCP1252.h"
@ -219,6 +220,7 @@ NS_UCONV_REG_UNREG("ISO-8859-1", NS_ISO88591TOUNICODE_CID, NS_UNICODETOISO88591_
 NS_UCONV_REG_UNREG("windows-1252", NS_CP1252TOUNICODE_CID, NS_UNICODETOCP1252_CID)
 NS_UCONV_REG_UNREG("macintosh", NS_MACROMANTOUNICODE_CID, NS_UNICODETOMACROMAN_CID)
 NS_UCONV_REG_UNREG("UTF-8", NS_UTF8TOUNICODE_CID, NS_UNICODETOUTF8_CID)
+NS_UCONV_REG_UNREG("replacement", NS_REPLACEMENTTOUNICODE_CID, NS_UNICODETOUTF8_CID)

  // ucvlatin
 NS_UCONV_REG_UNREG("us-ascii", NS_ASCIITOUNICODE_CID, NS_UNICODETOASCII_CID)
@ -332,6 +334,7 @@ NS_CONVERTER_REGISTRY_END

 NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUTF8)
 NS_GENERIC_FACTORY_CONSTRUCTOR(nsUTF8ToUnicode)
+NS_GENERIC_FACTORY_CONSTRUCTOR(nsReplacementToUnicode)

 // ucvlatin
 NS_GENERIC_FACTORY_CONSTRUCTOR(nsUTF7ToUnicode)
@ -506,6 +509,7 @@ NS_DEFINE_NAMED_CID(NS_ISO88591TOUNICODE_CID);
 NS_DEFINE_NAMED_CID(NS_CP1252TOUNICODE_CID);
 NS_DEFINE_NAMED_CID(NS_MACROMANTOUNICODE_CID);
 NS_DEFINE_NAMED_CID(NS_UTF8TOUNICODE_CID);
+NS_DEFINE_NAMED_CID(NS_REPLACEMENTTOUNICODE_CID);
 NS_DEFINE_NAMED_CID(NS_UNICODETOISO88591_CID);
 NS_DEFINE_NAMED_CID(NS_UNICODETOCP1252_CID);
 NS_DEFINE_NAMED_CID(NS_UNICODETOMACROMAN_CID);
@ -690,6 +694,7 @@ static const mozilla::Module::CIDEntry kUConvCIDs[] = {
  { &kNS_ISO88591TOUNICODE_CID, false, nullptr, nsISO88591ToUnicodeConstructor },
  { &kNS_CP1252TOUNICODE_CID, false, nullptr, nsCP1252ToUnicodeConstructor },
  { &kNS_MACROMANTOUNICODE_CID, false, nullptr, nsMacRomanToUnicodeConstructor },
+  { &kNS_REPLACEMENTTOUNICODE_CID, false, nullptr, nsReplacementToUnicodeConstructor },
  { &kNS_UTF8TOUNICODE_CID, false, nullptr, nsUTF8ToUnicodeConstructor },
  { &kNS_UNICODETOISO88591_CID, false, nullptr, nsUnicodeToISO88591Constructor },
  { &kNS_UNICODETOCP1252_CID, false, nullptr, nsUnicodeToCP1252Constructor },
@ -877,6 +882,7 @@ static const mozilla::Module::ContractIDEntry kUConvContracts[] = {
  { NS_ISO88591TOUNICODE_CONTRACTID, &kNS_ISO88591TOUNICODE_CID },
  { NS_CP1252TOUNICODE_CONTRACTID, &kNS_CP1252TOUNICODE_CID },
  { NS_MACROMANTOUNICODE_CONTRACTID, &kNS_MACROMANTOUNICODE_CID },
+  { NS_REPLACEMENTTOUNICODE_CONTRACTID, &kNS_REPLACEMENTTOUNICODE_CID },
  { NS_UTF8TOUNICODE_CONTRACTID, &kNS_UTF8TOUNICODE_CID },
  { NS_UNICODETOISO88591_CONTRACTID, &kNS_UNICODETOISO88591_CID },
  { NS_UNICODETOCP1252_CONTRACTID, &kNS_UNICODETOCP1252_CID },
--- a/intl/uconv/tests/test_long_doc.html
+++ b/intl/uconv/tests/test_long_doc.html
@ -34,7 +34,10 @@ while (decoderList.hasMore()) {

    // Skip UTF-16 variants. (Other non-ASCII compatible encodings will be
    // ignored anyway because of bug 601429
-    if (decoder.substring(0, 6)  == "UTF-16")
+    if (decoder.substring(0, 6)  == "UTF-16" || 
+        decoder == "replacement" ||
+        decoder == "ISO-2022-KR" ||
+        decoder == "ISO-2022-CN")
 	continue;

    data = encodeURI(testContent);
--- a/intl/uconv/tests/unit/test_bug718500.js
+++ b/intl/uconv/tests/unit/test_bug718500.js
@ -99,6 +99,7 @@ var encoderList = [
  "gbk",
  "HZ-GB-2312",
  "gb18030",
+  "replacement",
 ];

 var decoderList = [
@ -183,6 +184,7 @@ var decoderList = [
  "gb18030",
  "ISO-2022-KR",
  "ISO-2022-CN",
+  "replacement",
 ];

 function verifyList(aEnumerator, aList)
--- a/layout/base/tests/test_bug399284.html
+++ b/layout/base/tests/test_bug399284.html
@ -32,6 +32,10 @@ while (decoderList.hasMore()) {
        data = encodeUTF16BE(testContent);
    else if (decoder == "UTF-16" || decoder == "UTF-16LE")
        data = encodeUTF16LE(testContent);
+    else if (decoder == "replacement" || 
+             decoder == "ISO-2022-KR" || 
+             decoder == "ISO-2022-CN")
+        continue;
    else
        data = encodeURI(testContent);
    var dataURI = "data:text/html;charset=" + decoder + "," + data;
--- a/layout/reftests/reftest.list
+++ b/layout/reftests/reftest.list
@ -343,3 +343,6 @@ skip-if(B2G) include box-sizing/reftest.list

 # invalidation - only run on B2G
 skip-if(!B2G) include invalidation/reftest.list
+
+# encodings
+include ../../dom/encoding/test/reftest/reftest.list
--- a/netwerk/base/src/nsUnicharStreamLoader.cpp
+++ b/netwerk/base/src/nsUnicharStreamLoader.cpp
@ -181,7 +181,16 @@ nsUnicharStreamLoader::DetermineCharset()
    do_GetService(kCharsetConverterManagerCID, &rv);
  if (NS_FAILED(rv)) return rv;

-  rv = ccm->GetUnicodeDecoder(mCharset.get(), getter_AddRefs(mDecoder));
+  // Sadly, nsIUnicharStreamLoader is exposed to extensions, so we can't
+  // assume mozilla::css::Loader to be the only caller. Since legacy
+  // charset alias code doesn't know about the replacement encoding,
+  // special-case it here, but let other stuff go through legacy alias
+  // resolution for now.
+  if (mCharset.EqualsLiteral("replacement")) {
+    rv = ccm->GetUnicodeDecoderRaw(mCharset.get(), getter_AddRefs(mDecoder));
+  } else {
+    rv = ccm->GetUnicodeDecoder(mCharset.get(), getter_AddRefs(mDecoder));
+  }
  if (NS_FAILED(rv)) return rv;

  // Process the data into mBuffer
--- a/parser/html/nsHtml5MetaScannerCppSupplement.h
+++ b/parser/html/nsHtml5MetaScannerCppSupplement.h
@ -2,8 +2,6 @@
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#include "nsICharsetConverterManager.h"
-#include "nsServiceManagerUtils.h"
 #include "nsEncoderDecoderUtils.h"
 #include "nsTraceRefcnt.h"

@ -12,15 +10,12 @@
 using mozilla::dom::EncodingUtils;

 void
-nsHtml5MetaScanner::sniff(nsHtml5ByteReadable* bytes, nsIUnicodeDecoder** decoder, nsACString& charset)
+nsHtml5MetaScanner::sniff(nsHtml5ByteReadable* bytes, nsACString& charset)
 {
  readable = bytes;
  stateLoop(stateSave);
  readable = nullptr;
-  if (mUnicodeDecoder) {
-    mUnicodeDecoder.forget(decoder);
-    charset.Assign(mCharset);
-  }
+  charset.Assign(mCharset);
 }

 bool
@ -29,47 +24,17 @@ nsHtml5MetaScanner::tryCharset(nsString* charset)
  // This code needs to stay in sync with
  // nsHtml5StreamParser::internalEncodingDeclaration. Unfortunately, the
  // trickery with member fields here leads to some copy-paste reuse. :-(
-  nsresult res = NS_OK;
-  nsCOMPtr<nsICharsetConverterManager> convManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &res);
-  if (NS_FAILED(res)) {
-    NS_ERROR("Could not get CharsetConverterManager service.");
-    return false;
-  }
+  nsAutoCString label;
+  CopyUTF16toUTF8(*charset, label);
  nsAutoCString encoding;
-  CopyUTF16toUTF8(*charset, encoding);
-  encoding.Trim(" \t\r\n\f");
-  if (encoding.LowerCaseEqualsLiteral("utf-16") ||
-      encoding.LowerCaseEqualsLiteral("utf-16be") ||
-      encoding.LowerCaseEqualsLiteral("utf-16le")) {
+  if (!EncodingUtils::FindEncodingForLabel(label, encoding)) {
+    return false;
+  }
+  if (encoding.EqualsLiteral("UTF-16BE") ||
+      encoding.EqualsLiteral("UTF-16LE")) {
    mCharset.Assign("UTF-8");
-    res = convManager->GetUnicodeDecoderRaw(mCharset.get(), getter_AddRefs(mUnicodeDecoder));
-    if (NS_FAILED(res)) {
-      NS_ERROR("Could not get decoder for UTF-8.");
-      return false;
-    }
-    return true;
-  }
-  nsAutoCString preferred;
-  if (!EncodingUtils::FindEncodingForLabel(encoding, preferred)) {
-    return false;
-  }
-  if (preferred.LowerCaseEqualsLiteral("utf-16") ||
-      preferred.LowerCaseEqualsLiteral("utf-16be") ||
-      preferred.LowerCaseEqualsLiteral("utf-16le") ||
-      preferred.LowerCaseEqualsLiteral("utf-7") ||
-      preferred.LowerCaseEqualsLiteral("x-imap4-modified-utf7")) {
-    return false;
-  }
-  res = convManager->GetUnicodeDecoderRaw(preferred.get(), getter_AddRefs(mUnicodeDecoder));
-  if (res == NS_ERROR_UCONV_NOCONV) {
-    return false;
-  } else if (NS_FAILED(res)) {
-    NS_ERROR("Getting an encoding decoder failed in a bad way.");
-    mUnicodeDecoder = nullptr;
-    return false;
-  } else {
-    NS_ASSERTION(mUnicodeDecoder, "Getter nsresult and object don't match.");
-    mCharset.Assign(preferred);
    return true;
  }
+  mCharset.Assign(encoding);
+  return true;
 }
--- a/parser/html/nsHtml5MetaScannerHSupplement.h
+++ b/parser/html/nsHtml5MetaScannerHSupplement.h
@ -3,10 +3,9 @@
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 private:
-  nsCOMPtr<nsIUnicodeDecoder>  mUnicodeDecoder;
  nsCString mCharset;
  inline int32_t read() {
    return readable->read();
  }
 public:
-  void sniff(nsHtml5ByteReadable* bytes, nsIUnicodeDecoder** decoder, nsACString& charset);
+  void sniff(nsHtml5ByteReadable* bytes, nsACString& charset);
--- a/parser/html/nsHtml5StreamParser.cpp
+++ b/parser/html/nsHtml5StreamParser.cpp
@ -238,8 +238,16 @@ nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf)
  NS_ASSERTION(IsParserThread(), "Wrong thread!");
  if (aConf == eBestAnswer || aConf == eSureAnswer) {
    mFeedChardet = false; // just in case
+    nsAutoCString encoding;
+    if (!EncodingUtils::FindEncodingForLabel(nsDependentCString(aCharset),
+                                             encoding)) {
+      return NS_OK;
+    }
+    if (encoding.EqualsLiteral("replacement")) {
+      return NS_OK;
+    }
    if (HasDecoder()) {
-      if (mCharset.Equals(aCharset)) {
+      if (mCharset.Equals(encoding)) {
        NS_ASSERTION(mCharsetSource < kCharsetFromAutoDetection,
            "Why are we running chardet at all?");
        mCharsetSource = kCharsetFromAutoDetection;
@ -247,8 +255,7 @@ nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf)
      } else {
        // We've already committed to a decoder. Request a reload from the
        // docshell.
-        nsAutoCString charset(aCharset);
-        mTreeBuilder->NeedsCharsetSwitchTo(charset,
+        mTreeBuilder->NeedsCharsetSwitchTo(encoding,
                                           kCharsetFromAutoDetection,
                                           0);
        FlushTreeOpsAndDisarmTimer();
@ -257,7 +264,7 @@ nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf)
    } else {
      // Got a confident answer from the sniffing buffer. That code will
      // take care of setting up the decoder.
-      mCharset.Assign(aCharset);
+      mCharset.Assign(encoding);
      mCharsetSource = kCharsetFromAutoDetection;
      mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
    }
@ -299,7 +306,8 @@ nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(const
  nsresult rv = NS_OK;
  nsCOMPtr<nsICharsetConverterManager> convManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
  NS_ENSURE_SUCCESS(rv, rv);
-  rv = convManager->GetUnicodeDecoder(mCharset.get(), getter_AddRefs(mUnicodeDecoder));
+  rv = convManager->GetUnicodeDecoderRaw(mCharset.get(),
+                                         getter_AddRefs(mUnicodeDecoder));
  if (rv == NS_ERROR_UCONV_NOCONV) {
    mCharset.AssignLiteral("windows-1252"); // lower case is the raw form
    mCharsetSource = kCharsetFromFallback;
@ -307,16 +315,6 @@ nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(const
    mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
  }
  NS_ENSURE_SUCCESS(rv, rv);
-  return WriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
-}
-
-nsresult
-nsHtml5StreamParser::WriteSniffingBufferAndCurrentSegment(const uint8_t* aFromSegment, // can be null
-                                                          uint32_t aCount,
-                                                          uint32_t* aWriteCount)
-{
-  NS_ASSERTION(IsParserThread(), "Wrong thread!");
-  nsresult rv = NS_OK;
  if (mSniffingBuffer) {
    uint32_t writeCount;
    rv = WriteStreamBytes(mSniffingBuffer, mSniffingLength, &writeCount);
@ -710,29 +708,22 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
  // if we get here, there either was no BOM or the BOM sniffing isn't complete
  // yet
  
+  MOZ_ASSERT(mCharsetSource != kCharsetFromByteOrderMark,
+             "Should not come here if BOM was found.");
+  MOZ_ASSERT(mCharsetSource != kCharsetFromOtherComponent,
+             "kCharsetFromOtherComponent is for XSLT.");
+
  if (mBomState == BOM_SNIFFING_OVER &&
-    mCharsetSource >= kCharsetFromChannel) {
-    // There was no BOM and the charset came from channel or higher. mCharset
-    // still contains the charset from the channel or higher as set by an
+    mCharsetSource == kCharsetFromChannel) {
+    // There was no BOM and the charset came from channel. mCharset
+    // still contains the charset from the channel as set by an
    // earlier call to SetDocumentCharset(), since we didn't find a BOM and
-    // overwrite mCharset.
-    nsCOMPtr<nsICharsetConverterManager> convManager =
-      do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID);
-    convManager->GetUnicodeDecoder(mCharset.get(),
-                                   getter_AddRefs(mUnicodeDecoder));
-    if (mUnicodeDecoder) {
-      mFeedChardet = false;
-      mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
-      mMetaScanner = nullptr;
-      return WriteSniffingBufferAndCurrentSegment(aFromSegment,
-                                                  aCount,
-                                                  aWriteCount);
-    } else {
-      // nsHTMLDocument is supposed to make sure this does not happen. Let's
-      // deal with this anyway, since who knows how kCharsetFromOtherComponent
-      // is used.
-      mCharsetSource = kCharsetFromFallback;
-    }
+    // overwrite mCharset. (Note that if the user has overridden the charset,
+    // we don't come here but check <meta> for XSS-dangerous charsets first.)
+    mFeedChardet = false;
+    mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
+    return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment,
+      aCount, aWriteCount);
  }

  if (!mMetaScanner && (mMode == NORMAL ||
@ -748,17 +739,31 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
    if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) {
      nsHtml5ByteReadable readable(aFromSegment, aFromSegment +
          countToSniffingLimit);
-      mMetaScanner->sniff(&readable, getter_AddRefs(mUnicodeDecoder), mCharset);
-      if (mUnicodeDecoder) {
-        // meta scan successful
+      nsAutoCString encoding;
+      mMetaScanner->sniff(&readable, encoding);
+      if (!encoding.IsEmpty()) {
+        // meta scan successful; honor overrides unless meta is XSS-dangerous
+        if ((mCharsetSource == kCharsetFromParentForced ||
+             mCharsetSource == kCharsetFromUserForced) &&
+            EncodingUtils::IsAsciiCompatible(encoding)) {
+          // Honor override
+          return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
+            aFromSegment, aCount, aWriteCount);
+        }
+        mCharset.Assign(encoding);
        mCharsetSource = kCharsetFromMetaPrescan;
        mFeedChardet = false;
        mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
-        mMetaScanner = nullptr;
-        return WriteSniffingBufferAndCurrentSegment(aFromSegment, aCount,
-            aWriteCount);
+        return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
+          aFromSegment, aCount, aWriteCount);
      }
    }
+    if (mCharsetSource == kCharsetFromParentForced ||
+        mCharsetSource == kCharsetFromUserForced) {
+      // meta not found, honor override
+      return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
+        aFromSegment, aCount, aWriteCount);
+    }
    return FinalizeSniffing(aFromSegment, aCount, aWriteCount,
        countToSniffingLimit);
  }
@ -766,16 +771,23 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
  // not the last buffer
  if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) {
    nsHtml5ByteReadable readable(aFromSegment, aFromSegment + aCount);
-    mMetaScanner->sniff(&readable, getter_AddRefs(mUnicodeDecoder), mCharset);
-    if (mUnicodeDecoder) {
-      // meta scan successful
+    nsAutoCString encoding;
+    mMetaScanner->sniff(&readable, encoding);
+    if (!encoding.IsEmpty()) {
+      // meta scan successful; honor overrides unless meta is XSS-dangerous
+      if ((mCharsetSource == kCharsetFromParentForced ||
+           mCharsetSource == kCharsetFromUserForced) &&
+          EncodingUtils::IsAsciiCompatible(encoding)) {
+        // Honor override
+        return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment,
+            aCount, aWriteCount);
+      }
+      mCharset.Assign(encoding);
      mCharsetSource = kCharsetFromMetaPrescan;
      mFeedChardet = false;
      mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
-      mMetaScanner = nullptr;
-      return WriteSniffingBufferAndCurrentSegment(aFromSegment, 
-                                                  aCount,
-                                                  aWriteCount);
+      return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment,
+        aCount, aWriteCount);
    }
  }

@ -975,9 +987,11 @@ nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest, nsISupports* aContext)
  mFeedChardet = false;

  // Instantiate the converter here to avoid BOM sniffing.
-  nsCOMPtr<nsICharsetConverterManager> convManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
+  nsCOMPtr<nsICharsetConverterManager> convManager =
+    do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
  NS_ENSURE_SUCCESS(rv, rv);
-  rv = convManager->GetUnicodeDecoder(mCharset.get(), getter_AddRefs(mUnicodeDecoder));
+  rv = convManager->GetUnicodeDecoderRaw(mCharset.get(),
+                                         getter_AddRefs(mUnicodeDecoder));
  // if we failed to get a decoder, there will be fallback, so don't propagate
  //  the error.
  if (NS_FAILED(rv)) {
--- a/parser/html/nsHtml5StreamParser.h
+++ b/parser/html/nsHtml5StreamParser.h
@ -322,21 +322,6 @@ class nsHtml5StreamParser : public nsIStreamListener,
                                                                  uint32_t aCount,
                                                                  uint32_t* aWriteCount);

-    /**
-     * Write the sniffing buffer into the Unicode decoder followed by the
-     * current network buffer.
-     *
-     * @param aFromSegment The current network buffer or null if the sniffing
-     *                     buffer is being flushed due to network stream ending.
-     * @param aCount       The number of bytes in aFromSegment (ignored if
-     *                     aFromSegment is null)
-     * @param aWriteCount  Return value for how many bytes got read from the
-     *                     buffer.
-     */
-    nsresult WriteSniffingBufferAndCurrentSegment(const uint8_t* aFromSegment,
-                                                  uint32_t aCount,
-                                                  uint32_t* aWriteCount);
-
    /**
     * Initialize the Unicode decoder, mark the BOM as the source and
     * drop the sniffer.