From 7e6d22b4290303f6678a9e406b6a2b767a9a8893 Mon Sep 17 00:00:00 2001
From: Masatoshi Kimura <VYV03354@nifty.ne.jp>
Date: Mon, 10 Dec 2012 09:10:28 -0500
Subject: [PATCH] Bug 638379 - Part 1: Implement kOnError_Recover to the UTF-8
 decoder. r=smontagu

---
 intl/locale/public/nsCharsetAlias.h  |  2 ++
 intl/uconv/src/nsScriptableUConv.cpp | 48 +++++++++++++++++---------
 intl/uconv/src/nsUTF8ToUnicode.cpp   | 50 +++++++++++++++++++---------
 3 files changed, 69 insertions(+), 31 deletions(-)

diff --git a/intl/locale/public/nsCharsetAlias.h b/intl/locale/public/nsCharsetAlias.h
index 94b03762954..17419043b69 100644
--- a/intl/locale/public/nsCharsetAlias.h
+++ b/intl/locale/public/nsCharsetAlias.h
@@ -10,10 +10,12 @@
 #include "nsStringGlue.h"
 
 class nsCharsetConverterManager;
+class nsScriptableUnicodeConverter;
 
 class nsCharsetAlias
 {
    friend class nsCharsetConverterManager;
+   friend class nsScriptableUnicodeConverter;
    static nsresult GetPreferredInternal(const nsACString& aAlias, nsACString& aResult);
 public:
    static nsresult GetPreferred(const nsACString& aAlias, nsACString& aResult);
diff --git a/intl/uconv/src/nsScriptableUConv.cpp b/intl/uconv/src/nsScriptableUConv.cpp
index 0a03f67fa40..1dcc0c12878 100644
--- a/intl/uconv/src/nsScriptableUConv.cpp
+++ b/intl/uconv/src/nsScriptableUConv.cpp
@@ -13,6 +13,7 @@
 #include "nsIStringStream.h"
 #include "nsCRT.h"
 #include "nsComponentManagerUtils.h"
+#include "nsCharsetAlias.h"
 
 static int32_t          gInstanceCount = 0;
 
@@ -257,22 +258,39 @@ nsScriptableUnicodeConverter::InitConverter()
   mEncoder = nullptr;
 
   nsCOMPtr<nsICharsetConverterManager> ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
+  if (NS_FAILED(rv) || !ccm) {
+    return rv;
+  }
 
-  if (NS_SUCCEEDED(rv) && ccm) {
-    // get charset atom due to getting unicode converter
-    
-    // get an unicode converter
-    rv = ccm->GetUnicodeEncoder(mCharset.get(), getter_AddRefs(mEncoder));
-    if(NS_SUCCEEDED(rv)) {
-      rv = mEncoder->SetOutputErrorBehavior(nsIUnicodeEncoder::kOnError_Replace, nullptr, (PRUnichar)'?');
-      if(NS_SUCCEEDED(rv)) {
-        rv = mIsInternal ?
-          ccm->GetUnicodeDecoderInternal(mCharset.get(),
-                                         getter_AddRefs(mDecoder)) :
-          ccm->GetUnicodeDecoder(mCharset.get(),
-                                 getter_AddRefs(mDecoder));
-      }
-    }
+  // get an unicode converter
+  rv = ccm->GetUnicodeEncoder(mCharset.get(), getter_AddRefs(mEncoder));
+  if (NS_FAILED(rv)) {
+    return rv;
+  }
+
+  rv = mEncoder->SetOutputErrorBehavior(nsIUnicodeEncoder::kOnError_Replace, nullptr, (PRUnichar)'?');
+  if (NS_FAILED(rv)) {
+    return rv;
+  }
+
+  nsAutoCString charset;
+  rv = mIsInternal ? nsCharsetAlias::GetPreferredInternal(mCharset, charset)
+                   : nsCharsetAlias::GetPreferred(mCharset, charset);
+  if (NS_FAILED(rv)) {
+    return rv;
+  }
+
+  rv = ccm->GetUnicodeDecoderRaw(charset.get(), getter_AddRefs(mDecoder));
+  if (NS_FAILED(rv)) {
+    return rv;
+  }
+
+  // The UTF-8 decoder used to throw regardless of the error behavior.
+  // Simulating the old behavior for compatibility with legacy callers
+  // (including addons). If callers want a control over the behavior,
+  // they should switch to TextDecoder.
+  if (charset.EqualsLiteral("UTF-8")) {
+    mDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
   }
 
   return rv ;
diff --git a/intl/uconv/src/nsUTF8ToUnicode.cpp b/intl/uconv/src/nsUTF8ToUnicode.cpp
index fc76259c26b..cdac0547cc3 100644
--- a/intl/uconv/src/nsUTF8ToUnicode.cpp
+++ b/intl/uconv/src/nsUTF8ToUnicode.cpp
@@ -188,12 +188,11 @@ NS_IMETHODIMP nsUTF8ToUnicode::Convert(const char * aSrc,
 
   out = aDest;
   if (mState == 0xFF) {
-    // Emit supplementary character left over from previous iteration. If the
-    // buffer size is insufficient, treat it as an illegal character.
+    // Emit supplementary character left over from previous iteration. It is
+    // caller's responsibility to keep a sufficient buffer.
     if (aDestLen < 2) {
-      NS_ERROR("Output buffer insufficient to hold supplementary character");
-      mState = 0;
-      return NS_ERROR_ILLEGAL_INPUT;
+      *aSrcLength = *aDestLength = 0;
+      return NS_OK_UDEC_MOREOUTPUT;
     }
     out = EmitSurrogatePair(mUcs4, out);
     mUcs4 = 0;
@@ -225,8 +224,12 @@ NS_IMETHODIMP nsUTF8ToUnicode::Convert(const char * aSrc,
         mBytes = 1;
       } else if (c < 0xC2) {  // C0/C1
         // Overlong 2 octet sequence
-        res = NS_ERROR_ILLEGAL_INPUT;
-        break;
+        if (mErrBehavior == kOnError_Signal) {
+          res = NS_ERROR_ILLEGAL_INPUT;
+          break;
+        }
+        *out++ = UCS2_REPLACEMENT_CHAR;
+        mFirst = false;
       } else if (c < 0xE0) {  // C2..DF
         // First octet of 2 octet sequence
         mUcs4 = c;
@@ -248,12 +251,16 @@ NS_IMETHODIMP nsUTF8ToUnicode::Convert(const char * aSrc,
       } else {  // F5..FF
         /* Current octet is neither in the US-ASCII range nor a legal first
          * octet of a multi-octet sequence.
-         *
-         * Return an error condition. Caller is responsible for flushing and
-         * refilling the buffer and resetting state.
          */
-        res = NS_ERROR_ILLEGAL_INPUT;
-        break;
+        if (mErrBehavior == kOnError_Signal) {
+          /* Return an error condition. Caller is responsible for flushing and
+           * refilling the buffer and resetting state.
+           */
+          res = NS_ERROR_ILLEGAL_INPUT;
+          break;
+        }
+        *out++ = UCS2_REPLACEMENT_CHAR;
+        mFirst = false;
       }
     } else {
       // When mState is non-zero, we expect a continuation of the multi-octet
@@ -270,8 +277,14 @@ NS_IMETHODIMP nsUTF8ToUnicode::Convert(const char * aSrc,
                               mUcs4 == 0x100000 && c > 0x8F)) {  // F4 90..BF
             // illegal sequences or sequences converted into illegal ranges.
             in--;
-            res = NS_ERROR_ILLEGAL_INPUT;
-            break;
+            if (mErrBehavior == kOnError_Signal) {
+              res = NS_ERROR_ILLEGAL_INPUT;
+              break;
+            }
+            *out++ = UCS2_REPLACEMENT_CHAR;
+            mState = 0;
+            mFirst = false;
+            continue;
           }
         }
 
@@ -315,8 +328,13 @@ NS_IMETHODIMP nsUTF8ToUnicode::Convert(const char * aSrc,
          * for flushing and refilling the buffer and resetting state.
          */
         in--;
-        res = NS_ERROR_ILLEGAL_INPUT;
-        break;
+        if (mErrBehavior == kOnError_Signal) {
+          res = NS_ERROR_ILLEGAL_INPUT;
+          break;
+        }
+        *out++ = UCS2_REPLACEMENT_CHAR;
+        mState = 0;
+        mFirst = false;
       }
     }
   }