Bug 638379 - Part 1: Implement kOnError_Recover to the UTF-8 decoder. r=smontagu

This commit is contained in:
Masatoshi Kimura 2012-12-10 09:10:28 -05:00
parent 89a4119635
commit 7e6d22b429
3 changed files with 69 additions and 31 deletions

View File

@ -10,10 +10,12 @@
#include "nsStringGlue.h"
class nsCharsetConverterManager;
class nsScriptableUnicodeConverter;
class nsCharsetAlias
{
friend class nsCharsetConverterManager;
friend class nsScriptableUnicodeConverter;
static nsresult GetPreferredInternal(const nsACString& aAlias, nsACString& aResult);
public:
static nsresult GetPreferred(const nsACString& aAlias, nsACString& aResult);

View File

@ -13,6 +13,7 @@
#include "nsIStringStream.h"
#include "nsCRT.h"
#include "nsComponentManagerUtils.h"
#include "nsCharsetAlias.h"
static int32_t gInstanceCount = 0;
@ -257,22 +258,39 @@ nsScriptableUnicodeConverter::InitConverter()
mEncoder = nullptr;
nsCOMPtr<nsICharsetConverterManager> ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
if (NS_FAILED(rv) || !ccm) {
return rv;
}
if (NS_SUCCEEDED(rv) && ccm) {
// get charset atom due to getting unicode converter
// get an unicode converter
rv = ccm->GetUnicodeEncoder(mCharset.get(), getter_AddRefs(mEncoder));
if(NS_SUCCEEDED(rv)) {
rv = mEncoder->SetOutputErrorBehavior(nsIUnicodeEncoder::kOnError_Replace, nullptr, (PRUnichar)'?');
if(NS_SUCCEEDED(rv)) {
rv = mIsInternal ?
ccm->GetUnicodeDecoderInternal(mCharset.get(),
getter_AddRefs(mDecoder)) :
ccm->GetUnicodeDecoder(mCharset.get(),
getter_AddRefs(mDecoder));
}
}
// get an unicode converter
rv = ccm->GetUnicodeEncoder(mCharset.get(), getter_AddRefs(mEncoder));
if (NS_FAILED(rv)) {
return rv;
}
rv = mEncoder->SetOutputErrorBehavior(nsIUnicodeEncoder::kOnError_Replace, nullptr, (PRUnichar)'?');
if (NS_FAILED(rv)) {
return rv;
}
nsAutoCString charset;
rv = mIsInternal ? nsCharsetAlias::GetPreferredInternal(mCharset, charset)
: nsCharsetAlias::GetPreferred(mCharset, charset);
if (NS_FAILED(rv)) {
return rv;
}
rv = ccm->GetUnicodeDecoderRaw(charset.get(), getter_AddRefs(mDecoder));
if (NS_FAILED(rv)) {
return rv;
}
// The UTF-8 decoder used to throw regardless of the error behavior.
// Simulating the old behavior for compatibility with legacy callers
// (including addons). If callers want a control over the behavior,
// they should switch to TextDecoder.
if (charset.EqualsLiteral("UTF-8")) {
mDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
}
return rv ;

View File

@ -188,12 +188,11 @@ NS_IMETHODIMP nsUTF8ToUnicode::Convert(const char * aSrc,
out = aDest;
if (mState == 0xFF) {
// Emit supplementary character left over from previous iteration. If the
// buffer size is insufficient, treat it as an illegal character.
// Emit supplementary character left over from previous iteration. It is
// caller's responsibility to keep a sufficient buffer.
if (aDestLen < 2) {
NS_ERROR("Output buffer insufficient to hold supplementary character");
mState = 0;
return NS_ERROR_ILLEGAL_INPUT;
*aSrcLength = *aDestLength = 0;
return NS_OK_UDEC_MOREOUTPUT;
}
out = EmitSurrogatePair(mUcs4, out);
mUcs4 = 0;
@ -225,8 +224,12 @@ NS_IMETHODIMP nsUTF8ToUnicode::Convert(const char * aSrc,
mBytes = 1;
} else if (c < 0xC2) { // C0/C1
// Overlong 2 octet sequence
res = NS_ERROR_ILLEGAL_INPUT;
break;
if (mErrBehavior == kOnError_Signal) {
res = NS_ERROR_ILLEGAL_INPUT;
break;
}
*out++ = UCS2_REPLACEMENT_CHAR;
mFirst = false;
} else if (c < 0xE0) { // C2..DF
// First octet of 2 octet sequence
mUcs4 = c;
@ -248,12 +251,16 @@ NS_IMETHODIMP nsUTF8ToUnicode::Convert(const char * aSrc,
} else { // F5..FF
/* Current octet is neither in the US-ASCII range nor a legal first
* octet of a multi-octet sequence.
*
* Return an error condition. Caller is responsible for flushing and
* refilling the buffer and resetting state.
*/
res = NS_ERROR_ILLEGAL_INPUT;
break;
if (mErrBehavior == kOnError_Signal) {
/* Return an error condition. Caller is responsible for flushing and
* refilling the buffer and resetting state.
*/
res = NS_ERROR_ILLEGAL_INPUT;
break;
}
*out++ = UCS2_REPLACEMENT_CHAR;
mFirst = false;
}
} else {
// When mState is non-zero, we expect a continuation of the multi-octet
@ -270,8 +277,14 @@ NS_IMETHODIMP nsUTF8ToUnicode::Convert(const char * aSrc,
mUcs4 == 0x100000 && c > 0x8F)) { // F4 90..BF
// illegal sequences or sequences converted into illegal ranges.
in--;
res = NS_ERROR_ILLEGAL_INPUT;
break;
if (mErrBehavior == kOnError_Signal) {
res = NS_ERROR_ILLEGAL_INPUT;
break;
}
*out++ = UCS2_REPLACEMENT_CHAR;
mState = 0;
mFirst = false;
continue;
}
}
@ -315,8 +328,13 @@ NS_IMETHODIMP nsUTF8ToUnicode::Convert(const char * aSrc,
* for flushing and refilling the buffer and resetting state.
*/
in--;
res = NS_ERROR_ILLEGAL_INPUT;
break;
if (mErrBehavior == kOnError_Signal) {
res = NS_ERROR_ILLEGAL_INPUT;
break;
}
*out++ = UCS2_REPLACEMENT_CHAR;
mState = 0;
mFirst = false;
}
}
}