mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
Bug 638379 - Part 1: Implement kOnError_Recover to the UTF-8 decoder. r=smontagu
This commit is contained in:
parent
89a4119635
commit
7e6d22b429
@ -10,10 +10,12 @@
|
||||
#include "nsStringGlue.h"
|
||||
|
||||
class nsCharsetConverterManager;
|
||||
class nsScriptableUnicodeConverter;
|
||||
|
||||
class nsCharsetAlias
|
||||
{
|
||||
friend class nsCharsetConverterManager;
|
||||
friend class nsScriptableUnicodeConverter;
|
||||
static nsresult GetPreferredInternal(const nsACString& aAlias, nsACString& aResult);
|
||||
public:
|
||||
static nsresult GetPreferred(const nsACString& aAlias, nsACString& aResult);
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include "nsIStringStream.h"
|
||||
#include "nsCRT.h"
|
||||
#include "nsComponentManagerUtils.h"
|
||||
#include "nsCharsetAlias.h"
|
||||
|
||||
static int32_t gInstanceCount = 0;
|
||||
|
||||
@ -257,22 +258,39 @@ nsScriptableUnicodeConverter::InitConverter()
|
||||
mEncoder = nullptr;
|
||||
|
||||
nsCOMPtr<nsICharsetConverterManager> ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
|
||||
if (NS_FAILED(rv) || !ccm) {
|
||||
return rv;
|
||||
}
|
||||
|
||||
if (NS_SUCCEEDED(rv) && ccm) {
|
||||
// get charset atom due to getting unicode converter
|
||||
|
||||
// get an unicode converter
|
||||
rv = ccm->GetUnicodeEncoder(mCharset.get(), getter_AddRefs(mEncoder));
|
||||
if(NS_SUCCEEDED(rv)) {
|
||||
rv = mEncoder->SetOutputErrorBehavior(nsIUnicodeEncoder::kOnError_Replace, nullptr, (PRUnichar)'?');
|
||||
if(NS_SUCCEEDED(rv)) {
|
||||
rv = mIsInternal ?
|
||||
ccm->GetUnicodeDecoderInternal(mCharset.get(),
|
||||
getter_AddRefs(mDecoder)) :
|
||||
ccm->GetUnicodeDecoder(mCharset.get(),
|
||||
getter_AddRefs(mDecoder));
|
||||
}
|
||||
}
|
||||
// get an unicode converter
|
||||
rv = ccm->GetUnicodeEncoder(mCharset.get(), getter_AddRefs(mEncoder));
|
||||
if (NS_FAILED(rv)) {
|
||||
return rv;
|
||||
}
|
||||
|
||||
rv = mEncoder->SetOutputErrorBehavior(nsIUnicodeEncoder::kOnError_Replace, nullptr, (PRUnichar)'?');
|
||||
if (NS_FAILED(rv)) {
|
||||
return rv;
|
||||
}
|
||||
|
||||
nsAutoCString charset;
|
||||
rv = mIsInternal ? nsCharsetAlias::GetPreferredInternal(mCharset, charset)
|
||||
: nsCharsetAlias::GetPreferred(mCharset, charset);
|
||||
if (NS_FAILED(rv)) {
|
||||
return rv;
|
||||
}
|
||||
|
||||
rv = ccm->GetUnicodeDecoderRaw(charset.get(), getter_AddRefs(mDecoder));
|
||||
if (NS_FAILED(rv)) {
|
||||
return rv;
|
||||
}
|
||||
|
||||
// The UTF-8 decoder used to throw regardless of the error behavior.
|
||||
// Simulating the old behavior for compatibility with legacy callers
|
||||
// (including addons). If callers want a control over the behavior,
|
||||
// they should switch to TextDecoder.
|
||||
if (charset.EqualsLiteral("UTF-8")) {
|
||||
mDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
|
||||
}
|
||||
|
||||
return rv ;
|
||||
|
@ -188,12 +188,11 @@ NS_IMETHODIMP nsUTF8ToUnicode::Convert(const char * aSrc,
|
||||
|
||||
out = aDest;
|
||||
if (mState == 0xFF) {
|
||||
// Emit supplementary character left over from previous iteration. If the
|
||||
// buffer size is insufficient, treat it as an illegal character.
|
||||
// Emit supplementary character left over from previous iteration. It is
|
||||
// caller's responsibility to keep a sufficient buffer.
|
||||
if (aDestLen < 2) {
|
||||
NS_ERROR("Output buffer insufficient to hold supplementary character");
|
||||
mState = 0;
|
||||
return NS_ERROR_ILLEGAL_INPUT;
|
||||
*aSrcLength = *aDestLength = 0;
|
||||
return NS_OK_UDEC_MOREOUTPUT;
|
||||
}
|
||||
out = EmitSurrogatePair(mUcs4, out);
|
||||
mUcs4 = 0;
|
||||
@ -225,8 +224,12 @@ NS_IMETHODIMP nsUTF8ToUnicode::Convert(const char * aSrc,
|
||||
mBytes = 1;
|
||||
} else if (c < 0xC2) { // C0/C1
|
||||
// Overlong 2 octet sequence
|
||||
res = NS_ERROR_ILLEGAL_INPUT;
|
||||
break;
|
||||
if (mErrBehavior == kOnError_Signal) {
|
||||
res = NS_ERROR_ILLEGAL_INPUT;
|
||||
break;
|
||||
}
|
||||
*out++ = UCS2_REPLACEMENT_CHAR;
|
||||
mFirst = false;
|
||||
} else if (c < 0xE0) { // C2..DF
|
||||
// First octet of 2 octet sequence
|
||||
mUcs4 = c;
|
||||
@ -248,12 +251,16 @@ NS_IMETHODIMP nsUTF8ToUnicode::Convert(const char * aSrc,
|
||||
} else { // F5..FF
|
||||
/* Current octet is neither in the US-ASCII range nor a legal first
|
||||
* octet of a multi-octet sequence.
|
||||
*
|
||||
* Return an error condition. Caller is responsible for flushing and
|
||||
* refilling the buffer and resetting state.
|
||||
*/
|
||||
res = NS_ERROR_ILLEGAL_INPUT;
|
||||
break;
|
||||
if (mErrBehavior == kOnError_Signal) {
|
||||
/* Return an error condition. Caller is responsible for flushing and
|
||||
* refilling the buffer and resetting state.
|
||||
*/
|
||||
res = NS_ERROR_ILLEGAL_INPUT;
|
||||
break;
|
||||
}
|
||||
*out++ = UCS2_REPLACEMENT_CHAR;
|
||||
mFirst = false;
|
||||
}
|
||||
} else {
|
||||
// When mState is non-zero, we expect a continuation of the multi-octet
|
||||
@ -270,8 +277,14 @@ NS_IMETHODIMP nsUTF8ToUnicode::Convert(const char * aSrc,
|
||||
mUcs4 == 0x100000 && c > 0x8F)) { // F4 90..BF
|
||||
// illegal sequences or sequences converted into illegal ranges.
|
||||
in--;
|
||||
res = NS_ERROR_ILLEGAL_INPUT;
|
||||
break;
|
||||
if (mErrBehavior == kOnError_Signal) {
|
||||
res = NS_ERROR_ILLEGAL_INPUT;
|
||||
break;
|
||||
}
|
||||
*out++ = UCS2_REPLACEMENT_CHAR;
|
||||
mState = 0;
|
||||
mFirst = false;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
@ -315,8 +328,13 @@ NS_IMETHODIMP nsUTF8ToUnicode::Convert(const char * aSrc,
|
||||
* for flushing and refilling the buffer and resetting state.
|
||||
*/
|
||||
in--;
|
||||
res = NS_ERROR_ILLEGAL_INPUT;
|
||||
break;
|
||||
if (mErrBehavior == kOnError_Signal) {
|
||||
res = NS_ERROR_ILLEGAL_INPUT;
|
||||
break;
|
||||
}
|
||||
*out++ = UCS2_REPLACEMENT_CHAR;
|
||||
mState = 0;
|
||||
mFirst = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user