Bug 1214619 - Remove nsISaveAsCharset as much as possible without breaking extensions in popular use. r=emk.

This commit is contained in:
Henri Sivonen 2015-10-22 11:18:45 +03:00
parent b87989f7a7
commit 08f918f297
3 changed files with 46 additions and 402 deletions

View File

@ -11,7 +11,10 @@
#define NS_SAVEASCHARSET_CONTRACTID "@mozilla.org/intl/saveascharset;1"
%}
[scriptable, uuid(33B87F70-7A9C-11d3-915C-006008A6EDF6)]
/**
* DO NOT USE! For compat with legacy extension code only.
*/
[scriptable, uuid(b3b8124f-0abb-460e-88ac-3cf1a0134b2d)]
interface nsISaveAsCharset : nsISupports
{
// attributes
@ -19,17 +22,17 @@ interface nsISaveAsCharset : nsISupports
const unsigned long mask_Entity = 0x00000300; // mask for entity (2bits)
const unsigned long mask_CharsetFallback = 0x00000400; // mask for charset fallback (1bit)
const unsigned long attr_FallbackNone = 0; // no fall back for unconverted chars (skipped)
const unsigned long attr_FallbackQuestionMark = 1; // unconverted chars are replaced by '?'
const unsigned long attr_FallbackEscapeU = 2; // unconverted chars are escaped as \uxxxx
const unsigned long attr_FallbackDecimalNCR = 3; // unconverted chars are replaced by decimal NCR
const unsigned long attr_FallbackHexNCR = 4; // unconverted chars are replaced by hex NCR
const unsigned long attr_FallbackNone = 0; // IGNORED
const unsigned long attr_FallbackQuestionMark = 1; // IGNORED
const unsigned long attr_FallbackEscapeU = 2; // IGNORED
const unsigned long attr_FallbackDecimalNCR = 3; // IGNORED
const unsigned long attr_FallbackHexNCR = 4; // IGNORED
const unsigned long attr_EntityNone = 0; // generate no Named Entity
const unsigned long attr_EntityBeforeCharsetConv = 0x00000100; // generate Named Entity before charset conversion
const unsigned long attr_EntityAfterCharsetConv = 0x00000200; // generate Named Entity after charset conversion
const unsigned long attr_EntityNone = 0; // IGNORED
const unsigned long attr_EntityBeforeCharsetConv = 0x00000100; // IGNORED
const unsigned long attr_EntityAfterCharsetConv = 0x00000200; // IGNORED
const unsigned long attr_CharsetFallback = 0x00000400; // fallback to other charset and restart the convesion
const unsigned long attr_CharsetFallback = 0x00000400; // IGNORED
// default attribute for plain text
@ -39,14 +42,9 @@ interface nsISaveAsCharset : nsISupports
// generate entity before charset conversion, use decimal NCR
const unsigned long attr_htmlTextDefault = attr_FallbackDecimalNCR + attr_EntityBeforeCharsetConv;
readonly attribute string charset; // charset used for the conversion
readonly attribute AUTF8String charset; // charset used for the conversion
// set up charset, attribute and entity version
// see nsIEntityConverter.idl for possible value of entityVersion (entityNone for plain text).
void Init(in string charset, in unsigned long attr, in unsigned long entityVersion);
void Init(in AUTF8String charset, in unsigned long ignored, in unsigned long alsoIgnored);
// convert UCS-2 html to target charset
// may return the result code of the unicode converter (NS_ERROR_UENC_NOMAPPING)
// if the attribute does not specify any fall back (e.g. attrPlainTextDefault)
string Convert(in wstring inString);
ACString Convert(in AString inString);
};

View File

@ -4,16 +4,8 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "prmem.h"
#include "mozilla/Snprintf.h"
#include "nsSaveAsCharset.h"
#include "nsWhitespaceTokenizer.h"
#include "nsIUnicodeEncoder.h"
#include "mozilla/dom/EncodingUtils.h"
#include "nsComponentManagerUtils.h"
using mozilla::dom::EncodingUtils;
//
// nsISupports methods
@ -25,9 +17,6 @@ NS_IMPL_ISUPPORTS(nsSaveAsCharset, nsISaveAsCharset)
//
nsSaveAsCharset::nsSaveAsCharset()
{
mAttribute = attr_htmlTextDefault;
mEntityVersion = 0;
mCharsetListIndex = -1;
}
nsSaveAsCharset::~nsSaveAsCharset()
@ -35,340 +24,33 @@ nsSaveAsCharset::~nsSaveAsCharset()
}
NS_IMETHODIMP
nsSaveAsCharset::Init(const char *charset, uint32_t attr, uint32_t entityVersion)
nsSaveAsCharset::Init(const nsACString& aCharset, uint32_t aIgnored, uint32_t aAlsoIgnored)
{
nsresult rv = NS_OK;
mAttribute = attr;
mEntityVersion = entityVersion;
rv = SetupCharsetList(charset);
NS_ENSURE_SUCCESS(rv, rv);
// set up unicode encoder
rv = SetupUnicodeEncoder(GetNextCharset());
NS_ENSURE_SUCCESS(rv, rv);
// set up entity converter
if (attr_EntityNone != MASK_ENTITY(mAttribute) && !mEntityConverter)
mEntityConverter = do_CreateInstance(NS_ENTITYCONVERTER_CONTRACTID, &rv);
return rv;
nsAutoCString encoding;
if (!mozilla::dom::EncodingUtils::FindEncodingForLabelNoReplacement(aCharset, encoding)) {
return NS_ERROR_DOM_ENCODING_NOT_SUPPORTED_ERR;
}
mEncoder = new nsNCRFallbackEncoderWrapper(encoding);
mCharset.Assign(encoding);
return NS_OK;
}
NS_IMETHODIMP
nsSaveAsCharset::Convert(const char16_t *inString, char **_retval)
nsSaveAsCharset::Convert(const nsAString& aIn, nsACString& aOut)
{
NS_ENSURE_ARG_POINTER(_retval);
NS_ENSURE_ARG_POINTER(inString);
if (0 == *inString)
return NS_ERROR_ILLEGAL_VALUE;
nsresult rv = NS_OK;
NS_ASSERTION(mEncoder, "need to call Init() before Convert()");
NS_ENSURE_TRUE(mEncoder, NS_ERROR_FAILURE);
*_retval = nullptr;
// make sure to start from the first charset in the list
if (mCharsetListIndex > 0) {
mCharsetListIndex = -1;
rv = SetupUnicodeEncoder(GetNextCharset());
NS_ENSURE_SUCCESS(rv, rv);
if (!mEncoder) {
return NS_ERROR_DOM_ENCODING_NOT_SUPPORTED_ERR;
}
do {
// fallback to the next charset in the list if the last conversion failed by an unmapped character
if (MASK_CHARSET_FALLBACK(mAttribute) && NS_ERROR_UENC_NOMAPPING == rv) {
const char * charset = GetNextCharset();
if (!charset)
break;
rv = SetupUnicodeEncoder(charset);
NS_ENSURE_SUCCESS(rv, rv);
PR_FREEIF(*_retval);
}
if (attr_EntityBeforeCharsetConv == MASK_ENTITY(mAttribute)) {
NS_ASSERTION(mEntityConverter, "need to call Init() before Convert()");
NS_ENSURE_TRUE(mEntityConverter, NS_ERROR_FAILURE);
char16_t *entity = nullptr;
// do the entity conversion first
rv = mEntityConverter->ConvertToEntities(inString, mEntityVersion, &entity);
if(NS_SUCCEEDED(rv)) {
rv = DoCharsetConversion(entity, _retval);
free(entity);
}
}
else
rv = DoCharsetConversion(inString, _retval);
} while (MASK_CHARSET_FALLBACK(mAttribute) && NS_ERROR_UENC_NOMAPPING == rv);
return rv;
if (!mEncoder->Encode(aIn, aOut)) {
return NS_ERROR_OUT_OF_MEMORY;
}
return NS_OK;
}
NS_IMETHODIMP
nsSaveAsCharset::GetCharset(char * *aCharset)
nsSaveAsCharset::GetCharset(nsACString& aCharset)
{
NS_ENSURE_ARG(aCharset);
NS_ASSERTION(mCharsetListIndex >= 0, "need to call Init() first");
NS_ENSURE_TRUE(mCharsetListIndex >= 0, NS_ERROR_FAILURE);
const char* charset = mCharsetList[mCharsetListIndex].get();
if (!charset) {
*aCharset = nullptr;
NS_ASSERTION(charset, "make sure to call Init() with non empty charset list");
return NS_ERROR_FAILURE;
}
*aCharset = strdup(charset);
return (*aCharset) ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
}
/////////////////////////////////////////////////////////////////////////////////////////
#define RESERVE_FALLBACK_BYTES 512
// do the fallback, reallocate the buffer if necessary
// need to pass destination buffer info (size, current position and estimation of rest of the conversion)
NS_IMETHODIMP
nsSaveAsCharset::HandleFallBack(uint32_t character, char **outString, int32_t *bufferLength,
int32_t *currentPos, int32_t estimatedLength)
{
NS_ENSURE_ARG_POINTER(outString);
NS_ENSURE_ARG_POINTER(bufferLength);
NS_ENSURE_ARG_POINTER(currentPos);
char fallbackStr[256];
nsresult rv = DoConversionFallBack(character, fallbackStr, 256);
if (NS_SUCCEEDED(rv)) {
int32_t tempLen = (int32_t) strlen(fallbackStr);
// reallocate if the buffer is not large enough
if ((tempLen + estimatedLength) >= (*bufferLength - *currentPos)) {
int32_t addLength = tempLen + RESERVE_FALLBACK_BYTES;
// + 1 is for the terminating NUL, don't add that to bufferLength
char *temp = (char *) PR_Realloc(*outString, *bufferLength + addLength + 1);
if (temp) {
// adjust length/pointer after realloc
*bufferLength += addLength;
*outString = temp;
} else {
*outString = nullptr;
*bufferLength = 0;
return NS_ERROR_OUT_OF_MEMORY;
}
}
memcpy((*outString + *currentPos), fallbackStr, tempLen);
*currentPos += tempLen;
}
return rv;
}
NS_IMETHODIMP
nsSaveAsCharset::DoCharsetConversion(const char16_t *inString, char **outString)
{
NS_ENSURE_ARG_POINTER(outString);
*outString = nullptr;
nsresult rv;
int32_t inStringLength = NS_strlen(inString); // original input string length
int32_t bufferLength; // allocated buffer length
int32_t srcLength = inStringLength;
int32_t dstLength;
int32_t pos1, pos2;
nsresult saveResult = NS_OK; // to remember NS_ERROR_UENC_NOMAPPING
// estimate and allocate the target buffer (reserve extra memory for fallback)
rv = mEncoder->GetMaxLength(inString, inStringLength, &dstLength);
if (NS_FAILED(rv)) return rv;
bufferLength = dstLength + RESERVE_FALLBACK_BYTES; // extra bytes for fallback
// + 1 is for the terminating NUL -- we don't add that to bufferLength so that
// we can always write dstPtr[pos2] = '\0' even when the encoder filled the
// buffer.
char *dstPtr = (char *) PR_Malloc(bufferLength + 1);
if (!dstPtr) {
return NS_ERROR_OUT_OF_MEMORY;
}
for (pos1 = 0, pos2 = 0; pos1 < inStringLength;) {
// convert from unicode
dstLength = bufferLength - pos2;
NS_ASSERTION(dstLength >= 0, "out of bounds write");
rv = mEncoder->Convert(&inString[pos1], &srcLength, &dstPtr[pos2], &dstLength);
pos1 += srcLength ? srcLength : 1;
pos2 += dstLength;
dstPtr[pos2] = '\0';
// break: this is usually the case (no error) OR unrecoverable error
if (NS_ERROR_UENC_NOMAPPING != rv) break;
// remember this happened and reset the result
saveResult = rv;
rv = NS_OK;
// finish encoder, give it a chance to write extra data like escape sequences
dstLength = bufferLength - pos2;
rv = mEncoder->Finish(&dstPtr[pos2], &dstLength);
if (NS_SUCCEEDED(rv)) {
pos2 += dstLength;
dstPtr[pos2] = '\0';
}
srcLength = inStringLength - pos1;
// do the fallback
if (!ATTR_NO_FALLBACK(mAttribute)) {
uint32_t unMappedChar;
if (NS_IS_HIGH_SURROGATE(inString[pos1-1]) &&
inStringLength > pos1 && NS_IS_LOW_SURROGATE(inString[pos1])) {
unMappedChar = SURROGATE_TO_UCS4(inString[pos1-1], inString[pos1]);
pos1++;
} else {
unMappedChar = inString[pos1-1];
}
rv = mEncoder->GetMaxLength(inString+pos1, inStringLength-pos1, &dstLength);
if (NS_FAILED(rv))
break;
rv = HandleFallBack(unMappedChar, &dstPtr, &bufferLength, &pos2, dstLength);
if (NS_FAILED(rv))
break;
dstPtr[pos2] = '\0';
}
}
if (NS_SUCCEEDED(rv)) {
// finish encoder, give it a chance to write extra data like escape sequences
dstLength = bufferLength - pos2;
rv = mEncoder->Finish(&dstPtr[pos2], &dstLength);
if (NS_SUCCEEDED(rv)) {
pos2 += dstLength;
dstPtr[pos2] = '\0';
}
}
if (NS_FAILED(rv)) {
PR_FREEIF(dstPtr);
return rv;
}
*outString = dstPtr; // set the result string
// set error code so that the caller can do own fall back
if (NS_ERROR_UENC_NOMAPPING == saveResult) {
rv = NS_ERROR_UENC_NOMAPPING;
}
return rv;
}
NS_IMETHODIMP
nsSaveAsCharset::DoConversionFallBack(uint32_t inUCS4, char *outString, int32_t bufferLength)
{
NS_ENSURE_ARG_POINTER(outString);
*outString = '\0';
nsresult rv = NS_OK;
if (ATTR_NO_FALLBACK(mAttribute)) {
return NS_OK;
}
if (attr_EntityAfterCharsetConv == MASK_ENTITY(mAttribute)) {
char *entity = nullptr;
rv = mEntityConverter->ConvertUTF32ToEntity(inUCS4, mEntityVersion, &entity);
if (NS_SUCCEEDED(rv)) {
if (!entity || (int32_t)strlen(entity) > bufferLength) {
return NS_ERROR_OUT_OF_MEMORY;
}
PL_strcpy(outString, entity);
free(entity);
return rv;
}
}
switch (MASK_FALLBACK(mAttribute)) {
case attr_FallbackQuestionMark:
if(bufferLength>=2) {
*outString++='?';
*outString='\0';
rv = NS_OK;
} else {
rv = NS_ERROR_FAILURE;
}
break;
case attr_FallbackEscapeU:
if (inUCS4 & 0xff0000)
rv = (snprintf(outString, bufferLength, "\\u%.6x", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
else
rv = (snprintf(outString, bufferLength, "\\u%.4x", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
break;
case attr_FallbackDecimalNCR:
rv = (snprintf(outString, bufferLength, "&#%u;", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
break;
case attr_FallbackHexNCR:
rv = (snprintf(outString, bufferLength, "&#x%x;", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
break;
case attr_FallbackNone:
rv = NS_OK;
break;
default:
rv = NS_ERROR_ILLEGAL_VALUE;
break;
}
return rv;
}
nsresult nsSaveAsCharset::SetupUnicodeEncoder(const char* charset)
{
NS_ENSURE_ARG(charset);
nsDependentCString label(charset);
if (label.EqualsLiteral("replacement")) {
// Internal caller. "replacement" doesn't survive another label resolution.
mEncoder = EncodingUtils::EncoderForEncoding(label);
return NS_OK;
}
nsAutoCString encoding;
if (!EncodingUtils::FindEncodingForLabelNoReplacement(label,
encoding)) {
return NS_ERROR_UCONV_NOCONV;
}
mEncoder = EncodingUtils::EncoderForEncoding(encoding);
aCharset.Assign(mCharset);
return NS_OK;
}
nsresult nsSaveAsCharset::SetupCharsetList(const char *charsetList)
{
NS_ENSURE_ARG(charsetList);
NS_ASSERTION(charsetList[0], "charsetList should not be empty");
if (!charsetList[0])
return NS_ERROR_INVALID_ARG;
if (mCharsetListIndex >= 0) {
mCharsetList.Clear();
mCharsetListIndex = -1;
}
nsCWhitespaceTokenizer tokenizer((nsDependentCString(charsetList)));
while (tokenizer.hasMoreTokens()) {
ParseString(tokenizer.nextToken(), ',', mCharsetList);
}
return NS_OK;
}
const char * nsSaveAsCharset::GetNextCharset()
{
if ((mCharsetListIndex + 1) >= int32_t(mCharsetList.Length()))
return nullptr;
// bump the index and return the next charset
return mCharsetList[++mCharsetListIndex].get();
}

View File

@ -3,71 +3,35 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef nsSaveAsCharset_h__
#define nsSaveAsCharset_h__
#ifndef nsSaveAsCharset_h_
#define nsSaveAsCharset_h_
#include "nsStringFwd.h"
#include "nsTArray.h"
#include "nsISaveAsCharset.h"
#include "nsCOMPtr.h"
#define MASK_FALLBACK(a) (nsISaveAsCharset::mask_Fallback & (a))
#define MASK_ENTITY(a) (nsISaveAsCharset::mask_Entity & (a))
#define MASK_CHARSET_FALLBACK(a) (nsISaveAsCharset::mask_CharsetFallback & (a))
#define ATTR_NO_FALLBACK(a) (nsISaveAsCharset::attr_FallbackNone == MASK_FALLBACK(a) && \
nsISaveAsCharset::attr_EntityAfterCharsetConv != MASK_ENTITY(a))
class nsIUnicodeEncoder;
class nsIEntityConverter;
#include "nsAutoPtr.h"
#include "nsNCRFallbackEncoderWrapper.h"
#include "nsString.h"
class nsSaveAsCharset : public nsISaveAsCharset
{
public:
//
// implementation methods
//
nsSaveAsCharset();
//
// nsISupports
//
NS_DECL_ISUPPORTS
NS_DECL_ISUPPORTS
//
// nsIEntityConverter
//
NS_IMETHOD Init(const char *charset, uint32_t attr, uint32_t entityVersion) override;
NS_IMETHOD Init(const nsACString& aCharset, uint32_t aIgnored, uint32_t aAlsoIgnored) override;
NS_IMETHOD Convert(const char16_t *inString, char **_retval) override;
NS_IMETHOD Convert(const nsAString& ain, nsACString& aOut) override;
NS_IMETHODIMP GetCharset(char * *aCharset) override;
NS_IMETHODIMP GetCharset(nsACString& aCharset) override;
protected:
private:
virtual ~nsSaveAsCharset();
NS_IMETHOD DoCharsetConversion(const char16_t *inString, char **outString);
NS_IMETHOD DoConversionFallBack(uint32_t inUCS4, char *outString, int32_t bufferLength);
// do the fallback, reallocate the buffer if necessary
// need to pass destination buffer info (size, current position and estimation of rest of the conversion)
NS_IMETHOD HandleFallBack(uint32_t character, char **outString, int32_t *bufferLength,
int32_t *currentPos, int32_t estimatedLength);
nsresult SetupUnicodeEncoder(const char* charset);
nsresult SetupCharsetList(const char *charsetList);
const char * GetNextCharset();
uint32_t mAttribute; // conversion attribute
uint32_t mEntityVersion; // see nsIEntityConverter
nsCOMPtr<nsIUnicodeEncoder> mEncoder; // encoder (convert from unicode)
nsCOMPtr<nsIEntityConverter> mEntityConverter;
nsTArray<nsCString> mCharsetList;
int32_t mCharsetListIndex;
nsAutoPtr<nsNCRFallbackEncoderWrapper> mEncoder;
nsCString mCharset;
};
#endif