diff --git a/intl/unicharutil/nsISaveAsCharset.idl b/intl/unicharutil/nsISaveAsCharset.idl index e9571a1b668..c32960177d1 100644 --- a/intl/unicharutil/nsISaveAsCharset.idl +++ b/intl/unicharutil/nsISaveAsCharset.idl @@ -11,7 +11,10 @@ #define NS_SAVEASCHARSET_CONTRACTID "@mozilla.org/intl/saveascharset;1" %} -[scriptable, uuid(33B87F70-7A9C-11d3-915C-006008A6EDF6)] +/** + * DO NOT USE! For compat with legacy extension code only. + */ +[scriptable, uuid(b3b8124f-0abb-460e-88ac-3cf1a0134b2d)] interface nsISaveAsCharset : nsISupports { // attributes @@ -19,17 +22,17 @@ interface nsISaveAsCharset : nsISupports const unsigned long mask_Entity = 0x00000300; // mask for entity (2bits) const unsigned long mask_CharsetFallback = 0x00000400; // mask for charset fallback (1bit) - const unsigned long attr_FallbackNone = 0; // no fall back for unconverted chars (skipped) - const unsigned long attr_FallbackQuestionMark = 1; // unconverted chars are replaced by '?' - const unsigned long attr_FallbackEscapeU = 2; // unconverted chars are escaped as \uxxxx - const unsigned long attr_FallbackDecimalNCR = 3; // unconverted chars are replaced by decimal NCR - const unsigned long attr_FallbackHexNCR = 4; // unconverted chars are replaced by hex NCR + const unsigned long attr_FallbackNone = 0; // IGNORED + const unsigned long attr_FallbackQuestionMark = 1; // IGNORED + const unsigned long attr_FallbackEscapeU = 2; // IGNORED + const unsigned long attr_FallbackDecimalNCR = 3; // IGNORED + const unsigned long attr_FallbackHexNCR = 4; // IGNORED - const unsigned long attr_EntityNone = 0; // generate no Named Entity - const unsigned long attr_EntityBeforeCharsetConv = 0x00000100; // generate Named Entity before charset conversion - const unsigned long attr_EntityAfterCharsetConv = 0x00000200; // generate Named Entity after charset conversion + const unsigned long attr_EntityNone = 0; // IGNORED + const unsigned long attr_EntityBeforeCharsetConv = 0x00000100; // IGNORED + const unsigned long attr_EntityAfterCharsetConv = 0x00000200; // IGNORED - const unsigned long attr_CharsetFallback = 0x00000400; // fallback to other charset and restart the convesion + const unsigned long attr_CharsetFallback = 0x00000400; // IGNORED // default attribute for plain text @@ -39,14 +42,9 @@ interface nsISaveAsCharset : nsISupports // generate entity before charset conversion, use decimal NCR const unsigned long attr_htmlTextDefault = attr_FallbackDecimalNCR + attr_EntityBeforeCharsetConv; - readonly attribute string charset; // charset used for the conversion + readonly attribute AUTF8String charset; // charset used for the conversion - // set up charset, attribute and entity version - // see nsIEntityConverter.idl for possible value of entityVersion (entityNone for plain text). - void Init(in string charset, in unsigned long attr, in unsigned long entityVersion); + void Init(in AUTF8String charset, in unsigned long ignored, in unsigned long alsoIgnored); - // convert UCS-2 html to target charset - // may return the result code of the unicode converter (NS_ERROR_UENC_NOMAPPING) - // if the attribute does not specify any fall back (e.g. attrPlainTextDefault) - string Convert(in wstring inString); + ACString Convert(in AString inString); }; diff --git a/intl/unicharutil/nsSaveAsCharset.cpp b/intl/unicharutil/nsSaveAsCharset.cpp index 659bbfcfdb2..2b05c967731 100644 --- a/intl/unicharutil/nsSaveAsCharset.cpp +++ b/intl/unicharutil/nsSaveAsCharset.cpp @@ -4,16 +4,8 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#include "prmem.h" -#include "mozilla/Snprintf.h" #include "nsSaveAsCharset.h" -#include "nsWhitespaceTokenizer.h" -#include "nsIUnicodeEncoder.h" #include "mozilla/dom/EncodingUtils.h" -#include "nsComponentManagerUtils.h" - -using mozilla::dom::EncodingUtils; // // nsISupports methods @@ -25,9 +17,6 @@ NS_IMPL_ISUPPORTS(nsSaveAsCharset, nsISaveAsCharset) // nsSaveAsCharset::nsSaveAsCharset() { - mAttribute = attr_htmlTextDefault; - mEntityVersion = 0; - mCharsetListIndex = -1; } nsSaveAsCharset::~nsSaveAsCharset() @@ -35,340 +24,33 @@ nsSaveAsCharset::~nsSaveAsCharset() } NS_IMETHODIMP -nsSaveAsCharset::Init(const char *charset, uint32_t attr, uint32_t entityVersion) +nsSaveAsCharset::Init(const nsACString& aCharset, uint32_t aIgnored, uint32_t aAlsoIgnored) { - nsresult rv = NS_OK; - - mAttribute = attr; - mEntityVersion = entityVersion; - - rv = SetupCharsetList(charset); - NS_ENSURE_SUCCESS(rv, rv); - - // set up unicode encoder - rv = SetupUnicodeEncoder(GetNextCharset()); - NS_ENSURE_SUCCESS(rv, rv); - - // set up entity converter - if (attr_EntityNone != MASK_ENTITY(mAttribute) && !mEntityConverter) - mEntityConverter = do_CreateInstance(NS_ENTITYCONVERTER_CONTRACTID, &rv); - - return rv; + nsAutoCString encoding; + if (!mozilla::dom::EncodingUtils::FindEncodingForLabelNoReplacement(aCharset, encoding)) { + return NS_ERROR_DOM_ENCODING_NOT_SUPPORTED_ERR; + } + mEncoder = new nsNCRFallbackEncoderWrapper(encoding); + mCharset.Assign(encoding); + return NS_OK; } NS_IMETHODIMP -nsSaveAsCharset::Convert(const char16_t *inString, char **_retval) +nsSaveAsCharset::Convert(const nsAString& aIn, nsACString& aOut) { - NS_ENSURE_ARG_POINTER(_retval); - NS_ENSURE_ARG_POINTER(inString); - if (0 == *inString) - return NS_ERROR_ILLEGAL_VALUE; - nsresult rv = NS_OK; - - NS_ASSERTION(mEncoder, "need to call Init() before Convert()"); - NS_ENSURE_TRUE(mEncoder, NS_ERROR_FAILURE); - - *_retval = nullptr; - - // make sure to start from the first charset in the list - if (mCharsetListIndex > 0) { - mCharsetListIndex = -1; - rv = SetupUnicodeEncoder(GetNextCharset()); - NS_ENSURE_SUCCESS(rv, rv); + if (!mEncoder) { + return NS_ERROR_DOM_ENCODING_NOT_SUPPORTED_ERR; } - do { - // fallback to the next charset in the list if the last conversion failed by an unmapped character - if (MASK_CHARSET_FALLBACK(mAttribute) && NS_ERROR_UENC_NOMAPPING == rv) { - const char * charset = GetNextCharset(); - if (!charset) - break; - rv = SetupUnicodeEncoder(charset); - NS_ENSURE_SUCCESS(rv, rv); - PR_FREEIF(*_retval); - } - - if (attr_EntityBeforeCharsetConv == MASK_ENTITY(mAttribute)) { - NS_ASSERTION(mEntityConverter, "need to call Init() before Convert()"); - NS_ENSURE_TRUE(mEntityConverter, NS_ERROR_FAILURE); - char16_t *entity = nullptr; - // do the entity conversion first - rv = mEntityConverter->ConvertToEntities(inString, mEntityVersion, &entity); - if(NS_SUCCEEDED(rv)) { - rv = DoCharsetConversion(entity, _retval); - free(entity); - } - } - else - rv = DoCharsetConversion(inString, _retval); - - } while (MASK_CHARSET_FALLBACK(mAttribute) && NS_ERROR_UENC_NOMAPPING == rv); - - return rv; + if (!mEncoder->Encode(aIn, aOut)) { + return NS_ERROR_OUT_OF_MEMORY; + } + return NS_OK; } NS_IMETHODIMP -nsSaveAsCharset::GetCharset(char * *aCharset) +nsSaveAsCharset::GetCharset(nsACString& aCharset) { - NS_ENSURE_ARG(aCharset); - NS_ASSERTION(mCharsetListIndex >= 0, "need to call Init() first"); - NS_ENSURE_TRUE(mCharsetListIndex >= 0, NS_ERROR_FAILURE); - - const char* charset = mCharsetList[mCharsetListIndex].get(); - if (!charset) { - *aCharset = nullptr; - NS_ASSERTION(charset, "make sure to call Init() with non empty charset list"); - return NS_ERROR_FAILURE; - } - - *aCharset = strdup(charset); - return (*aCharset) ? NS_OK : NS_ERROR_OUT_OF_MEMORY; -} - -///////////////////////////////////////////////////////////////////////////////////////// - -#define RESERVE_FALLBACK_BYTES 512 - -// do the fallback, reallocate the buffer if necessary -// need to pass destination buffer info (size, current position and estimation of rest of the conversion) -NS_IMETHODIMP -nsSaveAsCharset::HandleFallBack(uint32_t character, char **outString, int32_t *bufferLength, - int32_t *currentPos, int32_t estimatedLength) -{ - NS_ENSURE_ARG_POINTER(outString); - NS_ENSURE_ARG_POINTER(bufferLength); - NS_ENSURE_ARG_POINTER(currentPos); - - char fallbackStr[256]; - nsresult rv = DoConversionFallBack(character, fallbackStr, 256); - if (NS_SUCCEEDED(rv)) { - int32_t tempLen = (int32_t) strlen(fallbackStr); - - // reallocate if the buffer is not large enough - if ((tempLen + estimatedLength) >= (*bufferLength - *currentPos)) { - int32_t addLength = tempLen + RESERVE_FALLBACK_BYTES; - // + 1 is for the terminating NUL, don't add that to bufferLength - char *temp = (char *) PR_Realloc(*outString, *bufferLength + addLength + 1); - if (temp) { - // adjust length/pointer after realloc - *bufferLength += addLength; - *outString = temp; - } else { - *outString = nullptr; - *bufferLength = 0; - return NS_ERROR_OUT_OF_MEMORY; - } - } - memcpy((*outString + *currentPos), fallbackStr, tempLen); - *currentPos += tempLen; - } - return rv; -} - -NS_IMETHODIMP -nsSaveAsCharset::DoCharsetConversion(const char16_t *inString, char **outString) -{ - NS_ENSURE_ARG_POINTER(outString); - - *outString = nullptr; - - nsresult rv; - int32_t inStringLength = NS_strlen(inString); // original input string length - int32_t bufferLength; // allocated buffer length - int32_t srcLength = inStringLength; - int32_t dstLength; - int32_t pos1, pos2; - nsresult saveResult = NS_OK; // to remember NS_ERROR_UENC_NOMAPPING - - // estimate and allocate the target buffer (reserve extra memory for fallback) - rv = mEncoder->GetMaxLength(inString, inStringLength, &dstLength); - if (NS_FAILED(rv)) return rv; - - bufferLength = dstLength + RESERVE_FALLBACK_BYTES; // extra bytes for fallback - // + 1 is for the terminating NUL -- we don't add that to bufferLength so that - // we can always write dstPtr[pos2] = '\0' even when the encoder filled the - // buffer. - char *dstPtr = (char *) PR_Malloc(bufferLength + 1); - if (!dstPtr) { - return NS_ERROR_OUT_OF_MEMORY; - } - - for (pos1 = 0, pos2 = 0; pos1 < inStringLength;) { - // convert from unicode - dstLength = bufferLength - pos2; - NS_ASSERTION(dstLength >= 0, "out of bounds write"); - rv = mEncoder->Convert(&inString[pos1], &srcLength, &dstPtr[pos2], &dstLength); - - pos1 += srcLength ? srcLength : 1; - pos2 += dstLength; - dstPtr[pos2] = '\0'; - - // break: this is usually the case (no error) OR unrecoverable error - if (NS_ERROR_UENC_NOMAPPING != rv) break; - - // remember this happened and reset the result - saveResult = rv; - rv = NS_OK; - - // finish encoder, give it a chance to write extra data like escape sequences - dstLength = bufferLength - pos2; - rv = mEncoder->Finish(&dstPtr[pos2], &dstLength); - if (NS_SUCCEEDED(rv)) { - pos2 += dstLength; - dstPtr[pos2] = '\0'; - } - - srcLength = inStringLength - pos1; - - // do the fallback - if (!ATTR_NO_FALLBACK(mAttribute)) { - uint32_t unMappedChar; - if (NS_IS_HIGH_SURROGATE(inString[pos1-1]) && - inStringLength > pos1 && NS_IS_LOW_SURROGATE(inString[pos1])) { - unMappedChar = SURROGATE_TO_UCS4(inString[pos1-1], inString[pos1]); - pos1++; - } else { - unMappedChar = inString[pos1-1]; - } - - rv = mEncoder->GetMaxLength(inString+pos1, inStringLength-pos1, &dstLength); - if (NS_FAILED(rv)) - break; - - rv = HandleFallBack(unMappedChar, &dstPtr, &bufferLength, &pos2, dstLength); - if (NS_FAILED(rv)) - break; - dstPtr[pos2] = '\0'; - } - } - - if (NS_SUCCEEDED(rv)) { - // finish encoder, give it a chance to write extra data like escape sequences - dstLength = bufferLength - pos2; - rv = mEncoder->Finish(&dstPtr[pos2], &dstLength); - if (NS_SUCCEEDED(rv)) { - pos2 += dstLength; - dstPtr[pos2] = '\0'; - } - } - - if (NS_FAILED(rv)) { - PR_FREEIF(dstPtr); - return rv; - } - - *outString = dstPtr; // set the result string - - // set error code so that the caller can do own fall back - if (NS_ERROR_UENC_NOMAPPING == saveResult) { - rv = NS_ERROR_UENC_NOMAPPING; - } - - return rv; -} - -NS_IMETHODIMP -nsSaveAsCharset::DoConversionFallBack(uint32_t inUCS4, char *outString, int32_t bufferLength) -{ - NS_ENSURE_ARG_POINTER(outString); - - *outString = '\0'; - - nsresult rv = NS_OK; - - if (ATTR_NO_FALLBACK(mAttribute)) { - return NS_OK; - } - if (attr_EntityAfterCharsetConv == MASK_ENTITY(mAttribute)) { - char *entity = nullptr; - rv = mEntityConverter->ConvertUTF32ToEntity(inUCS4, mEntityVersion, &entity); - if (NS_SUCCEEDED(rv)) { - if (!entity || (int32_t)strlen(entity) > bufferLength) { - return NS_ERROR_OUT_OF_MEMORY; - } - PL_strcpy(outString, entity); - free(entity); - return rv; - } - } - - switch (MASK_FALLBACK(mAttribute)) { - case attr_FallbackQuestionMark: - if(bufferLength>=2) { - *outString++='?'; - *outString='\0'; - rv = NS_OK; - } else { - rv = NS_ERROR_FAILURE; - } - break; - case attr_FallbackEscapeU: - if (inUCS4 & 0xff0000) - rv = (snprintf(outString, bufferLength, "\\u%.6x", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE; - else - rv = (snprintf(outString, bufferLength, "\\u%.4x", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE; - break; - case attr_FallbackDecimalNCR: - rv = (snprintf(outString, bufferLength, "&#%u;", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE; - break; - case attr_FallbackHexNCR: - rv = (snprintf(outString, bufferLength, "&#x%x;", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE; - break; - case attr_FallbackNone: - rv = NS_OK; - break; - default: - rv = NS_ERROR_ILLEGAL_VALUE; - break; - } - - return rv; -} - -nsresult nsSaveAsCharset::SetupUnicodeEncoder(const char* charset) -{ - NS_ENSURE_ARG(charset); - nsDependentCString label(charset); - if (label.EqualsLiteral("replacement")) { - // Internal caller. "replacement" doesn't survive another label resolution. - mEncoder = EncodingUtils::EncoderForEncoding(label); - return NS_OK; - } - nsAutoCString encoding; - if (!EncodingUtils::FindEncodingForLabelNoReplacement(label, - encoding)) { - return NS_ERROR_UCONV_NOCONV; - } - mEncoder = EncodingUtils::EncoderForEncoding(encoding); + aCharset.Assign(mCharset); return NS_OK; } - -nsresult nsSaveAsCharset::SetupCharsetList(const char *charsetList) -{ - NS_ENSURE_ARG(charsetList); - - NS_ASSERTION(charsetList[0], "charsetList should not be empty"); - if (!charsetList[0]) - return NS_ERROR_INVALID_ARG; - - if (mCharsetListIndex >= 0) { - mCharsetList.Clear(); - mCharsetListIndex = -1; - } - - nsCWhitespaceTokenizer tokenizer((nsDependentCString(charsetList))); - while (tokenizer.hasMoreTokens()) { - ParseString(tokenizer.nextToken(), ',', mCharsetList); - } - - return NS_OK; -} - -const char * nsSaveAsCharset::GetNextCharset() -{ - if ((mCharsetListIndex + 1) >= int32_t(mCharsetList.Length())) - return nullptr; - - // bump the index and return the next charset - return mCharsetList[++mCharsetListIndex].get(); -} diff --git a/intl/unicharutil/nsSaveAsCharset.h b/intl/unicharutil/nsSaveAsCharset.h index be9cbc46499..63c9e842dbe 100644 --- a/intl/unicharutil/nsSaveAsCharset.h +++ b/intl/unicharutil/nsSaveAsCharset.h @@ -3,71 +3,35 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#ifndef nsSaveAsCharset_h__ -#define nsSaveAsCharset_h__ +#ifndef nsSaveAsCharset_h_ +#define nsSaveAsCharset_h_ #include "nsStringFwd.h" -#include "nsTArray.h" #include "nsISaveAsCharset.h" -#include "nsCOMPtr.h" - -#define MASK_FALLBACK(a) (nsISaveAsCharset::mask_Fallback & (a)) -#define MASK_ENTITY(a) (nsISaveAsCharset::mask_Entity & (a)) -#define MASK_CHARSET_FALLBACK(a) (nsISaveAsCharset::mask_CharsetFallback & (a)) -#define ATTR_NO_FALLBACK(a) (nsISaveAsCharset::attr_FallbackNone == MASK_FALLBACK(a) && \ - nsISaveAsCharset::attr_EntityAfterCharsetConv != MASK_ENTITY(a)) - -class nsIUnicodeEncoder; -class nsIEntityConverter; +#include "nsAutoPtr.h" +#include "nsNCRFallbackEncoderWrapper.h" +#include "nsString.h" class nsSaveAsCharset : public nsISaveAsCharset { public: - - // - // implementation methods - // + nsSaveAsCharset(); - // - // nsISupports - // - NS_DECL_ISUPPORTS + NS_DECL_ISUPPORTS - // - // nsIEntityConverter - // - NS_IMETHOD Init(const char *charset, uint32_t attr, uint32_t entityVersion) override; + NS_IMETHOD Init(const nsACString& aCharset, uint32_t aIgnored, uint32_t aAlsoIgnored) override; - NS_IMETHOD Convert(const char16_t *inString, char **_retval) override; + NS_IMETHOD Convert(const nsAString& ain, nsACString& aOut) override; - NS_IMETHODIMP GetCharset(char * *aCharset) override; + NS_IMETHODIMP GetCharset(nsACString& aCharset) override; -protected: +private: virtual ~nsSaveAsCharset(); - NS_IMETHOD DoCharsetConversion(const char16_t *inString, char **outString); - - NS_IMETHOD DoConversionFallBack(uint32_t inUCS4, char *outString, int32_t bufferLength); - - // do the fallback, reallocate the buffer if necessary - // need to pass destination buffer info (size, current position and estimation of rest of the conversion) - NS_IMETHOD HandleFallBack(uint32_t character, char **outString, int32_t *bufferLength, - int32_t *currentPos, int32_t estimatedLength); - - nsresult SetupUnicodeEncoder(const char* charset); - - nsresult SetupCharsetList(const char *charsetList); - - const char * GetNextCharset(); - - uint32_t mAttribute; // conversion attribute - uint32_t mEntityVersion; // see nsIEntityConverter - nsCOMPtr mEncoder; // encoder (convert from unicode) - nsCOMPtr mEntityConverter; - nsTArray mCharsetList; - int32_t mCharsetListIndex; + nsAutoPtr mEncoder; + nsCString mCharset; }; #endif