gecko/dom/encoding/TextDecoder.cpp

264 lines
7.4 KiB
C++
Raw Normal View History

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "mozilla/dom/TextDecoder.h"
#include "mozilla/dom/EncodingUtils.h"
#include "nsContentUtils.h"
#include "nsICharsetConverterManager.h"
#include "nsServiceManagerUtils.h"
namespace mozilla {
namespace dom {
static const PRUnichar kReplacementChar = static_cast<PRUnichar>(0xFFFD);
void
TextDecoder::Init(const nsAString& aEncoding,
const TextDecoderOptions& aFatal,
ErrorResult& aRv)
{
nsAutoString label(aEncoding);
EncodingUtils::TrimSpaceCharacters(label);
// If label is a case-insensitive match for "utf-16"
// then set the internal useBOM flag.
if (label.LowerCaseEqualsLiteral("utf-16")) {
mUseBOM = true;
mIsUTF16Family = true;
mEncoding = "utf-16le";
// If BOM is used, we can't determine the converter yet.
return;
}
// Run the steps to get an encoding from Encoding.
if (!EncodingUtils::FindEncodingForLabel(label, mEncoding)) {
// If the steps result in failure,
// throw a "EncodingError" exception and terminate these steps.
aRv.Throw(NS_ERROR_DOM_ENCODING_NOT_SUPPORTED_ERR);
return;
}
mIsUTF16Family = !strcmp(mEncoding, "utf-16le") ||
!strcmp(mEncoding, "utf-16be");
// If the constructor is called with an options argument,
// and the fatal property of the dictionary is set,
// set the internal fatal flag of the decoder object.
mFatal = aFatal.fatal;
CreateDecoder(aRv);
}
void
TextDecoder::CreateDecoder(ErrorResult& aRv)
{
// Create a decoder object for mEncoding.
nsCOMPtr<nsICharsetConverterManager> ccm =
do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID);
if (!ccm) {
aRv.Throw(NS_ERROR_UNEXPECTED);
return;
}
ccm->GetUnicodeDecoder(mEncoding, getter_AddRefs(mDecoder));
if (!mDecoder) {
aRv.Throw(NS_ERROR_UNEXPECTED);
return;
}
if (mFatal) {
mDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
}
}
void
TextDecoder::ResetDecoder(bool aResetOffset)
{
mDecoder->Reset();
if (aResetOffset) {
mOffset = 0;
}
}
void
TextDecoder::Decode(const ArrayBufferView* aView,
const TextDecodeOptions& aOptions,
nsAString& aOutDecodedString,
ErrorResult& aRv)
{
const char* data;
uint32_t length;
// If view is not specified, let view be a Uint8Array of length 0.
if (!aView) {
data = EmptyCString().BeginReading();
length = EmptyCString().Length();
} else {
data = reinterpret_cast<const char*>(aView->Data());
length = aView->Length();
}
aOutDecodedString.Truncate();
if (mIsUTF16Family && mOffset < 2) {
HandleBOM(data, length, aOptions, aOutDecodedString, aRv);
if (aRv.Failed() || mOffset < 2) {
return;
}
}
// Run or resume the decoder algorithm of the decoder object's encoder.
int32_t outLen;
nsresult rv = mDecoder->GetMaxLength(data, length, &outLen);
if (NS_FAILED(rv)) {
aRv.Throw(rv);
return;
}
// Need a fallible allocator because the caller may be a content
// and the content can specify the length of the string.
static const fallible_t fallible = fallible_t();
nsAutoArrayPtr<PRUnichar> buf(new (fallible) PRUnichar[outLen + 1]);
if (!buf) {
aRv.Throw(NS_ERROR_OUT_OF_MEMORY);
return;
}
for (;;) {
int32_t srcLen = length;
int32_t dstLen = outLen;
rv = mDecoder->Convert(data, &srcLen, buf, &dstLen);
// Convert will convert the input partially even if the status
// indicates a failure.
buf[dstLen] = 0;
aOutDecodedString.Append(buf, dstLen);
if (mFatal || rv != NS_ERROR_ILLEGAL_INPUT) {
break;
}
// Emit a decode error manually because some decoders
// do not support kOnError_Recover (bug 638379)
if (srcLen == -1) {
ResetDecoder();
} else {
data += srcLen + 1;
length -= srcLen + 1;
aOutDecodedString.Append(kReplacementChar);
}
}
// If the internal streaming flag of the decoder object is not set,
// then reset the encoding algorithm state to the default values
if (!aOptions.stream) {
ResetDecoder();
if (rv == NS_OK_UDEC_MOREINPUT) {
if (mFatal) {
aRv.Throw(NS_ERROR_DOM_ENCODING_DECODE_ERR);
} else {
// Need to emit a decode error manually
// to simulate the EOF handling of the Encoding spec.
aOutDecodedString.Append(kReplacementChar);
}
}
}
if (NS_FAILED(rv)) {
aRv.Throw(NS_ERROR_DOM_ENCODING_DECODE_ERR);
}
}
void
TextDecoder::HandleBOM(const char*& aData, uint32_t& aLength,
const TextDecodeOptions& aOptions,
nsAString& aOutString, ErrorResult& aRv)
{
if (aLength < 2u - mOffset) {
if (aOptions.stream) {
memcpy(mInitialBytes + mOffset, aData, aLength);
mOffset += aLength;
} else if (mFatal) {
aRv.Throw(NS_ERROR_DOM_ENCODING_DECODE_ERR);
} else {
aOutString.Append(kReplacementChar);
}
return;
}
memcpy(mInitialBytes + mOffset, aData, 2 - mOffset);
// copied data will be fed later.
aData += 2 - mOffset;
aLength -= 2 - mOffset;
mOffset = 2;
const char* encoding = "";
if (!EncodingUtils::IdentifyDataOffset(mInitialBytes, 2, encoding) ||
strcmp(encoding, mEncoding)) {
// If the stream doesn't start with BOM or the BOM doesn't match the
// encoding, feed a BOM to workaround decoder's bug (bug 634541).
if (!mUseBOM) {
FeedBytes(!strcmp(mEncoding, "utf-16le") ? "\xFF\xFE" : "\xFE\xFF");
}
}
if (mUseBOM) {
// Select a decoder corresponding to the BOM.
if (!*encoding) {
encoding = "utf-16le";
}
// If the endian has not been changed, reuse the decoder.
if (mDecoder && !strcmp(encoding, mEncoding)) {
ResetDecoder(false);
} else {
mEncoding = encoding;
CreateDecoder(aRv);
}
}
FeedBytes(mInitialBytes, &aOutString);
}
void
TextDecoder::FeedBytes(const char* aBytes, nsAString* aOutString)
{
PRUnichar buf[3];
int32_t srcLen = mOffset;
int32_t dstLen = mozilla::ArrayLength(buf);
DebugOnly<nsresult> rv =
mDecoder->Convert(aBytes, &srcLen, buf, &dstLen);
MOZ_ASSERT(NS_SUCCEEDED(rv));
MOZ_ASSERT(srcLen == mOffset);
if (aOutString) {
aOutString->Assign(buf, dstLen);
}
}
void
TextDecoder::GetEncoding(nsAString& aEncoding)
{
// Our utf-16 converter does not comply with the Encoding Standard.
// As a result the utf-16le converter is used for the encoding label
// "utf-16".
// This workaround should not be exposed to the public API and so "utf-16"
// is returned by GetEncoding() if the internal encoding name is "utf-16le".
if (mUseBOM || !strcmp(mEncoding, "utf-16le")) {
aEncoding.AssignLiteral("utf-16");
return;
}
// Similarly, "x-windows-949" is used for the "euc-kr" family. Therefore, if
// the internal encoding name is "x-windows-949", "euc-kr" is returned.
if (!strcmp(mEncoding, "x-windows-949")) {
aEncoding.AssignLiteral("euc-kr");
return;
}
aEncoding.AssignASCII(mEncoding);
}
NS_IMPL_CYCLE_COLLECTING_ADDREF(TextDecoder)
NS_IMPL_CYCLE_COLLECTING_RELEASE(TextDecoder)
NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(TextDecoder)
NS_INTERFACE_MAP_ENTRY(nsISupports)
NS_INTERFACE_MAP_END
NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE_1(TextDecoder, mGlobal)
} // dom
} // mozilla