Bug 764234 - Implement StringEncoding API. r=dougt,smontagu

This commit is contained in:
Masatoshi Kimura 2012-09-28 11:19:18 +01:00
parent fdba5f54e7
commit 4a9395eb5d
24 changed files with 1995 additions and 0 deletions

View File

@ -128,6 +128,7 @@ static NS_DEFINE_CID(kXTFServiceCID, NS_XTFSERVICE_CID);
#include "nsISVGChildFrame.h"
#include "nsRenderingContext.h"
#include "gfxSVGGlyphs.h"
#include "mozilla/dom/EncodingUtils.h"
#ifdef IBMBIDI
#include "nsIBidiKeyboard.h"
@ -1527,6 +1528,7 @@ nsContentUtils::Shutdown()
NS_IF_RELEASE(sSameOriginChecker);
EncodingUtils::Shutdown();
nsTextEditorState::ShutDown();
}

View File

@ -52,6 +52,7 @@ PARALLEL_DIRS += \
contacts \
alarm \
devicestorage \
encoding \
file \
media \
messages \

View File

@ -35,6 +35,11 @@ DOM4_MSG_DEF(DataCloneError, "The object could not be cloned.", NS_ERROR_DOM_DAT
DOM4_MSG_DEF(TypeError, "The method parameter is missing or invalid.", NS_ERROR_TYPE_ERR)
DOM4_MSG_DEF(RangeError, "The method parameter is out of valid range.", NS_ERROR_RANGE_ERR)
/* StringEncoding API errors from http://wiki.whatwg.org/wiki/StringEncoding */
DOM4_MSG_DEF(EncodingError, "The given encoding is not supported.", NS_ERROR_DOM_ENCODING_NOT_SUPPORTED_ERR)
DOM4_MSG_DEF(EncodingError, "The encoding must be utf-8, utf-16, or utf-16be.", NS_ERROR_DOM_ENCODING_NOT_UTF_ERR)
DOM4_MSG_DEF(EncodingError, "The decoder failed to convert.", NS_ERROR_DOM_ENCODING_DECODE_ERR)
/* SVG DOM errors from http://www.w3.org/TR/SVG11/svgdom.html */
DOM4_MSG_DEF(TypeError, "Unknown or invalid type", NS_ERROR_DOM_SVG_WRONG_TYPE_ERR)

View File

@ -39,6 +39,7 @@ enum DOM4ErrorTypeCodeMap {
TimeoutError = nsIDOMDOMException::TIMEOUT_ERR,
InvalidNodeTypeError = nsIDOMDOMException::INVALID_NODE_TYPE_ERR,
DataCloneError = nsIDOMDOMException::DATA_CLONE_ERR,
EncodingError = 0,
/* XXX Should be JavaScript native errors */
TypeError = 0,

View File

@ -334,6 +334,15 @@ DOMInterfaces = {
'resultNotAddRefed': [ 'getItem' ]
}],
'TextEncoder': {
'headerFile': 'mozilla/dom/TextEncoder.h',
'implicitJSContext': [ 'encode' ],
},
'TextDecoder': {
'headerFile': 'mozilla/dom/TextDecoder.h',
},
'WebGLRenderingContext': {
'nativeType': 'mozilla::WebGLContext',
'headerFile': 'WebGLContext.h',

View File

@ -5,6 +5,7 @@
DOM_SRCDIRS = \
dom/base \
dom/battery \
dom/encoding \
dom/file \
dom/power \
dom/media \

View File

@ -0,0 +1,339 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "mozilla/dom/EncodingUtils.h"
#include "nsAutoPtr.h"
namespace mozilla {
namespace dom {
EncodingUtils* gEncodings = nullptr;
struct LabelEncoding
{
const char* mLabel;
const char* mEncoding;
};
static const LabelEncoding labelsEncodings[] = {
{"unicode-1-1-utf-8", "utf-8"},
{"utf-8", "utf-8"},
{"utf8", "utf-8"},
{"cp864", "ibm864"},
{"csibm864", "ibm864"},
{"ibm-864", "ibm864"},
{"ibm864", "ibm864"},
{"866", "ibm866"},
{"cp866", "ibm866"},
{"csibm866", "ibm866"},
{"ibm866", "ibm866"},
{"csisolatin2", "iso-8859-2"},
{"iso-8859-2", "iso-8859-2"},
{"iso-ir-101", "iso-8859-2"},
{"iso8859-2", "iso-8859-2"},
{"iso88592", "iso-8859-2"},
{"iso_8859-2", "iso-8859-2"},
{"iso_8859-2:1987", "iso-8859-2"},
{"l2", "iso-8859-2"},
{"latin2", "iso-8859-2"},
{"csisolatin3", "iso-8859-3"},
{"iso-8859-3", "iso-8859-3"},
{"iso-ir-109", "iso-8859-3"},
{"iso8859-3", "iso-8859-3"},
{"iso88593", "iso-8859-3"},
{"iso_8859-3", "iso-8859-3"},
{"iso_8859-3:1988", "iso-8859-3"},
{"l3", "iso-8859-3"},
{"latin3", "iso-8859-3"},
{"csisolatin4", "iso-8859-4"},
{"iso-8859-4", "iso-8859-4"},
{"iso-ir-110", "iso-8859-4"},
{"iso8859-4", "iso-8859-4"},
{"iso88594", "iso-8859-4"},
{"iso_8859-4", "iso-8859-4"},
{"iso_8859-4:1988", "iso-8859-4"},
{"l4", "iso-8859-4"},
{"latin4", "iso-8859-4"},
{"csisolatincyrillic", "iso-8859-5"},
{"cyrillic", "iso-8859-5"},
{"iso-8859-5", "iso-8859-5"},
{"iso-ir-144", "iso-8859-5"},
{"iso8859-5", "iso-8859-5"},
{"iso88595", "iso-8859-5"},
{"iso_8859-5", "iso-8859-5"},
{"iso_8859-5:1988", "iso-8859-5"},
{"arabic", "iso-8859-6"},
{"asmo-708", "iso-8859-6"},
{"csiso88596e", "iso-8859-6"},
{"csiso88596i", "iso-8859-6"},
{"csisolatinarabic", "iso-8859-6"},
{"ecma-114", "iso-8859-6"},
{"iso-8859-6", "iso-8859-6"},
{"iso-8859-6-e", "iso-8859-6"},
{"iso-8859-6-i", "iso-8859-6"},
{"iso-ir-127", "iso-8859-6"},
{"iso8859-6", "iso-8859-6"},
{"iso88596", "iso-8859-6"},
{"iso_8859-6", "iso-8859-6"},
{"iso_8859-6:1987", "iso-8859-6"},
{"csisolatingreek", "iso-8859-7"},
{"ecma-118", "iso-8859-7"},
{"elot_928", "iso-8859-7"},
{"greek", "iso-8859-7"},
{"greek8", "iso-8859-7"},
{"iso-8859-7", "iso-8859-7"},
{"iso-ir-126", "iso-8859-7"},
{"iso8859-7", "iso-8859-7"},
{"iso88597", "iso-8859-7"},
{"iso_8859-7", "iso-8859-7"},
{"iso_8859-7:1987", "iso-8859-7"},
{"sun_eu_greek", "iso-8859-7"},
{"csiso88598e", "iso-8859-8"},
{"csiso88598i", "iso-8859-8"},
{"csisolatinhebrew", "iso-8859-8"},
{"hebrew", "iso-8859-8"},
{"iso-8859-8", "iso-8859-8"},
{"iso-8859-8-e", "iso-8859-8"},
{"iso-8859-8-i", "iso-8859-8"},
{"iso-ir-138", "iso-8859-8"},
{"iso8859-8", "iso-8859-8"},
{"iso88598", "iso-8859-8"},
{"iso_8859-8", "iso-8859-8"},
{"iso_8859-8:1988", "iso-8859-8"},
{"logical", "iso-8859-8"},
{"visual", "iso-8859-8"},
{"csisolatin6", "iso-8859-10"},
{"iso-8859-10", "iso-8859-10"},
{"iso-ir-157", "iso-8859-10"},
{"iso8859-10", "iso-8859-10"},
{"iso885910", "iso-8859-10"},
{"l6", "iso-8859-10"},
{"latin6", "iso-8859-10"},
{"iso-8859-13", "iso-8859-13"},
{"iso8859-13", "iso-8859-13"},
{"iso885913", "iso-8859-13"},
{"iso-8859-14", "iso-8859-14"},
{"iso8859-14", "iso-8859-14"},
{"iso885914", "iso-8859-14"},
{"csisolatin9", "iso-8859-15"},
{"iso-8859-15", "iso-8859-15"},
{"iso8859-15", "iso-8859-15"},
{"iso885915", "iso-8859-15"},
{"iso_8859-15", "iso-8859-15"},
{"l9", "iso-8859-15"},
{"iso-8859-16", "iso-8859-16"},
{"cskoi8r", "koi8-r"},
{"koi", "koi8-r"},
{"koi8", "koi8-r"},
{"koi8-r", "koi8-r"},
{"koi8_r", "koi8-r"},
{"koi8-u", "koi8-u"},
{"csmacintosh", "macintosh"},
{"mac", "macintosh"},
{"macintosh", "macintosh"},
{"x-mac-roman", "macintosh"},
{"dos-874", "windows-874"},
{"iso-8859-11", "windows-874"},
{"iso8859-11", "windows-874"},
{"iso885911", "windows-874"},
{"tis-620", "windows-874"},
{"windows-874", "windows-874"},
{"cp1250", "windows-1250"},
{"windows-1250", "windows-1250"},
{"x-cp1250", "windows-1250"},
{"cp1251", "windows-1251"},
{"windows-1251", "windows-1251"},
{"x-cp1251", "windows-1251"},
{"ansi_x3.4-1968", "windows-1252"},
{"ascii", "windows-1252"},
{"cp1252", "windows-1252"},
{"cp819", "windows-1252"},
{"csisolatin1", "windows-1252"},
{"ibm819", "windows-1252"},
{"iso-8859-1", "windows-1252"},
{"iso-ir-100", "windows-1252"},
{"iso8859-1", "windows-1252"},
{"iso88591", "windows-1252"},
{"iso_8859-1", "windows-1252"},
{"iso_8859-1:1987", "windows-1252"},
{"l1", "windows-1252"},
{"latin1", "windows-1252"},
{"us-ascii", "windows-1252"},
{"windows-1252", "windows-1252"},
{"x-cp1252", "windows-1252"},
{"cp1253", "windows-1253"},
{"windows-1253", "windows-1253"},
{"x-cp1253", "windows-1253"},
{"cp1254", "windows-1254"},
{"csisolatin5", "windows-1254"},
{"iso-8859-9", "windows-1254"},
{"iso-ir-148", "windows-1254"},
{"iso8859-9", "windows-1254"},
{"iso88599", "windows-1254"},
{"iso_8859-9", "windows-1254"},
{"iso_8859-9:1989", "windows-1254"},
{"l5", "windows-1254"},
{"latin5", "windows-1254"},
{"windows-1254", "windows-1254"},
{"x-cp1254", "windows-1254"},
{"cp1255", "windows-1255"},
{"windows-1255", "windows-1255"},
{"x-cp1255", "windows-1255"},
{"cp1256", "windows-1256"},
{"windows-1256", "windows-1256"},
{"x-cp1256", "windows-1256"},
{"cp1257", "windows-1257"},
{"windows-1257", "windows-1257"},
{"x-cp1257", "windows-1257"},
{"cp1258", "windows-1258"},
{"windows-1258", "windows-1258"},
{"x-cp1258", "windows-1258"},
{"x-mac-cyrillic", "x-mac-cyrillic"},
{"x-mac-ukrainian", "x-mac-cyrillic"},
{"chinese", "gbk"},
{"csgb2312", "gbk"},
{"csiso58gb231280", "gbk"},
{"gb2312", "gbk"},
{"gb_2312", "gbk"},
{"gb_2312-80", "gbk"},
{"gbk", "gbk"},
{"iso-ir-58", "gbk"},
{"x-gbk", "gbk"},
{"gb18030", "gb18030"},
{"hz-gb-2312", "hz-gb-2312"},
{"big5", "big5"},
{"big5-hkscs", "big5"},
{"cn-big5", "big5"},
{"csbig5", "big5"},
{"x-x-big5", "big5"},
{"cseucpkdfmtjapanese", "euc-jp"},
{"euc-jp", "euc-jp"},
{"x-euc-jp", "euc-jp"},
{"csiso2022jp", "iso-2022-jp"},
{"iso-2022-jp", "iso-2022-jp"},
{"csshiftjis", "shift-jis"},
{"ms_kanji", "shift-jis"},
{"shift-jis", "shift-jis"},
{"shift_jis", "shift-jis"},
{"sjis", "shift-jis"},
{"windows-31j", "shift-jis"},
{"x-sjis", "shift-jis"},
{"cseuckr", "euc-kr"},
{"csksc56011987", "x-windows-949"},
{"euc-kr", "x-windows-949"},
{"iso-ir-149", "x-windows-949"},
{"korean", "x-windows-949"},
{"ks_c_5601-1987", "x-windows-949"},
{"ks_c_5601-1989", "x-windows-949"},
{"ksc5601", "x-windows-949"},
{"ksc_5601", "x-windows-949"},
{"windows-949", "x-windows-949"},
{"csiso2022kr", "iso-2022-kr"},
{"iso-2022-kr", "iso-2022-kr"},
{"utf-16", "utf-16le"},
{"utf-16le", "utf-16le"},
{"utf-16be", "utf-16be"}
};
EncodingUtils::EncodingUtils()
{
MOZ_ASSERT(!gEncodings);
const uint32_t numLabels = ArrayLength(labelsEncodings);
mLabelsEncodings.Init(numLabels);
for (uint32_t i = 0; i < numLabels; i++) {
mLabelsEncodings.Put(NS_ConvertASCIItoUTF16(labelsEncodings[i].mLabel),
labelsEncodings[i].mEncoding);
}
}
EncodingUtils::~EncodingUtils()
{
MOZ_ASSERT(gEncodings && gEncodings == this);
}
void
EncodingUtils::Shutdown()
{
NS_IF_RELEASE(gEncodings);
}
already_AddRefed<EncodingUtils>
EncodingUtils::GetOrCreate()
{
if (!gEncodings) {
gEncodings = new EncodingUtils();
NS_ADDREF(gEncodings);
}
NS_ADDREF(gEncodings);
return gEncodings;
}
uint32_t
EncodingUtils::IdentifyDataOffset(const char* aData,
const uint32_t aLength,
const char*& aRetval)
{
// Truncating to pre-clear return value in case of failure.
aRetval = "";
// Minimum bytes in input stream data that represents
// the Byte Order Mark is 2. Max is 3.
if (aLength < 2) {
return 0;
}
if (aData[0] == '\xFF' && aData[1] == '\xFE') {
aRetval = "utf-16le";
return 2;
}
if (aData[0] == '\xFE' && aData[1] == '\xFF') {
aRetval = "utf-16be";
return 2;
}
// Checking utf-8 byte order mark.
// Minimum bytes in input stream data that represents
// the Byte Order Mark for utf-8 is 3.
if (aLength < 3) {
return 0;
}
if (aData[0] == '\xEF' && aData[1] == '\xBB' && aData[2] == '\xBF') {
aRetval = "utf-8";
return 3;
}
return 0;
}
bool
EncodingUtils::FindEncodingForLabel(const nsAString& aLabel,
const char*& aOutEncoding)
{
nsRefPtr<EncodingUtils> self = EncodingUtils::GetOrCreate();
MOZ_ASSERT(self);
// Save aLabel first because it may be the same as aOutEncoding.
nsString label(aLabel);
// Truncating to clear aOutEncoding in case of failure.
aOutEncoding = EmptyCString().get();
EncodingUtils::TrimSpaceCharacters(label);
if (label.IsEmpty()) {
return false;
}
ToLowerCase(label);
const char* encoding = self->mLabelsEncodings.Get(label);
if (!encoding) {
return false;
}
aOutEncoding = encoding;
return true;
}
} // namespace dom
} // namespace mozilla

View File

@ -0,0 +1,76 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef mozilla_dom_encodingutils_h_
#define mozilla_dom_encodingutils_h_
#include "nsDataHashtable.h"
#include "nsString.h"
namespace mozilla {
namespace dom {
class EncodingUtils
{
public:
NS_INLINE_DECL_REFCOUNTING(EncodingUtils)
/**
* Implements decode algorithm's step 1 & 2 from Encoding spec.
* http://dvcs.w3.org/hg/encoding/raw-file/tip/Overview.html#decode
*
* @param aData, incoming byte stream of data.
* @param aLength, incoming byte stream length.
* @param aRetval, outgoing encoding corresponding to valid data
* byte order mark.
* @return offset after the BOM bytes in byte stream
* where the actual data starts.
*/
static uint32_t IdentifyDataOffset(const char* aData,
const uint32_t aLength,
const char*& aRetval);
/**
* Implements get an encoding algorithm from Encoding spec.
* http://dvcs.w3.org/hg/encoding/raw-file/tip/Overview.html#concept-encoding-get
* Given a label, this function returns the corresponding encoding or a
* false.
*
* @param aLabel, incoming label describing charset to be decoded.
* @param aRetEncoding, returning corresponding encoding for label.
* @return false if no encoding was found for label.
* true if valid encoding found.
*/
static bool FindEncodingForLabel(const nsAString& aLabel,
const char*& aOutEncoding);
/**
* Remove any leading and trailing space characters, following the
* definition of space characters from Encoding spec.
* http://dvcs.w3.org/hg/encoding/raw-file/tip/Overview.html#terminology
* Note that nsAString::StripWhitespace() doesn't exactly match the
* definition. It also removes all matching chars in the string,
* not just leading and trailing.
*
* @param aString, string to be trimmed.
*/
static void TrimSpaceCharacters(nsString& aString)
{
aString.Trim(" \t\n\f\r");
}
/* Called to free up Encoding instance. */
static void Shutdown();
protected:
nsDataHashtable<nsStringHashKey, const char *> mLabelsEncodings;
EncodingUtils();
virtual ~EncodingUtils();
static already_AddRefed<EncodingUtils> GetOrCreate();
};
} // dom
} // mozilla
#endif // mozilla_dom_encodingutils_h_

38
dom/encoding/Makefile.in Normal file
View File

@ -0,0 +1,38 @@
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
DEPTH = @DEPTH@
topsrcdir = @top_srcdir@
srcdir = @srcdir@
VPATH = @srcdir@
FAIL_ON_WARNINGS := 1
include $(DEPTH)/config/autoconf.mk
MODULE = dom
LIBRARY_NAME = domencoding_s
LIBXUL_LIBRARY = 1
FORCE_STATIC_LIB = 1
DIRS = \
test \
$(NULL)
EXPORTS_NAMESPACES = mozilla/dom
EXPORTS_mozilla/dom = \
EncodingUtils.h \
TextDecoder.h \
TextEncoder.h \
$(NULL)
CPPSRCS = \
EncodingUtils.cpp \
TextDecoder.cpp \
TextEncoder.cpp \
$(NULL)
include $(topsrcdir)/dom/dom-config.mk
include $(topsrcdir)/config/config.mk
include $(topsrcdir)/config/rules.mk

View File

@ -0,0 +1,263 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "mozilla/dom/TextDecoder.h"
#include "mozilla/dom/EncodingUtils.h"
#include "nsContentUtils.h"
#include "nsICharsetConverterManager.h"
#include "nsServiceManagerUtils.h"
namespace mozilla {
namespace dom {
static const PRUnichar kReplacementChar = static_cast<PRUnichar>(0xFFFD);
void
TextDecoder::Init(const nsAString& aEncoding,
const TextDecoderOptions& aFatal,
ErrorResult& aRv)
{
nsAutoString label(aEncoding);
EncodingUtils::TrimSpaceCharacters(label);
// If label is a case-insensitive match for "utf-16"
// then set the internal useBOM flag.
if (label.LowerCaseEqualsLiteral("utf-16")) {
mUseBOM = true;
mIsUTF16Family = true;
mEncoding = "utf-16le";
// If BOM is used, we can't determine the converter yet.
return;
}
// Run the steps to get an encoding from Encoding.
if (!EncodingUtils::FindEncodingForLabel(label, mEncoding)) {
// If the steps result in failure,
// throw a "EncodingError" exception and terminate these steps.
aRv.Throw(NS_ERROR_DOM_ENCODING_NOT_SUPPORTED_ERR);
return;
}
mIsUTF16Family = !strcmp(mEncoding, "utf-16le") ||
!strcmp(mEncoding, "utf-16be");
// If the constructor is called with an options argument,
// and the fatal property of the dictionary is set,
// set the internal fatal flag of the decoder object.
mFatal = aFatal.fatal;
CreateDecoder(aRv);
}
void
TextDecoder::CreateDecoder(ErrorResult& aRv)
{
// Create a decoder object for mEncoding.
nsCOMPtr<nsICharsetConverterManager> ccm =
do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID);
if (!ccm) {
aRv.Throw(NS_ERROR_UNEXPECTED);
return;
}
ccm->GetUnicodeDecoder(mEncoding, getter_AddRefs(mDecoder));
if (!mDecoder) {
aRv.Throw(NS_ERROR_UNEXPECTED);
return;
}
if (mFatal) {
mDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
}
}
void
TextDecoder::ResetDecoder(bool aResetOffset)
{
mDecoder->Reset();
if (aResetOffset) {
mOffset = 0;
}
}
void
TextDecoder::Decode(const ArrayBufferView* aView,
const TextDecodeOptions& aOptions,
nsAString& aOutDecodedString,
ErrorResult& aRv)
{
const char* data;
uint32_t length;
// If view is not specified, let view be a Uint8Array of length 0.
if (!aView) {
data = EmptyCString().BeginReading();
length = EmptyCString().Length();
} else {
data = reinterpret_cast<const char*>(aView->Data());
length = aView->Length();
}
aOutDecodedString.Truncate();
if (mIsUTF16Family && mOffset < 2) {
HandleBOM(data, length, aOptions, aOutDecodedString, aRv);
if (aRv.Failed() || mOffset < 2) {
return;
}
}
// Run or resume the decoder algorithm of the decoder object's encoder.
int32_t outLen;
nsresult rv = mDecoder->GetMaxLength(data, length, &outLen);
if (NS_FAILED(rv)) {
aRv.Throw(rv);
return;
}
// Need a fallible allocator because the caller may be a content
// and the content can specify the length of the string.
static const fallible_t fallible = fallible_t();
nsAutoArrayPtr<PRUnichar> buf(new (fallible) PRUnichar[outLen + 1]);
if (!buf) {
aRv.Throw(NS_ERROR_OUT_OF_MEMORY);
return;
}
for (;;) {
int32_t srcLen = length;
int32_t dstLen = outLen;
rv = mDecoder->Convert(data, &srcLen, buf, &dstLen);
// Convert will convert the input partially even if the status
// indicates a failure.
buf[dstLen] = 0;
aOutDecodedString.Append(buf, dstLen);
if (mFatal || rv != NS_ERROR_ILLEGAL_INPUT) {
break;
}
// Emit a decode error manually because some decoders
// do not support kOnError_Recover (bug 638379)
if (srcLen == -1) {
ResetDecoder();
} else {
data += srcLen + 1;
length -= srcLen + 1;
aOutDecodedString.Append(kReplacementChar);
}
}
// If the internal streaming flag of the decoder object is not set,
// then reset the encoding algorithm state to the default values
if (!aOptions.stream) {
ResetDecoder();
if (rv == NS_OK_UDEC_MOREINPUT) {
if (mFatal) {
aRv.Throw(NS_ERROR_DOM_ENCODING_DECODE_ERR);
} else {
// Need to emit a decode error manually
// to simulate the EOF handling of the Encoding spec.
aOutDecodedString.Append(kReplacementChar);
}
}
}
if (NS_FAILED(rv)) {
aRv.Throw(NS_ERROR_DOM_ENCODING_DECODE_ERR);
}
}
void
TextDecoder::HandleBOM(const char*& aData, uint32_t& aLength,
const TextDecodeOptions& aOptions,
nsAString& aOutString, ErrorResult& aRv)
{
if (aLength < 2u - mOffset) {
if (aOptions.stream) {
memcpy(mInitialBytes + mOffset, aData, aLength);
mOffset += aLength;
} else if (mFatal) {
aRv.Throw(NS_ERROR_DOM_ENCODING_DECODE_ERR);
} else {
aOutString.Append(kReplacementChar);
}
return;
}
memcpy(mInitialBytes + mOffset, aData, 2 - mOffset);
// copied data will be fed later.
aData += 2 - mOffset;
aLength -= 2 - mOffset;
mOffset = 2;
const char* encoding = "";
if (!EncodingUtils::IdentifyDataOffset(mInitialBytes, 2, encoding) ||
strcmp(encoding, mEncoding)) {
// If the stream doesn't start with BOM or the BOM doesn't match the
// encoding, feed a BOM to workaround decoder's bug (bug 634541).
if (!mUseBOM) {
FeedBytes(!strcmp(mEncoding, "utf-16le") ? "\xFF\xFE" : "\xFE\xFF");
}
}
if (mUseBOM) {
// Select a decoder corresponding to the BOM.
if (!*encoding) {
encoding = "utf-16le";
}
// If the endian has not been changed, reuse the decoder.
if (mDecoder && !strcmp(encoding, mEncoding)) {
ResetDecoder(false);
} else {
mEncoding = encoding;
CreateDecoder(aRv);
}
}
FeedBytes(mInitialBytes, &aOutString);
}
void
TextDecoder::FeedBytes(const char* aBytes, nsAString* aOutString)
{
PRUnichar buf[3];
int32_t srcLen = mOffset;
int32_t dstLen = mozilla::ArrayLength(buf);
DebugOnly<nsresult> rv =
mDecoder->Convert(aBytes, &srcLen, buf, &dstLen);
MOZ_ASSERT(NS_SUCCEEDED(rv));
MOZ_ASSERT(srcLen == mOffset);
if (aOutString) {
aOutString->Assign(buf, dstLen);
}
}
void
TextDecoder::GetEncoding(nsAString& aEncoding)
{
// Our utf-16 converter does not comply with the Encoding Standard.
// As a result the utf-16le converter is used for the encoding label
// "utf-16".
// This workaround should not be exposed to the public API and so "utf-16"
// is returned by GetEncoding() if the internal encoding name is "utf-16le".
if (mUseBOM || !strcmp(mEncoding, "utf-16le")) {
aEncoding.AssignLiteral("utf-16");
return;
}
// Similarly, "x-windows-949" is used for the "euc-kr" family. Therefore, if
// the internal encoding name is "x-windows-949", "euc-kr" is returned.
if (!strcmp(mEncoding, "x-windows-949")) {
aEncoding.AssignLiteral("euc-kr");
return;
}
aEncoding.AssignASCII(mEncoding);
}
NS_IMPL_CYCLE_COLLECTING_ADDREF(TextDecoder)
NS_IMPL_CYCLE_COLLECTING_RELEASE(TextDecoder)
NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(TextDecoder)
NS_INTERFACE_MAP_ENTRY(nsISupports)
NS_INTERFACE_MAP_END
NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE_1(TextDecoder, mGlobal)
} // dom
} // mozilla

131
dom/encoding/TextDecoder.h Normal file
View File

@ -0,0 +1,131 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef mozilla_dom_textdecoder_h_
#define mozilla_dom_textdecoder_h_
#include "jsapi.h"
#include "mozilla/dom/BindingUtils.h"
#include "mozilla/dom/TextDecoderBinding.h"
#include "mozilla/dom/TypedArray.h"
#include "mozilla/ErrorResult.h"
#include "nsIUnicodeDecoder.h"
#include "nsString.h"
#include "nsCOMPtr.h"
#include "nsCycleCollectionParticipant.h"
namespace mozilla {
namespace dom {
class TextDecoder : public nsISupports, public nsWrapperCache
{
public:
NS_DECL_CYCLE_COLLECTING_ISUPPORTS
NS_DECL_CYCLE_COLLECTION_SCRIPT_HOLDER_CLASS(TextDecoder)
// The WebIDL constructor.
static already_AddRefed<TextDecoder>
Constructor(nsISupports* aGlobal,
const nsAString& aEncoding,
const TextDecoderOptions& aFatal,
ErrorResult& aRv)
{
nsRefPtr<TextDecoder> txtDecoder = new TextDecoder(aGlobal);
txtDecoder->Init(aEncoding, aFatal, aRv);
if (aRv.Failed()) {
return nullptr;
}
return txtDecoder.forget();
}
TextDecoder(nsISupports* aGlobal)
: mGlobal(aGlobal)
, mFatal(false), mUseBOM(false), mOffset(0), mIsUTF16Family(false)
{
MOZ_ASSERT(aGlobal);
SetIsDOMBinding();
}
virtual
~TextDecoder()
{}
virtual JSObject*
WrapObject(JSContext* aCx, JSObject* aScope, bool* aTriedToWrap)
{
return TextDecoderBinding::Wrap(aCx, aScope, this, aTriedToWrap);
}
nsISupports*
GetParentObject()
{
return mGlobal;
}
/**
* Return the encoding name.
*
* @param aEncoding, current encoding.
*/
void GetEncoding(nsAString& aEncoding);
/**
* Decodes incoming byte stream of characters in charset indicated by
* encoding.
*
* The encoding algorithm state is reset if aOptions.stream is not set.
*
* If the fatal flag is set then a decoding error will throw EncodingError.
* Else the decoder will return a decoded string with replacement
* character(s) for unidentified character(s).
*
* @param aView, incoming byte stream of characters to be decoded to
* to UTF-16 code points.
* @param aOptions, indicates if streaming or not.
* @param aOutDecodedString, decoded string of UTF-16 code points.
* @param aRv, error result.
*/
void Decode(const ArrayBufferView* aView,
const TextDecodeOptions& aOptions,
nsAString& aOutDecodedString,
ErrorResult& aRv);
private:
const char* mEncoding;
nsCOMPtr<nsIUnicodeDecoder> mDecoder;
nsCOMPtr<nsISupports> mGlobal;
bool mFatal;
bool mUseBOM;
uint8_t mOffset;
char mInitialBytes[3];
bool mIsUTF16Family;
/**
* Validates provided encoding and throws an exception if invalid encoding.
* If no encoding is provided then mEncoding is default initialised to "utf-8".
*
* @param aEncoding Optional encoding (case insensitive) provided.
* Default value is "utf-8" if no encoding is provided.
* @param aFatal aFatal, indicates whether to throw an 'EncodingError'
* exception or not.
* @return aRv EncodingError exception else null.
*/
void Init(const nsAString& aEncoding,
const TextDecoderOptions& aFatal,
ErrorResult& aRv);
// Internal helper functions.
void CreateDecoder(ErrorResult& aRv);
void ResetDecoder(bool aResetOffset = true);
void HandleBOM(const char*& aData, uint32_t& aLength,
const TextDecodeOptions& aOptions,
nsAString& aOutString, ErrorResult& aRv);
void FeedBytes(const char* aBytes, nsAString* aOutString = nullptr);
};
} // dom
} // mozilla
#endif // mozilla_dom_textdecoder_h_

View File

@ -0,0 +1,136 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "mozilla/dom/TextEncoder.h"
#include "mozilla/dom/EncodingUtils.h"
#include "nsContentUtils.h"
#include "nsICharsetConverterManager.h"
#include "nsServiceManagerUtils.h"
namespace mozilla {
namespace dom {
void
TextEncoder::Init(const Optional<nsAString>& aEncoding,
ErrorResult& aRv)
{
// If the constructor is called with no arguments, let label be the "utf-8".
// Otherwise, let label be the value of the encoding argument.
nsAutoString label;
if (!aEncoding.WasPassed()) {
label.AssignLiteral("utf-8");
} else {
label.Assign(aEncoding.Value());
EncodingUtils::TrimSpaceCharacters(label);
}
// Run the steps to get an encoding from Encoding.
if (!EncodingUtils::FindEncodingForLabel(label, mEncoding)) {
// If the steps result in failure,
// throw an "EncodingError" exception and terminate these steps.
aRv.Throw(NS_ERROR_DOM_ENCODING_NOT_SUPPORTED_ERR);
return;
}
// Otherwise, if the Name of the returned encoding is not one of
// "utf-8", "utf-16", or "utf-16be" throw an "EncodingError" exception
// and terminate these steps.
if (PL_strcasecmp(mEncoding, "utf-8") &&
PL_strcasecmp(mEncoding, "utf-16le") &&
PL_strcasecmp(mEncoding, "utf-16be")) {
aRv.Throw(NS_ERROR_DOM_ENCODING_NOT_UTF_ERR);
return;
}
// Create an encoder object for mEncoding.
nsCOMPtr<nsICharsetConverterManager> ccm =
do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID);
if (!ccm) {
aRv.Throw(NS_ERROR_UNEXPECTED);
return;
}
ccm->GetUnicodeEncoder(mEncoding, getter_AddRefs(mEncoder));
if (!mEncoder) {
aRv.Throw(NS_ERROR_UNEXPECTED);
return;
}
}
JSObject*
TextEncoder::Encode(JSContext* aCx,
const nsAString& aString,
const TextEncodeOptions& aOptions,
ErrorResult& aRv)
{
// Run the steps of the encoding algorithm.
int32_t srcLen = aString.Length();
int32_t maxLen;
const PRUnichar* data = PromiseFlatString(aString).get();
nsresult rv = mEncoder->GetMaxLength(data, srcLen, &maxLen);
if (NS_FAILED(rv)) {
aRv.Throw(rv);
return nullptr;
}
// Need a fallible allocator because the caller may be a content
// and the content can specify the length of the string.
static const fallible_t fallible = fallible_t();
nsAutoArrayPtr<char> buf(new (fallible) char[maxLen + 1]);
if (!buf) {
aRv.Throw(NS_ERROR_OUT_OF_MEMORY);
return nullptr;
}
int32_t dstLen = maxLen;
rv = mEncoder->Convert(data, &srcLen, buf, &dstLen);
// If the internal streaming flag is not set, then reset
// the encoding algorithm state to the default values for encoding.
if (!aOptions.stream) {
int32_t finishLen = maxLen - dstLen;
rv = mEncoder->Finish(buf + dstLen, &finishLen);
if (NS_SUCCEEDED(rv)) {
dstLen += finishLen;
}
}
JSObject* outView = nullptr;
if (NS_SUCCEEDED(rv)) {
buf[dstLen] = '\0';
outView = Uint8Array::Create(aCx, this, dstLen,
reinterpret_cast<uint8_t*>(buf.get()));
}
if (NS_FAILED(rv)) {
aRv.Throw(rv);
}
return outView;
}
void
TextEncoder::GetEncoding(nsAString& aEncoding)
{
// Our utf-16 converter does not comply with the Encoding Standard.
// As a result the utf-16le converter is used for the encoding label
// "utf-16".
// This workaround should not be exposed to the public API and so "utf-16"
// is returned by GetEncoding() if the internal encoding name is "utf-16le".
if (!strcmp(mEncoding, "utf-16le")) {
aEncoding.AssignLiteral("utf-16");
return;
}
aEncoding.AssignASCII(mEncoding);
}
NS_IMPL_CYCLE_COLLECTING_ADDREF(TextEncoder)
NS_IMPL_CYCLE_COLLECTING_RELEASE(TextEncoder)
NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(TextEncoder)
NS_INTERFACE_MAP_ENTRY(nsISupports)
NS_INTERFACE_MAP_END
NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE_1(TextEncoder, mGlobal)
} // dom
} // mozilla

108
dom/encoding/TextEncoder.h Normal file
View File

@ -0,0 +1,108 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef mozilla_dom_textencoder_h_
#define mozilla_dom_textencoder_h_
#include "jsapi.h"
#include "mozilla/dom/BindingUtils.h"
#include "mozilla/dom/TextEncoderBinding.h"
#include "mozilla/dom/TypedArray.h"
#include "mozilla/ErrorResult.h"
#include "nsIUnicodeEncoder.h"
#include "nsString.h"
#include "nsCOMPtr.h"
#include "nsCycleCollectionParticipant.h"
namespace mozilla {
namespace dom {
class TextEncoder : public nsISupports, public nsWrapperCache
{
public:
NS_DECL_CYCLE_COLLECTING_ISUPPORTS
NS_DECL_CYCLE_COLLECTION_SCRIPT_HOLDER_CLASS(TextEncoder)
// The WebIDL constructor.
static already_AddRefed<TextEncoder>
Constructor(nsISupports* aGlobal,
const Optional<nsAString>& aEncoding,
ErrorResult& aRv)
{
nsRefPtr<TextEncoder> txtEncoder = new TextEncoder(aGlobal);
txtEncoder->Init(aEncoding, aRv);
if (aRv.Failed()) {
return nullptr;
}
return txtEncoder.forget();
}
TextEncoder(nsISupports* aGlobal)
: mGlobal(aGlobal)
{
MOZ_ASSERT(aGlobal);
SetIsDOMBinding();
}
virtual
~TextEncoder()
{}
virtual JSObject*
WrapObject(JSContext* aCx, JSObject* aScope, bool* aTriedToWrap)
{
return TextEncoderBinding::Wrap(aCx, aScope, this, aTriedToWrap);
}
nsISupports*
GetParentObject()
{
return mGlobal;
}
/**
* Return the encoding name.
*
* @param aEncoding, current encoding.
*/
void GetEncoding(nsAString& aEncoding);
/**
* Encodes incoming utf-16 code units/ DOM string to the requested encoding.
*
* @param aCx Javascript context.
* @param aString utf-16 code units to be encoded.
* @param aOptions Streaming option. Initialised by default to false.
* If the streaming option is false, then the encoding
* algorithm state will get reset. If set to true then
* the previous encoding is reused/continued.
* @return JSObject* The Uint8Array wrapped in a JS object.
*/
JSObject* Encode(JSContext* aCx,
const nsAString& aString,
const TextEncodeOptions& aOptions,
ErrorResult& aRv);
private:
const char* mEncoding;
nsCOMPtr<nsIUnicodeEncoder> mEncoder;
nsCOMPtr<nsISupports> mGlobal;
/**
* Validates provided encoding and throws an exception if invalid encoding.
* If no encoding is provided then mEncoding is default initialised to "utf-8".
*
* @param aEncoding Optional encoding (case insensitive) provided.
* (valid values are "utf-8", "utf-16", "utf-16be")
* Default value is "utf-8" if no encoding is provided.
* @return aRv EncodingError exception else null.
*/
void Init(const Optional<nsAString>& aEncoding,
ErrorResult& aRv);
};
} // dom
} // mozilla
#endif // mozilla_dom_textencoder_h_

View File

@ -0,0 +1,22 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
DEPTH = @DEPTH@
topsrcdir = @top_srcdir@
srcdir = @srcdir@
VPATH = @srcdir@
relativesrcdir = @relativesrcdir@
include $(DEPTH)/config/autoconf.mk
MOCHITEST_FILES = \
test_BOMEncoding.js \
test_TextDecoder.html \
test_TextDecoder.js \
test_TextEncoder.html \
test_TextEncoder.js \
$(NULL)
include $(topsrcdir)/config/rules.mk

View File

@ -0,0 +1,133 @@
/*
* test_TextDecoderBOMEncoding.js
* bug 764234 tests
*/
function runTextDecoderBOMEnoding()
{
test(testDecodeValidBOMUTF16, "testDecodeValidBOMUTF16");
test(testBOMEncodingUTF8, "testBOMEncodingUTF8");
test(testMoreBOMEncoding, "testMoreBOMEncoding");
}
function testDecodeValidBOMUTF16() {
var expectedString = "\"\u0412\u0441\u0435 \u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u044B\u0435 \u0441\u0435\u043C\u044C\u0438 \u043F\u043E\u0445\u043E\u0436\u0438 \u0434\u0440\u0443\u0433 \u043D\u0430 \u0434\u0440\u0443\u0433\u0430, \u043A\u0430\u0436\u0434\u0430\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430\u044F \u0441\u0435\u043C\u044C\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430 \u043F\u043E-\u0441\u0432\u043E\u0435\u043C\u0443.\"";
// Testing UTF-16BE
var data = [0xFE, 0xFF, 0x00, 0x22, 0x04, 0x12, 0x04, 0x41, 0x04, 0x35, 0x00, 0x20, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x4B, 0x04, 0x35, 0x00, 0x20, 0x04, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x38, 0x00, 0x20, 0x04, 0x3F, 0x04, 0x3E, 0x04, 0x45, 0x04, 0x3E, 0x04, 0x36, 0x04, 0x38, 0x00, 0x20, 0x04, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x00, 0x20, 0x04, 0x3D, 0x04, 0x30, 0x00, 0x20, 0x04, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 0x30, 0x00, 0x2C, 0x00, 0x20, 0x04, 0x3A, 0x04, 0x30, 0x04, 0x36, 0x04, 0x34, 0x04, 0x30, 0x04, 0x4F, 0x00, 0x20, 0x04, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x4F, 0x00, 0x20, 0x04, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x4F, 0x00, 0x20, 0x04, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x00, 0x20, 0x04, 0x3F, 0x04, 0x3E, 0x00, 0x2D, 0x04, 0x41, 0x04, 0x32, 0x04, 0x3E, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x43, 0x00, 0x2E, 0x00, 0x22];
testBOMCharset({encoding: "utf-16be", data: data, expected: expectedString,
msg: "decoder valid UTF-16BE test."});
}
function testBOMEncodingUTF8() {
// basic utf-8 test with valid encoding and byte stream. no byte om provided.
var data = [0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27];
var expectedString = " !\"#$%&'";
testBOMCharset({encoding: "utf-8", data: data, expected: expectedString,
msg: "utf-8 encoding."});
// test valid encoding provided with valid byte OM also provided.
data = [0xEF, 0xBB, 0xBF, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27];
expectedString = " !\"#$%&'";
testBOMCharset({encoding: "utf-8", data: data, expected: expectedString,
msg: "valid utf-8 encoding provided with VALID utf-8 BOM test."});
// test valid encoding provided with invalid byte OM also provided.
data = [0xFF, 0xFE, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27];
testBOMCharset({encoding: "utf-8", fatal: true, data: data, error: "EncodingError",
msg: "valid utf-8 encoding provided with invalid utf-8 fatal BOM test."});
// test valid encoding provided with invalid byte OM also provided.
data = [0xFF, 0xFE, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27];
expectedString = "\ufffd\ufffd !\"#$%&'";
testBOMCharset({encoding: "utf-8", data: data, expected: expectedString,
msg: "valid utf-8 encoding provided with invalid utf-8 BOM test."});
// test empty encoding provided with invalid byte OM also provided.
data = [0xFF, 0xFE, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27];
testBOMCharset({encoding: "", data: data, error: "EncodingError",
msg: "empty encoding provided with invalid utf-8 BOM test."});
}
function testMoreBOMEncoding() {
var expectedString = "\"\u0412\u0441\u0435 \u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u044B\u0435 \u0441\u0435\u043C\u044C\u0438 \u043F\u043E\u0445\u043E\u0436\u0438 \u0434\u0440\u0443\u0433 \u043D\u0430 \u0434\u0440\u0443\u0433\u0430, \u043A\u0430\u0436\u0434\u0430\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430\u044F \u0441\u0435\u043C\u044C\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430 \u043F\u043E-\u0441\u0432\u043E\u0435\u043C\u0443.\"";
// Testing user provided encoding is UTF-16BE & bom encoding is utf-16le
var data = [0xFF, 0xFE, 0x00, 0x22, 0x04, 0x12, 0x04, 0x41, 0x04, 0x35, 0x00, 0x20, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x4B, 0x04, 0x35, 0x00, 0x20, 0x04, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x38, 0x00, 0x20, 0x04, 0x3F, 0x04, 0x3E, 0x04, 0x45, 0x04, 0x3E, 0x04, 0x36, 0x04, 0x38, 0x00, 0x20, 0x04, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x00, 0x20, 0x04, 0x3D, 0x04, 0x30, 0x00, 0x20, 0x04, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 0x30, 0x00, 0x2C, 0x00, 0x20, 0x04, 0x3A, 0x04, 0x30, 0x04, 0x36, 0x04, 0x34, 0x04, 0x30, 0x04, 0x4F, 0x00, 0x20, 0x04, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x4F, 0x00, 0x20, 0x04, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x4F, 0x00, 0x20, 0x04, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x00, 0x20, 0x04, 0x3F, 0x04, 0x3E, 0x00, 0x2D, 0x04, 0x41, 0x04, 0x32, 0x04, 0x3E, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x43, 0x00, 0x2E, 0x00, 0x22];
testBOMCharset({encoding: "utf-16be", fatal: true, data: data, expected: "\ufffe" + expectedString,
msg: "test decoder invalid BOM encoding for utf-16be fatal."});
testBOMCharset({encoding: "utf-16be", data: data, expected: "\ufffe" + expectedString,
msg: "test decoder invalid BOM encoding for utf-16be."});
// Testing user provided encoding is UTF-16LE & bom encoding is utf-16be
var dataUTF16 = [0xFE, 0xFF, 0x22, 0x00, 0x12, 0x04, 0x41, 0x04, 0x35, 0x04, 0x20, 0x00, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x4B, 0x04, 0x35, 0x04, 0x20, 0x00, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x38, 0x04, 0x20, 0x00, 0x3F, 0x04, 0x3E, 0x04, 0x45, 0x04, 0x3E, 0x04, 0x36, 0x04, 0x38, 0x04, 0x20, 0x00, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 0x20, 0x00, 0x3D, 0x04, 0x30, 0x04, 0x20, 0x00, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 0x30, 0x04, 0x2C, 0x00, 0x20, 0x00, 0x3A, 0x04, 0x30, 0x04, 0x36, 0x04, 0x34, 0x04, 0x30, 0x04, 0x4F, 0x04, 0x20, 0x00, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x4F, 0x04, 0x20, 0x00, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x4F, 0x04, 0x20, 0x00, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x20, 0x00, 0x3F, 0x04, 0x3E, 0x04, 0x2D, 0x00, 0x41, 0x04, 0x32, 0x04, 0x3E, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x43, 0x04, 0x2E, 0x00, 0x22, 0x00];
testBOMCharset({encoding: "utf-16le", fatal: true, data: dataUTF16, expected: "\ufffe" + expectedString,
msg: "test decoder invalid BOM encoding for utf-16 fatal."});
testBOMCharset({encoding: "utf-16le", data: dataUTF16, expected: "\ufffe" + expectedString,
msg: "test decoder invalid BOM encoding for utf-16."});
// Testing user provided encoding is UTF-16 & bom encoding is utf-16be
data = [0xFE, 0xFF, 0x00, 0x22, 0x04, 0x12, 0x04, 0x41, 0x04, 0x35, 0x00, 0x20, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x4B, 0x04, 0x35, 0x00, 0x20, 0x04, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x38, 0x00, 0x20, 0x04, 0x3F, 0x04, 0x3E, 0x04, 0x45, 0x04, 0x3E, 0x04, 0x36, 0x04, 0x38, 0x00, 0x20, 0x04, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x00, 0x20, 0x04, 0x3D, 0x04, 0x30, 0x00, 0x20, 0x04, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 0x30, 0x00, 0x2C, 0x00, 0x20, 0x04, 0x3A, 0x04, 0x30, 0x04, 0x36, 0x04, 0x34, 0x04, 0x30, 0x04, 0x4F, 0x00, 0x20, 0x04, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x4F, 0x00, 0x20, 0x04, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x4F, 0x00, 0x20, 0x04, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x00, 0x20, 0x04, 0x3F, 0x04, 0x3E, 0x00, 0x2D, 0x04, 0x41, 0x04, 0x32, 0x04, 0x3E, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x43, 0x00, 0x2E, 0x00, 0x22];
testBOMCharset({encoding: "utf-16", fatal: true, data: data, expected: expectedString,
msg: "test decoder BOM encoding for utf-16 fatal."});
testBOMCharset({encoding: "utf-16", data: data, expected: expectedString,
msg: "test decoder BOM encoding for utf-16."});
// Testing user provided encoding is UTF-16 & bom encoding is utf-16le
dataUTF16 = [0xFF, 0xFE, 0x22, 0x00, 0x12, 0x04, 0x41, 0x04, 0x35, 0x04, 0x20, 0x00, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x4B, 0x04, 0x35, 0x04, 0x20, 0x00, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x38, 0x04, 0x20, 0x00, 0x3F, 0x04, 0x3E, 0x04, 0x45, 0x04, 0x3E, 0x04, 0x36, 0x04, 0x38, 0x04, 0x20, 0x00, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 0x20, 0x00, 0x3D, 0x04, 0x30, 0x04, 0x20, 0x00, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 0x30, 0x04, 0x2C, 0x00, 0x20, 0x00, 0x3A, 0x04, 0x30, 0x04, 0x36, 0x04, 0x34, 0x04, 0x30, 0x04, 0x4F, 0x04, 0x20, 0x00, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x4F, 0x04, 0x20, 0x00, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x4F, 0x04, 0x20, 0x00, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x20, 0x00, 0x3F, 0x04, 0x3E, 0x04, 0x2D, 0x00, 0x41, 0x04, 0x32, 0x04, 0x3E, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x43, 0x04, 0x2E, 0x00, 0x22, 0x00];
testBOMCharset({encoding: "utf-16", fatal: true, data: dataUTF16, expected: expectedString,
msg: "test decoder BOM encoding for utf-16 fatal."});
testBOMCharset({encoding: "utf-16", data: dataUTF16, expected: expectedString,
msg: "test decoder BOM encoding for utf-16."});
// Testing user provided encoding is UTF-8 & bom encoding is utf-16be
data = [0xFE, 0xFF, 0x22, 0xd0, 0x92, 0xd1, 0x81, 0xd0, 0xb5, 0x20, 0xd1, 0x81, 0xd1, 0x87, 0xd0, 0xb0, 0xd1, 0x81, 0xd1, 0x82, 0xd0, 0xbb, 0xd0, 0xb8, 0xd0, 0xb2, 0xd1, 0x8b, 0xd0, 0xb5, 0x20, 0xd1, 0x81, 0xd0, 0xb5, 0xd0, 0xbc, 0xd1, 0x8c, 0xd0, 0xb8, 0x20, 0xd0, 0xbf, 0xd0, 0xbe, 0xd1, 0x85, 0xd0, 0xbe, 0xd0, 0xb6, 0xd0, 0xb8, 0x20, 0xd0, 0xb4, 0xd1, 0x80, 0xd1, 0x83, 0xd0, 0xb3, 0x20, 0xd0, 0xbd, 0xd0, 0xb0, 0x20, 0xd0, 0xb4, 0xd1, 0x80, 0xd1, 0x83, 0xd0, 0xb3, 0xd0, 0xb0, 0x2c, 0x20, 0xd0, 0xba, 0xd0, 0xb0, 0xd0, 0xb6, 0xd0, 0xb4, 0xd0, 0xb0, 0xd1, 0x8f, 0x20, 0xd0, 0xbd, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x87, 0xd0, 0xb0, 0xd1, 0x81, 0xd1, 0x82, 0xd0, 0xbb, 0xd0, 0xb8, 0xd0, 0xb2, 0xd0, 0xb0, 0xd1, 0x8f, 0x20, 0xd1, 0x81, 0xd0, 0xb5, 0xd0, 0xbc, 0xd1, 0x8c, 0xd1, 0x8f, 0x20, 0xd0, 0xbd, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x87, 0xd0, 0xb0, 0xd1, 0x81, 0xd1, 0x82, 0xd0, 0xbb, 0xd0, 0xb8, 0xd0, 0xb2, 0xd0, 0xb0, 0x20, 0xd0, 0xbf, 0xd0, 0xbe, 0x2d, 0xd1, 0x81, 0xd0, 0xb2, 0xd0, 0xbe, 0xd0, 0xb5, 0xd0, 0xbc, 0xd1, 0x83, 0x2e, 0x22];
testBOMCharset({encoding: "utf-8", fatal: true, data: data, error: "EncodingError",
msg: "test decoder invalid BOM encoding for valid utf-8 fatal provided label."});
testBOMCharset({encoding: "utf-8", data: data, expected: "\ufffd\ufffd" + expectedString,
msg: "test decoder invalid BOM encoding for valid utf-8 provided label."});
// Testing user provided encoding is non-UTF & bom encoding is utf-16be
data = [0xFE, 0xFF, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe];
expectedString = "\u03CE\uFFFD\u2019\xA3\u20AC\u20AF\xA6\xA7\xA8\xA9\u037A\xAB\xAC\xAD\u2015"
+ "\xB0\xB1\xB2\xB3\u0384\u0385\u0386\xB7\u0388\u0389\u038A\xBB\u038C\xBD\u038E\u038F"
+ "\u0390\u0391\u0392\u0393\u0394\u0395\u0396\u0397\u0398\u0399\u039A\u039B\u039C\u039D\u039E\u039F"
+ "\u03A0\u03A1\u03A3\u03A4\u03A5\u03A6\u03A7\u03A8\u03A9\u03AA\u03AB\u03AC\u03AD\u03AE\u03AF"
+ "\u03B0\u03B1\u03B2\u03B3\u03B4\u03B5\u03B6\u03B7\u03B8\u03B9\u03BA\u03BB\u03BC\u03BD\u03BE\u03BF"
+ "\u03C0\u03C1\u03C2\u03C3\u03C4\u03C5\u03C6\u03C7\u03C8\u03C9\u03CA\u03CB\u03CC\u03CD\u03CE";
testBOMCharset({encoding: "greek", fatal: true, data: data, error: "EncodingError",
msg: "test decoder encoding provided with invalid BOM encoding for greek."});
testBOMCharset({encoding: "greek", data: data, expected: expectedString,
msg: "test decoder encoding provided with invalid BOM encoding for greek."});
}
function testBOMCharset(test)
{
var outText;
try {
var decoder = 'fatal' in test ?
TextDecoder(test.encoding, {fatal: test.fatal}) :
TextDecoder(test.encoding);
outText = decoder.decode(new Uint8Array(test.data));
} catch (e) {
assert_equals(e.name, test.error, test.msg);
return;
}
assert_true(!test.error, test.msg);
if (outText !== test.expected) {
assert_equals(escape(outText), escape(test.expected), test.msg + " Code points do not match expected code points.");
}
}

View File

@ -0,0 +1,25 @@
<!DOCTYPE HTML>
<html>
<head>
<meta charset=utf-8>
<title>Test for Bug 764234</title>
<script type="text/javascript" src="/resources/testharness.js"></script>
<script type="text/javascript" src="/resources/testharnessreport.js"></script>
<script type="text/javascript" src="test_TextDecoder.js"></script>
<script type="text/javascript" src="test_BOMEncoding.js"></script>
</head>
<body>
<div id="log"></div>
<script>
runTest();
function runTest()
{
runTextDecoderOptions();
runTextDecoderBOMEnoding();
}
</script>
</body>
</html>

View File

@ -0,0 +1,349 @@
/*
* test_TextDecoderOptions.js
* bug 764234 tests
*/
function runTextDecoderOptions()
{
const data = [0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1,
0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb,
0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9,
0xda, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb];
const expectedString = "\u00a0\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07"
+ "\u0e08\u0e09\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f"
+ "\u0e10\u0e11\u0e12\u0e13\u0e14\u0e15\u0e16\u0e17"
+ "\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d\u0e1e\u0e1f"
+ "\u0e20\u0e21\u0e22\u0e23\u0e24\u0e25\u0e26\u0e27"
+ "\u0e28\u0e29\u0e2a\u0e2b\u0e2c\u0e2d\u0e2e\u0e2f"
+ "\u0e30\u0e31\u0e32\u0e33\u0e34\u0e35\u0e36\u0e37"
+ "\u0e38\u0e39\u0e3a\u0e3f\u0e40\u0e41\u0e42\u0e43"
+ "\u0e44\u0e45\u0e46\u0e47\u0e48\u0e49\u0e4a\u0e4b"
+ "\u0e4c\u0e4d\u0e4e\u0e4f\u0e50\u0e51\u0e52\u0e53"
+ "\u0e54\u0e55\u0e56\u0e57\u0e58\u0e59\u0e5a\u0e5b";
test(testDecoderGetEncoding, "testDecoderGetEncoding");
test(testDecodeGreek, "testDecodeGreek");
test(function() {
testConstructorFatalOption(data, expectedString);
}, "testConstructorFatalOption");
test(function() {
testConstructorEncodingOption(data, expectedString);
}, "testConstructorEncodingOption");
test(function() {
testDecodeStreamOption(data, expectedString);
}, "testDecodeStreamOption");
test(testDecodeStreamCompositions, "testDecodeStreamCompositions");
test(function() {
testDecodeABVOption(data, expectedString);
}, "testDecodeABVOption");
test(testDecoderForThaiEncoding, "testDecoderForThaiEncoding");
}
/*
* function testConstructor()
*
* - This function tests the constructor optional arguments.
* - Stream option remains null for this test.
* - The stream option is passed to the decode function.
* - This function is not testing the decode function.
*
*/
function testConstructorFatalOption(data, expectedString)
{
//invalid string to decode passed, fatal = false
testCharset({fatal: false, encoding: "iso-8859-11", input: [], expected: "",
msg: "constructor fatal option set to false test."});
//invalid string to decode passed, fatal = true
testCharset({fatal: true, encoding: "iso-8859-11", input: [], expected: "",
msg: "constructor fatal option set to true test."});
}
function testConstructorEncodingOption(aData, aExpectedString)
{
// valid encoding passed
testCharset({encoding: "iso-8859-11", input: aData, expected: aExpectedString,
msg: "decoder testing constructor valid encoding."});
// invalid encoding passed
testCharset({encoding: "asdfasdf", input: aData, error: "EncodingError",
msg: "constructor encoding, invalid encoding test."});
// passing spaces for encoding
testCharset({encoding: " ", input: aData, error: "EncodingError",
msg: "constructor encoding, spaces encoding test."});
// passing null for encoding
testCharset({encoding: null, input: aData, error: "EncodingError",
msg: "constructor encoding, \"null\" encoding test."});
// empty encoding passed
testCharset({encoding: "", input: aData, error: "EncodingError",
msg: "constuctor encoding, empty encoding test."});
// replacement character test
aExpectedString = "\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd"
+ "\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd"
+ "\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd"
+ "\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd"
+ "\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd"
+ "\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd"
+ "\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd"
+ "\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd"
+ "\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd"
+ "\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd"
+ "\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd";
testCharset({encoding: "utf-8", input: aData, expected: aExpectedString,
msg: "constuctor encoding, utf-8 test."});
}
/*
* function testDecodeStreamOption()
*
* - fatal remains null for the entire test
* - encoding remains as "iso-8859-11"
* - The stream option is modified for this test.
* - ArrayBufferView is modified for this test.
*/
function testDecodeStreamOption(data, expectedString)
{
const streamData = [[0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6,
0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8,
0xb9, 0xba, 0xbb, 0xbc, 0xbd],
[0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce,
0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
0xd8, 0xd9, 0xda, 0xdf, 0xe0, 0xe1, 0xe2],
[0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb]];
const expectedStringOne = "\u00a0\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07"
+ "\u0e08\u0e09\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f"
+ "\u0e10\u0e11\u0e12\u0e13\u0e14\u0e15\u0e16\u0e17"
+ "\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d";
const expectedStringTwo = "\u0e1e\u0e1f\u0e20\u0e21\u0e22\u0e23\u0e24\u0e25"
+ "\u0e26\u0e27\u0e28\u0e29\u0e2a\u0e2b\u0e2c\u0e2d"
+ "\u0e2e\u0e2f\u0e30\u0e31\u0e32\u0e33\u0e34\u0e35"
+ "\u0e36\u0e37\u0e38\u0e39\u0e3a\u0e3f\u0e40\u0e41"
+ "\u0e42";
const expectedStringThree = "\u0e43\u0e44\u0e45\u0e46\u0e47\u0e48\u0e49\u0e4a"
+ "\u0e4b\u0e4c\u0e4d\u0e4e\u0e4f\u0e50\u0e51"
+ "\u0e52\u0e53\u0e54\u0e55\u0e56\u0e57\u0e58"
+ "\u0e59\u0e5a\u0e5b";
expectedString = [expectedStringOne, expectedStringTwo, expectedStringThree];
// streaming test
/* - the streaming is null
* - streaming is not set in the decode function
*/
testCharset({encoding: "iso-8859-11", array: [
{input: streamData[0], expected: expectedStringOne},
{input: streamData[1], expected: expectedStringTwo},
{input: streamData[2], expected: expectedStringThree},
], msg: "decode() stream test zero."});
testCharset({encoding: "iso-8859-11", array: [
{input: streamData[0], expected: expectedStringOne, stream: true},
{input: streamData[1], expected: expectedStringTwo, stream: true},
{input: streamData[2], expected: expectedStringThree, stream: true},
], msg: "decode() stream test one."});
testCharset({encoding: "iso-8859-11", array: [
{input: streamData[0], expected: expectedStringOne, stream: true},
{input: streamData[1], expected: expectedStringTwo},
{input: streamData[2], expected: expectedStringThree},
], msg: "decode() stream test two."});
testCharset({encoding: "utf-8", array: [
{input: [0xC2], expected: "\uFFFD"},
{input: [0x80], expected: "\uFFFD"},
], msg: "decode() stream test utf-8."});
testCharset({encoding: "utf-8", fatal: true, array: [
{input: [0xC2], error: "EncodingError"},
{input: [0x80], error: "EncodingError"},
], msg: "decode() stream test utf-8 fatal."});
}
function testDecodeStreamCompositions() {
var tests = [
{encoding: "utf-8", input: [0xC2,0x80], expected: ["","\x80"]},
{encoding: "utf-8", input: [0xEF,0xBB,0xBF,0xC2,0x80], expected: ["","","","","\x80"]},
{encoding: "utf-16", input: [0x01,0x00], expected: ["","\x01"]},
{encoding: "utf-16", input: [0x01,0x00,0x03,0x02], expected: ["","\x01","","\u0203"]},
{encoding: "utf-16", input: [0xFF,0xFE], expected: ["",""]},
{encoding: "utf-16", input: [0xFF,0xFE,0x01,0x00], expected: ["","","","\x01"]},
{encoding: "utf-16", input: [0xFF,0xFE,0xFF,0xFE], expected: ["","","","\uFEFF"]},
{encoding: "utf-16", input: [0xFF,0xFE,0xFE,0xFF], expected: ["","","","\uFFFE"]},
{encoding: "utf-16", input: [0xFE,0xFF], expected: ["",""]},
{encoding: "utf-16", input: [0xFE,0xFF,0x01,0x00], expected: ["","","","\u0100"]},
{encoding: "utf-16", input: [0xFE,0xFF,0xFF,0xFE], expected: ["","","","\uFFFE"]},
{encoding: "utf-16", input: [0xFE,0xFF,0xFE,0xFF], expected: ["","","","\uFEFF"]},
{encoding: "utf-16le", input: [0x01,0x00], expected: ["","\x01"]},
{encoding: "utf-16le", input: [0x01,0x00,0x03,0x02], expected: ["","\x01","","\u0203"]},
{encoding: "utf-16le", input: [0xFF,0xFE,0x01,0x00], expected: ["","","","\x01"]},
{encoding: "utf-16le", input: [0xFE,0xFF,0x01,0x00], expected: ["","\uFFFE","","\x01"]},
{encoding: "utf-16be", input: [0x01,0x00], expected: ["","\u0100"]},
{encoding: "utf-16be", input: [0x01,0x00,0x03,0x02], expected: ["","\u0100","","\u0302"]},
{encoding: "utf-16be", input: [0xFF,0xFE,0x01,0x00], expected: ["","\uFFFE","","\u0100"]},
{encoding: "utf-16be", input: [0xFE,0xFF,0x01,0x00], expected: ["","","","\u0100"]},
{encoding: "shift_jis", input: [0x81,0x40], expected: ["","\u3000"]},
];
tests.forEach(function(t) {
(function generateCompositions(a, n) {
a.push(n);
var l = a.length - 1;
var array=[];
for (var i = 0, o = 0; i <= l; i++) {
array.push({
input: t.input.slice(o, o+a[i]),
expected: t.expected.slice(o, o+=a[i]).join(""),
stream: i < l
});
}
testCharset({encoding: t.encoding, array: array,
msg: "decode() stream test " + t.encoding + " " + a.join("-") + "."});
while (a[l] > 1) {
a[l]--;
generateCompositions(a.slice(0), n - a[l]);
}
})([], t.input.length);
});
}
/*
* function testDecodeABVOption()
*
* - ABV for ArrayBufferView
* - fatal remains null for the entire test
* - encoding remains as "iso-8859-11"
* - The stream option is modified for this test.
* - ArrayBufferView is modified for this test.
*/
function testDecodeABVOption(data, expectedString)
{
// valid data
testCharset({encoding: "iso-8859-11", input: data, expected: expectedString,
msg: "decode test ABV valid data."});
// invalid empty data
testCharset({encoding: "iso-8859-11", input: [], expected: "",
msg: "decode test ABV empty data."});
// spaces
testCharset({encoding: "iso-8859-11", input: ["\u0020\u0020"], expected: "\0",
msg: "text decoding ABV string test."});
testCharset({encoding: "iso-8859-11", input: [""], expected: "\0",
msg: "text decoding ABV empty string test."});
// null for Array Buffer
testCharset({encoding: "iso-8859-11", input: null, expected: "",
msg: "text decoding ABV null test."});
}
function testDecodeGreek()
{
var data = [0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8,
0xa9, 0xaa, 0xab, 0xac, 0xad, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4,
0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca,
0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd3, 0xd4, 0xd5, 0xd6,
0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1,
0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec,
0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe];
var expectedString = "\u00a0\u2018\u2019\u00a3\u20ac\u20af\u00a6\u00a7\u00a8"
+ "\u00a9\u037a\u00ab\u00ac\u00ad\u2015\u00b0\u00b1"
+ "\u00b2\u00b3\u0384\u0385\u0386\u00b7\u0388\u0389"
+ "\u038a\u00bb\u038c\u00bd\u038e\u038f\u0390\u0391"
+ "\u0392\u0393\u0394\u0395\u0396\u0397\u0398\u0399"
+ "\u039a\u039b\u039c\u039d\u039e\u039f\u03a0\u03a1"
+ "\u03a3\u03a4\u03a5\u03a6\u03a7\u03a8\u03a9\u03aa"
+ "\u03ab\u03ac\u03ad\u03ae\u03af\u03b0\u03b1\u03b2"
+ "\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba"
+ "\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c2"
+ "\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03ca"
+ "\u03cb\u03cc\u03cd\u03ce";
testCharset({encoding: "greek", input: data, expected: expectedString,
msg: "decode greek test."});
}
function testDecoderForThaiEncoding()
{
// TEST One
const data = [0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb];
const expectedString = "\u00a0\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07\u0e08\u0e09\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f\u0e10\u0e11\u0e12\u0e13\u0e14\u0e15\u0e16\u0e17\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d\u0e1e\u0e1f\u0e20\u0e21\u0e22\u0e23\u0e24\u0e25\u0e26\u0e27\u0e28\u0e29\u0e2a\u0e2b\u0e2c\u0e2d\u0e2e\u0e2f\u0e30\u0e31\u0e32\u0e33\u0e34\u0e35\u0e36\u0e37\u0e38\u0e39\u0e3a\u0e3f\u0e40\u0e41\u0e42\u0e43\u0e44\u0e45\u0e46\u0e47\u0e48\u0e49\u0e4a\u0e4b\u0e4c\u0e4d\u0e4e\u0e4f\u0e50\u0e51\u0e52\u0e53\u0e54\u0e55\u0e56\u0e57\u0e58\u0e59\u0e5a\u0e5b";
const aliases = [ "ISO-8859-11", "iso-8859-11", "iso8859-11", "iso885911" ];
testCharset({encoding: "iso-8859-11", input: data, expected: expectedString,
msg: "decoder testing valid ISO-8859-11 encoding."});
}
function testDecoderGetEncoding()
{
var labelEncodings = [
{label: "utf-16", encoding: "utf-16"},
{label: "utf-16le", encoding: "utf-16"},
{label: "euc-kr", encoding: "euc-kr"},
{label: "x-windows-949", error: "EncodingError"},
];
labelEncodings.forEach(function(le){
try {
var decoder = TextDecoder(le.label);
if (le.encoding) {
assert_equals(decoder.encoding, le.encoding, le.label + " label encoding test.");
} else {
assert_unreached(le.label + " label encoding unsupported test should throw " + le.error);
}
} catch (e) {
assert_equals(e.name, le.error, le.label + " label encoding unsupported test.");
}
});
}
function testCharset(test)
{
try {
var fatal = test.fatal ? {fatal: test.fatal} : null;
var decoder = TextDecoder(test.encoding, fatal);
} catch (e) {
assert_equals(e.name, test.error, test.msg + " error thrown from the constructor.");
return;
}
var array = test.array || [test];
var num_strings = array.length;
for (var i = 0; i < num_strings; i++) {
var decodeView = array[i].input !== null ? new Uint8Array(array[i].input) : null;
var stream = array[i].stream ? {stream: array[i].stream} : null;
var outText;
try {
outText = decoder.decode(decodeView, stream);
} catch (e) {
assert_equals(e.name, array[i].error, test.msg + " error thrown from decode().");
return;
}
var expected = array[i].expected;
if (outText !== expected) {
assert_equals(escape(outText), escape(expected), test.msg + " Code points do not match expected code points.");
break;
}
}
assert_true(!test.error, test.msg);
}

View File

@ -0,0 +1,23 @@
<!DOCTYPE HTML>
<html>
<head>
<meta charset=utf-8>
<title>Test for Bug 764234</title>
<script type="text/javascript" src="/resources/testharness.js"></script>
<script type="text/javascript" src="/resources/testharnessreport.js"></script>
<script type="text/javascript" src="test_TextEncoder.js"></script>
</head>
<body>
<div id="log"></div>
<script>
runTest();
function runTest()
{
runTextEncoderTests();
}
</script>
</body>
</html>

View File

@ -0,0 +1,274 @@
/*
* test_TextEncoder.js
* bug 764234 tests
*/
function runTextEncoderTests()
{
var data = "\u00a0\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07\u0e08\u0e09"
+ "\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f\u0e10\u0e11\u0e12\u0e13\u0e14"
+ "\u0e15\u0e16\u0e17\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d\u0e1e\u0e1f"
+ "\u0e20\u0e21\u0e22\u0e23\u0e24\u0e25\u0e26\u0e27\u0e28\u0e29\u0e2a"
+ "\u0e2b\u0e2c\u0e2d\u0e2e\u0e2f\u0e30\u0e31\u0e32\u0e33\u0e34\u0e35"
+ "\u0e36\u0e37\u0e38\u0e39\u0e3a\u0e3f\u0e40\u0e41\u0e42\u0e43\u0e44"
+ "\u0e45\u0e46\u0e47\u0e48\u0e49\u0e4a\u0e4b\u0e4c\u0e4d\u0e4e\u0e4f"
+ "\u0e50\u0e51\u0e52\u0e53\u0e54\u0e55\u0e56\u0e57\u0e58\u0e59\u0e5a"
+ "\u0e5b";
var expectedString = [0xC2, 0xA0, 0xE0, 0xB8, 0x81, 0xE0, 0xB8, 0x82, 0xE0,
0xB8, 0x83, 0xE0, 0xB8, 0x84, 0xE0, 0xB8, 0x85, 0xE0,
0xB8, 0x86, 0xE0, 0xB8, 0x87, 0xE0, 0xB8, 0x88, 0xE0,
0xB8, 0x89, 0xE0, 0xB8, 0x8A, 0xE0, 0xB8, 0x8B, 0xE0,
0xB8, 0x8C, 0xE0, 0xB8, 0x8D, 0xE0, 0xB8, 0x8E, 0xE0,
0xB8, 0x8F, 0xE0, 0xB8, 0x90, 0xE0, 0xB8, 0x91, 0xE0,
0xB8, 0x92, 0xE0, 0xB8, 0x93, 0xE0, 0xB8, 0x94, 0xE0,
0xB8, 0x95, 0xE0, 0xB8, 0x96, 0xE0, 0xB8, 0x97, 0xE0,
0xB8, 0x98, 0xE0, 0xB8, 0x99, 0xE0, 0xB8, 0x9A, 0xE0,
0xB8, 0x9B, 0xE0, 0xB8, 0x9C, 0xE0, 0xB8, 0x9D, 0xE0,
0xB8, 0x9E, 0xE0, 0xB8, 0x9F, 0xE0, 0xB8, 0xA0, 0xE0,
0xB8, 0xA1, 0xE0, 0xB8, 0xA2, 0xE0, 0xB8, 0xA3, 0xE0,
0xB8, 0xA4, 0xE0, 0xB8, 0xA5, 0xE0, 0xB8, 0xA6, 0xE0,
0xB8, 0xA7, 0xE0, 0xB8, 0xA8, 0xE0, 0xB8, 0xA9, 0xE0,
0xB8, 0xAA, 0xE0, 0xB8, 0xAB, 0xE0, 0xB8, 0xAC, 0xE0,
0xB8, 0xAD, 0xE0, 0xB8, 0xAE, 0xE0, 0xB8, 0xAF, 0xE0,
0xB8, 0xB0, 0xE0, 0xB8, 0xB1, 0xE0, 0xB8, 0xB2, 0xE0,
0xB8, 0xB3, 0xE0, 0xB8, 0xB4, 0xE0, 0xB8, 0xB5, 0xE0,
0xB8, 0xB6, 0xE0, 0xB8, 0xB7, 0xE0, 0xB8, 0xB8, 0xE0,
0xB8, 0xB9, 0xE0, 0xB8, 0xBA, 0xE0, 0xB8, 0xBF, 0xE0,
0xB9, 0x80, 0xE0, 0xB9, 0x81, 0xE0, 0xB9, 0x82, 0xE0,
0xB9, 0x83, 0xE0, 0xB9, 0x84, 0xE0, 0xB9, 0x85, 0xE0,
0xB9, 0x86, 0xE0, 0xB9, 0x87, 0xE0, 0xB9, 0x88, 0xE0,
0xB9, 0x89, 0xE0, 0xB9, 0x8A, 0xE0, 0xB9, 0x8B, 0xE0,
0xB9, 0x8C, 0xE0, 0xB9, 0x8D, 0xE0, 0xB9, 0x8E, 0xE0,
0xB9, 0x8F, 0xE0, 0xB9, 0x90, 0xE0, 0xB9, 0x91, 0xE0,
0xB9, 0x92, 0xE0, 0xB9, 0x93, 0xE0, 0xB9, 0x94, 0xE0,
0xB9, 0x95, 0xE0, 0xB9, 0x96, 0xE0, 0xB9, 0x97, 0xE0,
0xB9, 0x98, 0xE0, 0xB9, 0x99, 0xE0, 0xB9, 0x9A, 0xE0,
0xB9, 0x9B];
test(testEncoderGetEncoding, "testEncoderGetEncoding");
test(testInvalidSequence, "testInvalidSequence");
test(testEncodeUTF16ToUTF16, "testEncodeUTF16ToUTF16");
test(function() {
testConstructorEncodingOption(data, expectedString)
}, "testConstructorEncodingOption");
test(function() {
testEncodingValues(data, expectedString)
}, "testEncodingValues");
test(function() {
testInputString(data, expectedString)
}, "testInputString");
test(testStreamingOptions, "testStreamingOptions");
}
function testInvalidSequence()
{
var data = "\u0e43\u0e44\ufffd\u0e45";
var expectedString = [0xE0, 0xB9, 0x83, 0xE0, 0xB9, 0x84, 0xEF, 0xBF, 0xBD,
0xE0, 0xB9, 0x85];
//Test null input string
testSingleString({encoding: "utf-8", input: data, expected: expectedString,
msg: "encoder with replacement character test."});
}
function testEncodeUTF16ToUTF16()
{
var data = "\u0e43\u0e44\u0e45\u0e46\u0e47\u0e48\u0e49\u0e4a\u0e4b\u0e4c"
+ "\u0e4d\u0e4e\u0e4f\u0e50\u0e51\u0e52\u0e53\u0e54\u0e55\u0e56"
+ "\u0e57\u0e58\u0e59\u0e5a\u0e5b";
var expected = [0x43, 0x0E, 0x44, 0x0E, 0x45, 0x0E, 0x46, 0x0E, 0x47, 0x0E,
0x48, 0x0E, 0x49, 0x0E, 0x4A, 0x0E, 0x4B, 0x0E, 0x4C, 0x0E,
0x4D, 0x0E, 0x4E, 0x0E, 0x4F, 0x0E, 0x50, 0x0E, 0x51, 0x0E,
0x52, 0x0E, 0x53, 0x0E, 0x54, 0x0E, 0x55, 0x0E, 0x56, 0x0E,
0x57, 0x0E, 0x58, 0x0E, 0x59, 0x0E, 0x5A, 0x0E, 0x5B, 0x0E];
testSingleString({encoding: "Utf-16", input: data, expected: expected,
msg: "testing encoding from utf-16 to utf-16 zero."});
}
function testConstructorEncodingOption(aData, aExpectedString)
{
// valid encoding passed
testSingleString({encoding: "UTF-8", input: aData, expected: aExpectedString,
msg: "testing encoding with valid utf-8 encoding."});
// passing spaces for encoding
testSingleString({encoding: " ", input: aData, error: "EncodingError",
msg: "constructor encoding, spaces encoding test."});
// invalid encoding passed
testSingleString({encoding: "asdfasdf", input: aData, error: "EncodingError",
msg: "constructor encoding, invalid encoding test."});
// null encoding passed
testSingleString({encoding: null, input: aData, error: "EncodingError",
msg: "constructor encoding, \"null\" encoding test."});
// null encoding passed
testSingleString({encoding: "", input: aData, error: "EncodingError",
msg: "constructor encoding, empty encoding test."});
}
function testEncodingValues(aData, aExpectedString)
{
var encoding = "ISO-8859-11";
testSingleString({encoding: aData, input: encoding, error: "EncodingError",
msg: "encoder encoding values test."});
}
function testInputString(aData, aExpectedString)
{
//Test null input string
testSingleString({encoding: "utf-8", input: "", expected: [],
msg: "encoder null input string test."});
//Test spaces as input string
testSingleString({encoding: "utf-8", input: " ", expected: [32, 32],
msg: "spaces as input string."});
}
function testSingleString(test)
{
var outText;
try {
var stream = test.stream ? {stream: true} : null;
outText = TextEncoder(test.encoding).encode(test.input, stream);
} catch (e) {
assert_equals(e.name, test.error, test.msg);
return;
}
assert_true(!test.error, test.msg);
if (outText.length !== test.expected.length) {
assert_equals(outText.length, test.expected.length, test.msg + " length mismatch");
return;
}
for (var i = 0; i < outText.length; i++) {
if (outText[i] != test.expected[i]) {
assert_equals(escape(stringFromArray(outText.buffer)), escape(stringFromArray(test.expected)),
test.msg + " Bytes do not match expected bytes.");
return;
}
}
}
function stringFromArray(a) {
return Array.map.call(a, function(v){return String.fromCharCode(v)}).join('');
}
function testStreamingOptions()
{
var data = [
"\u00a0\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07\u0e08\u0e09\u0e0a"
+ "\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f\u0e10\u0e11\u0e12\u0e13\u0e14"
+ "\u0e15\u0e16\u0e17\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d",
"\u0e1e\u0e1f\u0e20\u0e21\u0e22\u0e23\u0e24\u0e25\u0e26\u0e27\u0e28"
+ "\u0e29\u0e2a\u0e2b\u0e2c\u0e2d\u0e2e\u0e2f\u0e30\u0e31\u0e32"
+ "\u0e33\u0e34\u0e35\u0e36\u0e37\u0e38\u0e39\u0e3a\u0e3f\u0e40"
+ "\u0e41\u0e42",
"\u0e43\u0e44\u0e45\u0e46\u0e47\u0e48\u0e49\u0e4a\u0e4b\u0e4c\u0e4d"
+ "\u0e4e\u0e4f\u0e50\u0e51\u0e52\u0e53\u0e54\u0e55\u0e56\u0e57"
+ "\u0e58\u0e59\u0e5a\u0e5b"
];
var expected = [[0xC2, 0xA0, 0xE0, 0xB8, 0x81, 0xE0, 0xB8, 0x82, 0xE0,
0xB8, 0x83, 0xE0, 0xB8, 0x84, 0xE0, 0xB8, 0x85, 0xE0,
0xB8, 0x86, 0xE0, 0xB8, 0x87, 0xE0, 0xB8, 0x88, 0xE0,
0xB8, 0x89, 0xE0, 0xB8, 0x8A, 0xE0, 0xB8, 0x8B, 0xE0,
0xB8, 0x8C, 0xE0, 0xB8, 0x8D, 0xE0, 0xB8, 0x8E, 0xE0,
0xB8, 0x8F, 0xE0, 0xB8, 0x90, 0xE0, 0xB8, 0x91, 0xE0,
0xB8, 0x92, 0xE0, 0xB8, 0x93, 0xE0, 0xB8, 0x94, 0xE0,
0xB8, 0x95, 0xE0, 0xB8, 0x96, 0xE0, 0xB8, 0x97, 0xE0,
0xB8, 0x98, 0xE0, 0xB8, 0x99, 0xE0, 0xB8, 0x9A, 0xE0,
0xB8, 0x9B, 0xE0, 0xB8, 0x9C, 0xE0, 0xB8, 0x9D],
[0xE0, 0xB8, 0x9E, 0xE0, 0xB8, 0x9F, 0xE0, 0xB8, 0xA0,
0xE0, 0xB8, 0xA1, 0xE0, 0xB8, 0xA2, 0xE0, 0xB8, 0xA3,
0xE0, 0xB8, 0xA4, 0xE0, 0xB8, 0xA5, 0xE0, 0xB8, 0xA6,
0xE0, 0xB8, 0xA7, 0xE0, 0xB8, 0xA8, 0xE0, 0xB8, 0xA9,
0xE0, 0xB8, 0xAA, 0xE0, 0xB8, 0xAB, 0xE0, 0xB8, 0xAC,
0xE0, 0xB8, 0xAD, 0xE0, 0xB8, 0xAE, 0xE0, 0xB8, 0xAF,
0xE0, 0xB8, 0xB0, 0xE0, 0xB8, 0xB1, 0xE0, 0xB8, 0xB2,
0xE0, 0xB8, 0xB3, 0xE0, 0xB8, 0xB4, 0xE0, 0xB8, 0xB5,
0xE0, 0xB8, 0xB6, 0xE0, 0xB8, 0xB7, 0xE0, 0xB8, 0xB8,
0xE0, 0xB8, 0xB9, 0xE0, 0xB8, 0xBA, 0xE0, 0xB8, 0xBF,
0xE0, 0xB9, 0x80, 0xE0, 0xB9, 0x81, 0xE0, 0xB9, 0x82],
[0xE0, 0xB9, 0x83, 0xE0, 0xB9, 0x84, 0xE0, 0xB9, 0x85,
0xE0, 0xB9, 0x86, 0xE0, 0xB9, 0x87, 0xE0, 0xB9, 0x88,
0xE0, 0xB9, 0x89, 0xE0, 0xB9, 0x8A, 0xE0, 0xB9, 0x8B,
0xE0, 0xB9, 0x8C, 0xE0, 0xB9, 0x8D, 0xE0, 0xB9, 0x8E,
0xE0, 0xB9, 0x8F, 0xE0, 0xB9, 0x90, 0xE0, 0xB9, 0x91,
0xE0, 0xB9, 0x92, 0xE0, 0xB9, 0x93, 0xE0, 0xB9, 0x94,
0xE0, 0xB9, 0x95, 0xE0, 0xB9, 0x96, 0xE0, 0xB9, 0x97,
0xE0, 0xB9, 0x98, 0xE0, 0xB9, 0x99, 0xE0, 0xB9, 0x9A,
0xE0, 0xB9, 0x9B]];
var expectedUTF16 = data.map(function(d) {
return new Uint8Array(new Uint16Array(arrayFromString(d)).buffer);
});
// STREAMING TEST ONE: test streaming three valid strings with stream option
// set to true for all three.
testArrayOfStrings({encoding: "utf-8", array: [
{input: data[0], stream: true, expected: expected[0]},
{input: data[1], stream: true, expected: expected[1]},
{input: data[2], stream: true, expected: expected[2]},
], msg: "streaming test one."});
// STREAMING TEST TWO: test streaming valid strings with stream option
// streaming option: false from constructor, string 1 stream: true,
// string 2 stream: false, string 3 stream: false
testArrayOfStrings({encoding: "utf-16", array: [
{input: data[0], stream: true, expected: expectedUTF16[0]},
{input: data[1], expected: expectedUTF16[1]},
{input: data[2], expected: expectedUTF16[2]},
], msg: "streaming test two."});
}
function arrayFromString(s) {
return s.split('').map(function(c){return String.charCodeAt(c)});
}
function testArrayOfStrings(test)
{
var encoder;
try {
encoder = TextEncoder(test.encoding);
} catch (e) {
assert_equals(e.name, test.error, test.msg);
return;
}
assert_true(!test.error, test.msg);
var array = test.array;
for (var i = 0; i < array.length; i += 1) {
var stream = array[i].stream ? {stream: true} : null;
var view = encoder.encode(array[i].input, stream);
var stringLen = view.length;
var expected = array[i].expected;
for (var j = 0; j < stringLen; j++) {
if (view[j] !== expected[j]) {
assert_equals(view[j], expected[j], msg + " Bytes do not match expected bytes.");
return;
}
}
}
}
function testEncoderGetEncoding()
{
var labelEncodings = [
{label: "utf-16", encoding: "utf-16"},
{label: "utf-16le", encoding: "utf-16"},
];
labelEncodings.forEach(function(le){
var decoder = TextEncoder(le.label);
assert_equals(decoder.encoding, le.encoding, le.label + " label encoding test.");
});
}

View File

@ -0,0 +1,29 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/.
*
* The origin of this IDL file is
* http://wiki.whatwg.org/wiki/StringEncoding
*
* Copyright © 2006 The WHATWG Contributors
* http://wiki.whatwg.org/wiki/WHATWG_Wiki:Copyrights
*/
[Constructor(optional DOMString encoding = "utf-8",
optional TextDecoderOptions options)]
interface TextDecoder {
[SetterThrows]
readonly attribute DOMString encoding;
[Throws]
DOMString decode(optional ArrayBufferView? view = null,
optional TextDecodeOptions options);
};
dictionary TextDecoderOptions {
boolean fatal = false;
};
dictionary TextDecodeOptions {
boolean stream = false;
};

View File

@ -0,0 +1,23 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/.
*
* The origin of this IDL file is
* http://wiki.whatwg.org/wiki/StringEncoding
*
* Copyright © 2006 The WHATWG Contributors
* http://wiki.whatwg.org/wiki/WHATWG_Wiki:Copyrights
*/
[Constructor(optional DOMString encoding)]
interface TextEncoder {
[SetterThrows]
readonly attribute DOMString encoding;
[Throws]
Uint8Array encode(DOMString? string, optional TextEncodeOptions options);
};
dictionary TextEncodeOptions {
boolean stream = false;
};

View File

@ -39,6 +39,8 @@ webidl_files = \
SVGPathSegList.webidl \
SVGPointList.webidl \
SVGTransformList.webidl \
TextDecoder.webidl \
TextEncoder.webidl \
WebSocket.webidl \
XMLHttpRequest.webidl \
XMLHttpRequestEventTarget.webidl \

View File

@ -65,6 +65,7 @@ SHARED_LIBRARY_LIBS = \
$(DEPTH)/dom/contacts/$(LIB_PREFIX)jsdomcontacts_s.$(LIB_SUFFIX) \
$(DEPTH)/dom/alarm/$(LIB_PREFIX)domalarm_s.$(LIB_SUFFIX) \
$(DEPTH)/dom/devicestorage/$(LIB_PREFIX)domdevicestorage_s.$(LIB_SUFFIX) \
$(DEPTH)/dom/encoding/$(LIB_PREFIX)domencoding_s.$(LIB_SUFFIX) \
$(DEPTH)/dom/file/$(LIB_PREFIX)domfile_s.$(LIB_SUFFIX) \
$(DEPTH)/dom/power/$(LIB_PREFIX)dom_power_s.$(LIB_SUFFIX) \
$(DEPTH)/dom/settings/$(LIB_PREFIX)jsdomsettings_s.$(LIB_SUFFIX) \

View File

@ -661,6 +661,10 @@ typedef enum tag_nsresult
/* XXX Should be JavaScript native errors */
NS_ERROR_TYPE_ERR = FAILURE(26),
NS_ERROR_RANGE_ERR = FAILURE(27),
/* StringEncoding API errors from http://wiki.whatwg.org/wiki/StringEncoding */
NS_ERROR_DOM_ENCODING_NOT_SUPPORTED_ERR = FAILURE(28),
NS_ERROR_DOM_ENCODING_NOT_UTF_ERR = FAILURE(29),
NS_ERROR_DOM_ENCODING_DECODE_ERR = FAILURE(30),
/* DOM error codes defined by us */
NS_ERROR_DOM_SECMAN_ERR = FAILURE(1001),
NS_ERROR_DOM_WRONG_TYPE_ERR = FAILURE(1002),