Bug 801487 - Remove encoding detection using BOM. r=sicking

This commit is contained in:
Masatoshi Kimura 2012-11-06 18:23:14 -05:00
parent c95b61723c
commit 84ffa7eecf
5 changed files with 16 additions and 54 deletions

View File

@ -21,16 +21,6 @@ TextDecoder::Init(const nsAString& aEncoding,
nsAutoString label(aEncoding);
EncodingUtils::TrimSpaceCharacters(label);
// If label is a case-insensitive match for "utf-16"
// then set the internal useBOM flag.
if (label.LowerCaseEqualsLiteral("utf-16")) {
mUseBOM = true;
mIsUTF16Family = true;
mEncoding = "utf-16le";
// If BOM is used, we can't determine the converter yet.
return;
}
// Let encoding be the result of getting an encoding from label.
// If encoding is failure, throw a TypeError.
if (!EncodingUtils::FindEncodingForLabel(label, mEncoding)) {
@ -46,12 +36,6 @@ TextDecoder::Init(const nsAString& aEncoding,
// set the internal fatal flag of the decoder object.
mFatal = aFatal.fatal;
CreateDecoder(aRv);
}
void
TextDecoder::CreateDecoder(ErrorResult& aRv)
{
// Create a decoder object for mEncoding.
nsCOMPtr<nsICharsetConverterManager> ccm =
do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID);
@ -72,12 +56,10 @@ TextDecoder::CreateDecoder(ErrorResult& aRv)
}
void
TextDecoder::ResetDecoder(bool aResetOffset)
TextDecoder::ResetDecoder()
{
mDecoder->Reset();
if (aResetOffset) {
mOffset = 0;
}
mOffset = 0;
}
void
@ -191,22 +173,7 @@ TextDecoder::HandleBOM(const char*& aData, uint32_t& aLength,
strcmp(encoding, mEncoding)) {
// If the stream doesn't start with BOM or the BOM doesn't match the
// encoding, feed a BOM to workaround decoder's bug (bug 634541).
if (!mUseBOM) {
FeedBytes(!strcmp(mEncoding, "utf-16le") ? "\xFF\xFE" : "\xFE\xFF");
}
}
if (mUseBOM) {
// Select a decoder corresponding to the BOM.
if (!*encoding) {
encoding = "utf-16le";
}
// If the endian has not been changed, reuse the decoder.
if (mDecoder && !strcmp(encoding, mEncoding)) {
ResetDecoder(false);
} else {
mEncoding = encoding;
CreateDecoder(aRv);
}
FeedBytes(!strcmp(mEncoding, "utf-16le") ? "\xFF\xFE" : "\xFE\xFF");
}
FeedBytes(mInitialBytes, &aOutString);
}
@ -234,7 +201,7 @@ TextDecoder::GetEncoding(nsAString& aEncoding)
// "utf-16".
// This workaround should not be exposed to the public API and so "utf-16"
// is returned by GetEncoding() if the internal encoding name is "utf-16le".
if (mUseBOM || !strcmp(mEncoding, "utf-16le")) {
if (!strcmp(mEncoding, "utf-16le")) {
aEncoding.AssignLiteral("utf-16");
return;
}

View File

@ -41,8 +41,7 @@ public:
}
TextDecoder(nsISupports* aGlobal)
: mGlobal(aGlobal)
, mFatal(false), mUseBOM(false), mOffset(0), mIsUTF16Family(false)
: mGlobal(aGlobal), mFatal(false), mOffset(0), mIsUTF16Family(false)
{
MOZ_ASSERT(aGlobal);
SetIsDOMBinding();
@ -97,7 +96,6 @@ private:
nsCOMPtr<nsIUnicodeDecoder> mDecoder;
nsCOMPtr<nsISupports> mGlobal;
bool mFatal;
bool mUseBOM;
uint8_t mOffset;
char mInitialBytes[3];
bool mIsUTF16Family;
@ -117,8 +115,7 @@ private:
ErrorResult& aRv);
// Internal helper functions.
void CreateDecoder(ErrorResult& aRv);
void ResetDecoder(bool aResetOffset = true);
void ResetDecoder();
void HandleBOM(const char*& aData, uint32_t& aLength,
const TextDecodeOptions& aOptions,
nsAString& aOutString, ErrorResult& aRv);

View File

@ -65,18 +65,17 @@ function testMoreBOMEncoding() {
// Testing user provided encoding is UTF-16LE & bom encoding is utf-16be
var dataUTF16 = [0xFE, 0xFF, 0x22, 0x00, 0x12, 0x04, 0x41, 0x04, 0x35, 0x04, 0x20, 0x00, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x4B, 0x04, 0x35, 0x04, 0x20, 0x00, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x38, 0x04, 0x20, 0x00, 0x3F, 0x04, 0x3E, 0x04, 0x45, 0x04, 0x3E, 0x04, 0x36, 0x04, 0x38, 0x04, 0x20, 0x00, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 0x20, 0x00, 0x3D, 0x04, 0x30, 0x04, 0x20, 0x00, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 0x30, 0x04, 0x2C, 0x00, 0x20, 0x00, 0x3A, 0x04, 0x30, 0x04, 0x36, 0x04, 0x34, 0x04, 0x30, 0x04, 0x4F, 0x04, 0x20, 0x00, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x4F, 0x04, 0x20, 0x00, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x4F, 0x04, 0x20, 0x00, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x20, 0x00, 0x3F, 0x04, 0x3E, 0x04, 0x2D, 0x00, 0x41, 0x04, 0x32, 0x04, 0x3E, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x43, 0x04, 0x2E, 0x00, 0x22, 0x00];
testBOMCharset({encoding: "utf-16le", fatal: true, data: dataUTF16, expected: "\ufffe" + expectedString,
msg: "test decoder invalid BOM encoding for utf-16 fatal."});
msg: "test decoder invalid BOM encoding for utf-16le fatal."});
testBOMCharset({encoding: "utf-16le", data: dataUTF16, expected: "\ufffe" + expectedString,
msg: "test decoder invalid BOM encoding for utf-16."});
msg: "test decoder invalid BOM encoding for utf-16le."});
// Testing user provided encoding is UTF-16 & bom encoding is utf-16be
data = [0xFE, 0xFF, 0x00, 0x22, 0x04, 0x12, 0x04, 0x41, 0x04, 0x35, 0x00, 0x20, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x4B, 0x04, 0x35, 0x00, 0x20, 0x04, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x38, 0x00, 0x20, 0x04, 0x3F, 0x04, 0x3E, 0x04, 0x45, 0x04, 0x3E, 0x04, 0x36, 0x04, 0x38, 0x00, 0x20, 0x04, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x00, 0x20, 0x04, 0x3D, 0x04, 0x30, 0x00, 0x20, 0x04, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 0x30, 0x00, 0x2C, 0x00, 0x20, 0x04, 0x3A, 0x04, 0x30, 0x04, 0x36, 0x04, 0x34, 0x04, 0x30, 0x04, 0x4F, 0x00, 0x20, 0x04, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x4F, 0x00, 0x20, 0x04, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x4F, 0x00, 0x20, 0x04, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x00, 0x20, 0x04, 0x3F, 0x04, 0x3E, 0x00, 0x2D, 0x04, 0x41, 0x04, 0x32, 0x04, 0x3E, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x43, 0x00, 0x2E, 0x00, 0x22];
testBOMCharset({encoding: "utf-16", fatal: true, data: data, expected: expectedString,
msg: "test decoder BOM encoding for utf-16 fatal."});
testBOMCharset({encoding: "utf-16", fatal: true, data: dataUTF16, expected: "\ufffe" + expectedString,
msg: "test decoder invalid BOM encoding for utf-16 fatal."});
testBOMCharset({encoding: "utf-16", data: data, expected: expectedString,
msg: "test decoder BOM encoding for utf-16."});
testBOMCharset({encoding: "utf-16", data: dataUTF16, expected: "\ufffe" + expectedString,
msg: "test decoder invalid BOM encoding for utf-16."});
// Testing user provided encoding is UTF-16 & bom encoding is utf-16le
dataUTF16 = [0xFF, 0xFE, 0x22, 0x00, 0x12, 0x04, 0x41, 0x04, 0x35, 0x04, 0x20, 0x00, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x4B, 0x04, 0x35, 0x04, 0x20, 0x00, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x38, 0x04, 0x20, 0x00, 0x3F, 0x04, 0x3E, 0x04, 0x45, 0x04, 0x3E, 0x04, 0x36, 0x04, 0x38, 0x04, 0x20, 0x00, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 0x20, 0x00, 0x3D, 0x04, 0x30, 0x04, 0x20, 0x00, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 0x30, 0x04, 0x2C, 0x00, 0x20, 0x00, 0x3A, 0x04, 0x30, 0x04, 0x36, 0x04, 0x34, 0x04, 0x30, 0x04, 0x4F, 0x04, 0x20, 0x00, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x4F, 0x04, 0x20, 0x00, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x4F, 0x04, 0x20, 0x00, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x20, 0x00, 0x3F, 0x04, 0x3E, 0x04, 0x2D, 0x00, 0x41, 0x04, 0x32, 0x04, 0x3E, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x43, 0x04, 0x2E, 0x00, 0x22, 0x00];

View File

@ -184,10 +184,10 @@ function testDecodeStreamCompositions() {
{encoding: "utf-16", input: [0xFF,0xFE,0x01,0x00], expected: ["","","","\x01"]},
{encoding: "utf-16", input: [0xFF,0xFE,0xFF,0xFE], expected: ["","","","\uFEFF"]},
{encoding: "utf-16", input: [0xFF,0xFE,0xFE,0xFF], expected: ["","","","\uFFFE"]},
{encoding: "utf-16", input: [0xFE,0xFF], expected: ["",""]},
{encoding: "utf-16", input: [0xFE,0xFF,0x01,0x00], expected: ["","","","\u0100"]},
{encoding: "utf-16", input: [0xFE,0xFF,0xFF,0xFE], expected: ["","","","\uFFFE"]},
{encoding: "utf-16", input: [0xFE,0xFF,0xFE,0xFF], expected: ["","","","\uFEFF"]},
{encoding: "utf-16", input: [0xFE,0xFF], expected: ["","\uFFFE"]},
{encoding: "utf-16", input: [0xFE,0xFF,0x01,0x00], expected: ["","\uFFFE","","\x01"]},
{encoding: "utf-16", input: [0xFE,0xFF,0xFF,0xFE], expected: ["","\uFFFE","","\uFEFF"]},
{encoding: "utf-16", input: [0xFE,0xFF,0xFE,0xFF], expected: ["","\uFFFE","","\uFFFE"]},
{encoding: "utf-16le", input: [0x01,0x00], expected: ["","\x01"]},
{encoding: "utf-16le", input: [0x01,0x00,0x03,0x02], expected: ["","\x01","","\u0203"]},
{encoding: "utf-16le", input: [0xFF,0xFE,0x01,0x00], expected: ["","","","\x01"]},

View File

@ -273,7 +273,6 @@ test(
equal(TextDecoder('utf-16le').decode(new Uint8Array(utf16le)), string);
equal(TextDecoder('utf-16be').decode(new Uint8Array(utf16be)), string);
equal(TextDecoder('utf-16').decode(new Uint8Array(utf16le)), string);
equal(TextDecoder('utf-16').decode(new Uint8Array(utf16be)), string);
/*
// TODO: New API?