gecko/intl/uconv/tests/unit/test_bug335531.js

229 lines
11 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/* Test case for bug 335531
*
* Uses nsIConverterInputStream to decode UTF-16 text with all combinations
* of UTF-16BE and UTF-16LE with and without BOM.
*
* Sample text is: "Все счастливые семьи похожи друг на друга, каждая несчастливая семья несчастлива по-своему."
*
* The enclosing quotation marks are included in the sample text to test that
* UTF-16LE is recognized even when there is no BOM and the UTF-16LE decoder is
* not explicitly called. This only works when the first character of the text
* is an eight-bit character.
*/
const beBOM="%00%00%FE%FF";
const leBOM="%FF%FE%00%00";
const outBOM="\uFEFF";
const sampleUTF32BE="%00%00%00%22%00%00%04%12%00%00%04%41%00%00%04%35%00%00%00%20%00%00%04%41%00%00%04%47%00%00%04%30%00%00%04%41%00%00%04%42%00%00%04%3B%00%00%04%38%00%00%04%32%00%00%04%4B%00%00%04%35%00%00%00%20%00%00%04%41%00%00%04%35%00%00%04%3C%00%00%04%4C%00%00%04%38%00%00%00%20%00%00%04%3F%00%00%04%3E%00%00%04%45%00%00%04%3E%00%00%04%36%00%00%04%38%00%00%00%20%00%00%04%34%00%00%04%40%00%00%04%43%00%00%04%33%00%00%00%20%00%00%04%3D%00%00%04%30%00%00%00%20%00%00%04%34%00%00%04%40%00%00%04%43%00%00%04%33%00%00%04%30%00%00%00%2C%00%00%00%20%00%00%04%3A%00%00%04%30%00%00%04%36%00%00%04%34%00%00%04%30%00%00%04%4F%00%00%00%20%00%00%04%3D%00%00%04%35%00%00%04%41%00%00%04%47%00%00%04%30%00%00%04%41%00%00%04%42%00%00%04%3B%00%00%04%38%00%00%04%32%00%00%04%30%00%00%04%4F%00%00%00%20%00%00%04%41%00%00%04%35%00%00%04%3C%00%00%04%4C%00%00%04%4F%00%00%00%20%00%00%04%3D%00%00%04%35%00%00%04%41%00%00%04%47%00%00%04%30%00%00%04%41%00%00%04%42%00%00%04%3B%00%00%04%38%00%00%04%32%00%00%04%30%00%00%00%20%00%00%04%3F%00%00%04%3E%00%00%00%2D%00%00%04%41%00%00%04%32%00%00%04%3E%00%00%04%35%00%00%04%3C%00%00%04%43%00%00%00%2E%00%00%00%22";
const sampleUTF32LE="%22%00%00%00%12%04%00%00%41%04%00%00%35%04%00%00%20%00%00%00%41%04%00%00%47%04%00%00%30%04%00%00%41%04%00%00%42%04%00%00%3B%04%00%00%38%04%00%00%32%04%00%00%4B%04%00%00%35%04%00%00%20%00%00%00%41%04%00%00%35%04%00%00%3C%04%00%00%4C%04%00%00%38%04%00%00%20%00%00%00%3F%04%00%00%3E%04%00%00%45%04%00%00%3E%04%00%00%36%04%00%00%38%04%00%00%20%00%00%00%34%04%00%00%40%04%00%00%43%04%00%00%33%04%00%00%20%00%00%00%3D%04%00%00%30%04%00%00%20%00%00%00%34%04%00%00%40%04%00%00%43%04%00%00%33%04%00%00%30%04%00%00%2C%00%00%00%20%00%00%00%3A%04%00%00%30%04%00%00%36%04%00%00%34%04%00%00%30%04%00%00%4F%04%00%00%20%00%00%00%3D%04%00%00%35%04%00%00%41%04%00%00%47%04%00%00%30%04%00%00%41%04%00%00%42%04%00%00%3B%04%00%00%38%04%00%00%32%04%00%00%30%04%00%00%4F%04%00%00%20%00%00%00%41%04%00%00%35%04%00%00%3C%04%00%00%4C%04%00%00%4F%04%00%00%20%00%00%00%3D%04%00%00%35%04%00%00%41%04%00%00%47%04%00%00%30%04%00%00%41%04%00%00%42%04%00%00%3B%04%00%00%38%04%00%00%32%04%00%00%30%04%00%00%20%00%00%00%3F%04%00%00%3E%04%00%00%2D%00%00%00%41%04%00%00%32%04%00%00%3E%04%00%00%35%04%00%00%3C%04%00%00%43%04%00%00%2E%00%00%00%22%00%00%00";
const expectedNoBOM = "\"\u0412\u0441\u0435 \u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u044B\u0435 \u0441\u0435\u043C\u044C\u0438 \u043F\u043E\u0445\u043E\u0436\u0438 \u0434\u0440\u0443\u0433 \u043D\u0430 \u0434\u0440\u0443\u0433\u0430, \u043A\u0430\u0436\u0434\u0430\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430\u044F \u0441\u0435\u043C\u044C\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430 \u043F\u043E-\u0441\u0432\u043E\u0435\u043C\u0443.\"";
function makeText(withBOM, charset)
{
var theText = eval("sample" + charset);
if (withBOM) {
if (charset == "UTF32BE") {
theText = beBOM + theText;
} else {
theText = leBOM + theText;
}
}
return theText;
}
function testCase(withBOM, charset, charsetDec, decoder, bufferLength)
{
var dataURI = "data:text/plain;charset=" + charsetDec + "," +
makeText(withBOM, charset);
var IOService = Components.Constructor("@mozilla.org/network/io-service;1",
"nsIIOService");
var ConverterInputStream =
Components.Constructor("@mozilla.org/intl/converter-input-stream;1",
"nsIConverterInputStream",
"init");
var ios = new IOService();
var channel = ios.newChannel(dataURI, "", null);
var testInputStream = channel.open();
var testConverter = new ConverterInputStream(testInputStream,
decoder,
bufferLength,
0xFFFD);
if (!(testConverter instanceof
Components.interfaces.nsIUnicharLineInputStream))
throw "not line input stream";
var outStr = "";
var more;
do {
// read the line and check for eof
var line = {};
more = testConverter.readLine(line);
outStr += line.value;
} while (more);
var expected = expectedNoBOM;
if (withBOM) {
// BE / LE decoder wouldn't strip the BOM
if (decoder == "UTF-32BE" || decoder == "UTF-32LE") {
expected = outBOM + expectedNoBOM;
}
}
do_check_eq(outStr, expected);
}
// Tests conversion of one to three byte(s) from UTF-32 to Unicode
const expectedString = "\ufffd";
const charset = "UTF-32";
function testCase2(inString) {
var ScriptableUnicodeConverter =
Components.Constructor("@mozilla.org/intl/scriptableunicodeconverter",
"nsIScriptableUnicodeConverter");
var converter = new ScriptableUnicodeConverter();
converter.charset = charset;
var outString;
try {
outString = converter.ConvertToUnicode(inString) + converter.Finish();
} catch(e) {
outString = "\ufffd";
}
do_check_eq(escape(outString), escape(expectedString));
}
/*
* Uses nsIConverterInputStream to decode UTF-32 text with surrogate characters
*
* Sample text is: "g" in Mathematical Bold Symbolls (U+1D420)
*
* The test uses buffers of 4 different lengths to test end of buffer in mid-
* UTF32 character
*/
// Single supplementaly character
// expected: surrogate pair
const test0="%00%00%00%2D%00%00%00%2D%00%01%D4%20%00%00%00%2D%00%00%00%2D";
const expected0 = "--\uD835\uDC20--";
// High surrogate followed by low surrogate (invalid in UTF-32)
// expected: two replacement chars
const test1="%00%00%00%2D%00%00%00%2D%00%00%D8%35%00%00%DC%20%00%00%00%2D%00%00%00%2D";
const expected1 = "--\uFFFD\uFFFD--";
// Lone high surrogate
// expected: one replacement char
const test2="%00%00%00%2D%00%00%00%2D%00%00%D8%35%00%00%00%2D%00%00%00%2D";
const expected2 = "--\uFFFD--";
// Lone low surrogate
// expected: one replacement char
const test3="%00%00%00%2D%00%00%00%2D%00%00%DC%20%00%00%00%2D%00%00%00%2D";
const expected3 = "--\uFFFD--";
// Two high surrogates
// expected: two replacement chars
const test4="%00%00%00%2D%00%00%00%2D%00%00%D8%35%00%00%D8%35%00%00%00%2D%00%00%00%2D";
const expected4 = "--\uFFFD\uFFFD--";
// Two low surrogates
// expected: two replacement chars
const test5="%00%00%00%2D%00%00%00%2D%00%00%DC%20%00%00%DC%20%00%00%00%2D%00%00%00%2D";
const expected5 = "--\uFFFD\uFFFD--";
// Low surrogate followed by high surrogate
// expected: two replacement chars
const test6="%00%00%00%2D%00%00%00%2D%00%00%DC%20%00%00%D8%35%00%00%00%2D%00%00%00%2D";
const expected6 = "--\uFFFD\uFFFD--";
// Lone high surrogate followed by supplementaly character
// expected: replacement char followed by surrogate pair
const test7="%00%00%00%2D%00%00%00%2D%00%00%D8%35%00%01%D4%20%00%00%00%2D%00%00%00%2D";
const expected7 = "--\uFFFD\uD835\uDC20--";
// Lone low surrogate followed by supplementaly character
// expected: replacement char followed by surrogate pair
const test8="%00%00%00%2D%00%00%00%2D%00%00%DC%20%00%01%D4%20%00%00%00%2D%00%00%00%2D";
const expected8 = "--\uFFFD\uD835\uDC20--";
// Supplementaly character followed by lone high surrogate
// expected: surrogate pair followed by replacement char
const test9="%00%00%00%2D%00%00%00%2D%00%01%D4%20%00%00%D8%35%00%00%00%2D%00%00%00%2D";
const expected9 = "--\uD835\uDC20\uFFFD--";
// Supplementaly character followed by lone low surrogate
// expected: surrogate pair followed by replacement char
const test10="%00%00%00%2D%00%00%00%2D%00%01%D4%20%00%00%DC%20%00%00%00%2D%00%00%00%2D";
const expected10 = "--\uD835\uDC20\uFFFD--";
// Lone high surrogate at the end of the input
// expected: one replacement char (invalid in UTF-32)
const test11="%00%00%00%2D%00%00%00%2D%00%00%00%2D%00%00%00%2D%00%00%D8%35";
const expected11 = "----\uFFFD";
// Half code unit at the end of the input
// expected: nothing
const test12="%00%00%00%2D%00%00%00%2D%00%00%00%2D%00%00%00%2D%D8";
const expected12 = "----";
function testCase3(testNumber, bufferLength)
{
var dataURI = "data:text/plain;charset=UTF32BE," + eval("test" + testNumber);
var IOService = Components.Constructor("@mozilla.org/network/io-service;1",
"nsIIOService");
var ConverterInputStream =
Components.Constructor("@mozilla.org/intl/converter-input-stream;1",
"nsIConverterInputStream",
"init");
var ios = new IOService();
var channel = ios.newChannel(dataURI, "", null);
var testInputStream = channel.open();
var testConverter = new ConverterInputStream(testInputStream,
"UTF-32BE",
bufferLength,
0xFFFD);
if (!(testConverter instanceof
Components.interfaces.nsIUnicharLineInputStream))
throw "not line input stream";
var outStr = "";
var more;
do {
// read the line and check for eof
var line = {};
more = testConverter.readLine(line);
outStr += line.value;
} while (more);
// escape the strings before comparing for better readability
do_check_eq(escape(outStr), escape(eval("expected" + testNumber)));
}
function run_test()
{
/* BOM charset charset decoder buffer
declaration length */
testCase(true, "UTF32LE", "UTF-32", "UTF-32", 64);
testCase(true, "UTF32BE", "UTF-32", "UTF-32", 64);
testCase(true, "UTF32LE", "UTF-32", "UTF-32LE", 64);
testCase(true, "UTF32BE", "UTF-32", "UTF-32BE", 64);
testCase(false, "UTF32LE", "UTF-32", "UTF-32", 64);
testCase(false, "UTF32BE", "UTF-32", "UTF-32", 64);
testCase(false, "UTF32LE", "UTF-32", "UTF-32LE", 64);
testCase(false, "UTF32BE", "UTF-32", "UTF-32BE", 64);
testCase(true, "UTF32LE", "UTF-32", "UTF-32", 65);
testCase(true, "UTF32BE", "UTF-32", "UTF-32", 65);
testCase(true, "UTF32LE", "UTF-32", "UTF-32LE", 65);
testCase(true, "UTF32BE", "UTF-32", "UTF-32BE", 65);
testCase(false, "UTF32LE", "UTF-32", "UTF-32", 65);
testCase(false, "UTF32BE", "UTF-32", "UTF-32", 65);
testCase(false, "UTF32LE", "UTF-32", "UTF-32LE", 65);
testCase(false, "UTF32BE", "UTF-32", "UTF-32BE", 65);
testCase2("A");
testCase2("AB");
testCase2("ABC");
for (var test = 0; test <= 12; ++ test) {
for (var bufferLength = 4; bufferLength < 8; ++ bufferLength) {
testCase3(test, bufferLength);
}
}
}