Bug 1181345 - Make DevToolsUtils.fetch work with files that are not UTF-8 encoded. r=past

These changes restore the previous behavior in which the data is decoded
according to a locale specific default charset if the UTF-8 conversion fails.
This allows the method to properly decode both UTF-8 data and data that has
been encoded with the locale default charset (ISO-8859-1 in most cases) without
any charset hints from the source.

The conversion is now a two step process:
1. Convert the stream content to a string using the locale specific encoding.
2. Try reinterpret the data according to UTF-8 rules. If it succeeds, return
   the conversion result. If it fails, return the original string.
This commit is contained in:
Sami Jaktholm 2015-07-11 13:45:59 +03:00
parent c057758f9f
commit e8a735b004
2 changed files with 51 additions and 17 deletions

View File

@ -434,6 +434,10 @@ exports.defineLazyGetter(this, "TextDecoder", () => {
return Cu.import("resource://gre/modules/osfile.jsm", {}).TextDecoder;
});
exports.defineLazyGetter(this, "NetworkHelper", () => {
return require("devtools/toolkit/webconsole/network-helper");
});
/**
* Performs a request to load the desired URL and returns a promise.
*
@ -491,15 +495,27 @@ function mainThreadFetch(aURL, aOptions={ loadFromCache: true,
}
try {
let charset = channel.contentCharset || aOptions.charset || "UTF-8";
// We cannot use NetUtil to do the charset conversion as if charset
// information is not available and our default guess is wrong the method
// might fail and we lose the stream data. This means we can't fall back
// to using the locale default encoding (bug 1181345).
// NetUtil handles charset conversion.
// Read and decode the data according to the locale default encoding.
let available = stream.available();
let source = NetUtil.readInputStreamToString(stream, available, {charset});
let source = NetUtil.readInputStreamToString(stream, available);
stream.close();
// If the channel or the caller has correct charset information, the
// content will be decoded correctly. If we have to fall back to UTF-8 and
// the guess is wrong, the conversion fails and convertToUnicode returns
// the input unmodified. Essentially we try to decode the data as UTF-8
// and if that fails, we use the locale specific default encoding. This is
// the best we can do if the source does not provide charset info.
let charset = channel.contentCharset || aOptions.charset || "UTF-8";
let unicodeSource = NetworkHelper.convertToUnicode(source, charset);
deferred.resolve({
content: source,
content: unicodeSource,
contentType: request.contentType
});
} catch (ex) {

View File

@ -8,7 +8,13 @@
const { FileUtils } = Cu.import("resource://gre/modules/FileUtils.jsm");
const { OS } = Cu.import("resource://gre/modules/osfile.jsm", {});
const TEST_CONTENT = "let a = 1 + 1";
const TEST_CONTENT = "aéd";
// The TEST_CONTENT encoded as UTF-8.
const UTF8_TEST_BUFFER = new Uint8Array([0x61, 0xc3, 0xa9, 0x64]);
// The TEST_CONTENT encoded as ISO 8859-1.
const ISO_8859_1_BUFFER = new Uint8Array([0x61, 0xe9, 0x64]);
/**
* Tests that URLs with arrows pointing to an actual source are handled properly
@ -34,6 +40,30 @@ add_task(function* test_empty() {
deepEqual(content, "", "The empty file was read correctly.");
});
/**
* Tests that UTF-8 encoded files are correctly read.
*/
add_task(function* test_encoding_utf8() {
let { path } = createTemporaryFile();
yield OS.File.writeAtomic(path, UTF8_TEST_BUFFER);
let { content } = yield DevToolsUtils.fetch(path);
deepEqual(content, TEST_CONTENT,
"The UTF-8 encoded file was correctly read.");
});
/**
* Tests that ISO 8859-1 (Latin-1) encoded files are correctly read.
*/
add_task(function* test_encoding_iso_8859_1() {
let { path } = createTemporaryFile();
yield OS.File.writeAtomic(path, ISO_8859_1_BUFFER);
let { content } = yield DevToolsUtils.fetch(path);
deepEqual(content, TEST_CONTENT,
"The ISO 8859-1 encoded file was correctly read.");
});
/**
* Test that non-existent files are handled correctly.
*/
@ -46,18 +76,6 @@ add_task(function* test_missing() {
});
});
/**
* Tests that existing files are handled correctly.
*/
add_task(function* test_normal() {
let { path } = createTemporaryFile(".js");
yield OS.File.writeAtomic(path, TEST_CONTENT, { encoding: "utf-8" });
let { content } = yield DevToolsUtils.fetch("file://" + path);
deepEqual(content, TEST_CONTENT, "The file contents were correctly read.");
});
/**
* Test that URLs without file:// scheme work.
*/