diff --git a/netwerk/streamconv/converters/nsUnknownDecoder.cpp b/netwerk/streamconv/converters/nsUnknownDecoder.cpp index 185bccf8fe2..c9a4f424020 100644 --- a/netwerk/streamconv/converters/nsUnknownDecoder.cpp +++ b/netwerk/streamconv/converters/nsUnknownDecoder.cpp @@ -686,13 +686,15 @@ nsBinaryDetector::DetermineContentType(nsIRequest* aRequest) // Make sure to do a case-sensitive exact match comparison here. Apache // 1.x just sends text/plain for "unknown", while Apache 2.x sends // text/plain with a ISO-8859-1 charset. Debian's Apache version, just to - // be different, sends text/plain with iso-8859-1 charset. Don't do - // general case-insensitive comparison, since we really want to apply this - // crap as rarely as we can. + // be different, sends text/plain with iso-8859-1 charset. For extra fun, + // FC7, RHEL4, and Ubuntu Feisty send charset=UTF-8. Don't do general + // case-insensitive comparison, since we really want to apply this crap as + // rarely as we can. if (!contentType.EqualsLiteral("text/plain") || (!contentTypeHdr.EqualsLiteral("text/plain") && !contentTypeHdr.EqualsLiteral("text/plain; charset=ISO-8859-1") && - !contentTypeHdr.EqualsLiteral("text/plain; charset=iso-8859-1"))) { + !contentTypeHdr.EqualsLiteral("text/plain; charset=iso-8859-1") && + !contentTypeHdr.EqualsLiteral("text/plain; charset=UTF-8"))) { return; } diff --git a/netwerk/test/unit/test_plaintext_sniff.js b/netwerk/test/unit/test_plaintext_sniff.js new file mode 100644 index 00000000000..9d2b6198969 --- /dev/null +++ b/netwerk/test/unit/test_plaintext_sniff.js @@ -0,0 +1,180 @@ +// Test the plaintext-or-binary sniffer + +do_import_script("netwerk/test/httpserver/httpd.js"); + +// List of Content-Type headers to test. For each header we have an array. +// The first element in the array is the Content-Type header string. The +// second element in the array is a boolean indicating whether we allow +// sniffing for that type. +var contentTypeHeaderList = +[ + [ "text/plain", true ], + [ "text/plain; charset=ISO-8859-1", true ], + [ "text/plain; charset=iso-8859-1", true ], + [ "text/plain; charset=UTF-8", true ], + [ "text/plain; charset=unknown", false ], + [ "text/plain; param", false ], + [ "text/plain; charset=ISO-8859-1; param", false ], + [ "text/plain; charset=iso-8859-1; param", false ], + [ "text/plain; charset=UTF-8; param", false ], + [ "text/plain; charset=utf-8", false ], + [ "text/plain; charset=utf8", false ], + [ "text/plain; charset=UTF8", false ], + [ "text/plain; charset=iSo-8859-1", false ] +]; + +// List of response bodies to test. For each response we have an array. The +// first element in the array is the body string. The second element in the +// array is a boolean indicating whether that string should sniff as binary. +var bodyList = +[ + [ "Plaintext", false ] +]; + +// List of possible BOMs +var BOMList = +[ + "\xFE\xFF", // UTF-16BE + "\xFF\xFE", // UTF-16LE + "\xEF\xBB\xBF", // UTF-8 + "\x00\x00\xFE\xFF", // UCS-4BE + "\x00\x00\xFF\xFE" // UCS-4LE +]; + +// Build up bodyList. The things we treat as binary are ASCII codes 0-8, +// 14-26, 28-31. That is, the control char range, except for tab, newline, +// vertical tab, form feed, carriage return, and ESC (this last being used by +// Shift_JIS, apparently). +function isBinaryChar(ch) { + return (0 <= ch && ch <= 8) || (14 <= ch && ch <= 26) || + (28 <= ch && ch <= 31); +} + +// Test chars on their own +var i; +for (i = 0; i <= 127; ++i) { + bodyList.push([ String.fromCharCode(i), isBinaryChar(i) ]); +} + +// Test that having a BOM prevents plaintext sniffing +var j; +for (i = 0; i <= 127; ++i) { + for (j = 0; j < BOMList.length; ++j) { + bodyList.push([ BOMList[j] + String.fromCharCode(i, i), false ]); + } +} + +// Test that having a BOM requires at least 4 chars to kick in +for (i = 0; i <= 127; ++i) { + for (j = 0; j < BOMList.length; ++j) { + bodyList.push([ BOMList[j] + String.fromCharCode(i), + BOMList[j].length == 2 && isBinaryChar(i) ]); + } +} + +function makeChan(headerIdx, bodyIdx) { + var ios = Components.classes["@mozilla.org/network/io-service;1"] + .getService(Components.interfaces.nsIIOService); + var chan = + ios.newChannel("http://localhost:4444/" + headerIdx + "/" + bodyIdx, null, + null) + .QueryInterface(Components.interfaces.nsIHttpChannel); + + chan.loadFlags |= + Components.interfaces.nsIChannel.LOAD_CALL_CONTENT_SNIFFERS; + + return chan; +} + +function makeListener(headerIdx, bodyIdx) { + var listener = { + onStartRequest : function test_onStartR(request, ctx) { + try { + var chan = request.QueryInterface(Components.interfaces.nsIChannel); + var type = chan.contentType; + + var expectedType = + contentTypeHeaderList[headerIdx][1] && bodyList[bodyIdx][1] ? + "application/x-vnd.mozilla.guess-from-ext" : "text/plain"; + if (expectedType != type) { + do_throw("Unexpected sniffed type '" + type + "'. " + + "Should be '" + expectedType + "'. " + + "Header is ['" + + contentTypeHeaderList[headerIdx][0] + "', " + + contentTypeHeaderList[headerIdx][1] + "]. " + + "Body is ['" + + bodyList[bodyIdx][0].toSource() + "', " + + bodyList[bodyIdx][1] + + "]."); + } + do_check_eq(expectedType, type); + } catch (e) { + do_throw("Unexpected exception: " + e); + } + + throw Components.results.NS_ERROR_ABORT; + }, + + onDataAvailable: function test_ODA() { + do_throw("Should not get any data!"); + }, + + onStopRequest: function test_onStopR(request, ctx, status) { + // Advance to next test + ++headerIdx; + if (headerIdx == contentTypeHeaderList.length) { + headerIdx = 0; + ++bodyIdx; + } + + if (bodyIdx == bodyList.length) { + httpserv.stop(); + } else { + doTest(headerIdx, bodyIdx); + } + + do_test_finished(); + } + }; + + return listener; +} + +function doTest(headerIdx, bodyIdx) { + var chan = makeChan(headerIdx, bodyIdx); + + var listener = makeListener(headerIdx, bodyIdx); + + chan.asyncOpen(listener, null); + + do_test_pending(); +} + +function createResponse(headerIdx, bodyIdx, metadata, response) { + response.setHeader("Content-Type", contentTypeHeaderList[headerIdx][0]); + response.bodyOutputStream.write(bodyList[bodyIdx][0], + bodyList[bodyIdx][0].length); +} + +function makeHandler(headerIdx, bodyIdx) { + var f = + function handlerClosure(metadata, response) { + return createResponse(headerIdx, bodyIdx, metadata, response); + }; + return f; +} + +var httpserv; +function run_test() { + httpserv = new nsHttpServer(); + + for (i = 0; i < contentTypeHeaderList.length; ++i) { + for (j = 0; j < bodyList.length; ++j) { + httpserv.registerPathHandler("/" + i + "/" + j, makeHandler(i, j)); + } + } + + httpserv.start(4444); + + doTest(0, 0); +}