Bug 601933: remove RFC 2047 encoding support for HTTP header field parameters. r=jduell

This commit is contained in:
Julian Reschke 2013-02-21 05:36:00 -08:00
parent 8133e088d0
commit c87fff2ca2
6 changed files with 121 additions and 82 deletions

View File

@ -5782,9 +5782,9 @@ nsresult
nsContentTypeParser::GetParameter(const char* aParameterName, nsAString& aResult)
{
NS_ENSURE_TRUE(mService, NS_ERROR_FAILURE);
return mService->GetParameter(mString, aParameterName,
EmptyCString(), false, nullptr,
aResult);
return mService->GetParameterHTTP(mString, aParameterName,
EmptyCString(), false, nullptr,
aResult);
}
/* static */

View File

@ -2045,8 +2045,8 @@ NS_GetContentDispositionFromHeader(const nsACString& aHeader, nsIChannel *aChan
}
nsAutoString dispToken;
rv = mimehdrpar->GetParameter(aHeader, "", fallbackCharset, true, nullptr,
dispToken);
rv = mimehdrpar->GetParameterHTTP(aHeader, "", fallbackCharset, true, nullptr,
dispToken);
if (NS_FAILED(rv)) {
// special case (see bug 272541): empty disposition type handled as "inline"
@ -2083,9 +2083,9 @@ NS_GetFilenameFromDisposition(nsAString& aFilename,
if (url)
url->GetOriginCharset(fallbackCharset);
// Get the value of 'filename' parameter
rv = mimehdrpar->GetParameter(aDisposition, "filename",
fallbackCharset, true, nullptr,
aFilename);
rv = mimehdrpar->GetParameterHTTP(aDisposition, "filename",
fallbackCharset, true, nullptr,
aFilename);
if (NS_FAILED(rv)) {
aFilename.Truncate();

View File

@ -12,14 +12,14 @@
#include "nsISupports.idl"
[scriptable, uuid(ddbbdfb8-a1c0-4dd5-a31b-5d2a7a3bb6ec)]
[scriptable, uuid(9c9252a1-fdaf-40a2-9c2b-a3dc45e28dde)]
interface nsIMIMEHeaderParam : nsISupports {
/**
* Given the value of a single header field (such as
* Content-Disposition and Content-Type) and the name of a parameter
* (e.g. filename, name, charset), returns the value of the parameter.
* The value is obtained by decoding RFC 2231-style encoding,
* The value is obtained by decoding RFC 2231/5987-style encoding,
* RFC 2047-style encoding, and converting to UniChar(UTF-16)
* from charset specified in RFC 2231/2047 encoding, UTF-8,
* <code>aFallbackCharset</code>, the locale charset as fallback if
@ -33,14 +33,17 @@ interface nsIMIMEHeaderParam : nsISupports {
* with several non-standard-compliant cases mentioned below.
*
* <p>
* Note that a lot of MUAs and HTTP servers put RFC 2047-encoded parameters
* in mail headers and HTTP headers. Unfortunately, this includes Mozilla
* as of 2003-05-30. Even more standard-ignorant MUAs, web servers and
* application servers put 'raw 8bit characters'. This will try to cope
* with all these cases as gracefully as possible. Additionally, it
* returns the language tag if the parameter is encoded per RFC 2231 and
* Note that a lot of MUAs put RFC 2047-encoded parameters. Unfortunately,
* this includes Mozilla as of 2003-05-30. Even more standard-ignorant MUAs,
* web servers and application servers put 'raw 8bit characters'. This will
* try to cope with all these cases as gracefully as possible. Additionally,
* it returns the language tag if the parameter is encoded per RFC 2231 and
* includes lang.
*
* <p>
* Note that GetParameterHTTP skips some of the workarounds used for
* mail (MIME) header fields, and thus SHOULD be used from non-mail
* code.
*
*
* @param aHeaderVal a header string to get the value of a parameter
@ -59,7 +62,6 @@ interface nsIMIMEHeaderParam : nsISupports {
* nsMemory::Free it.
* @return the value of <code>aParamName</code> in Unichar(UTF-16).
*/
AString getParameter(in ACString aHeaderVal,
in string aParamName,
in ACString aFallbackCharset,
@ -68,10 +70,10 @@ interface nsIMIMEHeaderParam : nsISupports {
/**
* Like getParameter, but using RFC 5987 instead of 2231. This removes
* support for header parameter continuations (foo*0, foo*1, etc).
* Like getParameter, but disabling encodings and workarounds specific to
* MIME (as opposed to HTTP).
*/
AString getParameter5987(in ACString aHeaderVal,
AString getParameterHTTP(in ACString aHeaderVal,
in string aParamName,
in ACString aFallbackCharset,
in boolean aTryLocaleCharset,
@ -89,9 +91,8 @@ interface nsIMIMEHeaderParam : nsISupports {
* non-interoperable usage.
*
* <p>
* This code is currently not used inside nsMIMEHeaderParamImpl, but
* might be in the future. New code that needs RFC2231/5987
* encoding should use this one.
* Code that parses HTTP header fields (as opposed to MIME header fields)
* should use this function.
*
* @param aParamVal a header field parameter to decode.
* @param aLang will be set to the language part (possibly

View File

@ -32,6 +32,8 @@ static char *DecodeQ(const char *, uint32_t);
static bool Is7bitNonAsciiString(const char *, uint32_t);
static void CopyRawHeader(const char *, uint32_t, const char *, nsACString &);
static nsresult DecodeRFC2047Str(const char *, const char *, bool, nsACString&);
static nsresult internalDecodeParameter(const nsACString&, const char*,
const char*, bool, bool, nsACString&);
// XXX The chance of UTF-7 being used in the message header is really
// low, but in theory it's possible.
@ -49,18 +51,18 @@ nsMIMEHeaderParamImpl::GetParameter(const nsACString& aHeaderVal,
bool aTryLocaleCharset,
char **aLang, nsAString& aResult)
{
return DoGetParameter(aHeaderVal, aParamName, RFC_2231_DECODING,
return DoGetParameter(aHeaderVal, aParamName, MIME_FIELD_ENCODING,
aFallbackCharset, aTryLocaleCharset, aLang, aResult);
}
NS_IMETHODIMP
nsMIMEHeaderParamImpl::GetParameter5987(const nsACString& aHeaderVal,
nsMIMEHeaderParamImpl::GetParameterHTTP(const nsACString& aHeaderVal,
const char *aParamName,
const nsACString& aFallbackCharset,
bool aTryLocaleCharset,
char **aLang, nsAString& aResult)
{
return DoGetParameter(aHeaderVal, aParamName, RFC_5987_DECODING,
return DoGetParameter(aHeaderVal, aParamName, HTTP_FIELD_ENCODING,
aFallbackCharset, aTryLocaleCharset, aLang, aResult);
}
@ -90,7 +92,8 @@ nsMIMEHeaderParamImpl::DoGetParameter(const nsACString& aHeaderVal,
// if necessary.
nsAutoCString str1;
rv = DecodeParameter(med, charset.get(), nullptr, false, str1);
rv = internalDecodeParameter(med, charset.get(), nullptr, false,
aDecoding == MIME_FIELD_ENCODING, str1);
NS_ENSURE_SUCCESS(rv, rv);
if (!aFallbackCharset.IsEmpty())
@ -347,7 +350,7 @@ nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue,
char **aLang,
char **aResult)
{
return DoParameterInternal(aHeaderValue, aParamName, RFC_2231_DECODING,
return DoParameterInternal(aHeaderValue, aParamName, MIME_FIELD_ENCODING,
aCharset, aLang, aResult);
}
@ -371,7 +374,9 @@ nsMIMEHeaderParamImpl::DoParameterInternal(const char *aHeaderValue,
nsAutoCString charset;
bool acceptContinuations = (aDecoding != RFC_5987_DECODING);
// change to (aDecoding != HTTP_FIELD_ENCODING) when we want to disable
// them for HTTP header fields later on, see bug 776324
bool acceptContinuations = true;
const char *str = aHeaderValue;
@ -722,13 +727,10 @@ increment_str:
return *aResult ? NS_OK : NS_ERROR_INVALID_ARG;
}
NS_IMETHODIMP
nsMIMEHeaderParamImpl::DecodeRFC2047Header(const char* aHeaderVal,
const char* aDefaultCharset,
bool aOverrideCharset,
bool aEatContinuations,
nsACString& aResult)
nsresult
internalDecodeRFC2047Header(const char* aHeaderVal, const char* aDefaultCharset,
bool aOverrideCharset, bool aEatContinuations,
nsACString& aResult)
{
aResult.Truncate();
if (!aHeaderVal)
@ -763,6 +765,18 @@ nsMIMEHeaderParamImpl::DecodeRFC2047Header(const char* aHeaderVal,
return NS_OK;
}
NS_IMETHODIMP
nsMIMEHeaderParamImpl::DecodeRFC2047Header(const char* aHeaderVal,
const char* aDefaultCharset,
bool aOverrideCharset,
bool aEatContinuations,
nsACString& aResult)
{
return internalDecodeRFC2047Header(aHeaderVal, aDefaultCharset,
aOverrideCharset, aEatContinuations,
aResult);
}
// true if the character is allowed in a RFC 5987 value
// see RFC 5987, Section 3.2.1, "attr-char"
bool IsRFC5987AttrChar(char aChar)
@ -882,12 +896,10 @@ nsMIMEHeaderParamImpl::DecodeRFC5987Param(const nsACString& aParamVal,
return NS_OK;
}
NS_IMETHODIMP
nsMIMEHeaderParamImpl::DecodeParameter(const nsACString& aParamValue,
const char* aCharset,
const char* aDefaultCharset,
bool aOverrideCharset,
nsACString& aResult)
nsresult
internalDecodeParameter(const nsACString& aParamValue, const char* aCharset,
const char* aDefaultCharset, bool aOverrideCharset,
bool aDecode2047, nsACString& aResult)
{
aResult.Truncate();
// If aCharset is given, aParamValue was obtained from RFC2231/5987
@ -921,19 +933,33 @@ nsMIMEHeaderParamImpl::DecodeParameter(const nsACString& aParamValue,
}
aResult = unQuoted;
nsresult rv = NS_OK;
nsAutoCString decoded;
if (aDecode2047) {
nsAutoCString decoded;
// Try RFC 2047 encoding, instead.
nsresult rv = DecodeRFC2047Header(unQuoted.get(), aDefaultCharset,
aOverrideCharset, true, decoded);
if (NS_SUCCEEDED(rv) && !decoded.IsEmpty())
aResult = decoded;
// Try RFC 2047 encoding, instead.
rv = internalDecodeRFC2047Header(unQuoted.get(), aDefaultCharset,
aOverrideCharset, true, decoded);
if (NS_SUCCEEDED(rv) && !decoded.IsEmpty())
aResult = decoded;
}
return rv;
}
NS_IMETHODIMP
nsMIMEHeaderParamImpl::DecodeParameter(const nsACString& aParamValue,
const char* aCharset,
const char* aDefaultCharset,
bool aOverrideCharset,
nsACString& aResult)
{
return internalDecodeParameter(aParamValue, aCharset, aDefaultCharset,
aOverrideCharset, true, aResult);
}
#define ISHEXCHAR(c) \
((0x30 <= uint8_t(c) && uint8_t(c) <= 0x39) || \
(0x41 <= uint8_t(c) && uint8_t(c) <= 0x46) || \

View File

@ -16,11 +16,9 @@ public:
nsMIMEHeaderParamImpl() {}
virtual ~nsMIMEHeaderParamImpl() {}
private:
// Toggles support for RFC 2231 decoding, or RFC 5987 (5987 profiles 2231
// for use in HTTP, and, for instance, drops support for continuations)
enum ParamDecoding {
RFC_2231_DECODING = 1,
RFC_5987_DECODING
MIME_FIELD_ENCODING = 1,
HTTP_FIELD_ENCODING
};
nsresult DoGetParameter(const nsACString& aHeaderVal,

View File

@ -11,14 +11,15 @@ var DQUOTE = '"';
// Test array:
// - element 0: "Content-Disposition" header to test
// under RFC 2231 (email):
// under MIME (email):
// - element 1: correct value returned for disposition-type (empty param name)
// - element 2: correct value for filename returned
// under RFC 5987 (HTTP):
// (note: 5987-mode not yet in use, see bug 601933)
// under HTTP:
// (currently supports continuations; expected results without continuations
// are commented out for now)
// - element 3: correct value returned for disposition-type (empty param name)
// - element 4: correct value for filename returned
//
//
// 3 and 4 may be left out if they are identical
var tests = [
@ -57,22 +58,22 @@ var tests = [
// continuations not part of RFC 5987 (bug 610054)
["attachment; filename*0=foo; filename*1=bar",
"attachment", "foobar",
"attachment", Cr.NS_ERROR_INVALID_ARG],
/* "attachment", Cr.NS_ERROR_INVALID_ARG */],
// Return first continuation (invalid; error recovery)
["attachment; filename*0=first; filename*0=wrong; filename=basic",
"attachment", "first",
"attachment", "basic"],
/* "attachment", "basic" */],
// Only use correctly ordered continuations (invalid; error recovery)
["attachment; filename*0=first; filename*1=second; filename*0=wrong",
"attachment", "firstsecond",
"attachment", Cr.NS_ERROR_INVALID_ARG],
/* "attachment", Cr.NS_ERROR_INVALID_ARG */],
// prefer continuation to basic (unless RFC 5987)
["attachment; filename=basic; filename*0=foo; filename*1=bar",
"attachment", "foobar",
"attachment", "basic"],
/* "attachment", "basic" */],
// Prefer extended to basic and/or (broken or not) continuation
// (invalid; error recovery)
@ -88,19 +89,19 @@ var tests = [
// (invalid; error recovery)
["attachment; filename*0=foo; filename*2=bar",
"attachment", "foo",
"attachment", Cr.NS_ERROR_INVALID_ARG],
/* "attachment", Cr.NS_ERROR_INVALID_ARG */],
// Don't allow leading 0's (*01) (invalid; error recovery)
["attachment; filename*0=foo; filename*01=bar",
"attachment", "foo",
"attachment", Cr.NS_ERROR_INVALID_ARG],
/* "attachment", Cr.NS_ERROR_INVALID_ARG */],
// continuations should prevail over non-extended (unless RFC 5987)
["attachment; filename=basic; filename*0*=UTF-8''multi;\r\n"
+ " filename*1=line;\r\n"
+ " filename*2*=%20extended",
"attachment", "multiline extended",
"attachment", "basic"],
/* "attachment", "basic" */],
// Gaps should result in returning only value until gap hit
// (invalid; error recovery)
@ -108,7 +109,7 @@ var tests = [
+ " filename*1=line;\r\n"
+ " filename*3*=%20extended",
"attachment", "multiline",
"attachment", "basic"],
/* "attachment", "basic" */],
// First series, only please, and don't slurp up higher elements (*2 in this
// case) from later series into earlier one (invalid; error recovery)
@ -118,7 +119,7 @@ var tests = [
+ " filename*1=bad;\r\n"
+ " filename*2=evil",
"attachment", "multiline",
"attachment", "basic"],
/* "attachment", "basic" */],
// RFC 2231 not clear on correct outcome: we prefer non-continued extended
// (invalid; error recovery)
@ -132,13 +133,13 @@ var tests = [
["attachment; filename*0=UTF-8''unescaped;\r\n"
+ " filename*1*=%20so%20includes%20UTF-8''%20in%20value",
"attachment", "UTF-8''unescaped so includes UTF-8'' in value",
"attachment", Cr.NS_ERROR_INVALID_ARG],
/* "attachment", Cr.NS_ERROR_INVALID_ARG */],
// sneaky: if unescaped, make sure we leave UTF-8'' in value
["attachment; filename=basic; filename*0=UTF-8''unescaped;\r\n"
+ " filename*1*=%20so%20includes%20UTF-8''%20in%20value",
"attachment", "UTF-8''unescaped so includes UTF-8'' in value",
"attachment", "basic"],
/* "attachment", "basic" */],
// Prefer basic over invalid continuation
// (invalid; error recovery)
@ -153,7 +154,7 @@ var tests = [
+ " filename*6=6; filename*7=7;filename*8=8;filename*9=9;filename*10=a;\r\n"
+ " filename*11=b; filename*12=c;filename*13=d;filename*14=e;filename*15=f\r\n",
"attachment", "0123456789abcdef",
"attachment", "basic"],
/* "attachment", "basic" */],
// support digits over 10 (detect gaps)
["attachment; filename=basic; filename*0*=UTF-8''0;\r\n"
@ -161,7 +162,7 @@ var tests = [
+ " filename*6=6; filename*7=7;filename*8=8;filename*9=9;filename*10=a;\r\n"
+ " filename*11=b; filename*12=c;filename*14=e\r\n",
"attachment", "0123456789abc",
"attachment", "basic"],
/* "attachment", "basic" */],
// return nothing: invalid
// (invalid; error recovery)
@ -195,38 +196,38 @@ var tests = [
+ " filename*6=6; filename*7=7;filename*8=8;filename*9=9;filename*10=a;\r\n"
+ " filename*11=b; filename*12=c;filename*13=d;filename*15=f;filename*14=e;\r\n",
"attachment", "0123456789abcdef",
"attachment", "basic"],
/* "attachment", "basic" */],
// check non-digits in sequence numbers
["attachment; filename=basic; filename*0*=UTF-8''0;\r\n"
+ " filename*1a=1\r\n",
"attachment", "0",
"attachment", "basic"],
/* "attachment", "basic" */],
// check duplicate sequence numbers
["attachment; filename=basic; filename*0*=UTF-8''0;\r\n"
+ " filename*0=bad; filename*1=1;\r\n",
"attachment", "0",
"attachment", "basic"],
/* "attachment", "basic" */],
// check overflow
["attachment; filename=basic; filename*0*=UTF-8''0;\r\n"
+ " filename*11111111111111111111111111111111111111111111111111111111111=1",
"attachment", "0",
"attachment", "basic"],
/* "attachment", "basic" */],
// check underflow
["attachment; filename=basic; filename*0*=UTF-8''0;\r\n"
+ " filename*-1=1",
"attachment", "0",
"attachment", "basic"],
/* "attachment", "basic" */],
// check mixed token/quoted-string
["attachment; filename=basic; filename*0=\"0\";\r\n"
+ " filename*1=1;\r\n"
+ " filename*2*=%32",
"attachment", "012",
"attachment", "basic"],
/* "attachment", "basic" */],
// check empty sequence number
["attachment; filename=basic; filename**=UTF-8''0\r\n",
@ -261,7 +262,20 @@ var tests = [
// test empty param
["attachment; filename=",
"attachment", ""],
// Bug 601933: RFC 2047 does not apply to parameters (at least in HTTP)
["attachment; filename==?ISO-8859-1?Q?foo-=E4.html?=",
"attachment", "foo-\u00e4.html",
"attachment", "=?ISO-8859-1?Q?foo-=E4.html?="],
["attachment; filename=\"=?ISO-8859-1?Q?foo-=E4.html?=\"",
"attachment", "foo-\u00e4.html",
"attachment", "=?ISO-8859-1?Q?foo-=E4.html?="],
// format sent by GMail as of 2012-07-23 (5987 overrides 2047)
["attachment; filename=\"=?ISO-8859-1?Q?foo-=E4.html?=\"; filename*=UTF-8''5987",
"attachment", "5987"],
// Bug 651185: double quotes around 2231/5987 encoded param
// Change reverted to backwards compat issues with various web services,
// such as OWA (Bug 703015), plus similar problems in Thunderbird. If this
@ -389,12 +403,12 @@ var tests = [
['attachment; filename=basic; filename*0="foo"; filename*1="\\b\\a\\r.html"',
"attachment", "foobar.html",
"attachment", "basic"],
/* "attachment", "basic" */],
// unmatched escape char
['attachment; filename=basic; filename*0="foo"; filename*1="\\b\\a\\',
"attachment", "fooba\\",
"attachment", "basic"],
/* "attachment", "basic" */],
// Bug 732369: Content-Disposition parser does not require presence of ";" between params
// optimally, this would not even return the disposition type "attachment"
@ -469,7 +483,7 @@ function do_tests(whichRFC)
if (whichRFC == 0)
result = mhp.getParameter(tests[i][0], "", "UTF-8", true, unused);
else
result = mhp.getParameter5987(tests[i][0], "", "UTF-8", true, unused);
result = mhp.getParameterHTTP(tests[i][0], "", "UTF-8", true, unused);
do_check_eq(result, expectedDt);
}
@ -493,7 +507,7 @@ function do_tests(whichRFC)
if (whichRFC == 0)
result = mhp.getParameter(tests[i][0], "filename", "UTF-8", true, unused);
else
result = mhp.getParameter5987(tests[i][0], "filename", "UTF-8", true, unused);
result = mhp.getParameterHTTP(tests[i][0], "filename", "UTF-8", true, unused);
do_check_eq(result, expectedFn);
}