Bug 610054 - Clean up MIME header parsing (allow different behavior for HTTP and EMail); r=bz

This commit is contained in:
julian.reschke@gmx.de 2011-10-10 15:27:05 +01:00
parent ae39c696bb
commit 0c968928b1
4 changed files with 295 additions and 101 deletions

View File

@ -39,7 +39,7 @@
/*
* This interface allows any module to access the routine
* for MIME header parameter parsing (RFC 2231)
* for MIME header parameter parsing (RFC 2231/5987)
*/
#include "nsISupports.idl"
@ -97,6 +97,18 @@ interface nsIMIMEHeaderParam : nsISupports {
in ACString aFallbackCharset,
in boolean aTryLocaleCharset,
out string aLang);
/**
* Like getParameter, but using RFC 5987 instead of 2231. This removes
* support for header parameter continuations (foo*0, foo*1, etc).
*/
AString getParameter5987(in ACString aHeaderVal,
in string aParamName,
in ACString aFallbackCharset,
in boolean aTryLocaleCharset,
out string aLang);
/**
* Given the value of a single header field (such as
* Content-Disposition and Content-Type) and the name of a parameter

View File

@ -75,23 +75,47 @@ static nsresult DecodeRFC2047Str(const char *, const char *, bool, nsACString&);
NS_IMPL_ISUPPORTS1(nsMIMEHeaderParamImpl, nsIMIMEHeaderParam)
// XXX : aTryLocaleCharset is not yet effective.
NS_IMETHODIMP
nsMIMEHeaderParamImpl::GetParameter(const nsACString& aHeaderVal,
const char *aParamName,
const nsACString& aFallbackCharset,
bool aTryLocaleCharset,
char **aLang, nsAString& aResult)
{
return DoGetParameter(aHeaderVal, aParamName, RFC_2231_DECODING,
aFallbackCharset, aTryLocaleCharset, aLang, aResult);
}
NS_IMETHODIMP
nsMIMEHeaderParamImpl::GetParameter5987(const nsACString& aHeaderVal,
const char *aParamName,
const nsACString& aFallbackCharset,
bool aTryLocaleCharset,
char **aLang, nsAString& aResult)
{
return DoGetParameter(aHeaderVal, aParamName, RFC_5987_DECODING,
aFallbackCharset, aTryLocaleCharset, aLang, aResult);
}
// XXX : aTryLocaleCharset is not yet effective.
nsresult
nsMIMEHeaderParamImpl::DoGetParameter(const nsACString& aHeaderVal,
const char *aParamName,
ParamDecoding aDecoding,
const nsACString& aFallbackCharset,
bool aTryLocaleCharset,
char **aLang, nsAString& aResult)
{
aResult.Truncate();
nsresult rv;
// get parameter (decode RFC 2231 if it's RFC 2231-encoded and
// return charset.)
// get parameter (decode RFC 2231/5987 when applicable, as specified by
// aDecoding (5987 being a subset of 2231) and return charset.)
nsXPIDLCString med;
nsXPIDLCString charset;
rv = GetParameterInternal(PromiseFlatCString(aHeaderVal).get(), aParamName,
getter_Copies(charset), aLang, getter_Copies(med));
rv = DoParameterInternal(PromiseFlatCString(aHeaderVal).get(), aParamName,
aDecoding, getter_Copies(charset), aLang,
getter_Copies(med));
if (NS_FAILED(rv))
return rv;
@ -159,6 +183,20 @@ nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue,
char **aLang,
char **aResult)
{
return DoParameterInternal(aHeaderValue, aParamName, RFC_2231_DECODING,
aCharset, aLang, aResult);
}
nsresult
nsMIMEHeaderParamImpl::DoParameterInternal(const char *aHeaderValue,
const char *aParamName,
ParamDecoding aDecoding,
char **aCharset,
char **aLang,
char **aResult)
{
if (!aHeaderValue || !*aHeaderValue || !aResult)
return NS_ERROR_INVALID_ARG;
@ -215,9 +253,13 @@ nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue,
// title*2="Else..."
// D. title*0="Hey, what you think you are doing?"
// title*1="There is no charset and lang info."
// RFC5987: only A and B
PRInt32 paramLen = strlen(aParamName);
bool haveCaseAValue = false;
PRInt32 nextContinuation = 0; // next value in series, or -1 if error
while (*str) {
const char *tokenStart = str;
const char *tokenEnd = 0;
@ -276,6 +318,12 @@ nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue,
seenEquals &&
!nsCRT::strncasecmp(tokenStart, aParamName, paramLen))
{
if (*aResult)
{
// either seen earlier caseA value already--we prefer first--or caseA
// came after a continuation: either way, prefer other value
goto increment_str;
}
// if the parameter spans across multiple lines we have to strip out the
// line continuation -- jht 4/29/98
nsCAutoString tempStr(valueStart, valueEnd - valueStart);
@ -288,7 +336,8 @@ nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue,
*aResult = res;
// keep going, we may find a RFC 2231 encoded alternative
haveCaseAValue = true;
// keep going, we may find a RFC 2231/5987 encoded alternative
}
// case B, C, and D
else if (tokenEnd - tokenStart > paramLen &&
@ -296,14 +345,26 @@ nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue,
seenEquals &&
*(tokenStart + paramLen) == '*')
{
const char *cp = tokenStart + paramLen + 1; // 1st char pass '*'
const char *cp = tokenStart + paramLen + 1; // 1st char past '*'
bool needUnescape = *(tokenEnd - 1) == '*';
// the 1st line of a multi-line parameter or a single line that needs
// unescaping. ( title*0*= or title*= )
// only allowed for token form, not for quoted-string
if (!needUnquote &&
((*cp == '0' && needUnescape) || (tokenEnd - tokenStart == paramLen + 1)))
bool caseB = (tokenEnd - tokenStart) == paramLen + 1;
bool caseCorDStart = (*cp == '0') && needUnescape;
bool acceptContinuations = (aDecoding != RFC_5987_DECODING);
// CaseB and start of CaseC: requires charset and optional language
// in quotes (quotes required even if lang is blank)
if (!needUnquote && (caseB || (caseCorDStart && acceptContinuations)))
{
if (caseCorDStart) {
if (nextContinuation++ != 0)
{
// error: already started a continuation. Skip future
// continuations and return whatever initial parts were in order.
nextContinuation = -1;
goto increment_str;
}
}
// look for single quotation mark(')
const char *sQuote1 = PL_strchr(valueStart, 0x27);
const char *sQuote2 = (char *) (sQuote1 ? PL_strchr(sQuote1 + 1, 0x27) : nsnull);
@ -340,9 +401,10 @@ nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue,
{
if (*aResult)
{
// drop non-2231-encoded value, instead prefer the one using
// the RFC2231 encoding
// caseA value already read, or caseC/D value already read
// but we're now reading caseB: either way, drop old value
nsMemory::Free(*aResult);
haveCaseAValue = false;
}
*aResult = (char *) nsMemory::Alloc(valueEnd - (sQuote2 + 1) + 1);
if (*aResult)
@ -352,17 +414,35 @@ nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue,
if (needUnescape)
{
nsUnescape(*aResult);
if (tokenEnd - tokenStart == paramLen + 1)
// we're done; this is case B
return NS_OK;
if (caseB)
return NS_OK; // caseB wins over everything else
}
}
}
} // end of if-block : title*0*= or title*=
// a line of multiline param with no need for unescaping : title*[0-9]=
// or 2nd or later lines of a multiline param : title*[1-9]*=
else if (nsCRT::IsAsciiDigit(PRUnichar(*cp)))
// caseD: a line of multiline param with no need for unescaping : title*[0-9]=
// or 2nd or later lines of a caseC param : title*[1-9]*=
else if (acceptContinuations && nsCRT::IsAsciiDigit(PRUnichar(*cp)))
{
PRInt32 nextSegment = atoi(cp);
// no leading zeros allowed except for ... position 0
bool broken = nextSegment > 0 && *cp == '0';
if (broken || nextSegment != nextContinuation++)
{
// error: gap in continuation or unneccessary leading 0.
// Skip future continuations and return whatever initial parts were
// in order.
nextContinuation = -1;
goto increment_str;
}
if (haveCaseAValue && *aResult)
{
// drop caseA value
nsMemory::Free(*aResult);
*aResult = 0;
haveCaseAValue = false;
}
PRInt32 len = 0;
if (*aResult) // 2nd or later lines of multiline parameter
{
@ -374,11 +454,11 @@ nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue,
}
*aResult = ns;
}
else if (*cp == '0') // must be; 1st line : title*0=
else
{
NS_ASSERTION(*cp == '0', "Not first value in continuation"); // must be; 1st line : title*0=
*aResult = (char *) nsMemory::Alloc(valueEnd - valueStart + 1);
}
// else {} something is really wrong; out of memory
if (*aResult)
{
// append a partial value
@ -394,7 +474,7 @@ nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue,
// str now points after the end of the value.
// skip over whitespace, ';', whitespace.
increment_str:
while (nsCRT::IsAsciiSpace(*str)) ++str;
if (*str == ';') ++str;
while (nsCRT::IsAsciiSpace(*str)) ++str;
@ -425,8 +505,8 @@ nsMIMEHeaderParamImpl::DecodeRFC2047Header(const char* aHeaderVal,
// aDefaultCharset is specified, decodes RFC 2047 encoding and converts
// to UTF-8. Otherwise, just strips away CRLF.
if (PL_strstr(aHeaderVal, "=?") ||
aDefaultCharset && (!IsUTF8(nsDependentCString(aHeaderVal)) ||
Is7bitNonAsciiString(aHeaderVal, PL_strlen(aHeaderVal)))) {
(aDefaultCharset && (!IsUTF8(nsDependentCString(aHeaderVal)) ||
Is7bitNonAsciiString(aHeaderVal, PL_strlen(aHeaderVal))))) {
DecodeRFC2047Str(aHeaderVal, aDefaultCharset, aOverrideCharset, aResult);
} else if (aEatContinuations &&
(PL_strchr(aHeaderVal, '\n') || PL_strchr(aHeaderVal, '\r'))) {
@ -455,7 +535,7 @@ nsMIMEHeaderParamImpl::DecodeParameter(const nsACString& aParamValue,
nsACString& aResult)
{
aResult.Truncate();
// If aCharset is given, aParamValue was obtained from RFC2231
// If aCharset is given, aParamValue was obtained from RFC2231/5987
// encoding and we're pretty sure that it's in aCharset.
if (aCharset && *aCharset)
{
@ -501,9 +581,9 @@ nsMIMEHeaderParamImpl::DecodeParameter(const nsACString& aParamValue,
}
#define ISHEXCHAR(c) \
(0x30 <= PRUint8(c) && PRUint8(c) <= 0x39 || \
0x41 <= PRUint8(c) && PRUint8(c) <= 0x46 || \
0x61 <= PRUint8(c) && PRUint8(c) <= 0x66)
((0x30 <= PRUint8(c) && PRUint8(c) <= 0x39) || \
(0x41 <= PRUint8(c) && PRUint8(c) <= 0x46) || \
(0x61 <= PRUint8(c) && PRUint8(c) <= 0x66))
// Decode Q encoding (RFC 2047).
// static

View File

@ -47,6 +47,29 @@ public:
nsMIMEHeaderParamImpl() {}
virtual ~nsMIMEHeaderParamImpl() {}
private:
// Toggles support for RFC 2231 decoding, or RFC 5987 (5987 profiles 2231
// for use in HTTP, and, for instance, drops support for continuations)
enum ParamDecoding {
RFC_2231_DECODING = 1,
RFC_5987_DECODING
};
nsresult DoGetParameter(const nsACString& aHeaderVal,
const char *aParamName,
ParamDecoding aDecoding,
const nsACString& aFallbackCharset,
bool aTryLocaleCharset,
char **aLang,
nsAString& aResult);
nsresult DoParameterInternal(const char *aHeaderValue,
const char *aParamName,
ParamDecoding aDecoding,
char **aCharset,
char **aLang,
char **aResult);
};
#endif

View File

@ -6,106 +6,185 @@
*
* See also <http://greenbytes.de/tech/webdav/rfc5987.html#rfc.section.4.2>
*/
const Cr = Components.results;
// Test array:
// - element 0: "Content-Disposition" header to test for 'filename' parameter
// - element 1: correct value returned under RFC 2231 (email)
// - element 2: correct value returned under RFC 5987 (HTTP)
var tests = [
// No filename parameter: return nothing
["attachment;",
Cr.NS_ERROR_INVALID_ARG, Cr.NS_ERROR_INVALID_ARG],
// basic
["attachment; filename=basic",
"basic", "basic"],
// extended
["attachment; filename*=UTF-8''extended",
"extended", "extended"],
// prefer extended to basic
["attachment; filename=basic; filename*=UTF-8''extended",
"extended", "extended"],
// prefer extended to basic
["attachment; filename*=UTF-8''extended; filename=basic",
"extended", "extended"],
// use first basic value
["attachment; filename=first; filename=wrong",
"first", "first"],
// old school bad HTTP servers: missing 'attachment' or 'inline'
["filename=old",
"old", "old"],
["attachment; filename*=UTF-8''extended",
"extended", "extended"],
["attachment; filename*0=foo; filename*1=bar",
"foobar", Cr.NS_ERROR_INVALID_ARG],
// Return first continuation
["attachment; filename*0=first; filename*0=wrong; filename=basic",
"first", "basic"],
// Only use correctly ordered continuations
["attachment; filename*0=first; filename*1=second; filename*0=wrong",
"firstsecond", Cr.NS_ERROR_INVALID_ARG],
// prefer continuation to basic (unless RFC 5987)
["attachment; filename=basic; filename*0=foo; filename*1=bar",
"foobar", "basic"],
// Prefer extended to basic and/or (broken or not) continuation
["attachment; filename=basic; filename*0=first; filename*0=wrong; filename*=UTF-8''extended",
"extended", "extended"],
var succeed = [
["Content-Disposition: attachment; filename=basic; filename*=UTF-8''extended",
"extended"],
["Content-Disposition: attachment; filename*=UTF-8''extended; filename=basic",
"extended"],
["Content-Disposition: attachment; filename=basic",
"basic"],
["Content-Disposition: attachment; filename*=UTF-8''extended",
"extended"],
["Content-Disposition: attachment; filename*0=foo; filename*1=bar",
"foobar"],
/* BROKEN: we prepend 'basic' to result
["Content-Disposition: attachment; filename=basic; filename*0=foo; filename*1=bar",
"foobar"],
*/
// RFC 2231 not clear on correct outcome: we prefer non-continued extended
["Content-Disposition: attachment; filename=basic; filename*=UTF-8''extended; filename*0=foo; filename*1=bar",
"extended"],
/* BROKEN: not checking order yet
["attachment; filename=basic; filename*=UTF-8''extended; filename*0=foo; filename*1=bar",
"extended", "extended"],
// Gaps should result in returning only value until gap hit
["Content-Disposition: attachment; filename*0=foo; filename*2=bar",
"foo"],
*/
/* BROKEN: don't check for leading 0s yet
// Don't handle leading 0's (*01)
["Content-Disposition: attachment; filename*0=foo; filename*01=bar",
"foo"],
*/
["Content-Disposition: attachment; filename=basic; filename*0*=UTF-8''multi\r\n"
["attachment; filename*0=foo; filename*2=bar",
"foo", Cr.NS_ERROR_INVALID_ARG],
// Don't allow leading 0's (*01)
["attachment; filename*0=foo; filename*01=bar",
"foo", Cr.NS_ERROR_INVALID_ARG],
// continuations should prevail over non-extended (unless RFC 5987)
["attachment; filename=basic; filename*0*=UTF-8''multi\r\n"
+ " filename*1=line\r\n"
+ " filename*2*=%20extended",
"multiline extended"],
/* BROKEN: not checking order yet
"multiline extended", "basic"],
// Gaps should result in returning only value until gap hit
["Content-Disposition: attachment; filename=basic; filename*0*=UTF-8''multi\r\n"
["attachment; filename=basic; filename*0*=UTF-8''multi\r\n"
+ " filename*1=line\r\n"
+ " filename*3*=%20extended",
"multiline"],
*/
"multiline", "basic"],
// First series, only please, and don't slurp up higher elements (*2 in this
// case) from later series into earlier one
["attachment; filename=basic; filename*0*=UTF-8''multi\r\n"
+ " filename*1=line\r\n"
+ " filename*0*=UTF-8''wrong\r\n"
+ " filename*1=bad\r\n"
+ " filename*2=evil",
"multiline", "basic"],
// RFC 2231 not clear on correct outcome: we prefer non-continued extended
["Content-Disposition: attachment; filename=basic; filename*0=UTF-8''multi\r\n"
["attachment; filename=basic; filename*0=UTF-8''multi\r\n"
+ " filename*=UTF-8''extended\r\n"
+ " filename*1=line\r\n"
+ " filename*2*=%20extended",
"extended"],
"extended", "extended"],
// sneaky: if unescaped, make sure we leave UTF-8'' in value
["Content-Disposition: attachment; filename*0=UTF-8''unescaped\r\n"
["attachment; filename*0=UTF-8''unescaped\r\n"
+ " filename*1*=%20so%20includes%20UTF-8''%20in%20value",
"UTF-8''unescaped so includes UTF-8'' in value"],
/* BROKEN: we prepend 'basic' to result
"UTF-8''unescaped so includes UTF-8'' in value", Cr.NS_ERROR_INVALID_ARG],
// sneaky: if unescaped, make sure we leave UTF-8'' in value
["Content-Disposition: attachment; filename=basic; filename*0=UTF-8''unescaped\r\n"
["attachment; filename=basic; filename*0=UTF-8''unescaped\r\n"
+ " filename*1*=%20so%20includes%20UTF-8''%20in%20value",
"UTF-8''unescaped so includes UTF-8'' in value"],
*/
/* BROKEN: we append filename*1 to 'basic'
// Also not sure if this is the spec'd behavior here:
["Content-Disposition: attachment; filename=basic; filename*1=multi\r\n"
"UTF-8''unescaped so includes UTF-8'' in value", "basic"],
// Prefer basic over invalid continuation
["attachment; filename=basic; filename*1=multi\r\n"
+ " filename*2=line\r\n"
+ " filename*3*=%20extended",
"basic"],
*/
];
"basic", "basic"],
var broken = [
["Content-Disposition: attachment; filename*1=multi\r\n"
// support digits over 10
["attachment; filename=basic; filename*0*=UTF-8''0\r\n"
+ " filename*1=1; filename*2=2;filename*3=3;filename*4=4;filename*5=5\r\n"
+ " filename*6=6; filename*7=7;filename*8=8;filename*9=9;filename*10=a\r\n"
+ " filename*11=b; filename*12=c;filename*13=d;filename*14=e;filename*15=f\r\n",
"0123456789abcdef", "basic"],
// support digits over 10 (check ordering)
["attachment; filename=basic; filename*0*=UTF-8''0\r\n"
+ " filename*1=1; filename*2=2;filename*3=3;filename*4=4;filename*5=5\r\n"
+ " filename*6=6; filename*7=7;filename*8=8;filename*9=9;filename*10=a\r\n"
+ " filename*11=b; filename*12=c;filename*13=d;filename*15=f;filename*14=e\r\n",
"0123456789abcd" /* should see the 'f', see bug 588414 */, "basic"],
// support digits over 10 (detect gaps)
["attachment; filename=basic; filename*0*=UTF-8''0\r\n"
+ " filename*1=1; filename*2=2;filename*3=3;filename*4=4;filename*5=5\r\n"
+ " filename*6=6; filename*7=7;filename*8=8;filename*9=9;filename*10=a\r\n"
+ " filename*11=b; filename*12=c;filename*14=e\r\n",
"0123456789abc", "basic"],
// return nothing: invalid
["attachment; filename*1=multi\r\n"
+ " filename*2=line\r\n"
+ " filename*3*=%20extended",
"param continuation must start from 0: should fail"],
Cr.NS_ERROR_INVALID_ARG, Cr.NS_ERROR_INVALID_ARG],
];
function run_test() {
function do_tests(whichRFC)
{
var mhp = Components.classes["@mozilla.org/network/mime-hdrparam;1"]
.getService(Components.interfaces.nsIMIMEHeaderParam);
var unused = { value : null };
for (var i = 0; i < succeed.length; ++i) {
dump("Testing " + succeed[i] + "\n");
for (var i = 0; i < tests.length; ++i) {
dump("Testing " + tests[i] + "\n");
try {
do_check_eq(mhp.getParameter(succeed[i][0], "filename", "UTF-8", true, unused),
succeed[i][1]);
} catch (e) {}
}
// Check failure cases
for (var i = 0; i < broken.length; ++i) {
dump("Testing " + broken[i] + "\n");
try {
var result = mhp.getParameter(broken[i][0], "filename", "UTF-8", true, unused);
// No exception? Error.
do_check_eq(broken[i][1], "instead got: " + result);
} catch (e) {
// .result set if getParameter failed: check for correct error code
if (e.result)
do_check_eq(e.result, Components.results.NS_ERROR_OUT_OF_MEMORY);
var result;
if (whichRFC == 1)
result = mhp.getParameter(tests[i][0], "filename", "UTF-8", true, unused);
else
result = mhp.getParameter5987(tests[i][0], "filename", "UTF-8", true, unused);
do_check_eq(result, tests[i][whichRFC]);
}
catch (e) {
// Tests can also succeed by expecting to fail with given error code
if (e.result) {
// Allow following tests to run by catching exception from do_check_eq()
try {
do_check_eq(e.result, tests[i][whichRFC]);
} catch(e) {}
}
continue;
}
}
}
function run_test() {
// Test RFC 2231
do_tests(1);
// Test RFC 5987
do_tests(2);
}