diff --git a/netwerk/mime/nsIMIMEHeaderParam.idl b/netwerk/mime/nsIMIMEHeaderParam.idl index f7f1afb2567..dab064cb092 100644 --- a/netwerk/mime/nsIMIMEHeaderParam.idl +++ b/netwerk/mime/nsIMIMEHeaderParam.idl @@ -39,7 +39,7 @@ /* * This interface allows any module to access the routine - * for MIME header parameter parsing (RFC 2231) + * for MIME header parameter parsing (RFC 2231/5987) */ #include "nsISupports.idl" @@ -97,6 +97,18 @@ interface nsIMIMEHeaderParam : nsISupports { in ACString aFallbackCharset, in boolean aTryLocaleCharset, out string aLang); + + + /** + * Like getParameter, but using RFC 5987 instead of 2231. This removes + * support for header parameter continuations (foo*0, foo*1, etc). + */ + AString getParameter5987(in ACString aHeaderVal, + in string aParamName, + in ACString aFallbackCharset, + in boolean aTryLocaleCharset, + out string aLang); + /** * Given the value of a single header field (such as * Content-Disposition and Content-Type) and the name of a parameter diff --git a/netwerk/mime/nsMIMEHeaderParamImpl.cpp b/netwerk/mime/nsMIMEHeaderParamImpl.cpp index 7b5b7b3fb7e..aa0162f3e98 100644 --- a/netwerk/mime/nsMIMEHeaderParamImpl.cpp +++ b/netwerk/mime/nsMIMEHeaderParamImpl.cpp @@ -75,23 +75,47 @@ static nsresult DecodeRFC2047Str(const char *, const char *, bool, nsACString&); NS_IMPL_ISUPPORTS1(nsMIMEHeaderParamImpl, nsIMIMEHeaderParam) -// XXX : aTryLocaleCharset is not yet effective. NS_IMETHODIMP nsMIMEHeaderParamImpl::GetParameter(const nsACString& aHeaderVal, const char *aParamName, const nsACString& aFallbackCharset, bool aTryLocaleCharset, char **aLang, nsAString& aResult) +{ + return DoGetParameter(aHeaderVal, aParamName, RFC_2231_DECODING, + aFallbackCharset, aTryLocaleCharset, aLang, aResult); +} + +NS_IMETHODIMP +nsMIMEHeaderParamImpl::GetParameter5987(const nsACString& aHeaderVal, + const char *aParamName, + const nsACString& aFallbackCharset, + bool aTryLocaleCharset, + char **aLang, nsAString& aResult) +{ + return DoGetParameter(aHeaderVal, aParamName, RFC_5987_DECODING, + aFallbackCharset, aTryLocaleCharset, aLang, aResult); +} + +// XXX : aTryLocaleCharset is not yet effective. +nsresult +nsMIMEHeaderParamImpl::DoGetParameter(const nsACString& aHeaderVal, + const char *aParamName, + ParamDecoding aDecoding, + const nsACString& aFallbackCharset, + bool aTryLocaleCharset, + char **aLang, nsAString& aResult) { aResult.Truncate(); nsresult rv; - // get parameter (decode RFC 2231 if it's RFC 2231-encoded and - // return charset.) + // get parameter (decode RFC 2231/5987 when applicable, as specified by + // aDecoding (5987 being a subset of 2231) and return charset.) nsXPIDLCString med; nsXPIDLCString charset; - rv = GetParameterInternal(PromiseFlatCString(aHeaderVal).get(), aParamName, - getter_Copies(charset), aLang, getter_Copies(med)); + rv = DoParameterInternal(PromiseFlatCString(aHeaderVal).get(), aParamName, + aDecoding, getter_Copies(charset), aLang, + getter_Copies(med)); if (NS_FAILED(rv)) return rv; @@ -159,6 +183,20 @@ nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue, char **aLang, char **aResult) { + return DoParameterInternal(aHeaderValue, aParamName, RFC_2231_DECODING, + aCharset, aLang, aResult); +} + + +nsresult +nsMIMEHeaderParamImpl::DoParameterInternal(const char *aHeaderValue, + const char *aParamName, + ParamDecoding aDecoding, + char **aCharset, + char **aLang, + char **aResult) +{ + if (!aHeaderValue || !*aHeaderValue || !aResult) return NS_ERROR_INVALID_ARG; @@ -215,9 +253,13 @@ nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue, // title*2="Else..." // D. title*0="Hey, what you think you are doing?" // title*1="There is no charset and lang info." - + // RFC5987: only A and B + PRInt32 paramLen = strlen(aParamName); + bool haveCaseAValue = false; + PRInt32 nextContinuation = 0; // next value in series, or -1 if error + while (*str) { const char *tokenStart = str; const char *tokenEnd = 0; @@ -276,6 +318,12 @@ nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue, seenEquals && !nsCRT::strncasecmp(tokenStart, aParamName, paramLen)) { + if (*aResult) + { + // either seen earlier caseA value already--we prefer first--or caseA + // came after a continuation: either way, prefer other value + goto increment_str; + } // if the parameter spans across multiple lines we have to strip out the // line continuation -- jht 4/29/98 nsCAutoString tempStr(valueStart, valueEnd - valueStart); @@ -288,7 +336,8 @@ nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue, *aResult = res; - // keep going, we may find a RFC 2231 encoded alternative + haveCaseAValue = true; + // keep going, we may find a RFC 2231/5987 encoded alternative } // case B, C, and D else if (tokenEnd - tokenStart > paramLen && @@ -296,14 +345,26 @@ nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue, seenEquals && *(tokenStart + paramLen) == '*') { - const char *cp = tokenStart + paramLen + 1; // 1st char pass '*' + const char *cp = tokenStart + paramLen + 1; // 1st char past '*' bool needUnescape = *(tokenEnd - 1) == '*'; - // the 1st line of a multi-line parameter or a single line that needs - // unescaping. ( title*0*= or title*= ) - // only allowed for token form, not for quoted-string - if (!needUnquote && - ((*cp == '0' && needUnescape) || (tokenEnd - tokenStart == paramLen + 1))) + + bool caseB = (tokenEnd - tokenStart) == paramLen + 1; + bool caseCorDStart = (*cp == '0') && needUnescape; + bool acceptContinuations = (aDecoding != RFC_5987_DECODING); + + // CaseB and start of CaseC: requires charset and optional language + // in quotes (quotes required even if lang is blank) + if (!needUnquote && (caseB || (caseCorDStart && acceptContinuations))) { + if (caseCorDStart) { + if (nextContinuation++ != 0) + { + // error: already started a continuation. Skip future + // continuations and return whatever initial parts were in order. + nextContinuation = -1; + goto increment_str; + } + } // look for single quotation mark(') const char *sQuote1 = PL_strchr(valueStart, 0x27); const char *sQuote2 = (char *) (sQuote1 ? PL_strchr(sQuote1 + 1, 0x27) : nsnull); @@ -340,9 +401,10 @@ nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue, { if (*aResult) { - // drop non-2231-encoded value, instead prefer the one using - // the RFC2231 encoding + // caseA value already read, or caseC/D value already read + // but we're now reading caseB: either way, drop old value nsMemory::Free(*aResult); + haveCaseAValue = false; } *aResult = (char *) nsMemory::Alloc(valueEnd - (sQuote2 + 1) + 1); if (*aResult) @@ -352,17 +414,35 @@ nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue, if (needUnescape) { nsUnescape(*aResult); - if (tokenEnd - tokenStart == paramLen + 1) - // we're done; this is case B - return NS_OK; + if (caseB) + return NS_OK; // caseB wins over everything else } } } } // end of if-block : title*0*= or title*= - // a line of multiline param with no need for unescaping : title*[0-9]= - // or 2nd or later lines of a multiline param : title*[1-9]*= - else if (nsCRT::IsAsciiDigit(PRUnichar(*cp))) + // caseD: a line of multiline param with no need for unescaping : title*[0-9]= + // or 2nd or later lines of a caseC param : title*[1-9]*= + else if (acceptContinuations && nsCRT::IsAsciiDigit(PRUnichar(*cp))) { + PRInt32 nextSegment = atoi(cp); + // no leading zeros allowed except for ... position 0 + bool broken = nextSegment > 0 && *cp == '0'; + + if (broken || nextSegment != nextContinuation++) + { + // error: gap in continuation or unneccessary leading 0. + // Skip future continuations and return whatever initial parts were + // in order. + nextContinuation = -1; + goto increment_str; + } + if (haveCaseAValue && *aResult) + { + // drop caseA value + nsMemory::Free(*aResult); + *aResult = 0; + haveCaseAValue = false; + } PRInt32 len = 0; if (*aResult) // 2nd or later lines of multiline parameter { @@ -374,11 +454,11 @@ nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue, } *aResult = ns; } - else if (*cp == '0') // must be; 1st line : title*0= + else { + NS_ASSERTION(*cp == '0', "Not first value in continuation"); // must be; 1st line : title*0= *aResult = (char *) nsMemory::Alloc(valueEnd - valueStart + 1); } - // else {} something is really wrong; out of memory if (*aResult) { // append a partial value @@ -394,7 +474,7 @@ nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue, // str now points after the end of the value. // skip over whitespace, ';', whitespace. - +increment_str: while (nsCRT::IsAsciiSpace(*str)) ++str; if (*str == ';') ++str; while (nsCRT::IsAsciiSpace(*str)) ++str; @@ -425,8 +505,8 @@ nsMIMEHeaderParamImpl::DecodeRFC2047Header(const char* aHeaderVal, // aDefaultCharset is specified, decodes RFC 2047 encoding and converts // to UTF-8. Otherwise, just strips away CRLF. if (PL_strstr(aHeaderVal, "=?") || - aDefaultCharset && (!IsUTF8(nsDependentCString(aHeaderVal)) || - Is7bitNonAsciiString(aHeaderVal, PL_strlen(aHeaderVal)))) { + (aDefaultCharset && (!IsUTF8(nsDependentCString(aHeaderVal)) || + Is7bitNonAsciiString(aHeaderVal, PL_strlen(aHeaderVal))))) { DecodeRFC2047Str(aHeaderVal, aDefaultCharset, aOverrideCharset, aResult); } else if (aEatContinuations && (PL_strchr(aHeaderVal, '\n') || PL_strchr(aHeaderVal, '\r'))) { @@ -455,7 +535,7 @@ nsMIMEHeaderParamImpl::DecodeParameter(const nsACString& aParamValue, nsACString& aResult) { aResult.Truncate(); - // If aCharset is given, aParamValue was obtained from RFC2231 + // If aCharset is given, aParamValue was obtained from RFC2231/5987 // encoding and we're pretty sure that it's in aCharset. if (aCharset && *aCharset) { @@ -501,9 +581,9 @@ nsMIMEHeaderParamImpl::DecodeParameter(const nsACString& aParamValue, } #define ISHEXCHAR(c) \ - (0x30 <= PRUint8(c) && PRUint8(c) <= 0x39 || \ - 0x41 <= PRUint8(c) && PRUint8(c) <= 0x46 || \ - 0x61 <= PRUint8(c) && PRUint8(c) <= 0x66) + ((0x30 <= PRUint8(c) && PRUint8(c) <= 0x39) || \ + (0x41 <= PRUint8(c) && PRUint8(c) <= 0x46) || \ + (0x61 <= PRUint8(c) && PRUint8(c) <= 0x66)) // Decode Q encoding (RFC 2047). // static diff --git a/netwerk/mime/nsMIMEHeaderParamImpl.h b/netwerk/mime/nsMIMEHeaderParamImpl.h index 9267155d3ae..d52721446cb 100644 --- a/netwerk/mime/nsMIMEHeaderParamImpl.h +++ b/netwerk/mime/nsMIMEHeaderParamImpl.h @@ -47,6 +47,29 @@ public: nsMIMEHeaderParamImpl() {} virtual ~nsMIMEHeaderParamImpl() {} +private: + // Toggles support for RFC 2231 decoding, or RFC 5987 (5987 profiles 2231 + // for use in HTTP, and, for instance, drops support for continuations) + enum ParamDecoding { + RFC_2231_DECODING = 1, + RFC_5987_DECODING + }; + + nsresult DoGetParameter(const nsACString& aHeaderVal, + const char *aParamName, + ParamDecoding aDecoding, + const nsACString& aFallbackCharset, + bool aTryLocaleCharset, + char **aLang, + nsAString& aResult); + + nsresult DoParameterInternal(const char *aHeaderValue, + const char *aParamName, + ParamDecoding aDecoding, + char **aCharset, + char **aLang, + char **aResult); + }; #endif diff --git a/netwerk/test/unit/test_MIME_params.js b/netwerk/test/unit/test_MIME_params.js index f8bbdec0e7e..a75b23b8972 100644 --- a/netwerk/test/unit/test_MIME_params.js +++ b/netwerk/test/unit/test_MIME_params.js @@ -6,106 +6,185 @@ * * See also */ +const Cr = Components.results; + +// Test array: +// - element 0: "Content-Disposition" header to test for 'filename' parameter +// - element 1: correct value returned under RFC 2231 (email) +// - element 2: correct value returned under RFC 5987 (HTTP) + +var tests = [ + // No filename parameter: return nothing + ["attachment;", + Cr.NS_ERROR_INVALID_ARG, Cr.NS_ERROR_INVALID_ARG], + + // basic + ["attachment; filename=basic", + "basic", "basic"], + + // extended + ["attachment; filename*=UTF-8''extended", + "extended", "extended"], + + // prefer extended to basic + ["attachment; filename=basic; filename*=UTF-8''extended", + "extended", "extended"], + + // prefer extended to basic + ["attachment; filename*=UTF-8''extended; filename=basic", + "extended", "extended"], + + // use first basic value + ["attachment; filename=first; filename=wrong", + "first", "first"], + + // old school bad HTTP servers: missing 'attachment' or 'inline' + ["filename=old", + "old", "old"], + + ["attachment; filename*=UTF-8''extended", + "extended", "extended"], + + ["attachment; filename*0=foo; filename*1=bar", + "foobar", Cr.NS_ERROR_INVALID_ARG], + + // Return first continuation + ["attachment; filename*0=first; filename*0=wrong; filename=basic", + "first", "basic"], + + // Only use correctly ordered continuations + ["attachment; filename*0=first; filename*1=second; filename*0=wrong", + "firstsecond", Cr.NS_ERROR_INVALID_ARG], + + // prefer continuation to basic (unless RFC 5987) + ["attachment; filename=basic; filename*0=foo; filename*1=bar", + "foobar", "basic"], + + // Prefer extended to basic and/or (broken or not) continuation + ["attachment; filename=basic; filename*0=first; filename*0=wrong; filename*=UTF-8''extended", + "extended", "extended"], -var succeed = [ - ["Content-Disposition: attachment; filename=basic; filename*=UTF-8''extended", - "extended"], - ["Content-Disposition: attachment; filename*=UTF-8''extended; filename=basic", - "extended"], - ["Content-Disposition: attachment; filename=basic", - "basic"], - ["Content-Disposition: attachment; filename*=UTF-8''extended", - "extended"], - ["Content-Disposition: attachment; filename*0=foo; filename*1=bar", - "foobar"], -/* BROKEN: we prepend 'basic' to result - ["Content-Disposition: attachment; filename=basic; filename*0=foo; filename*1=bar", - "foobar"], -*/ // RFC 2231 not clear on correct outcome: we prefer non-continued extended - ["Content-Disposition: attachment; filename=basic; filename*=UTF-8''extended; filename*0=foo; filename*1=bar", - "extended"], -/* BROKEN: not checking order yet + ["attachment; filename=basic; filename*=UTF-8''extended; filename*0=foo; filename*1=bar", + "extended", "extended"], + // Gaps should result in returning only value until gap hit - ["Content-Disposition: attachment; filename*0=foo; filename*2=bar", - "foo"], -*/ -/* BROKEN: don't check for leading 0s yet - // Don't handle leading 0's (*01) - ["Content-Disposition: attachment; filename*0=foo; filename*01=bar", - "foo"], -*/ - ["Content-Disposition: attachment; filename=basic; filename*0*=UTF-8''multi\r\n" + ["attachment; filename*0=foo; filename*2=bar", + "foo", Cr.NS_ERROR_INVALID_ARG], + + // Don't allow leading 0's (*01) + ["attachment; filename*0=foo; filename*01=bar", + "foo", Cr.NS_ERROR_INVALID_ARG], + + // continuations should prevail over non-extended (unless RFC 5987) + ["attachment; filename=basic; filename*0*=UTF-8''multi\r\n" + " filename*1=line\r\n" + " filename*2*=%20extended", - "multiline extended"], -/* BROKEN: not checking order yet + "multiline extended", "basic"], + // Gaps should result in returning only value until gap hit - ["Content-Disposition: attachment; filename=basic; filename*0*=UTF-8''multi\r\n" + ["attachment; filename=basic; filename*0*=UTF-8''multi\r\n" + " filename*1=line\r\n" + " filename*3*=%20extended", - "multiline"], -*/ + "multiline", "basic"], + + // First series, only please, and don't slurp up higher elements (*2 in this + // case) from later series into earlier one + ["attachment; filename=basic; filename*0*=UTF-8''multi\r\n" + + " filename*1=line\r\n" + + " filename*0*=UTF-8''wrong\r\n" + + " filename*1=bad\r\n" + + " filename*2=evil", + "multiline", "basic"], + // RFC 2231 not clear on correct outcome: we prefer non-continued extended - ["Content-Disposition: attachment; filename=basic; filename*0=UTF-8''multi\r\n" + ["attachment; filename=basic; filename*0=UTF-8''multi\r\n" + " filename*=UTF-8''extended\r\n" + " filename*1=line\r\n" + " filename*2*=%20extended", - "extended"], + "extended", "extended"], + // sneaky: if unescaped, make sure we leave UTF-8'' in value - ["Content-Disposition: attachment; filename*0=UTF-8''unescaped\r\n" + ["attachment; filename*0=UTF-8''unescaped\r\n" + " filename*1*=%20so%20includes%20UTF-8''%20in%20value", - "UTF-8''unescaped so includes UTF-8'' in value"], -/* BROKEN: we prepend 'basic' to result + "UTF-8''unescaped so includes UTF-8'' in value", Cr.NS_ERROR_INVALID_ARG], + // sneaky: if unescaped, make sure we leave UTF-8'' in value - ["Content-Disposition: attachment; filename=basic; filename*0=UTF-8''unescaped\r\n" + ["attachment; filename=basic; filename*0=UTF-8''unescaped\r\n" + " filename*1*=%20so%20includes%20UTF-8''%20in%20value", - "UTF-8''unescaped so includes UTF-8'' in value"], -*/ -/* BROKEN: we append filename*1 to 'basic' - // Also not sure if this is the spec'd behavior here: - ["Content-Disposition: attachment; filename=basic; filename*1=multi\r\n" + "UTF-8''unescaped so includes UTF-8'' in value", "basic"], + + // Prefer basic over invalid continuation + ["attachment; filename=basic; filename*1=multi\r\n" + " filename*2=line\r\n" + " filename*3*=%20extended", - "basic"], -*/ -]; + "basic", "basic"], -var broken = [ - ["Content-Disposition: attachment; filename*1=multi\r\n" + // support digits over 10 + ["attachment; filename=basic; filename*0*=UTF-8''0\r\n" + + " filename*1=1; filename*2=2;filename*3=3;filename*4=4;filename*5=5\r\n" + + " filename*6=6; filename*7=7;filename*8=8;filename*9=9;filename*10=a\r\n" + + " filename*11=b; filename*12=c;filename*13=d;filename*14=e;filename*15=f\r\n", + "0123456789abcdef", "basic"], + + // support digits over 10 (check ordering) + ["attachment; filename=basic; filename*0*=UTF-8''0\r\n" + + " filename*1=1; filename*2=2;filename*3=3;filename*4=4;filename*5=5\r\n" + + " filename*6=6; filename*7=7;filename*8=8;filename*9=9;filename*10=a\r\n" + + " filename*11=b; filename*12=c;filename*13=d;filename*15=f;filename*14=e\r\n", + "0123456789abcd" /* should see the 'f', see bug 588414 */, "basic"], + + // support digits over 10 (detect gaps) + ["attachment; filename=basic; filename*0*=UTF-8''0\r\n" + + " filename*1=1; filename*2=2;filename*3=3;filename*4=4;filename*5=5\r\n" + + " filename*6=6; filename*7=7;filename*8=8;filename*9=9;filename*10=a\r\n" + + " filename*11=b; filename*12=c;filename*14=e\r\n", + "0123456789abc", "basic"], + + // return nothing: invalid + ["attachment; filename*1=multi\r\n" + " filename*2=line\r\n" + " filename*3*=%20extended", - "param continuation must start from 0: should fail"], + Cr.NS_ERROR_INVALID_ARG, Cr.NS_ERROR_INVALID_ARG], + ]; - -function run_test() { - +function do_tests(whichRFC) +{ var mhp = Components.classes["@mozilla.org/network/mime-hdrparam;1"] .getService(Components.interfaces.nsIMIMEHeaderParam); var unused = { value : null }; - for (var i = 0; i < succeed.length; ++i) { - dump("Testing " + succeed[i] + "\n"); + for (var i = 0; i < tests.length; ++i) { + dump("Testing " + tests[i] + "\n"); try { - do_check_eq(mhp.getParameter(succeed[i][0], "filename", "UTF-8", true, unused), - succeed[i][1]); - } catch (e) {} - } - - // Check failure cases - for (var i = 0; i < broken.length; ++i) { - dump("Testing " + broken[i] + "\n"); - try { - var result = mhp.getParameter(broken[i][0], "filename", "UTF-8", true, unused); - // No exception? Error. - do_check_eq(broken[i][1], "instead got: " + result); - } catch (e) { - // .result set if getParameter failed: check for correct error code - if (e.result) - do_check_eq(e.result, Components.results.NS_ERROR_OUT_OF_MEMORY); + var result; + if (whichRFC == 1) + result = mhp.getParameter(tests[i][0], "filename", "UTF-8", true, unused); + else + result = mhp.getParameter5987(tests[i][0], "filename", "UTF-8", true, unused); + do_check_eq(result, tests[i][whichRFC]); + } + catch (e) { + // Tests can also succeed by expecting to fail with given error code + if (e.result) { + // Allow following tests to run by catching exception from do_check_eq() + try { + do_check_eq(e.result, tests[i][whichRFC]); + } catch(e) {} + } + continue; } } } +function run_test() { + + // Test RFC 2231 + do_tests(1); + + // Test RFC 5987 + do_tests(2); +} +