Bug 1101625 part 1 - Don't escape all non-ASCII characters when unsafe characters are found, just escape the unsafe characters. r=smontagu

This commit is contained in:
Mats Palmgren 2014-12-01 15:55:15 +00:00
parent 91c612cc56
commit 9ce25f95ec
5 changed files with 92 additions and 29 deletions

View File

@ -26,9 +26,8 @@ interface nsITextToSubURI : nsISupports
* give the original escaped string
* <li> In case of a conversion error, the URI fragment (escaped) is
* assumed to be in UTF-8 and converted to AString (UTF-16)
* <li> In case of successful conversion to unicode but it contains
* any characters in network.IDN.blacklist_chars (except space)
* then the final result is escaped
* <li> In case of successful conversion any resulting character listed
* in network.IDN.blacklist_chars (except space) is escaped
* <li> Always succeeeds (callers don't need to do error checking)
* </ul>
*

View File

@ -35,14 +35,9 @@ static const char16_t sNetworkIDNBlacklistChars[] =
0x3014, 0x3015, 0x3033, 0x3164, 0x321D, 0x321E, 0x33AE, 0x33AF,
0x33C6, 0x33DF, 0xA789, 0xFE14, 0xFE15, 0xFE3F, 0xFE5D, 0xFE5E,
0xFEFF, 0xFF0E, 0xFF0F, 0xFF61, 0xFFA0, 0xFFF9, 0xFFFA, 0xFFFB,
0xFFFC, 0xFFFD,
'\0'
0xFFFC, 0xFFFD
};
nsTextToSubURI::nsTextToSubURI()
{
mUnsafeChars.SetIsVoid(true);
}
nsTextToSubURI::~nsTextToSubURI()
{
}
@ -246,29 +241,30 @@ NS_IMETHODIMP nsTextToSubURI::UnEscapeURIForUI(const nsACString & aCharset,
}
// if there are any characters that are unsafe for IRIs, reescape.
if (mUnsafeChars.IsVoid()) {
if (mUnsafeChars.IsEmpty()) {
nsCOMPtr<nsISupportsString> blacklist;
nsresult rv = mozilla::Preferences::GetComplex("network.IDN.blacklist_chars",
NS_GET_IID(nsISupportsString),
getter_AddRefs(blacklist));
if (NS_SUCCEEDED(rv)) {
blacklist->ToString(getter_Copies(mUnsafeChars));
mUnsafeChars.StripChars(" "); // we allow SPACE in this method
MOZ_ASSERT(!mUnsafeChars.IsVoid());
nsString chars;
blacklist->ToString(getter_Copies(chars));
chars.StripChars(" "); // we allow SPACE in this method
mUnsafeChars.AppendElements(chars.Data(), chars.Length());
} else {
NS_WARNING("Failed to get the 'network.IDN.blacklist_chars' preference");
}
// We check IsEmpty() intentionally here because an empty (or just spaces)
// pref value is likely a mistake/error of some sort.
if (mUnsafeChars.IsEmpty()) {
mUnsafeChars.AppendElements(sNetworkIDNBlacklistChars,
mozilla::ArrayLength(sNetworkIDNBlacklistChars));
}
mUnsafeChars.Sort();
}
// We check IsEmpty() intentionally here instead of IsVoid() because an
// empty (or just spaces) pref value is likely a mistake/error of some sort.
const char16_t* unsafeChars =
mUnsafeChars.IsEmpty() ? sNetworkIDNBlacklistChars : mUnsafeChars;
if (PromiseFlatString(_retval).FindCharInSet(unsafeChars) != kNotFound) {
// Note that this reescapes all non-ASCII characters in the URI, not just
// the unsafe characters.
nsString reescapedSpec;
_retval = NS_EscapeURL(_retval, esc_OnlyNonASCII, reescapedSpec);
}
const nsPromiseFlatString& unescapedResult = PromiseFlatString(_retval);
nsString reescapedSpec;
_retval = NS_EscapeURL(unescapedResult, mUnsafeChars, reescapedSpec);
return NS_OK;
}

View File

@ -7,15 +7,13 @@
#include "nsITextToSubURI.h"
#include "nsString.h"
#include "nsTArray.h"
//==============================================================
class nsTextToSubURI: public nsITextToSubURI {
NS_DECL_ISUPPORTS
NS_DECL_NSITEXTTOSUBURI
public:
nsTextToSubURI();
private:
virtual ~nsTextToSubURI();
@ -33,8 +31,9 @@ private:
bool aIRI,
nsAString &_retval);
// Void until we get the pref "network.IDN.blacklist_chars" successfully.
nsXPIDLString mUnsafeChars;
// Characters from the pref "network.IDN.blacklist_chars", or a built-in
// fallback if reading the pref fails.
nsTArray<char16_t> mUnsafeChars;
};
#endif // nsTextToSubURI_h__

View File

@ -7,6 +7,8 @@
#include "nsEscape.h"
#include "mozilla/ArrayUtils.h"
#include "mozilla/BinarySearch.h"
#include "nsTArray.h"
#include "nsCRT.h"
#include "plstr.h"
@ -42,6 +44,8 @@ static const int netCharType[256] =
#define IS_OK(C) (netCharType[((unsigned int)(C))] & (aFlags))
#define HEX_ESCAPE '%'
static const uint32_t ENCODE_MAX_LEN = 6; // %uABCD
static uint32_t
AppendPercentHex(char* aBuffer, unsigned char aChar)
{
@ -392,7 +396,6 @@ T_EscapeURL(const typename T::char_type* aPart, size_t aPartLen,
return false;
}
const uint32_t ENCODE_MAX_LEN = 6; // %uABCD
bool forced = !!(aFlags & esc_Forced);
bool ignoreNonAscii = !!(aFlags & esc_OnlyASCII);
bool ignoreAscii = !!(aFlags & esc_OnlyNonASCII);
@ -476,6 +479,59 @@ NS_EscapeURL(const nsSubstring& aStr, uint32_t aFlags, nsSubstring& aResult)
return aStr;
}
// Starting at aStr[aStart] find the first index in aStr that matches any
// character in aForbidden. Return false if not found.
static bool
FindFirstMatchFrom(const nsAFlatString& aStr, size_t aStart,
const nsTArray<char16_t>& aForbidden, size_t* aIndex)
{
const size_t len = aForbidden.Length();
for (size_t j = aStart, l = aStr.Length(); j < l; ++j) {
size_t unused;
if (mozilla::BinarySearch(aForbidden, 0, len, aStr[j], &unused)) {
*aIndex = j;
return true;
}
}
return false;
}
const nsSubstring&
NS_EscapeURL(const nsAFlatString& aStr, const nsTArray<char16_t>& aForbidden,
nsSubstring& aResult)
{
bool didEscape = false;
for (size_t i = 0, len = aStr.Length(); i < len; ) {
size_t j;
if (MOZ_UNLIKELY(FindFirstMatchFrom(aStr, i, aForbidden, &j))) {
if (i == 0) {
didEscape = true;
aResult.Truncate();
aResult.SetCapacity(aStr.Length());
}
if (j != i) {
// The substring from 'i' up to 'j' that needs no escaping.
aResult.Append(nsDependentSubstring(aStr, i, j - i));
}
char16_t buffer[ENCODE_MAX_LEN];
uint32_t len = ::AppendPercentHex(buffer, aStr[j]);
MOZ_ASSERT(len <= ENCODE_MAX_LEN, "buffer overflow");
aResult.Append(buffer, len);
i = j + 1;
} else {
if (MOZ_UNLIKELY(didEscape)) {
// The tail of the string that needs no escaping.
aResult.Append(nsDependentSubstring(aStr, i, len - i));
}
break;
}
}
if (MOZ_UNLIKELY(didEscape)) {
return aResult;
}
return aStr;
}
#define ISHEX(c) memchr(hexChars, c, sizeof(hexChars)-1)
bool

View File

@ -166,6 +166,19 @@ NS_UnescapeURL(const nsCSubstring& aStr, uint32_t aFlags, nsCSubstring& aResult)
const nsSubstring&
NS_EscapeURL(const nsSubstring& aStr, uint32_t aFlags, nsSubstring& aResult);
/**
* Percent-escapes all characters in aStr that occurs in aForbidden.
* @param aStr the input URL string
* @param aForbidden the characters that should be escaped if found in aStr
* @note that aForbidden MUST be sorted (low to high)
* @param aResult the result if some characters were escaped
* @return aResult if some characters were escaped, or aStr otherwise (aResult
* is unmodified in that case)
*/
const nsSubstring&
NS_EscapeURL(const nsAFlatString& aStr, const nsTArray<char16_t>& aForbidden,
nsSubstring& aResult);
/**
* CString version of nsEscape. Returns true on success, false
* on out of memory. To reverse this function, use NS_UnescapeURL.