relanding 402008.

This commit is contained in:
dwitte@stanford.edu 2007-12-03 22:30:47 -08:00
parent 1a3396c0ad
commit c35938c835
5 changed files with 101 additions and 100 deletions

View File

@ -65,8 +65,6 @@ PRBool nsStandardURL::gInitialized = PR_FALSE;
PRBool nsStandardURL::gEscapeUTF8 = PR_TRUE;
PRBool nsStandardURL::gAlwaysEncodeInUTF8 = PR_TRUE;
PRBool nsStandardURL::gEncodeQueryInUTF8 = PR_TRUE;
PRBool nsStandardURL::gShowPunycode = PR_FALSE;
nsIPrefBranch *nsStandardURL::gIDNWhitelistPrefBranch = nsnull;
#if defined(PR_LOGGING)
//
@ -140,8 +138,6 @@ end:
#define NS_NET_PREF_ENABLEIDN "network.enableIDN"
#define NS_NET_PREF_ALWAYSENCODEINUTF8 "network.standard-url.encode-utf8"
#define NS_NET_PREF_ENCODEQUERYINUTF8 "network.standard-url.encode-query-utf8"
#define NS_NET_PREF_SHOWPUNYCODE "network.IDN_show_punycode"
#define NS_NET_PREF_IDNWHITELIST "network.IDN.whitelist."
NS_IMPL_ISUPPORTS1(nsStandardURL::nsPrefObserver, nsIObserver)
@ -315,17 +311,8 @@ nsStandardURL::InitGlobalObjects()
prefBranch->AddObserver(NS_NET_PREF_ALWAYSENCODEINUTF8, obs.get(), PR_FALSE);
prefBranch->AddObserver(NS_NET_PREF_ENCODEQUERYINUTF8, obs.get(), PR_FALSE);
prefBranch->AddObserver(NS_NET_PREF_ENABLEIDN, obs.get(), PR_FALSE);
prefBranch->AddObserver(NS_NET_PREF_SHOWPUNYCODE, obs.get(), PR_FALSE);
PrefsChanged(prefBranch, nsnull);
nsCOMPtr<nsIPrefService> prefs = do_QueryInterface(prefBranch);
if (prefs) {
nsCOMPtr<nsIPrefBranch> branch;
if (NS_SUCCEEDED(prefs->GetBranch( NS_NET_PREF_IDNWHITELIST,
getter_AddRefs(branch) )))
NS_ADDREF(gIDNWhitelistPrefBranch = branch);
}
}
}
@ -334,7 +321,6 @@ nsStandardURL::ShutdownGlobalObjects()
{
NS_IF_RELEASE(gIDN);
NS_IF_RELEASE(gCharsetMgr);
NS_IF_RELEASE(gIDNWhitelistPrefBranch);
}
//----------------------------------------------------------------------------
@ -395,7 +381,7 @@ nsStandardURL::NormalizeIDN(const nsCSubstring &host, nsCString &result)
// If host is ACE, then convert to UTF-8. Else, if host is already UTF-8,
// then make sure it is normalized per IDN.
// this function returns PR_TRUE iff it writes something to |result|.
// this function returns PR_TRUE if normalization succeeds.
// NOTE: As a side-effect this function sets mHostEncoding. While it would
// be nice to avoid side-effects in this function, the implementation of
@ -405,23 +391,13 @@ nsStandardURL::NormalizeIDN(const nsCSubstring &host, nsCString &result)
NS_ASSERTION(mHostEncoding == eEncoding_ASCII, "unexpected default encoding");
if (IsASCII(host)) {
PRBool isACE;
if (gIDN &&
NS_SUCCEEDED(gIDN->IsACE(host, &isACE)) && isACE &&
NS_SUCCEEDED(ACEtoDisplayIDN(host, result))) {
mHostEncoding = eEncoding_UTF8;
return PR_TRUE;
}
}
else {
mHostEncoding = eEncoding_UTF8;
if (gIDN && NS_SUCCEEDED(UTF8toDisplayIDN(host, result))) {
// normalization could result in an ASCII only hostname
if (IsASCII(result))
mHostEncoding = eEncoding_ASCII;
return PR_TRUE;
}
PRBool isASCII;
if (gIDN &&
NS_SUCCEEDED(gIDN->ConvertToDisplayIDN(host, &isASCII, result))) {
if (!isASCII)
mHostEncoding = eEncoding_UTF8;
return PR_TRUE;
}
result.Truncate();
@ -861,68 +837,10 @@ nsStandardURL::PrefsChanged(nsIPrefBranch *prefs, const char *pref)
gEncodeQueryInUTF8 = val;
LOG(("encode query in UTF-8 %s\n", gEncodeQueryInUTF8 ? "enabled" : "disabled"));
}
if (PREF_CHANGED(NS_NET_PREF_SHOWPUNYCODE)) {
if (GOT_PREF(NS_NET_PREF_SHOWPUNYCODE, val))
gShowPunycode = val;
LOG(("show punycode %s\n", gShowPunycode ? "enabled" : "disabled"));
}
#undef PREF_CHANGED
#undef GOT_PREF
}
/* static */ nsresult
nsStandardURL::ACEtoDisplayIDN(const nsCSubstring &host, nsCString &result)
{
if (gShowPunycode || !IsInWhitelist(host)) {
result = host;
return NS_OK;
}
return gIDN->ConvertACEtoUTF8(host, result);
}
/* static */ nsresult
nsStandardURL::UTF8toDisplayIDN(const nsCSubstring &host, nsCString &result)
{
// We have to normalize the hostname before testing against the domain
// whitelist. See bug 315411.
nsCAutoString temp;
if (gShowPunycode || NS_FAILED(gIDN->Normalize(host, temp)))
return gIDN->ConvertUTF8toACE(host, result);
PRBool isACE = PR_FALSE;
gIDN->IsACE(temp, &isACE);
// If host is converted to ACE by the normalizer, then the host may contain
// unsafe characters. See bug 283016, bug 301694, and bug 309311.
if (!isACE && !IsInWhitelist(temp))
return gIDN->ConvertUTF8toACE(temp, result);
result = temp;
return NS_OK;
}
/* static */ PRBool
nsStandardURL::IsInWhitelist(const nsCSubstring &host)
{
PRInt32 pos;
PRBool safe;
// XXX This code uses strings inefficiently.
if (gIDNWhitelistPrefBranch &&
(pos = nsCAutoString(host).RFind(".")) != kNotFound &&
NS_SUCCEEDED(gIDNWhitelistPrefBranch->
GetBoolPref(nsCAutoString(Substring(host, pos + 1)).get(),
&safe)))
return safe;
return PR_FALSE;
}
//----------------------------------------------------------------------------
// nsStandardURL::nsISupports
//----------------------------------------------------------------------------

View File

@ -221,11 +221,6 @@ private:
static void PrefsChanged(nsIPrefBranch *prefs, const char *pref);
// IDN routines
static nsresult ACEtoDisplayIDN(const nsCSubstring &in, nsCString &out);
static nsresult UTF8toDisplayIDN(const nsCSubstring &in, nsCString &out);
static PRBool IsInWhitelist(const nsCSubstring &host);
// mSpec contains the normalized version of the URL spec (UTF-8 encoded).
nsCString mSpec;
PRInt32 mDefaultPort;
@ -276,8 +271,6 @@ private:
static PRBool gEscapeUTF8;
static PRBool gAlwaysEncodeInUTF8;
static PRBool gEncodeQueryInUTF8;
static PRBool gShowPunycode;
static nsIPrefBranch *gIDNWhitelistPrefBranch;
};
#define NS_THIS_STANDARDURL_IMPL_CID \

View File

@ -25,6 +25,7 @@
* gagan@netscape.com,
* nhotta@netscape.com,
* william.tan@i-dns.net
* dwitte@stanford.edu
*
*
* Alternatively, the contents of this file may be used under the terms of
@ -58,7 +59,7 @@
* http://search.ietf.org/internet-drafts/draft-ietf-idn-idna-06.txt
*/
[scriptable, uuid(7B67747E-A8C4-4832-80C7-39EBB0C11F94)]
[scriptable, uuid(a592a60e-3621-4f19-a318-2bf233cfad3e)]
interface nsIIDNService : nsISupports
{
/**
@ -86,4 +87,12 @@ interface nsIIDNService : nsISupports
* for callers that want early normalization.
*/
AUTF8String normalize(in AUTF8String input);
/**
* Normalizes and converts a host to UTF-8 if the host is in the IDN
* whitelist, otherwise converts it to ACE. This is useful for display
* purposes and to ensure an encoding consistent with nsIURI::GetHost().
* If the result is ASCII or ACE encoded, |isASCII| will be true.
*/
AUTF8String convertToDisplayIDN(in AUTF8String input, out boolean isASCII);
};

View File

@ -1,4 +1,4 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
@ -57,6 +57,8 @@ static const PRUint32 kMaxDNSNodeLen = 63;
#define NS_NET_PREF_IDNTESTBED "network.IDN_testbed"
#define NS_NET_PREF_IDNPREFIX "network.IDN_prefix"
#define NS_NET_PREF_IDNBLACKLIST "network.IDN.blacklist_chars"
#define NS_NET_PREF_SHOWPUNYCODE "network.IDN_show_punycode"
#define NS_NET_PREF_IDNWHITELIST "network.IDN.whitelist."
inline PRBool isOnlySafeChars(const nsAFlatString& in,
const nsAFlatString& blacklist)
@ -77,13 +79,19 @@ NS_IMPL_THREADSAFE_ISUPPORTS3(nsIDNService,
nsresult nsIDNService::Init()
{
nsCOMPtr<nsIPrefBranch2> prefInternal(do_GetService(NS_PREFSERVICE_CONTRACTID));
nsCOMPtr<nsIPrefService> prefs(do_GetService(NS_PREFSERVICE_CONTRACTID));
if (prefs)
prefs->GetBranch(NS_NET_PREF_IDNWHITELIST, getter_AddRefs(mIDNWhitelistPrefBranch));
nsCOMPtr<nsIPrefBranch2> prefInternal(do_QueryInterface(prefs));
if (prefInternal) {
prefInternal->AddObserver(NS_NET_PREF_IDNTESTBED, this, PR_TRUE);
prefInternal->AddObserver(NS_NET_PREF_IDNPREFIX, this, PR_TRUE);
prefInternal->AddObserver(NS_NET_PREF_IDNBLACKLIST, this, PR_TRUE);
prefInternal->AddObserver(NS_NET_PREF_SHOWPUNYCODE, this, PR_TRUE);
prefsChanged(prefInternal, nsnull);
}
return NS_OK;
}
@ -122,6 +130,11 @@ void nsIDNService::prefsChanged(nsIPrefBranch *prefBranch, const PRUnichar *pref
else
mIDNBlacklist.Truncate();
}
if (!pref || NS_LITERAL_STRING(NS_NET_PREF_SHOWPUNYCODE).Equals(pref)) {
PRBool val;
if (NS_SUCCEEDED(prefBranch->GetBoolPref(NS_NET_PREF_SHOWPUNYCODE, &val)))
mShowPunycode = val;
}
}
nsIDNService::nsIDNService()
@ -282,6 +295,53 @@ NS_IMETHODIMP nsIDNService::Normalize(const nsACString & input, nsACString & out
return NS_OK;
}
NS_IMETHODIMP nsIDNService::ConvertToDisplayIDN(const nsACString & input, PRBool * _isASCII, nsACString & _retval)
{
// If host is ACE, then convert to UTF-8 if the host is in the IDN whitelist.
// Else, if host is already UTF-8, then make sure it is normalized per IDN.
nsresult rv;
if (IsASCII(input)) {
// first, canonicalize the host to lowercase, for whitelist lookup
_retval = input;
ToLowerCase(_retval);
PRBool isACE;
IsACE(_retval, &isACE);
if (isACE && !mShowPunycode && isInWhitelist(_retval)) {
// ConvertACEtoUTF8() can't fail, but might return the original ACE string
nsCAutoString temp(_retval);
ConvertACEtoUTF8(temp, _retval);
*_isASCII = IsASCII(_retval);
} else {
*_isASCII = PR_TRUE;
}
} else {
if (mShowPunycode && NS_SUCCEEDED(ConvertUTF8toACE(input, _retval))) {
*_isASCII = PR_TRUE;
return NS_OK;
}
// We have to normalize the hostname before testing against the domain
// whitelist. See bug 315411.
rv = Normalize(input, _retval);
if (NS_FAILED(rv)) return rv;
// normalization could result in an ASCII-only hostname. alternatively, if
// the host is converted to ACE by the normalizer, then the host may contain
// unsafe characters, so leave it ACE encoded. see bug 283016, bug 301694, and bug 309311.
*_isASCII = IsASCII(_retval);
if (!*_isASCII && !isInWhitelist(_retval)) {
*_isASCII = PR_TRUE;
return ConvertUTF8toACE(_retval, _retval);
}
}
return NS_OK;
}
//-----------------------------------------------------------------------------
static void utf16ToUcs4(const nsAString& in, PRUint32 *out, PRUint32 outBufLen, PRUint32 *outLen)
@ -570,3 +630,21 @@ nsresult nsIDNService::decodeACE(const nsACString& in, nsACString& out)
return NS_OK;
}
PRBool nsIDNService::isInWhitelist(const nsACString &host)
{
if (mIDNWhitelistPrefBranch) {
// truncate trailing dots first
nsCAutoString tld(host);
tld.Trim(".");
PRInt32 pos = tld.RFind(".");
PRBool safe;
if (pos != kNotFound &&
NS_SUCCEEDED(mIDNWhitelistPrefBranch->GetBoolPref(tld.get() + pos + 1, &safe)))
return safe;
}
return PR_FALSE;
}

View File

@ -75,6 +75,7 @@ private:
nsresult encodeToACE(const nsAString& in, nsACString& out);
nsresult stringPrep(const nsAString& in, nsAString& out);
nsresult decodeACE(const nsACString& in, nsACString& out);
PRBool isInWhitelist(const nsACString &host);
void prefsChanged(nsIPrefBranch *prefBranch, const PRUnichar *pref);
PRBool mMultilingualTestBed; // if true generates extra node for mulitlingual testbed
@ -82,6 +83,8 @@ private:
nsCOMPtr<nsIUnicodeNormalizer> mNormalizer;
char mACEPrefix[kACEPrefixLen+1];
nsXPIDLString mIDNBlacklist;
PRBool mShowPunycode;
nsCOMPtr<nsIPrefBranch> mIDNWhitelistPrefBranch;
};
#endif // nsIDNService_h__