Bug 1247835 (part 1) - Use binary search instead of a hash table in nsEffectiveTLDService. r=jduell.

This saves 128 KiB of memory per process. It requires putting the eTLD entries
in sorted order.
This commit is contained in:
Nicholas Nethercote 2016-02-25 13:31:01 +11:00
parent 9ccb6747f8
commit bc81aab021
3 changed files with 74 additions and 88 deletions

View File

@ -30,13 +30,13 @@ NS_IMPL_ISUPPORTS(nsEffectiveTLDService, nsIEffectiveTLDService,
#define ETLD_STR_NUM(line) ETLD_STR_NUM_1(line)
#define ETLD_ENTRY_OFFSET(name) offsetof(struct etld_string_list, ETLD_STR_NUM(__LINE__))
const ETLDEntry nsDomainEntry::entries[] = {
const ETLDEntry ETLDEntry::entries[] = {
#define ETLD_ENTRY(name, ex, wild) { ETLD_ENTRY_OFFSET(name), ex, wild },
#include "etld_data.inc"
#undef ETLD_ENTRY
};
const union nsDomainEntry::etld_strings nsDomainEntry::strings = {
const union ETLDEntry::etld_strings ETLDEntry::strings = {
{
#define ETLD_ENTRY(name, ex, wild) name,
#include "etld_data.inc"
@ -44,10 +44,21 @@ const union nsDomainEntry::etld_strings nsDomainEntry::strings = {
}
};
/* static */ const ETLDEntry*
ETLDEntry::GetEntry(const char* aDomain)
{
size_t i;
if (BinarySearchIf(entries, 0, ArrayLength(ETLDEntry::entries),
Cmp(aDomain), &i)) {
return &entries[i];
}
return nullptr;
}
// Dummy function to statically ensure that our indices don't overflow
// the storage provided for them.
void
nsDomainEntry::FuncForStaticAsserts(void)
ETLDEntry::FuncForStaticAsserts(void)
{
#define ETLD_ENTRY(name, ex, wild) \
static_assert(ETLD_ENTRY_OFFSET(name) < (1 << ETLD_ENTRY_N_INDEX_BITS), \
@ -65,33 +76,33 @@ nsDomainEntry::FuncForStaticAsserts(void)
static nsEffectiveTLDService *gService = nullptr;
nsEffectiveTLDService::nsEffectiveTLDService()
: mHash(ArrayLength(nsDomainEntry::entries))
{
}
nsresult
nsEffectiveTLDService::Init()
{
const ETLDEntry *entries = nsDomainEntry::entries;
nsresult rv;
mIDNService = do_GetService(NS_IDNSERVICE_CONTRACTID, &rv);
if (NS_FAILED(rv)) return rv;
// Initialize eTLD hash from static array
for (uint32_t i = 0; i < ArrayLength(nsDomainEntry::entries); i++) {
const char *domain = nsDomainEntry::GetEffectiveTLDName(entries[i].strtab_index);
#ifdef DEBUG
// Sanity-check the eTLD entries.
for (uint32_t i = 0; i < ArrayLength(ETLDEntry::entries); i++) {
const char* domain = ETLDEntry::entries[i].GetEffectiveTLDName();
nsDependentCString name(domain);
nsAutoCString normalizedName(domain);
NS_ASSERTION(NS_SUCCEEDED(NormalizeHostname(normalizedName)),
"normalization failure!");
NS_ASSERTION(name.Equals(normalizedName), "domain not normalized!");
#endif
nsDomainEntry *entry = mHash.PutEntry(domain);
NS_ENSURE_TRUE(entry, NS_ERROR_OUT_OF_MEMORY);
entry->SetData(entries[i]);
MOZ_ASSERT(NS_SUCCEEDED(NormalizeHostname(normalizedName)),
"normalization failure!");
MOZ_ASSERT(name.Equals(normalizedName), "domain not normalized!");
// Domains must be in sorted order for binary search to work.
if (i > 0) {
const char* domain0 = ETLDEntry::entries[i - 1].GetEffectiveTLDName();
MOZ_ASSERT(strcmp(domain0, domain) < 0, "domains not in sorted order!");
}
}
#endif
MOZ_ASSERT(!gService);
gService = this;
@ -108,6 +119,10 @@ nsEffectiveTLDService::~nsEffectiveTLDService()
MOZ_DEFINE_MALLOC_SIZE_OF(EffectiveTLDServiceMallocSizeOf)
// The amount of heap memory measured here is tiny. It used to be bigger when
// nsEffectiveTLDService used a separate hash table instead of binary search.
// Nonetheless, we keep this code here in anticipation of bug 1083971 which will
// change ETLDEntries::entries to a heap-allocated array modifiable at runtime.
NS_IMETHODIMP
nsEffectiveTLDService::CollectReports(nsIHandleReportCallback* aHandleReport,
nsISupports* aData, bool aAnonymize)
@ -122,7 +137,6 @@ size_t
nsEffectiveTLDService::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf)
{
size_t n = aMallocSizeOf(this);
n += mHash.ShallowSizeOfExcludingThis(aMallocSizeOf);
// Measurement of the following members may be added later if DMD finds it is
// worthwhile:
@ -263,8 +277,8 @@ nsEffectiveTLDService::GetBaseDomainInternal(nsCString &aHostname,
if (*currDomain == '.')
return NS_ERROR_INVALID_ARG;
// perform the hash lookup.
nsDomainEntry *entry = mHash.GetEntry(currDomain);
// Perform the lookup.
const ETLDEntry* entry = ETLDEntry::GetEntry(currDomain);
if (entry) {
if (entry->IsWild() && prevDomain) {
// wildcard rules imply an eTLD one level inferior to the match.

View File

@ -9,85 +9,47 @@
#include "nsIEffectiveTLDService.h"
#include "nsIMemoryReporter.h"
#include "nsTHashtable.h"
#include "nsString.h"
#include "nsCOMPtr.h"
#include "mozilla/Attributes.h"
#include "mozilla/BinarySearch.h"
#include "mozilla/MemoryReporting.h"
class nsIIDNService;
#define ETLD_ENTRY_N_INDEX_BITS 30
// struct for static data generated from effective_tld_names.dat
struct ETLDEntry {
friend class nsEffectiveTLDService;
public:
bool IsNormal() const { return wild || !exception; }
bool IsException() const { return exception; }
bool IsWild() const { return wild; }
const char* GetEffectiveTLDName() const
{
return strings.strtab + strtab_index;
}
static const ETLDEntry* GetEntry(const char* aDomain);
static const size_t ETLD_ENTRY_N_INDEX_BITS = 30;
// These fields must be public to allow static construction.
uint32_t strtab_index : ETLD_ENTRY_N_INDEX_BITS;
uint32_t exception : 1;
uint32_t wild : 1;
};
// hash entry class
class nsDomainEntry : public PLDHashEntryHdr
{
friend class nsEffectiveTLDService;
public:
// Hash methods
typedef const char* KeyType;
typedef const char* KeyTypePointer;
explicit nsDomainEntry(KeyTypePointer aEntry)
{
}
nsDomainEntry(const nsDomainEntry& toCopy)
{
// if we end up here, things will break. nsTHashtable shouldn't
// allow this, since we set ALLOW_MEMMOVE to true.
NS_NOTREACHED("nsDomainEntry copy constructor is forbidden!");
}
~nsDomainEntry()
{
}
KeyType GetKey() const
{
return GetEffectiveTLDName(mData.strtab_index);
}
bool KeyEquals(KeyTypePointer aKey) const
{
return !strcmp(GetKey(), aKey);
}
static KeyTypePointer KeyToPointer(KeyType aKey)
{
return aKey;
}
static PLDHashNumber HashKey(KeyTypePointer aKey)
{
// PLDHashTable::HashStringKey doesn't use the table parameter, so we can
// safely pass nullptr
return PLDHashTable::HashStringKey(nullptr, aKey);
}
enum { ALLOW_MEMMOVE = true };
void SetData(ETLDEntry entry) { mData = entry; }
bool IsNormal() { return mData.wild || !mData.exception; }
bool IsException() { return mData.exception; }
bool IsWild() { return mData.wild; }
static const char *GetEffectiveTLDName(size_t idx)
{
return strings.strtab + idx;
}
private:
ETLDEntry mData;
struct Cmp {
int operator()(const ETLDEntry aEntry) const
{
return strcmp(mName, aEntry.GetEffectiveTLDName());
}
explicit Cmp(const char* aName) : mName(aName) {}
const char* mName;
};
#define ETLD_STR_NUM_1(line) str##line
#define ETLD_STR_NUM(line) ETLD_STR_NUM_1(line)
struct etld_string_list {
@ -95,11 +57,17 @@ private:
#include "etld_data.inc"
#undef ETLD_ENTRY
};
// This static string table is all the eTLD domain names packed together.
static const union etld_strings {
struct etld_string_list list;
char strtab[1];
} strings;
// This is the static entries table. Each entry has an index into the string
// table. The entries are in sorted order so that binary search can be used.
static const ETLDEntry entries[];
void FuncForStaticAsserts(void);
#undef ETLD_STR_NUM
#undef ETLD_STR_NUM1
@ -124,7 +92,6 @@ private:
nsresult NormalizeHostname(nsCString &aHostname);
~nsEffectiveTLDService();
nsTHashtable<nsDomainEntry> mHash;
nsCOMPtr<nsIIDNService> mIDNService;
};

View File

@ -18,9 +18,9 @@ http://wiki.mozilla.org/Gecko:Effective_TLD_Service
def getEffectiveTLDs(path):
file = codecs.open(path, "r", "UTF-8")
entries = []
domains = set()
while True:
line = file.readline()
for line in file:
# line always contains a line terminator unless the file is empty
if len(line) == 0:
raise StopIteration
@ -34,7 +34,12 @@ def getEffectiveTLDs(path):
assert domain not in domains, \
"repeating domain %s makes no sense" % domain
domains.add(domain)
yield entry
entries.append(entry)
# Sort the entries so we can use binary search on them.
entries.sort(key=EffectiveTLDEntry.domain)
return entries
def _normalizeHostname(domain):
"""