Bug 947745 - Clean up the seer database when it gets too big. r=honzab

This commit is contained in:
Nicholas Hurley 2014-01-17 17:45:48 -08:00
parent 0819be2b4b
commit 2674a1f7ec
6 changed files with 421 additions and 0 deletions

View File

@ -68,6 +68,10 @@ pref("network.http.spdy.push-allowance", 32768);
pref("network.buffer.cache.count", 24); pref("network.buffer.cache.count", 24);
pref("network.buffer.cache.size", 16384); pref("network.buffer.cache.size", 16384);
// predictive actions
pref("network.seer.max-db-size", 2097152); // bytes
pref("network.seer.preserve", 50); // percentage of seer data to keep when cleaning up
/* session history */ /* session history */
pref("browser.sessionhistory.max_total_viewers", 1); pref("browser.sessionhistory.max_total_viewers", 1);
pref("browser.sessionhistory.max_entries", 50); pref("browser.sessionhistory.max_entries", 50);

View File

@ -102,6 +102,10 @@ pref("network.http.spdy.push-allowance", 32768);
pref("network.buffer.cache.count", 24); pref("network.buffer.cache.count", 24);
pref("network.buffer.cache.size", 16384); pref("network.buffer.cache.size", 16384);
// predictive actions
pref("network.seer.max-db-size", 2097152); // bytes
pref("network.seer.preserve", 50); // percentage of seer data to keep when cleaning up
/* history max results display */ /* history max results display */
pref("browser.display.history.maxresults", 100); pref("browser.display.history.maxresults", 100);

View File

@ -1273,6 +1273,8 @@ pref("network.seer.preconnect-min-confidence", 90);
pref("network.seer.preresolve-min-confidence", 60); pref("network.seer.preresolve-min-confidence", 60);
pref("network.seer.redirect-likely-confidence", 75); pref("network.seer.redirect-likely-confidence", 75);
pref("network.seer.max-queue-size", 50); pref("network.seer.max-queue-size", 50);
pref("network.seer.max-db-size", 157286400); // bytes
pref("network.seer.preserve", 80); // percentage of seer data to keep when cleaning up
// The following prefs pertain to the negotiate-auth extension (see bug 17578), // The following prefs pertain to the negotiate-auth extension (see bug 17578),

View File

@ -100,6 +100,11 @@ const int REDIRECT_LIKELY_DEFAULT = 75;
const char SEER_MAX_QUEUE_SIZE_PREF[] = "network.seer.max-queue-size"; const char SEER_MAX_QUEUE_SIZE_PREF[] = "network.seer.max-queue-size";
const uint32_t SEER_MAX_QUEUE_SIZE_DEFAULT = 50; const uint32_t SEER_MAX_QUEUE_SIZE_DEFAULT = 50;
const char SEER_MAX_DB_SIZE_PREF[] = "network.seer.max-db-size";
const int32_t SEER_MAX_DB_SIZE_DEFAULT_BYTES = 150 * 1024 * 1024;
const char SEER_PRESERVE_PERCENTAGE_PREF[] = "network.seer.preserve";
const int32_t SEER_PRESERVE_PERCENTAGE_DEFAULT = 80;
// All these time values are in usec // All these time values are in usec
const long long ONE_DAY = 86400LL * 1000000LL; const long long ONE_DAY = 86400LL * 1000000LL;
const long long ONE_WEEK = 7LL * ONE_DAY; const long long ONE_WEEK = 7LL * ONE_DAY;
@ -192,6 +197,10 @@ Seer::Seer()
,mStartupCount(0) ,mStartupCount(0)
,mQueueSize(0) ,mQueueSize(0)
,mQueueSizeLock("Seer.mQueueSizeLock") ,mQueueSizeLock("Seer.mQueueSizeLock")
,mCleanupScheduled(false)
,mMaxDBSize(SEER_MAX_DB_SIZE_DEFAULT_BYTES)
,mPreservePercentage(SEER_PRESERVE_PERCENTAGE_DEFAULT)
,mLastCleanupTime(0)
{ {
#if defined(PR_LOGGING) #if defined(PR_LOGGING)
gSeerLog = PR_NewLogModule("NetworkSeer"); gSeerLog = PR_NewLogModule("NetworkSeer");
@ -276,6 +285,12 @@ Seer::InstallObserver()
Preferences::AddIntVarCache(&mMaxQueueSize, SEER_MAX_QUEUE_SIZE_PREF, Preferences::AddIntVarCache(&mMaxQueueSize, SEER_MAX_QUEUE_SIZE_PREF,
SEER_MAX_QUEUE_SIZE_DEFAULT); SEER_MAX_QUEUE_SIZE_DEFAULT);
Preferences::AddIntVarCache(&mMaxDBSize, SEER_MAX_DB_SIZE_PREF,
SEER_MAX_DB_SIZE_DEFAULT_BYTES);
Preferences::AddIntVarCache(&mPreservePercentage,
SEER_PRESERVE_PERCENTAGE_PREF,
SEER_PRESERVE_PERCENTAGE_DEFAULT);
return rv; return rv;
} }
@ -320,6 +335,7 @@ class SeerNewTransactionEvent : public nsRunnable
{ {
gSeer->CommitTransaction(); gSeer->CommitTransaction();
gSeer->BeginTransaction(); gSeer->BeginTransaction();
gSeer->MaybeScheduleCleanup();
nsRefPtr<SeerCommitTimerInitEvent> event = new SeerCommitTimerInitEvent(); nsRefPtr<SeerCommitTimerInitEvent> event = new SeerCommitTimerInitEvent();
NS_DispatchToMainThread(event); NS_DispatchToMainThread(event);
return NS_OK; return NS_OK;
@ -574,6 +590,11 @@ Seer::EnsureInitStorage()
"ON moz_hosts (origin);")); "ON moz_hosts (origin);"));
NS_ENSURE_SUCCESS(rv, rv); NS_ENSURE_SUCCESS(rv, rv);
rv = mDB->ExecuteSimpleSQL(
NS_LITERAL_CSTRING("CREATE INDEX IF NOT EXISTS host_load_index "
"ON moz_hosts (last_load);"));
NS_ENSURE_SUCCESS(rv, rv);
// And this is the table that keeps track of the hosts for subresources of a // And this is the table that keeps track of the hosts for subresources of a
// pageload. // pageload.
rv = mDB->ExecuteSimpleSQL( rv = mDB->ExecuteSimpleSQL(
@ -675,6 +696,11 @@ Seer::EnsureInitStorage()
"ON moz_startup_pages (uri);")); "ON moz_startup_pages (uri);"));
NS_ENSURE_SUCCESS(rv, rv); NS_ENSURE_SUCCESS(rv, rv);
rv = mDB->ExecuteSimpleSQL(
NS_LITERAL_CSTRING("CREATE INDEX IF NOT EXISTS startup_page_hit_index "
"ON moz_startup_pages (last_hit);"));
NS_ENSURE_SUCCESS(rv, rv);
// This table is similar to moz_hosts above, but uses full URIs instead of // This table is similar to moz_hosts above, but uses full URIs instead of
// hosts so that we can get more specific predictions for URIs that people // hosts so that we can get more specific predictions for URIs that people
// visit often (such as their email or social network home pages). // visit often (such as their email or social network home pages).
@ -944,6 +970,8 @@ public:
Telemetry::AccumulateTimeDelta(Telemetry::SEER_PREDICT_WORK_TIME, Telemetry::AccumulateTimeDelta(Telemetry::SEER_PREDICT_WORK_TIME,
startTime); startTime);
gSeer->MaybeScheduleCleanup();
return rv; return rv;
} }
@ -1725,6 +1753,8 @@ public:
Telemetry::AccumulateTimeDelta(Telemetry::SEER_LEARN_WORK_TIME, startTime); Telemetry::AccumulateTimeDelta(Telemetry::SEER_LEARN_WORK_TIME, startTime);
gSeer->MaybeScheduleCleanup();
return rv; return rv;
} }
private: private:
@ -2234,6 +2264,347 @@ Seer::Reset()
return mIOThread->Dispatch(event, NS_DISPATCH_NORMAL); return mIOThread->Dispatch(event, NS_DISPATCH_NORMAL);
} }
class SeerCleanupEvent : public nsRunnable
{
public:
NS_IMETHOD Run() MOZ_OVERRIDE
{
gSeer->Cleanup();
gSeer->mCleanupScheduled = false;
return NS_OK;
}
};
// Returns the current size (in bytes) of the db file on disk
int64_t
Seer::GetDBFileSize()
{
MOZ_ASSERT(!NS_IsMainThread(), "GetDBFileSize called on main thread!");
CommitTransaction();
nsCOMPtr<mozIStorageStatement> countStmt = mStatements.GetCachedStatement(
NS_LITERAL_CSTRING("PRAGMA page_count;"));
if (!countStmt) {
return 0;
}
mozStorageStatementScoper scopedCount(countStmt);
bool hasRows;
nsresult rv = countStmt->ExecuteStep(&hasRows);
if (NS_FAILED(rv) || !hasRows) {
return 0;
}
int64_t pageCount;
rv = countStmt->GetInt64(0, &pageCount);
if (NS_FAILED(rv)) {
return 0;
}
nsCOMPtr<mozIStorageStatement> sizeStmt = mStatements.GetCachedStatement(
NS_LITERAL_CSTRING("PRAGMA page_size;"));
if (!sizeStmt) {
return 0;
}
mozStorageStatementScoper scopedSize(sizeStmt);
rv = sizeStmt->ExecuteStep(&hasRows);
if (NS_FAILED(rv) || !hasRows) {
return 0;
}
int64_t pageSize;
rv = sizeStmt->GetInt64(0, &pageSize);
if (NS_FAILED(rv)) {
return 0;
}
BeginTransaction();
return pageCount * pageSize;
}
// Returns the size (in bytes) that the db file will consume on disk AFTER we
// vacuum the db.
int64_t
Seer::GetDBFileSizeAfterVacuum()
{
MOZ_ASSERT(!NS_IsMainThread(), "GetDBFileSizeAfterVacuum called on main thread!");
CommitTransaction();
nsCOMPtr<mozIStorageStatement> countStmt = mStatements.GetCachedStatement(
NS_LITERAL_CSTRING("PRAGMA page_count;"));
if (!countStmt) {
return 0;
}
mozStorageStatementScoper scopedCount(countStmt);
bool hasRows;
nsresult rv = countStmt->ExecuteStep(&hasRows);
if (NS_FAILED(rv) || !hasRows) {
return 0;
}
int64_t pageCount;
rv = countStmt->GetInt64(0, &pageCount);
if (NS_FAILED(rv)) {
return 0;
}
nsCOMPtr<mozIStorageStatement> sizeStmt = mStatements.GetCachedStatement(
NS_LITERAL_CSTRING("PRAGMA page_size;"));
if (!sizeStmt) {
return 0;
}
mozStorageStatementScoper scopedSize(sizeStmt);
rv = sizeStmt->ExecuteStep(&hasRows);
if (NS_FAILED(rv) || !hasRows) {
return 0;
}
int64_t pageSize;
rv = sizeStmt->GetInt64(0, &pageSize);
if (NS_FAILED(rv)) {
return 0;
}
nsCOMPtr<mozIStorageStatement> freeStmt = mStatements.GetCachedStatement(
NS_LITERAL_CSTRING("PRAGMA freelist_count;"));
if (!freeStmt) {
return 0;
}
mozStorageStatementScoper scopedFree(freeStmt);
rv = freeStmt->ExecuteStep(&hasRows);
if (NS_FAILED(rv) || !hasRows) {
return 0;
}
int64_t freelistCount;
rv = freeStmt->GetInt64(0, &freelistCount);
if (NS_FAILED(rv)) {
return 0;
}
BeginTransaction();
return (pageCount - freelistCount) * pageSize;
}
void
Seer::MaybeScheduleCleanup()
{
MOZ_ASSERT(!NS_IsMainThread(), "MaybeScheduleCleanup called on main thread!");
if (mCleanupScheduled) {
Telemetry::Accumulate(Telemetry::SEER_CLEANUP_SCHEDULED, false);
return;
}
int64_t dbFileSize = GetDBFileSize();
if (dbFileSize < mMaxDBSize) {
Telemetry::Accumulate(Telemetry::SEER_CLEANUP_SCHEDULED, false);
return;
}
mCleanupScheduled = true;
nsRefPtr<SeerCleanupEvent> event = new SeerCleanupEvent();
nsresult rv = mIOThread->Dispatch(event, NS_DISPATCH_NORMAL);
if (NS_FAILED(rv)) {
mCleanupScheduled = false;
Telemetry::Accumulate(Telemetry::SEER_CLEANUP_SCHEDULED, false);
} else {
Telemetry::Accumulate(Telemetry::SEER_CLEANUP_SCHEDULED, true);
}
}
#ifndef ANDROID
static const long long CLEANUP_CUTOFF = ONE_MONTH;
#else
static const long long CLEANUP_CUTOFF = ONE_WEEK;
#endif
void
Seer::CleanupOrigins(PRTime now)
{
PRTime cutoff = now - CLEANUP_CUTOFF;
nsCOMPtr<mozIStorageStatement> deleteOrigins = mStatements.GetCachedStatement(
NS_LITERAL_CSTRING("DELETE FROM moz_hosts WHERE last_load <= :cutoff"));
if (!deleteOrigins) {
return;
}
mozStorageStatementScoper scopedOrigins(deleteOrigins);
nsresult rv = deleteOrigins->BindInt32ByName(NS_LITERAL_CSTRING("cutoff"),
cutoff);
RETURN_IF_FAILED(rv);
deleteOrigins->Execute();
}
void
Seer::CleanupStartupPages(PRTime now)
{
PRTime cutoff = now - ONE_WEEK;
nsCOMPtr<mozIStorageStatement> deletePages = mStatements.GetCachedStatement(
NS_LITERAL_CSTRING("DELETE FROM moz_startup_pages WHERE "
"last_hit <= :cutoff"));
if (!deletePages) {
return;
}
mozStorageStatementScoper scopedPages(deletePages);
nsresult rv = deletePages->BindInt32ByName(NS_LITERAL_CSTRING("cutoff"),
cutoff);
RETURN_IF_FAILED(rv);
deletePages->Execute();
}
int32_t
Seer::GetSubresourceCount()
{
nsCOMPtr<mozIStorageStatement> count = mStatements.GetCachedStatement(
NS_LITERAL_CSTRING("SELECT COUNT(id) FROM moz_subresources"));
if (!count) {
return 0;
}
mozStorageStatementScoper scopedCount(count);
bool hasRows;
nsresult rv = count->ExecuteStep(&hasRows);
if (NS_FAILED(rv) || !hasRows) {
return 0;
}
int32_t subresourceCount = 0;
count->GetInt32(0, &subresourceCount);
return subresourceCount;
}
void
Seer::Cleanup()
{
MOZ_ASSERT(!NS_IsMainThread(), "Seer::Cleanup called on main thread!");
nsresult rv = EnsureInitStorage();
if (NS_FAILED(rv)) {
return;
}
int64_t dbFileSize = GetDBFileSize();
float preservePercentage = static_cast<float>(mPreservePercentage) / 100.0;
int64_t evictionCutoff = static_cast<int64_t>(mMaxDBSize) * preservePercentage;
if (dbFileSize < evictionCutoff) {
return;
}
CommitTransaction();
BeginTransaction();
PRTime now = PR_Now();
if (mLastCleanupTime) {
Telemetry::Accumulate(Telemetry::SEER_CLEANUP_DELTA,
(now - mLastCleanupTime) / 1000);
}
mLastCleanupTime = now;
CleanupOrigins(now);
CleanupStartupPages(now);
dbFileSize = GetDBFileSizeAfterVacuum();
if (dbFileSize < evictionCutoff) {
// We've deleted enough stuff, time to free up the disk space and be on
// our way.
VacuumDatabase();
Telemetry::Accumulate(Telemetry::SEER_CLEANUP_SUCCEEDED, true);
Telemetry::Accumulate(Telemetry::SEER_CLEANUP_TIME,
(PR_Now() - mLastCleanupTime) / 1000);
return;
}
bool canDelete = true;
while (canDelete && (dbFileSize >= evictionCutoff)) {
int32_t subresourceCount = GetSubresourceCount();
if (!subresourceCount) {
canDelete = false;
break;
}
// DB size scales pretty much linearly with the number of rows in
// moz_subresources, so we can guess how many rows we need to delete pretty
// accurately.
float percentNeeded = static_cast<float>(dbFileSize - evictionCutoff) /
static_cast<float>(dbFileSize);
int32_t subresourcesToDelete = static_cast<int32_t>(percentNeeded * subresourceCount);
if (!subresourcesToDelete) {
// We're getting pretty close to nothing here, anyway, so we may as well
// just trash it all. This delete cascades to moz_subresources, as well.
rv = mDB->ExecuteSimpleSQL(NS_LITERAL_CSTRING("DELETE FROM moz_pages;"));
if (NS_FAILED(rv)) {
canDelete = false;
break;
}
} else {
nsCOMPtr<mozIStorageStatement> deleteStatement = mStatements.GetCachedStatement(
NS_LITERAL_CSTRING("DELETE FROM moz_subresources WHERE id IN "
"(SELECT id FROM moz_subresources ORDER BY "
"last_hit ASC LIMIT :limit);"));
if (!deleteStatement) {
canDelete = false;
break;
}
mozStorageStatementScoper scopedDelete(deleteStatement);
rv = deleteStatement->BindInt32ByName(NS_LITERAL_CSTRING("limit"),
subresourcesToDelete);
if (NS_FAILED(rv)) {
canDelete = false;
break;
}
rv = deleteStatement->Execute();
if (NS_FAILED(rv)) {
canDelete = false;
break;
}
// Now we clean up pages that no longer reference any subresources
rv = mDB->ExecuteSimpleSQL(
NS_LITERAL_CSTRING("DELETE FROM moz_pages WHERE id NOT IN "
"(SELECT DISTINCT(pid) FROM moz_subresources);"));
if (NS_FAILED(rv)) {
canDelete = false;
break;
}
}
if (canDelete) {
dbFileSize = GetDBFileSizeAfterVacuum();
}
}
if (!canDelete || (dbFileSize >= evictionCutoff)) {
// Last-ditch effort to free up space
ResetInternal();
Telemetry::Accumulate(Telemetry::SEER_CLEANUP_SUCCEEDED, false);
} else {
// We do this to actually free up the space on disk
VacuumDatabase();
Telemetry::Accumulate(Telemetry::SEER_CLEANUP_SUCCEEDED, true);
}
Telemetry::Accumulate(Telemetry::SEER_CLEANUP_TIME,
(PR_Now() - mLastCleanupTime) / 1000);
}
void
Seer::VacuumDatabase()
{
MOZ_ASSERT(!NS_IsMainThread(), "VacuumDatabase called on main thread!");
CommitTransaction();
mDB->ExecuteSimpleSQL(NS_LITERAL_CSTRING("VACUUM;"));
BeginTransaction();
}
#ifdef SEER_TESTS #ifdef SEER_TESTS
class SeerPrepareForDnsTestEvent : public nsRunnable class SeerPrepareForDnsTestEvent : public nsRunnable
{ {

View File

@ -64,6 +64,7 @@ private:
friend class SeerDBShutdownRunner; friend class SeerDBShutdownRunner;
friend class SeerCommitTimerInitEvent; friend class SeerCommitTimerInitEvent;
friend class SeerNewTransactionEvent; friend class SeerNewTransactionEvent;
friend class SeerCleanupEvent;
void CheckForAndDeleteOldDBFile(); void CheckForAndDeleteOldDBFile();
nsresult EnsureInitStorage(); nsresult EnsureInitStorage();
@ -165,6 +166,16 @@ private:
mDB->CommitTransaction(); mDB->CommitTransaction();
} }
int64_t GetDBFileSize();
int64_t GetDBFileSizeAfterVacuum();
void MaybeScheduleCleanup();
void Cleanup();
void CleanupOrigins(PRTime now);
void CleanupStartupPages(PRTime now);
int32_t GetSubresourceCount();
void VacuumDatabase();
// Observer-related stuff // Observer-related stuff
nsresult InstallObserver(); nsresult InstallObserver();
void RemoveObserver(); void RemoveObserver();
@ -220,6 +231,11 @@ private:
friend class SeerPrepareForDnsTestEvent; friend class SeerPrepareForDnsTestEvent;
void PrepareForDnsTestInternal(int64_t timestamp, const nsACString &uri); void PrepareForDnsTestInternal(int64_t timestamp, const nsACString &uri);
#endif #endif
bool mCleanupScheduled;
int32_t mMaxDBSize;
int32_t mPreservePercentage;
PRTime mLastCleanupTime;
}; };
} // ::mozilla::net } // ::mozilla::net

View File

@ -2231,6 +2231,30 @@
"n_buckets": 10, "n_buckets": 10,
"description": "How long it takes from the time Predict() is called to the time we figure out there's nothing to do" "description": "How long it takes from the time Predict() is called to the time we figure out there's nothing to do"
}, },
"SEER_CLEANUP_DELTA": {
"expires_in_version": "never",
"kind": "exponential",
"high": "60000",
"n_buckets": 50,
"description": "How long between seer db cleanups, in ms"
},
"SEER_CLEANUP_SUCCEEDED": {
"expires_in_version": "never",
"kind": "boolean",
"description": "Whether or not the seer cleanup succeeded"
},
"SEER_CLEANUP_TIME": {
"expires_in_version": "never",
"kind": "exponential",
"high": "5000",
"n_buckets": 10,
"description": "How long it takes to run the seer cleanup"
},
"SEER_CLEANUP_SCHEDULED": {
"expires_in_version": "never",
"kind": "boolean",
"description": "Whether or not we actually try the cleanup method when we think about it"
},
"FIND_PLUGINS": { "FIND_PLUGINS": {
"expires_in_version": "never", "expires_in_version": "never",
"kind": "exponential", "kind": "exponential",