Bug 965587 - Add lock to Firefox Health Report uploading; r=rnewman

Previously, it was technically possible for the FHR client to have
multiple simultaneous uploads. While this should never occur in
well-behaving systems, server logs have indicated that this behavior
might be occurring.

This patch adds a lock around uploading to ensure only 1 upload
operation may be active at a given time.

To measure the impact of this change in the wild, we added a counter
that increments whenever a held lock is encountered.

--HG--
extra : rebase_source : f0bb5065a3618cd335b6b6f31e4e68850f31f151
This commit is contained in:
Gregory Szorc 2014-01-29 15:28:57 -08:00
parent 66539bb295
commit b14e377e09
5 changed files with 151 additions and 39 deletions

View File

@ -148,6 +148,7 @@ this.DataSubmissionRequest.prototype = Object.freeze({
SUBMISSION_SUCCESS: "success",
SUBMISSION_FAILURE_SOFT: "failure-soft",
SUBMISSION_FAILURE_HARD: "failure-hard",
UPLOAD_IN_PROGRESS: "upload-in-progress",
/**
* No submission was attempted because no data was available.
@ -210,6 +211,16 @@ this.DataSubmissionRequest.prototype = Object.freeze({
this.promise.resolve(this);
return this.promise.promise;
},
/**
* The request was aborted because an upload was already in progress.
*/
onUploadInProgress: function (reason=null) {
this.state = this.UPLOAD_IN_PROGRESS;
this.reason = reason;
this.promise.resolve(this);
return this.promise.promise;
},
});
/**

View File

@ -996,6 +996,16 @@ org.mozilla.healthreport.submissions
This measurement contains a history of FHR's own data submission activity.
It was added in Firefox 23 in early May 2013.
Version 2
^^^^^^^^^
This is the same as version 1 except an additional field has been added.
uploadAlreadyInProgress
A request for upload was initiated while another upload was in progress.
This should not occur in well-behaving clients. It (along with a lock
preventing simultaneous upload) was added to ensure this never occurs.
Version 1
^^^^^^^^^

View File

@ -1144,6 +1144,7 @@ AbstractHealthReporter.prototype = Object.freeze({
*/
this.HealthReporter = function (branch, policy, sessionRecorder, stateLeaf=null) {
this._stateLeaf = stateLeaf;
this._uploadInProgress = false;
AbstractHealthReporter.call(this, branch, policy, sessionRecorder);
@ -1311,6 +1312,7 @@ this.HealthReporter.prototype = Object.freeze({
onSubmissionSuccess: function () {},
onSubmissionFailureSoft: function () {},
onSubmissionFailureHard: function () {},
onUploadInProgress: function () {},
};
this._uploadData(request);
@ -1374,6 +1376,18 @@ this.HealthReporter.prototype = Object.freeze({
},
_uploadData: function (request) {
// Under ideal circumstances, clients should never race to this
// function. However, server logs have observed behavior where
// racing to this function could be a cause. So, this lock was
// instituted.
if (this._uploadInProgress) {
this._log.warn("Upload requested but upload already in progress.");
let provider = this.getProvider("org.mozilla.healthreport");
let promise = provider.recordEvent("uploadAlreadyInProgress");
request.onUploadInProgress("Upload already in progress.");
return promise;
}
let id = CommonUtils.generateUUID();
this._log.info("Uploading data to server: " + this.serverURI + " " +
@ -1382,40 +1396,48 @@ this.HealthReporter.prototype = Object.freeze({
let now = this._now();
return Task.spawn(function doUpload() {
let payload = yield this.getJSONPayload();
let histogram = Services.telemetry.getHistogramById(TELEMETRY_PAYLOAD_SIZE_UNCOMPRESSED);
histogram.add(payload.length);
let lastID = this.lastSubmitID;
yield this._state.addRemoteID(id);
let hrProvider = this.getProvider("org.mozilla.healthreport");
if (hrProvider) {
let event = lastID ? "continuationUploadAttempt"
: "firstDocumentUploadAttempt";
hrProvider.recordEvent(event, now);
}
TelemetryStopwatch.start(TELEMETRY_UPLOAD, this);
let result;
try {
let options = {
deleteIDs: this._state.remoteIDs.filter((x) => { return x != id; }),
telemetryCompressed: TELEMETRY_PAYLOAD_SIZE_COMPRESSED,
};
result = yield client.uploadJSON(this.serverNamespace, id, payload,
options);
TelemetryStopwatch.finish(TELEMETRY_UPLOAD, this);
} catch (ex) {
TelemetryStopwatch.cancel(TELEMETRY_UPLOAD, this);
if (hrProvider) {
hrProvider.recordEvent("uploadClientFailure", now);
}
throw ex;
}
// The test for upload locking monkeypatches getJSONPayload.
// If the next two lines change, be sure to verify the test is
// accurate!
this._uploadInProgress = true;
let payload = yield this.getJSONPayload();
yield this._onBagheeraResult(request, false, now, result);
let histogram = Services.telemetry.getHistogramById(TELEMETRY_PAYLOAD_SIZE_UNCOMPRESSED);
histogram.add(payload.length);
let lastID = this.lastSubmitID;
yield this._state.addRemoteID(id);
let hrProvider = this.getProvider("org.mozilla.healthreport");
if (hrProvider) {
let event = lastID ? "continuationUploadAttempt"
: "firstDocumentUploadAttempt";
hrProvider.recordEvent(event, now);
}
TelemetryStopwatch.start(TELEMETRY_UPLOAD, this);
let result;
try {
let options = {
deleteIDs: this._state.remoteIDs.filter((x) => { return x != id; }),
telemetryCompressed: TELEMETRY_PAYLOAD_SIZE_COMPRESSED,
};
result = yield client.uploadJSON(this.serverNamespace, id, payload,
options);
TelemetryStopwatch.finish(TELEMETRY_UPLOAD, this);
} catch (ex) {
TelemetryStopwatch.cancel(TELEMETRY_UPLOAD, this);
if (hrProvider) {
hrProvider.recordEvent("uploadClientFailure", now);
}
throw ex;
}
yield this._onBagheeraResult(request, false, now, result);
} finally {
this._uploadInProgress = false;
}
}.bind(this));
},

View File

@ -1367,6 +1367,27 @@ HealthReportSubmissionMeasurement1.prototype = Object.freeze({
},
});
function HealthReportSubmissionMeasurement2() {
Metrics.Measurement.call(this);
}
HealthReportSubmissionMeasurement2.prototype = Object.freeze({
__proto__: Metrics.Measurement.prototype,
name: "submissions",
version: 2,
fields: {
firstDocumentUploadAttempt: DAILY_COUNTER_FIELD,
continuationUploadAttempt: DAILY_COUNTER_FIELD,
uploadSuccess: DAILY_COUNTER_FIELD,
uploadTransportFailure: DAILY_COUNTER_FIELD,
uploadServerFailure: DAILY_COUNTER_FIELD,
uploadClientFailure: DAILY_COUNTER_FIELD,
uploadAlreadyInProgress: DAILY_COUNTER_FIELD,
},
});
this.HealthReportProvider = function () {
Metrics.Provider.call(this);
}
@ -1376,10 +1397,13 @@ HealthReportProvider.prototype = Object.freeze({
name: "org.mozilla.healthreport",
measurementTypes: [HealthReportSubmissionMeasurement1],
measurementTypes: [
HealthReportSubmissionMeasurement1,
HealthReportSubmissionMeasurement2,
],
recordEvent: function (event, date=new Date()) {
let m = this.getMeasurement("submissions", 1);
let m = this.getMeasurement("submissions", 2);
return this.enqueueStorageOperation(function recordCounter() {
return m.incrementDailyCounter(event, date);
});

View File

@ -75,7 +75,7 @@ function getHealthReportProviderValues(reporter, day=null) {
return Task.spawn(function getValues() {
let p = reporter.getProvider("org.mozilla.healthreport");
do_check_neq(p, null);
let m = p.getMeasurement("submissions", 1);
let m = p.getMeasurement("submissions", 2);
do_check_neq(m, null);
let data = yield reporter._storage.getMeasurementValues(m.id);
@ -86,6 +86,7 @@ function getHealthReportProviderValues(reporter, day=null) {
do_check_true(data.days.hasDay(day));
let serializer = m.serializer(m.SERIALIZE_JSON)
let json = serializer.daily(data.days.getDay(day));
do_check_eq(json._v, 2);
throw new Task.Result(json);
});
@ -565,7 +566,6 @@ add_task(function test_data_submission_transport_failure() {
do_check_eq(request.state, request.SUBMISSION_FAILURE_SOFT);
let data = yield getHealthReportProviderValues(reporter, new Date());
do_check_eq(data._v, 1);
do_check_eq(data.firstDocumentUploadAttempt, 1);
do_check_eq(data.uploadTransportFailure, 1);
do_check_eq(Object.keys(data).length, 3);
@ -592,7 +592,6 @@ add_task(function test_data_submission_server_failure() {
do_check_eq(request.state, request.SUBMISSION_FAILURE_HARD);
let data = yield getHealthReportProviderValues(reporter, now);
do_check_eq(data._v, 1);
do_check_eq(data.firstDocumentUploadAttempt, 1);
do_check_eq(data.uploadServerFailure, 1);
do_check_eq(Object.keys(data).length, 3);
@ -631,7 +630,6 @@ add_task(function test_data_submission_success() {
do_check_true("DummyConstantProvider.DummyMeasurement" in o.data.last);
let data = yield getHealthReportProviderValues(reporter, now);
do_check_eq(data._v, 1);
do_check_eq(data.continuationUploadAttempt, 1);
do_check_eq(data.uploadSuccess, 1);
do_check_eq(Object.keys(data).length, 3);
@ -682,7 +680,6 @@ add_task(function test_recurring_daily_pings() {
// now() on the health reporter instance wasn't munged. So, we should see
// both requests attributed to the same day.
let data = yield getHealthReportProviderValues(reporter, new Date());
do_check_eq(data._v, 1);
do_check_eq(data.firstDocumentUploadAttempt, 1);
do_check_eq(data.continuationUploadAttempt, 1);
do_check_eq(data.uploadSuccess, 2);
@ -720,6 +717,54 @@ add_task(function test_request_remote_data_deletion() {
}
});
add_task(function test_multiple_simultaneous_uploads() {
let [reporter, server] = yield getReporterAndServer("multiple_simultaneous_uploads");
try {
let d1 = Promise.defer();
let d2 = Promise.defer();
let t1 = new Date(Date.now() - 1000);
let t2 = new Date(t1.getTime() + 500);
let r1 = new DataSubmissionRequest(d1, t1);
let r2 = new DataSubmissionRequest(d2, t2);
let getPayloadDeferred = Promise.defer();
Object.defineProperty(reporter, "getJSONPayload", {
configurable: true,
value: () => {
getPayloadDeferred.resolve();
delete reporter["getJSONPayload"];
return reporter.getJSONPayload();
},
});
let p1 = reporter.requestDataUpload(r1);
yield getPayloadDeferred.promise;
do_check_true(reporter._uploadInProgress);
let p2 = reporter.requestDataUpload(r2);
yield p1;
yield p2;
do_check_eq(r1.state, r1.SUBMISSION_SUCCESS);
do_check_eq(r2.state, r2.UPLOAD_IN_PROGRESS);
// They should both be resolved already.
yield d1;
yield d2;
let data = yield getHealthReportProviderValues(reporter, t1);
do_check_eq(data.firstDocumentUploadAttempt, 1);
do_check_false("continuationUploadAttempt" in data);
do_check_eq(data.uploadSuccess, 1);
do_check_eq(data.uploadAlreadyInProgress, 1);
} finally {
reporter._shutdown();
yield shutdownServer(server);
}
});
add_task(function test_policy_accept_reject() {
let [reporter, server] = yield getReporterAndServer("policy_accept_reject");