Bug 1047472 - Make parseSubmissionURL return the string offset for each search term r=paolo

This commit is contained in:
Tim Taubert 2014-08-06 20:36:35 +02:00
parent 641e64fe3f
commit c5806ca208
3 changed files with 104 additions and 25 deletions

View File

@ -184,7 +184,7 @@ interface nsISearchEngine : nsISupports
AString getResultDomain([optional] in AString responseType);
};
[scriptable, uuid(856a31ff-b451-4101-b12e-ff399485ac8a)]
[scriptable, uuid(0dc93e51-a7bf-4a16-862d-4b3469ff6206)]
interface nsISearchParseSubmissionResult : nsISupports
{
/**
@ -199,6 +199,20 @@ interface nsISearchParseSubmissionResult : nsISupports
* terms were specified or the URL does not represent a search submission.
*/
readonly attribute AString terms;
/**
* The offset of the string |terms| in the URL passed in to
* nsISearchEngine::parseSubmissionURL, or -1 if the URL does not represent
* a search submission.
*/
readonly attribute long termsOffset;
/**
* The length of the |terms| in the original encoding of the URL passed in to
* nsISearchEngine::parseSubmissionURL. If the search term in the original
* URL is encoded then this will be bigger than |terms.length|.
*/
readonly attribute long termsLength;
};
[scriptable, uuid(9fc39136-f08b-46d3-b232-96f4b7b0e235)]

View File

@ -2842,9 +2842,11 @@ Submission.prototype = {
}
// nsISearchParseSubmissionResult
function ParseSubmissionResult(aEngine, aTerms) {
function ParseSubmissionResult(aEngine, aTerms, aTermsOffset, aTermsLength) {
this._engine = aEngine;
this._terms = aTerms;
this._termsOffset = aTermsOffset;
this._termsLength = aTermsLength;
}
ParseSubmissionResult.prototype = {
get engine() {
@ -2853,11 +2855,17 @@ ParseSubmissionResult.prototype = {
get terms() {
return this._terms;
},
get termsOffset() {
return this._termsOffset;
},
get termsLength() {
return this._termsLength;
},
QueryInterface: XPCOMUtils.generateQI([Ci.nsISearchParseSubmissionResult]),
}
const gEmptyParseSubmissionResult =
Object.freeze(new ParseSubmissionResult(null, ""));
Object.freeze(new ParseSubmissionResult(null, "", -1, 0));
function executeSoon(func) {
Services.tm.mainThread.dispatch(func, Ci.nsIThread.DISPATCH_NORMAL);
@ -4356,6 +4364,23 @@ SearchService.prototype = {
return gEmptyParseSubmissionResult;
}
let length = 0;
let offset = aURL.indexOf("?") + 1;
let query = aURL.slice(offset);
// Iterate a second time over the original input string to determine the
// correct search term offset and length in the original encoding.
for (let param of query.split("&")) {
let equalPos = param.indexOf("=");
if (equalPos != -1 &&
param.substr(0, equalPos) == mapEntry.termsParameterName) {
// This is the parameter we are looking for.
offset += equalPos + 1;
length = param.length - equalPos - 1;
break;
}
offset += param.length + 1;
}
// Decode the terms using the charset defined in the search engine.
let terms;
try {
@ -4370,7 +4395,7 @@ SearchService.prototype = {
}
LOG("Match found. Terms: " + terms);
return new ParseSubmissionResult(mapEntry.engine, terms);
return new ParseSubmissionResult(mapEntry.engine, terms, offset, length);
},
// nsIObserver

View File

@ -21,11 +21,13 @@ add_task(function* test_parseSubmissionURL() {
Services.search.removeEngine(engine);
}
let [engine1, engine2, engine3] = yield addTestEngines([
let [engine1, engine2, engine3, engine4] = yield addTestEngines([
{ name: "Test search engine", xmlFileName: "engine.xml" },
{ name: "Test search engine (fr)", xmlFileName: "engine-fr.xml" },
{ name: "bacon_addParam", details: ["", "bacon_addParam", "Search Bacon",
"GET", "http://www.bacon.test/find"] },
{ name: "idn_addParam", details: ["", "idn_addParam", "Search IDN",
"GET", "http://www.xn--bcher-kva.ch/search"] },
// The following engines cannot identify the search parameter.
{ name: "A second test engine", xmlFileName: "engine2.xml" },
{ name: "Sherlock test search engine", srcFileName: "engine.src",
@ -35,34 +37,67 @@ add_task(function* test_parseSubmissionURL() {
]);
engine3.addParam("q", "{searchTerms}", null);
engine4.addParam("q", "{searchTerms}", null);
// Test the first engine, whose URLs use UTF-8 encoding.
let result = Services.search.parseSubmissionURL(
"http://www.google.com/search?q=caff%C3%A8");
let url = "http://www.google.com/search?foo=bar&q=caff%C3%A8";
let result = Services.search.parseSubmissionURL(url);
do_check_eq(result.engine, engine1);
do_check_eq(result.terms, "caff\u00E8");
do_check_true(url.slice(result.termsOffset).startsWith("caff%C3%A8"));
do_check_eq(result.termsLength, "caff%C3%A8".length);
// The second engine uses a locale-specific domain that is an alternate domain
// of the first one, but the second engine should get priority when matching.
// The URL used with this engine uses ISO-8859-1 encoding instead.
let result = Services.search.parseSubmissionURL(
"http://www.google.fr/search?q=caff%E8");
url = "http://www.google.fr/search?q=caff%E8";
result = Services.search.parseSubmissionURL(url);
do_check_eq(result.engine, engine2);
do_check_eq(result.terms, "caff\u00E8");
do_check_true(url.slice(result.termsOffset).startsWith("caff%E8"));
do_check_eq(result.termsLength, "caff%E8".length);
// Test a domain that is an alternate domain of those defined. In this case,
// the first matching engine from the ordered list should be returned.
let result = Services.search.parseSubmissionURL(
"http://www.google.co.uk/search?q=caff%C3%A8");
url = "http://www.google.co.uk/search?q=caff%C3%A8";
result = Services.search.parseSubmissionURL(url);
do_check_eq(result.engine, engine1);
do_check_eq(result.terms, "caff\u00E8");
do_check_true(url.slice(result.termsOffset).startsWith("caff%C3%A8"));
do_check_eq(result.termsLength, "caff%C3%A8".length);
// We support parsing URLs from a dynamically added engine. Those engines use
// windows-1252 encoding by default.
let result = Services.search.parseSubmissionURL(
"http://www.bacon.test/find?q=caff%E8");
url = "http://www.bacon.test/find?q=caff%E8";
result = Services.search.parseSubmissionURL(url);
do_check_eq(result.engine, engine3);
do_check_eq(result.terms, "caff\u00E8");
do_check_true(url.slice(result.termsOffset).startsWith("caff%E8"));
do_check_eq(result.termsLength, "caff%E8".length);
// Test URLs with unescaped unicode characters.
url = "http://www.google.com/search?q=foo+b\u00E4r";
result = Services.search.parseSubmissionURL(url);
do_check_eq(result.engine, engine1);
do_check_eq(result.terms, "foo b\u00E4r");
do_check_true(url.slice(result.termsOffset).startsWith("foo+b\u00E4r"));
do_check_eq(result.termsLength, "foo+b\u00E4r".length);
// Test search engines with unescaped IDNs.
url = "http://www.b\u00FCcher.ch/search?q=foo+bar";
result = Services.search.parseSubmissionURL(url);
do_check_eq(result.engine, engine4);
do_check_eq(result.terms, "foo bar");
do_check_true(url.slice(result.termsOffset).startsWith("foo+bar"));
do_check_eq(result.termsLength, "foo+bar".length);
// Test search engines with escaped IDNs.
url = "http://www.xn--bcher-kva.ch/search?q=foo+bar";
result = Services.search.parseSubmissionURL(url);
do_check_eq(result.engine, engine4);
do_check_eq(result.terms, "foo bar");
do_check_true(url.slice(result.termsOffset).startsWith("foo+bar"));
do_check_eq(result.termsLength, "foo+bar".length);
// Parsing of parameters from an engine template URL is not supported.
do_check_eq(Services.search.parseSubmissionURL(
@ -79,38 +114,43 @@ add_task(function* test_parseSubmissionURL() {
"http://getfirefox.com/?q=test").engine, null);
// HTTP and HTTPS schemes are interchangeable.
let result = Services.search.parseSubmissionURL(
"https://www.google.com/search?q=caff%C3%A8");
url = "https://www.google.com/search?q=caff%C3%A8";
result = Services.search.parseSubmissionURL(url);
do_check_eq(result.engine, engine1);
do_check_eq(result.terms, "caff\u00E8");
do_check_true(url.slice(result.termsOffset).startsWith("caff%C3%A8"));
// Decoding search terms with multiple spaces should work.
let result = Services.search.parseSubmissionURL(
"http://www.google.com/search?q=+with++spaces+");
result = Services.search.parseSubmissionURL(
"http://www.google.com/search?q=+with++spaces+");
do_check_eq(result.engine, engine1);
do_check_eq(result.terms, " with spaces ");
// An empty query parameter should work the same.
let result = Services.search.parseSubmissionURL(
"http://www.google.com/search?q=");
url = "http://www.google.com/search?q=";
result = Services.search.parseSubmissionURL(url);
do_check_eq(result.engine, engine1);
do_check_eq(result.terms, "");
do_check_eq(result.termsOffset, url.length);
// There should be no match when the path is different.
let result = Services.search.parseSubmissionURL(
"http://www.google.com/search/?q=test");
result = Services.search.parseSubmissionURL(
"http://www.google.com/search/?q=test");
do_check_eq(result.engine, null);
do_check_eq(result.terms, "");
do_check_eq(result.termsOffset, -1);
// There should be no match when the argument is different.
let result = Services.search.parseSubmissionURL(
"http://www.google.com/search?q2=test");
result = Services.search.parseSubmissionURL(
"http://www.google.com/search?q2=test");
do_check_eq(result.engine, null);
do_check_eq(result.terms, "");
do_check_eq(result.termsOffset, -1);
// There should be no match for URIs that are not HTTP or HTTPS.
let result = Services.search.parseSubmissionURL(
"file://localhost/search?q=test");
result = Services.search.parseSubmissionURL(
"file://localhost/search?q=test");
do_check_eq(result.engine, null);
do_check_eq(result.terms, "");
do_check_eq(result.termsOffset, -1);
});