mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
Bug 1131911 - Extract page metadata extraction from Social.jsm to its own JSM. r=mixedpuppy
This commit is contained in:
parent
ba60837bc8
commit
fe28e7efde
@ -637,8 +637,8 @@ SocialShare = {
|
||||
// containing the open graph data.
|
||||
let _dataFn;
|
||||
if (!pageData || sharedURI == gBrowser.currentURI) {
|
||||
messageManager.addMessageListener("Social:PageDataResult", _dataFn = (msg) => {
|
||||
messageManager.removeMessageListener("Social:PageDataResult", _dataFn);
|
||||
messageManager.addMessageListener("PageMetadata:PageDataResult", _dataFn = (msg) => {
|
||||
messageManager.removeMessageListener("PageMetadata:PageDataResult", _dataFn);
|
||||
let pageData = msg.json;
|
||||
if (graphData) {
|
||||
// overwrite data retreived from page with data given to us as a param
|
||||
@ -648,17 +648,17 @@ SocialShare = {
|
||||
}
|
||||
this.sharePage(providerOrigin, pageData, target);
|
||||
});
|
||||
gBrowser.selectedBrowser.messageManager.sendAsyncMessage("Social:GetPageData");
|
||||
gBrowser.selectedBrowser.messageManager.sendAsyncMessage("PageMetadata:GetPageData");
|
||||
return;
|
||||
}
|
||||
// if this is a share of a selected item, get any microdata
|
||||
if (!pageData.microdata && target) {
|
||||
messageManager.addMessageListener("Social:PageDataResult", _dataFn = (msg) => {
|
||||
messageManager.removeMessageListener("Social:PageDataResult", _dataFn);
|
||||
messageManager.addMessageListener("PageMetadata:MicrodataResult", _dataFn = (msg) => {
|
||||
messageManager.removeMessageListener("PageMetadata:MicrodataResult", _dataFn);
|
||||
pageData.microdata = msg.data;
|
||||
this.sharePage(providerOrigin, pageData, target);
|
||||
});
|
||||
gBrowser.selectedBrowser.messageManager.sendAsyncMessage("Social:GetMicrodata", null, target);
|
||||
gBrowser.selectedBrowser.messageManager.sendAsyncMessage("PageMetadata:GetMicrodata", null, target);
|
||||
return;
|
||||
}
|
||||
this.currentShare = pageData;
|
||||
|
@ -32,6 +32,8 @@ XPCOMUtils.defineLazyModuleGetter(this, "AboutReader",
|
||||
"resource://gre/modules/AboutReader.jsm");
|
||||
XPCOMUtils.defineLazyModuleGetter(this, "ReaderMode",
|
||||
"resource://gre/modules/ReaderMode.jsm");
|
||||
XPCOMUtils.defineLazyModuleGetter(this, "PageMetadata",
|
||||
"resource://gre/modules/PageMetadata.jsm");
|
||||
XPCOMUtils.defineLazyGetter(this, "SimpleServiceDiscovery", function() {
|
||||
let ssdp = Cu.import("resource://gre/modules/SimpleServiceDiscovery.jsm", {}).SimpleServiceDiscovery;
|
||||
// Register targets
|
||||
@ -1000,30 +1002,29 @@ addEventListener("pageshow", function(event) {
|
||||
}
|
||||
});
|
||||
|
||||
let SocialMessenger = {
|
||||
let PageMetadataMessenger = {
|
||||
init: function() {
|
||||
addMessageListener("Social:GetPageData", this);
|
||||
addMessageListener("Social:GetMicrodata", this);
|
||||
|
||||
XPCOMUtils.defineLazyGetter(this, "og", function() {
|
||||
let tmp = {};
|
||||
Cu.import("resource:///modules/Social.jsm", tmp);
|
||||
return tmp.OpenGraphBuilder;
|
||||
});
|
||||
addMessageListener("PageMetadata:GetPageData", this);
|
||||
addMessageListener("PageMetadata:GetMicrodata", this);
|
||||
},
|
||||
receiveMessage: function(aMessage) {
|
||||
switch(aMessage.name) {
|
||||
case "Social:GetPageData":
|
||||
sendAsyncMessage("Social:PageDataResult", this.og.getData(content.document));
|
||||
case "PageMetadata:GetPageData": {
|
||||
let result = PageMetadata.getData(content.document);
|
||||
sendAsyncMessage("PageMetadata:PageDataResult", result);
|
||||
break;
|
||||
case "Social:GetMicrodata":
|
||||
}
|
||||
|
||||
case "PageMetadata:GetMicrodata": {
|
||||
let target = aMessage.objects;
|
||||
sendAsyncMessage("Social:PageDataResult", this.og.getMicrodata(content.document, target));
|
||||
let result = PageMetadata.getMicrodata(content.document, target);
|
||||
sendAsyncMessage("PageMetadata:MicrodataResult", result);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
SocialMessenger.init();
|
||||
PageMetadataMessenger.init();
|
||||
|
||||
addEventListener("ActivateSocialFeature", function (aEvent) {
|
||||
let document = content.document;
|
||||
|
@ -150,21 +150,21 @@
|
||||
let URLTemplate = provider.markURL;
|
||||
let _dataFn;
|
||||
if (!pageData) {
|
||||
messageManager.addMessageListener("Social:PageDataResult", _dataFn = (msg) => {
|
||||
messageManager.removeMessageListener("Social:PageDataResult", _dataFn);
|
||||
messageManager.addMessageListener("PageMetadata:PageDataResult", _dataFn = (msg) => {
|
||||
messageManager.removeMessageListener("PageMetadata:PageDataResult", _dataFn);
|
||||
this.loadPanel(msg.json, target);
|
||||
});
|
||||
gBrowser.selectedBrowser.messageManager.sendAsyncMessage("Social:GetPageData");
|
||||
gBrowser.selectedBrowser.messageManager.sendAsyncMessage("PageMetadata:GetPageData");
|
||||
return;
|
||||
}
|
||||
// if this is a share of a selected item, get any microdata
|
||||
if (!pageData.microdata && target) {
|
||||
messageManager.addMessageListener("Social:PageDataResult", _dataFn = (msg) => {
|
||||
messageManager.removeMessageListener("Social:PageDataResult", _dataFn);
|
||||
messageManager.addMessageListener("PageMetadata:MicrodataResult", _dataFn = (msg) => {
|
||||
messageManager.removeMessageListener("PageMetadata:MicrodataResult", _dataFn);
|
||||
pageData.microdata = msg.data;
|
||||
this.loadPanel(pageData, target);
|
||||
});
|
||||
gBrowser.selectedBrowser.messageManager.sendAsyncMessage("Social:GetMicrodata", null, target);
|
||||
gBrowser.selectedBrowser.messageManager.sendAsyncMessage("PageMetadata:GetMicrodata", null, target);
|
||||
return;
|
||||
}
|
||||
this.pageData = pageData;
|
||||
|
@ -24,6 +24,8 @@ XPCOMUtils.defineLazyModuleGetter(this, "CustomizableUI",
|
||||
"resource:///modules/CustomizableUI.jsm");
|
||||
XPCOMUtils.defineLazyModuleGetter(this, "SocialService",
|
||||
"resource://gre/modules/SocialService.jsm");
|
||||
XPCOMUtils.defineLazyModuleGetter(this, "PageMetadata",
|
||||
"resource://gre/modules/PageMetadata.jsm");
|
||||
XPCOMUtils.defineLazyModuleGetter(this, "PlacesUtils",
|
||||
"resource://gre/modules/PlacesUtils.jsm");
|
||||
XPCOMUtils.defineLazyModuleGetter(this, "PrivateBrowsingUtils",
|
||||
@ -31,9 +33,6 @@ XPCOMUtils.defineLazyModuleGetter(this, "PrivateBrowsingUtils",
|
||||
XPCOMUtils.defineLazyModuleGetter(this, "Promise",
|
||||
"resource://gre/modules/Promise.jsm");
|
||||
|
||||
XPCOMUtils.defineLazyServiceGetter(this, "unescapeService",
|
||||
"@mozilla.org/feed-unescapehtml;1",
|
||||
"nsIScriptableUnescapeHTML");
|
||||
|
||||
function promiseSetAnnotation(aURI, providerList) {
|
||||
let deferred = Promise.defer();
|
||||
@ -528,180 +527,4 @@ this.OpenGraphBuilder = {
|
||||
endpointURL = endpointURL + "?" + str.join("&");
|
||||
return endpointURL;
|
||||
},
|
||||
|
||||
getData: function(aDocument, target) {
|
||||
let res = {
|
||||
url: this._validateURL(aDocument, aDocument.documentURI),
|
||||
title: aDocument.title,
|
||||
previews: []
|
||||
};
|
||||
this._getMetaData(aDocument, res);
|
||||
this._getLinkData(aDocument, res);
|
||||
this._getPageData(aDocument, res);
|
||||
res.microdata = this.getMicrodata(aDocument, target);
|
||||
return res;
|
||||
},
|
||||
|
||||
getMicrodata: function (aDocument, target) {
|
||||
return getMicrodata(aDocument, target);
|
||||
},
|
||||
|
||||
_getMetaData: function(aDocument, o) {
|
||||
// query for standardized meta data
|
||||
let els = aDocument.querySelectorAll("head > meta[property], head > meta[name]");
|
||||
if (els.length < 1)
|
||||
return;
|
||||
let url;
|
||||
for (let el of els) {
|
||||
let value = el.getAttribute("content")
|
||||
if (!value)
|
||||
continue;
|
||||
value = unescapeService.unescape(value.trim());
|
||||
let key = el.getAttribute("property") || el.getAttribute("name");
|
||||
if (!key)
|
||||
continue;
|
||||
// There are a wide array of possible meta tags, expressing articles,
|
||||
// products, etc. so all meta tags are passed through but we touch up the
|
||||
// most common attributes.
|
||||
o[key] = value;
|
||||
switch (key) {
|
||||
case "title":
|
||||
case "og:title":
|
||||
o.title = value;
|
||||
break;
|
||||
case "description":
|
||||
case "og:description":
|
||||
o.description = value;
|
||||
break;
|
||||
case "og:site_name":
|
||||
o.siteName = value;
|
||||
break;
|
||||
case "medium":
|
||||
case "og:type":
|
||||
o.medium = value;
|
||||
break;
|
||||
case "og:video":
|
||||
url = this._validateURL(aDocument, value);
|
||||
if (url)
|
||||
o.source = url;
|
||||
break;
|
||||
case "og:url":
|
||||
url = this._validateURL(aDocument, value);
|
||||
if (url)
|
||||
o.url = url;
|
||||
break;
|
||||
case "og:image":
|
||||
url = this._validateURL(aDocument, value);
|
||||
if (url)
|
||||
o.previews.push(url);
|
||||
break;
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
_getLinkData: function(aDocument, o) {
|
||||
let els = aDocument.querySelectorAll("head > link[rel], head > link[id]");
|
||||
for (let el of els) {
|
||||
let url = el.getAttribute("href");
|
||||
if (!url)
|
||||
continue;
|
||||
url = this._validateURL(aDocument, unescapeService.unescape(url.trim()));
|
||||
switch (el.getAttribute("rel") || el.getAttribute("id")) {
|
||||
case "shorturl":
|
||||
case "shortlink":
|
||||
o.shortUrl = url;
|
||||
break;
|
||||
case "canonicalurl":
|
||||
case "canonical":
|
||||
o.url = url;
|
||||
break;
|
||||
case "image_src":
|
||||
o.previews.push(url);
|
||||
break;
|
||||
case "alternate":
|
||||
// expressly for oembed support but we're liberal here and will let
|
||||
// other alternate links through. oembed defines an href, supplied by
|
||||
// the site, where you can fetch additional meta data about a page.
|
||||
// We'll let the client fetch the oembed data themselves, but they
|
||||
// need the data from this link.
|
||||
if (!o.alternate)
|
||||
o.alternate = [];
|
||||
o.alternate.push({
|
||||
"type": el.getAttribute("type"),
|
||||
"href": el.getAttribute("href"),
|
||||
"title": el.getAttribute("title")
|
||||
})
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
// scrape through the page for data we want
|
||||
_getPageData: function(aDocument, o) {
|
||||
if (o.previews.length < 1)
|
||||
o.previews = this._getImageUrls(aDocument);
|
||||
},
|
||||
|
||||
_validateURL: function(aDocument, url) {
|
||||
let docURI = Services.io.newURI(aDocument.documentURI, null, null);
|
||||
let uri = Services.io.newURI(docURI.resolve(url), null, null);
|
||||
if (["http", "https", "ftp", "ftps"].indexOf(uri.scheme) < 0)
|
||||
return null;
|
||||
uri.userPass = "";
|
||||
return uri.spec;
|
||||
},
|
||||
|
||||
_getImageUrls: function(aDocument) {
|
||||
let l = [];
|
||||
let els = aDocument.querySelectorAll("img");
|
||||
for (let el of els) {
|
||||
let src = el.getAttribute("src");
|
||||
if (src) {
|
||||
l.push(this._validateURL(aDocument, unescapeService.unescape(src)));
|
||||
// we don't want a billion images
|
||||
if (l.length > 5)
|
||||
break;
|
||||
}
|
||||
}
|
||||
return l;
|
||||
}
|
||||
};
|
||||
|
||||
// getMicrodata (and getObject) based on wg algorythm to convert microdata to json
|
||||
// http://www.whatwg.org/specs/web-apps/current-work/multipage/microdata-2.html#json
|
||||
function getMicrodata(document, target) {
|
||||
|
||||
function _getObject(item) {
|
||||
let result = {};
|
||||
if (item.itemType.length)
|
||||
result.types = [i for (i of item.itemType)];
|
||||
if (item.itemId)
|
||||
result.itemId = item.itemid;
|
||||
if (item.properties.length)
|
||||
result.properties = {};
|
||||
for (let elem of item.properties) {
|
||||
let value;
|
||||
if (elem.itemScope)
|
||||
value = _getObject(elem);
|
||||
else if (elem.itemValue)
|
||||
value = elem.itemValue;
|
||||
// handle mis-formatted microdata
|
||||
else if (elem.hasAttribute("content"))
|
||||
value = elem.getAttribute("content");
|
||||
|
||||
for (let prop of elem.itemProp) {
|
||||
if (!result.properties[prop])
|
||||
result.properties[prop] = [];
|
||||
result.properties[prop].push(value);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
let result = { items: [] };
|
||||
let elms = target ? [target] : document.getItems();
|
||||
for (let el of elms) {
|
||||
if (el.itemScope)
|
||||
result.items.push(_getObject(el));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
345
toolkit/modules/PageMetadata.jsm
Normal file
345
toolkit/modules/PageMetadata.jsm
Normal file
@ -0,0 +1,345 @@
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
"use strict";
|
||||
|
||||
this.EXPORTED_SYMBOLS = ["PageMetadata"];
|
||||
|
||||
const {classes: Cc, interfaces: Ci, utils: Cu, results: Cr} = Components;
|
||||
|
||||
Cu.import("resource://gre/modules/Services.jsm");
|
||||
Cu.import("resource://gre/modules/XPCOMUtils.jsm");
|
||||
|
||||
XPCOMUtils.defineLazyServiceGetter(this, "UnescapeService",
|
||||
"@mozilla.org/feed-unescapehtml;1",
|
||||
"nsIScriptableUnescapeHTML");
|
||||
|
||||
|
||||
/**
|
||||
* Maximum number of images to discover in the document, when no preview images
|
||||
* are explicitly specified by the metadata.
|
||||
* @type {Number}
|
||||
*/
|
||||
const DISCOVER_IMAGES_MAX = 5;
|
||||
|
||||
|
||||
/**
|
||||
* Extract metadata and microdata from a HTML document.
|
||||
* @type {Object}
|
||||
*/
|
||||
this.PageMetadata = {
|
||||
/**
|
||||
* Get all metadata from an HTML document. This includes:
|
||||
* - URL
|
||||
* - title
|
||||
* - Metadata specified in <meta> tags, including OpenGraph data
|
||||
* - Links specified in <link> tags (short, canonical, preview images, alternative)
|
||||
* - Content that can be found in the page content that we consider useful metadata
|
||||
* - Microdata, as defined by the spec:
|
||||
* http://www.whatwg.org/specs/web-apps/current-work/multipage/microdata-2.html
|
||||
*
|
||||
* @param {Document} document - Document to extract data from.
|
||||
* @returns {Object} Object containing the various metadata, normalized to
|
||||
* merge some common alternative names for metadata.
|
||||
*/
|
||||
getData(document) {
|
||||
let result = {
|
||||
url: this._validateURL(document, document.documentURI),
|
||||
title: document.title,
|
||||
previews: [],
|
||||
};
|
||||
|
||||
this._getMetaData(document, result);
|
||||
this._getLinkData(document, result);
|
||||
this._getPageData(document, result);
|
||||
result.microdata = this.getMicrodata(document);
|
||||
|
||||
return result;
|
||||
},
|
||||
|
||||
/**
|
||||
* Get all microdata from an HTML document, or from only a given element.
|
||||
*
|
||||
* Returns an object in the format:
|
||||
* {
|
||||
* "items": [
|
||||
* {
|
||||
* "type": [ "<TYPE>" ],
|
||||
* "properties": {
|
||||
* "<PROPERTY-NAME>": [ "<PROPERTY-VALUE>", ... ],
|
||||
* ...,
|
||||
* }
|
||||
* },
|
||||
* ...,
|
||||
* ]
|
||||
* }
|
||||
*
|
||||
* @note This is based on wg algorythm to convert microdata to json
|
||||
* http://www.whatwg.org/specs/web-apps/current-work/multipage/microdata-2.html#json
|
||||
*
|
||||
* @param {Document} document - Document to extract data from.
|
||||
* @param {Element} [target] - Optional element to restrict microdata lookup to.
|
||||
* @return {Object} Object describing the found microdata.
|
||||
*/
|
||||
getMicrodata(document, target = null) {
|
||||
function getObject(item) {
|
||||
let result = {};
|
||||
|
||||
if (item.itemType.length) {
|
||||
result.types = [...item.itemType];
|
||||
}
|
||||
|
||||
if (item.itemId) {
|
||||
result.itemId = item.itemId;
|
||||
}
|
||||
|
||||
if (item.properties.length) {
|
||||
result.properties = {};
|
||||
}
|
||||
|
||||
for (let elem of item.properties) {
|
||||
let value;
|
||||
if (elem.itemScope) {
|
||||
value = getObject(elem);
|
||||
} else if (elem.itemValue) {
|
||||
value = elem.itemValue;
|
||||
} else if (elem.hasAttribute("content")) {
|
||||
// Handle mis-formatted microdata.
|
||||
value = elem.getAttribute("content");
|
||||
}
|
||||
|
||||
for (let prop of elem.itemProp) {
|
||||
if (!result.properties[prop]) {
|
||||
result.properties[prop] = [];
|
||||
}
|
||||
|
||||
result.properties[prop].push(value);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
let result = { items: [] };
|
||||
let elements = target ? [target] : document.getItems();
|
||||
|
||||
for (let element of elements) {
|
||||
if (element.itemScope) {
|
||||
result.items.push(getObject(element));
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
},
|
||||
|
||||
/**
|
||||
* Get metadata as defined in <meta> tags.
|
||||
* This adds properties to an existing result object.
|
||||
*
|
||||
* @param {Document} document - Document to extract data from.
|
||||
* @param {Object} result - Existing result object to add properties to.
|
||||
*/
|
||||
_getMetaData(document, result) {
|
||||
// Query for standardized meta data.
|
||||
let elements = document.querySelectorAll("head > meta[property], head > meta[name]");
|
||||
if (elements.length < 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (let element of elements) {
|
||||
let value = element.getAttribute("content")
|
||||
if (!value) {
|
||||
continue;
|
||||
}
|
||||
value = UnescapeService.unescape(value.trim());
|
||||
|
||||
let key = element.getAttribute("property") || element.getAttribute("name");
|
||||
if (!key) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// There are a wide array of possible meta tags, expressing articles,
|
||||
// products, etc. so all meta tags are passed through but we touch up the
|
||||
// most common attributes.
|
||||
result[key] = value;
|
||||
|
||||
switch (key) {
|
||||
case "title":
|
||||
case "og:title": {
|
||||
result.title = value;
|
||||
break;
|
||||
}
|
||||
|
||||
case "description":
|
||||
case "og:description": {
|
||||
result.description = value;
|
||||
break;
|
||||
}
|
||||
|
||||
case "og:site_name": {
|
||||
result.siteName = value;
|
||||
break;
|
||||
}
|
||||
|
||||
case "medium":
|
||||
case "og:type": {
|
||||
result.medium = value;
|
||||
break;
|
||||
}
|
||||
|
||||
case "og:video": {
|
||||
let url = this._validateURL(document, value);
|
||||
if (url) {
|
||||
result.source = url;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case "og:url": {
|
||||
let url = this._validateURL(document, value);
|
||||
if (url) {
|
||||
result.url = url;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case "og:image": {
|
||||
let url = this._validateURL(document, value);
|
||||
if (url) {
|
||||
result.previews.push(url);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* Get metadata as defined in <link> tags.
|
||||
* This adds properties to an existing result object.
|
||||
*
|
||||
* @param {Document} document - Document to extract data from.
|
||||
* @param {Object} result - Existing result object to add properties to.
|
||||
*/
|
||||
_getLinkData: function(document, result) {
|
||||
let elements = document.querySelectorAll("head > link[rel], head > link[id]");
|
||||
|
||||
for (let element of elements) {
|
||||
let url = element.getAttribute("href");
|
||||
if (!url) {
|
||||
continue;
|
||||
}
|
||||
url = this._validateURL(document, UnescapeService.unescape(url.trim()));
|
||||
|
||||
let key = element.getAttribute("rel") || element.getAttribute("id");
|
||||
if (!key) {
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (key) {
|
||||
case "shorturl":
|
||||
case "shortlink": {
|
||||
result.shortUrl = url;
|
||||
break;
|
||||
}
|
||||
|
||||
case "canonicalurl":
|
||||
case "canonical": {
|
||||
result.url = url;
|
||||
break;
|
||||
}
|
||||
|
||||
case "image_src": {
|
||||
result.previews.push(url);
|
||||
break;
|
||||
}
|
||||
|
||||
case "alternate": {
|
||||
// Expressly for oembed support but we're liberal here and will let
|
||||
// other alternate links through. oembed defines an href, supplied by
|
||||
// the site, where you can fetch additional meta data about a page.
|
||||
// We'll let the client fetch the oembed data themselves, but they
|
||||
// need the data from this link.
|
||||
if (!result.alternate) {
|
||||
result.alternate = [];
|
||||
}
|
||||
|
||||
result.alternate.push({
|
||||
type: element.getAttribute("type"),
|
||||
href: element.getAttribute("href"),
|
||||
title: element.getAttribute("title")
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* Scrape thought the page content for additional content that may be used to
|
||||
* suppliment explicitly defined metadata. This includes:
|
||||
* - First few images, when no preview image metadata is explicitly defined.
|
||||
*
|
||||
* This adds properties to an existing result object.
|
||||
*
|
||||
* @param {Document} document - Document to extract data from.
|
||||
* @param {Object} result - Existing result object to add properties to.
|
||||
*/
|
||||
_getPageData(document, result) {
|
||||
if (result.previews.length < 1) {
|
||||
result.previews = this._getImageUrls(document);
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* Find the first few images in a document, for use as preview images.
|
||||
* Will return upto DISCOVER_IMAGES_MAX number of images.
|
||||
*
|
||||
* @note This is not very clever. It does not (yet) check if any of the
|
||||
* images may be appropriate as a preview image.
|
||||
*
|
||||
* @param {Document} document - Document to extract data from.
|
||||
* @return {[string]} Array of URLs.
|
||||
*/
|
||||
_getImageUrls(document) {
|
||||
let result = [];
|
||||
let elements = document.querySelectorAll("img");
|
||||
|
||||
for (let element of elements) {
|
||||
let src = element.getAttribute("src");
|
||||
if (src) {
|
||||
result.push(this._validateURL(document, UnescapeService.unescape(src)));
|
||||
|
||||
// We don't want a billion images.
|
||||
// TODO: Move this magic number to a const.
|
||||
if (result.length > DISCOVER_IMAGES_MAX) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
},
|
||||
|
||||
/**
|
||||
* Validate a URL. This involves resolving the URL if it's relative to the
|
||||
* document location, ensuring it's using an expected scheme, and stripping
|
||||
* the userPass portion of the URL.
|
||||
*
|
||||
* @param {Document} document - Document to use as the root location for a relative URL.
|
||||
* @param {string} url - URL to validate.
|
||||
* @return {string} Result URL.
|
||||
*/
|
||||
_validateURL(document, url) {
|
||||
let docURI = Services.io.newURI(document.documentURI, null, null);
|
||||
let uri = Services.io.newURI(docURI.resolve(url), null, null);
|
||||
|
||||
if (["http", "https"].indexOf(uri.scheme) < 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
uri.userPass = "";
|
||||
|
||||
return uri.spec;
|
||||
},
|
||||
};
|
@ -31,6 +31,7 @@ EXTRA_JS_MODULES += [
|
||||
'Log.jsm',
|
||||
'NewTabUtils.jsm',
|
||||
'PageMenu.jsm',
|
||||
'PageMetadata.jsm',
|
||||
'PermissionsUtils.jsm',
|
||||
'PopupNotifications.jsm',
|
||||
'Preferences.jsm',
|
||||
|
@ -1,6 +1,7 @@
|
||||
[DEFAULT]
|
||||
support-files =
|
||||
dummy_page.html
|
||||
metadata_*.html
|
||||
|
||||
[browser_Battery.js]
|
||||
[browser_Deprecated.js]
|
||||
@ -8,5 +9,6 @@ support-files =
|
||||
skip-if = e10s # Bug ?????? - test already uses content scripts, but still fails only under e10s.
|
||||
[browser_Geometry.js]
|
||||
[browser_InlineSpellChecker.js]
|
||||
[browser_PageMetadata.js]
|
||||
[browser_RemoteWebNavigation.js]
|
||||
[browser_Troubleshoot.js]
|
||||
|
43
toolkit/modules/tests/browser/browser_PageMetadata.js
Normal file
43
toolkit/modules/tests/browser/browser_PageMetadata.js
Normal file
@ -0,0 +1,43 @@
|
||||
/**
|
||||
* Tests PageMetadata.jsm, which extracts metadata and microdata from a
|
||||
* document.
|
||||
*/
|
||||
|
||||
let {PageMetadata} = Cu.import("resource://gre/modules/PageMetadata.jsm", {});
|
||||
|
||||
let rootURL = "http://example.com/browser/toolkit/modules/tests/browser/";
|
||||
|
||||
function promiseDocument(fileName) {
|
||||
let url = rootURL + fileName;
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
let xhr = new XMLHttpRequest();
|
||||
xhr.onload = () => resolve(xhr.responseXML);
|
||||
xhr.onerror = () => reject(new Error("Error loading document"));
|
||||
xhr.open("GET", url);
|
||||
xhr.responseType = "document";
|
||||
xhr.send();
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Load a simple document.
|
||||
*/
|
||||
add_task(function* simpleDoc() {
|
||||
let fileName = "metadata_simple.html";
|
||||
info(`Loading a simple page, ${fileName}`);
|
||||
|
||||
let doc = yield promiseDocument(fileName);
|
||||
Assert.notEqual(doc, null,
|
||||
"Should have a document to analyse");
|
||||
|
||||
let data = PageMetadata.getData(doc);
|
||||
Assert.notEqual(data, null,
|
||||
"Should have non-null result");
|
||||
Assert.equal(data.url, rootURL + fileName,
|
||||
"Should have expected url property");
|
||||
Assert.equal(data.title, "Test Title",
|
||||
"Should have expected title property");
|
||||
Assert.equal(data.description, "A very simple test page",
|
||||
"Should have expected title property");
|
||||
});
|
10
toolkit/modules/tests/browser/metadata_simple.html
Normal file
10
toolkit/modules/tests/browser/metadata_simple.html
Normal file
@ -0,0 +1,10 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Test Title</title>
|
||||
<meta property="description" content="A very simple test page">
|
||||
</head>
|
||||
<body>
|
||||
Llama.
|
||||
</body>
|
||||
</html>
|
Loading…
Reference in New Issue
Block a user