Bug 1131911 - Extract page metadata extraction from Social.jsm to its own JSM. r=mixedpuppy

2024-09-13 09:24:08 -07:00 · 2015-03-06 11:43:05 +13:00 · 2015-03-06 11:43:05 +13:00 · fe28e7efde
commit fe28e7efde
parent ba60837bc8
9 changed files with 430 additions and 205 deletions
--- a/browser/base/content/browser-social.js
+++ b/browser/base/content/browser-social.js
@ -637,8 +637,8 @@ SocialShare = {
    // containing the open graph data.
    let _dataFn;
    if (!pageData || sharedURI == gBrowser.currentURI) {
-      messageManager.addMessageListener("Social:PageDataResult", _dataFn = (msg) => {
-        messageManager.removeMessageListener("Social:PageDataResult", _dataFn);
+      messageManager.addMessageListener("PageMetadata:PageDataResult", _dataFn = (msg) => {
+        messageManager.removeMessageListener("PageMetadata:PageDataResult", _dataFn);
        let pageData = msg.json;
        if (graphData) {
          // overwrite data retreived from page with data given to us as a param
@ -648,17 +648,17 @@ SocialShare = {
        }
        this.sharePage(providerOrigin, pageData, target);
      });
-      gBrowser.selectedBrowser.messageManager.sendAsyncMessage("Social:GetPageData");
+      gBrowser.selectedBrowser.messageManager.sendAsyncMessage("PageMetadata:GetPageData");
      return;
    }
    // if this is a share of a selected item, get any microdata
    if (!pageData.microdata && target) {
-      messageManager.addMessageListener("Social:PageDataResult", _dataFn = (msg) => {
-        messageManager.removeMessageListener("Social:PageDataResult", _dataFn);
+      messageManager.addMessageListener("PageMetadata:MicrodataResult", _dataFn = (msg) => {
+        messageManager.removeMessageListener("PageMetadata:MicrodataResult", _dataFn);
        pageData.microdata = msg.data;
        this.sharePage(providerOrigin, pageData, target);
      });
-      gBrowser.selectedBrowser.messageManager.sendAsyncMessage("Social:GetMicrodata", null, target);
+      gBrowser.selectedBrowser.messageManager.sendAsyncMessage("PageMetadata:GetMicrodata", null, target);
      return;
    }
    this.currentShare = pageData;
--- a/browser/base/content/content.js
+++ b/browser/base/content/content.js
@ -32,6 +32,8 @@ XPCOMUtils.defineLazyModuleGetter(this, "AboutReader",
  "resource://gre/modules/AboutReader.jsm");
 XPCOMUtils.defineLazyModuleGetter(this, "ReaderMode",
  "resource://gre/modules/ReaderMode.jsm");
+XPCOMUtils.defineLazyModuleGetter(this, "PageMetadata",
+  "resource://gre/modules/PageMetadata.jsm");
 XPCOMUtils.defineLazyGetter(this, "SimpleServiceDiscovery", function() {
  let ssdp = Cu.import("resource://gre/modules/SimpleServiceDiscovery.jsm", {}).SimpleServiceDiscovery;
  // Register targets
@ -1000,30 +1002,29 @@ addEventListener("pageshow", function(event) {
  }
 });

-let SocialMessenger = {
+let PageMetadataMessenger = {
  init: function() {
-    addMessageListener("Social:GetPageData", this);
-    addMessageListener("Social:GetMicrodata", this);
-
-    XPCOMUtils.defineLazyGetter(this, "og", function() {
-      let tmp = {};
-      Cu.import("resource:///modules/Social.jsm", tmp);
-      return tmp.OpenGraphBuilder;
-    });
+    addMessageListener("PageMetadata:GetPageData", this);
+    addMessageListener("PageMetadata:GetMicrodata", this);
  },
  receiveMessage: function(aMessage) {
    switch(aMessage.name) {
-      case "Social:GetPageData":
-        sendAsyncMessage("Social:PageDataResult", this.og.getData(content.document));
+      case "PageMetadata:GetPageData": {
+        let result = PageMetadata.getData(content.document);
+        sendAsyncMessage("PageMetadata:PageDataResult", result);
        break;
-      case "Social:GetMicrodata":
+      }
+
+      case "PageMetadata:GetMicrodata": {
        let target = aMessage.objects;
-        sendAsyncMessage("Social:PageDataResult", this.og.getMicrodata(content.document, target));
+        let result = PageMetadata.getMicrodata(content.document, target);
+        sendAsyncMessage("PageMetadata:MicrodataResult", result);
        break;
+      }
    }
  }
 }
-SocialMessenger.init();
+PageMetadataMessenger.init();

 addEventListener("ActivateSocialFeature", function (aEvent) {
  let document = content.document;
--- a/browser/base/content/socialmarks.xml
+++ b/browser/base/content/socialmarks.xml
@ -150,21 +150,21 @@
        let URLTemplate = provider.markURL;
        let _dataFn;
        if (!pageData) {
-          messageManager.addMessageListener("Social:PageDataResult", _dataFn = (msg) => {
-            messageManager.removeMessageListener("Social:PageDataResult", _dataFn);
+          messageManager.addMessageListener("PageMetadata:PageDataResult", _dataFn = (msg) => {
+            messageManager.removeMessageListener("PageMetadata:PageDataResult", _dataFn);
            this.loadPanel(msg.json, target);
          });
-          gBrowser.selectedBrowser.messageManager.sendAsyncMessage("Social:GetPageData");
+          gBrowser.selectedBrowser.messageManager.sendAsyncMessage("PageMetadata:GetPageData");
          return;
        }
        // if this is a share of a selected item, get any microdata
        if (!pageData.microdata && target) {
-          messageManager.addMessageListener("Social:PageDataResult", _dataFn = (msg) => {
-            messageManager.removeMessageListener("Social:PageDataResult", _dataFn);
+          messageManager.addMessageListener("PageMetadata:MicrodataResult", _dataFn = (msg) => {
+            messageManager.removeMessageListener("PageMetadata:MicrodataResult", _dataFn);
            pageData.microdata = msg.data;
            this.loadPanel(pageData, target);
          });
-          gBrowser.selectedBrowser.messageManager.sendAsyncMessage("Social:GetMicrodata", null, target);
+          gBrowser.selectedBrowser.messageManager.sendAsyncMessage("PageMetadata:GetMicrodata", null, target);
          return;
        }
        this.pageData = pageData;
--- a/browser/modules/Social.jsm
+++ b/browser/modules/Social.jsm
@ -24,6 +24,8 @@ XPCOMUtils.defineLazyModuleGetter(this, "CustomizableUI",
  "resource:///modules/CustomizableUI.jsm");
 XPCOMUtils.defineLazyModuleGetter(this, "SocialService",
  "resource://gre/modules/SocialService.jsm");
+XPCOMUtils.defineLazyModuleGetter(this, "PageMetadata",
+  "resource://gre/modules/PageMetadata.jsm");
 XPCOMUtils.defineLazyModuleGetter(this, "PlacesUtils",
  "resource://gre/modules/PlacesUtils.jsm");
 XPCOMUtils.defineLazyModuleGetter(this, "PrivateBrowsingUtils",
@ -31,9 +33,6 @@ XPCOMUtils.defineLazyModuleGetter(this, "PrivateBrowsingUtils",
 XPCOMUtils.defineLazyModuleGetter(this, "Promise",
  "resource://gre/modules/Promise.jsm");

-XPCOMUtils.defineLazyServiceGetter(this, "unescapeService",
-                                   "@mozilla.org/feed-unescapehtml;1",
-                                   "nsIScriptableUnescapeHTML");

 function promiseSetAnnotation(aURI, providerList) {
  let deferred = Promise.defer();
@ -528,180 +527,4 @@ this.OpenGraphBuilder = {
      endpointURL = endpointURL + "?" + str.join("&");
    return endpointURL;
  },
-
-  getData: function(aDocument, target) {
-    let res = {
-      url: this._validateURL(aDocument, aDocument.documentURI),
-      title: aDocument.title,
-      previews: []
-    };
-    this._getMetaData(aDocument, res);
-    this._getLinkData(aDocument, res);
-    this._getPageData(aDocument, res);
-    res.microdata = this.getMicrodata(aDocument, target);
-    return res;
-  },
-
-  getMicrodata: function (aDocument, target) {
-    return getMicrodata(aDocument, target);
-  },
-
-  _getMetaData: function(aDocument, o) {
-    // query for standardized meta data
-    let els = aDocument.querySelectorAll("head > meta[property], head > meta[name]");
-    if (els.length < 1)
-      return;
-    let url;
-    for (let el of els) {
-      let value = el.getAttribute("content")
-      if (!value)
-        continue;
-      value = unescapeService.unescape(value.trim());
-      let key = el.getAttribute("property") || el.getAttribute("name");
-      if (!key)
-        continue;
-      // There are a wide array of possible meta tags, expressing articles,
-      // products, etc. so all meta tags are passed through but we touch up the
-      // most common attributes.
-      o[key] = value;
-      switch (key) {
-        case "title":
-        case "og:title":
-          o.title = value;
-          break;
-        case "description":
-        case "og:description":
-          o.description = value;
-          break;
-        case "og:site_name":
-          o.siteName = value;
-          break;
-        case "medium":
-        case "og:type":
-          o.medium = value;
-          break;
-        case "og:video":
-          url = this._validateURL(aDocument, value);
-          if (url)
-            o.source = url;
-          break;
-        case "og:url":
-          url = this._validateURL(aDocument, value);
-          if (url)
-            o.url = url;
-          break;
-        case "og:image":
-          url = this._validateURL(aDocument, value);
-          if (url)
-            o.previews.push(url);
-          break;
-      }
-    }
-  },
-
-  _getLinkData: function(aDocument, o) {
-    let els = aDocument.querySelectorAll("head > link[rel], head > link[id]");
-    for (let el of els) {
-      let url = el.getAttribute("href");
-      if (!url)
-        continue;
-      url = this._validateURL(aDocument, unescapeService.unescape(url.trim()));
-      switch (el.getAttribute("rel") || el.getAttribute("id")) {
-        case "shorturl":
-        case "shortlink":
-          o.shortUrl = url;
-          break;
-        case "canonicalurl":
-        case "canonical":
-          o.url = url;
-          break;
-        case "image_src":
-          o.previews.push(url);
-          break;
-        case "alternate":
-          // expressly for oembed support but we're liberal here and will let
-          // other alternate links through. oembed defines an href, supplied by
-          // the site, where you can fetch additional meta data about a page.
-          // We'll let the client fetch the oembed data themselves, but they
-          // need the data from this link.
-          if (!o.alternate)
-            o.alternate = [];
-          o.alternate.push({
-            "type": el.getAttribute("type"),
-            "href": el.getAttribute("href"),
-            "title": el.getAttribute("title")
-          })
-      }
-    }
-  },
-
-  // scrape through the page for data we want
-  _getPageData: function(aDocument, o) {
-    if (o.previews.length < 1)
-      o.previews = this._getImageUrls(aDocument);
-  },
-
-  _validateURL: function(aDocument, url) {
-    let docURI = Services.io.newURI(aDocument.documentURI, null, null);
-    let uri = Services.io.newURI(docURI.resolve(url), null, null);
-    if (["http", "https", "ftp", "ftps"].indexOf(uri.scheme) < 0)
-      return null;
-    uri.userPass = "";
-    return uri.spec;
-  },
-
-  _getImageUrls: function(aDocument) {
-    let l = [];
-    let els = aDocument.querySelectorAll("img");
-    for (let el of els) {
-      let src = el.getAttribute("src");
-      if (src) {
-        l.push(this._validateURL(aDocument, unescapeService.unescape(src)));
-        // we don't want a billion images
-        if (l.length > 5)
-          break;
-      }
-    }
-    return l;
-  }
 };
-
-// getMicrodata (and getObject) based on wg algorythm to convert microdata to json
-// http://www.whatwg.org/specs/web-apps/current-work/multipage/microdata-2.html#json
-function  getMicrodata(document, target) {
-
-  function _getObject(item) {
-    let result = {};
-    if (item.itemType.length)
-      result.types = [i for (i of item.itemType)];
-    if (item.itemId)
-      result.itemId = item.itemid;
-    if (item.properties.length)
-      result.properties = {};
-    for (let elem of item.properties) {
-      let value;
-      if (elem.itemScope)
-        value = _getObject(elem);
-      else if (elem.itemValue)
-        value = elem.itemValue;
-      // handle mis-formatted microdata
-      else if (elem.hasAttribute("content"))
-        value = elem.getAttribute("content");
-
-      for (let prop of elem.itemProp) {
-        if (!result.properties[prop])
-          result.properties[prop] = [];
-        result.properties[prop].push(value);
-      }
-    }
-    return result;
-  }
-
-  let result = { items: [] };
-  let elms = target ? [target] : document.getItems();
-  for (let el of elms) {
-    if (el.itemScope)
-      result.items.push(_getObject(el));
-  }
-  return result;
-}
--- a/toolkit/modules/PageMetadata.jsm
+++ b/toolkit/modules/PageMetadata.jsm
@ -0,0 +1,345 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+"use strict";
+
+this.EXPORTED_SYMBOLS = ["PageMetadata"];
+
+const {classes: Cc, interfaces: Ci, utils: Cu, results: Cr} = Components;
+
+Cu.import("resource://gre/modules/Services.jsm");
+Cu.import("resource://gre/modules/XPCOMUtils.jsm");
+
+XPCOMUtils.defineLazyServiceGetter(this, "UnescapeService",
+                                   "@mozilla.org/feed-unescapehtml;1",
+                                   "nsIScriptableUnescapeHTML");
+
+
+/**
+ * Maximum number of images to discover in the document, when no preview images
+ * are explicitly specified by the metadata.
+ * @type {Number}
+ */
+const DISCOVER_IMAGES_MAX  = 5;
+
+
+/**
+ * Extract metadata and microdata from a HTML document.
+ * @type {Object}
+ */
+this.PageMetadata = {
+  /**
+   * Get all metadata from an HTML document. This includes:
+   * - URL
+   * - title
+   * - Metadata specified in <meta> tags, including OpenGraph data
+   * - Links specified in <link> tags (short, canonical, preview images, alternative)
+   * - Content that can be found in the page content that we consider useful metadata
+   * - Microdata, as defined by the spec:
+   *   http://www.whatwg.org/specs/web-apps/current-work/multipage/microdata-2.html
+   *
+   * @param {Document} document - Document to extract data from.
+   * @returns {Object} Object containing the various metadata, normalized to
+   *                   merge some common alternative names for metadata.
+   */
+  getData(document) {
+    let result = {
+      url: this._validateURL(document, document.documentURI),
+      title: document.title,
+      previews: [],
+    };
+
+    this._getMetaData(document, result);
+    this._getLinkData(document, result);
+    this._getPageData(document, result);
+    result.microdata = this.getMicrodata(document);
+
+    return result;
+  },
+
+  /**
+   * Get all microdata from an HTML document, or from only a given element.
+   *
+   * Returns an object in the format:
+   *   {
+   *     "items": [
+   *       {
+   *         "type": [ "<TYPE>" ],
+   *         "properties": {
+   *            "<PROPERTY-NAME>": [ "<PROPERTY-VALUE>", ... ],
+   *            ...,
+   *         }
+   *       },
+   *       ...,
+   *     ]
+   *   }
+   *
+   * @note This is based on wg algorythm to convert microdata to json
+   *      http://www.whatwg.org/specs/web-apps/current-work/multipage/microdata-2.html#json
+   *
+   * @param {Document} document - Document to extract data from.
+   * @param {Element} [target] - Optional element to restrict microdata lookup to.
+   * @return {Object} Object describing the found microdata.
+   */
+  getMicrodata(document, target = null) {
+    function getObject(item) {
+      let result = {};
+
+      if (item.itemType.length) {
+        result.types = [...item.itemType];
+      }
+
+      if (item.itemId) {
+        result.itemId = item.itemId;
+      }
+
+      if (item.properties.length) {
+        result.properties = {};
+      }
+
+      for (let elem of item.properties) {
+        let value;
+        if (elem.itemScope) {
+          value = getObject(elem);
+        } else if (elem.itemValue) {
+          value = elem.itemValue;
+        } else if (elem.hasAttribute("content")) {
+          // Handle mis-formatted microdata.
+          value = elem.getAttribute("content");
+        }
+
+        for (let prop of elem.itemProp) {
+          if (!result.properties[prop]) {
+            result.properties[prop] = [];
+          }
+
+          result.properties[prop].push(value);
+        }
+      }
+
+      return result;
+    }
+
+    let result = { items: [] };
+    let elements = target ? [target] : document.getItems();
+
+    for (let element of elements) {
+      if (element.itemScope) {
+        result.items.push(getObject(element));
+      }
+    }
+
+    return result;
+  },
+
+  /**
+   * Get metadata as defined in <meta> tags.
+   * This adds properties to an existing result object.
+   *
+   * @param {Document} document - Document to extract data from.
+   * @param {Object}  result - Existing result object to add properties to.
+   */
+  _getMetaData(document, result) {
+    // Query for standardized meta data.
+    let elements = document.querySelectorAll("head > meta[property], head > meta[name]");
+    if (elements.length < 1) {
+      return;
+    }
+
+    for (let element of elements) {
+      let value = element.getAttribute("content")
+      if (!value) {
+        continue;
+      }
+      value = UnescapeService.unescape(value.trim());
+
+      let key = element.getAttribute("property") || element.getAttribute("name");
+      if (!key) {
+        continue;
+      }
+
+      // There are a wide array of possible meta tags, expressing articles,
+      // products, etc. so all meta tags are passed through but we touch up the
+      // most common attributes.
+      result[key] = value;
+
+      switch (key) {
+        case "title":
+        case "og:title": {
+          result.title = value;
+          break;
+        }
+
+        case "description":
+        case "og:description": {
+          result.description = value;
+          break;
+        }
+
+        case "og:site_name": {
+          result.siteName = value;
+          break;
+        }
+
+        case "medium":
+        case "og:type": {
+          result.medium = value;
+          break;
+        }
+
+        case "og:video": {
+          let url = this._validateURL(document, value);
+          if (url) {
+            result.source = url;
+          }
+          break;
+        }
+
+        case "og:url": {
+          let url = this._validateURL(document, value);
+          if (url) {
+            result.url = url;
+          }
+          break;
+        }
+
+        case "og:image": {
+          let url = this._validateURL(document, value);
+          if (url) {
+            result.previews.push(url);
+          }
+          break;
+        }
+      }
+    }
+  },
+
+  /**
+   * Get metadata as defined in <link> tags.
+   * This adds properties to an existing result object.
+   *
+   * @param {Document} document - Document to extract data from.
+   * @param {Object}  result - Existing result object to add properties to.
+   */
+  _getLinkData: function(document, result) {
+    let elements = document.querySelectorAll("head > link[rel], head > link[id]");
+
+    for (let element of elements) {
+      let url = element.getAttribute("href");
+      if (!url) {
+        continue;
+      }
+      url = this._validateURL(document, UnescapeService.unescape(url.trim()));
+
+      let key = element.getAttribute("rel") || element.getAttribute("id");
+      if (!key) {
+        continue;
+      }
+
+      switch (key) {
+        case "shorturl":
+        case "shortlink": {
+          result.shortUrl = url;
+          break;
+        }
+
+        case "canonicalurl":
+        case "canonical": {
+          result.url = url;
+          break;
+        }
+
+        case "image_src": {
+          result.previews.push(url);
+          break;
+        }
+
+        case "alternate": {
+          // Expressly for oembed support but we're liberal here and will let
+          // other alternate links through. oembed defines an href, supplied by
+          // the site, where you can fetch additional meta data about a page.
+          // We'll let the client fetch the oembed data themselves, but they
+          // need the data from this link.
+          if (!result.alternate) {
+            result.alternate = [];
+          }
+
+          result.alternate.push({
+            type: element.getAttribute("type"),
+            href: element.getAttribute("href"),
+            title: element.getAttribute("title")
+          });
+        }
+      }
+    }
+  },
+
+  /**
+   * Scrape thought the page content for additional content that may be used to
+   * suppliment explicitly defined metadata. This includes:
+   * - First few images, when no preview image metadata is explicitly defined.
+   *
+   * This adds properties to an existing result object.
+   *
+   * @param {Document} document - Document to extract data from.
+   * @param {Object}  result - Existing result object to add properties to.
+   */
+  _getPageData(document, result) {
+    if (result.previews.length < 1) {
+      result.previews = this._getImageUrls(document);
+    }
+  },
+
+  /**
+   * Find the first few images in a document, for use as preview images.
+   * Will return upto DISCOVER_IMAGES_MAX number of images.
+   *
+   * @note This is not very clever. It does not (yet) check if any of the
+   *       images may be appropriate as a preview image.
+   *
+   * @param {Document} document - Document to extract data from.
+   * @return {[string]} Array of URLs.
+   */
+  _getImageUrls(document) {
+    let result = [];
+    let elements = document.querySelectorAll("img");
+
+    for (let element of elements) {
+      let src = element.getAttribute("src");
+      if (src) {
+        result.push(this._validateURL(document, UnescapeService.unescape(src)));
+
+        // We don't want a billion images.
+        // TODO: Move this magic number to a const.
+        if (result.length > DISCOVER_IMAGES_MAX) {
+          break;
+        }
+      }
+    }
+
+    return result;
+  },
+
+  /**
+   * Validate a URL. This involves resolving the URL if it's relative to the
+   * document location, ensuring it's using an expected scheme, and stripping
+   * the userPass portion of the URL.
+   *
+   * @param {Document} document - Document to use as the root location for a relative URL.
+   * @param {string} url - URL to validate.
+   * @return {string} Result URL.
+   */
+  _validateURL(document, url) {
+    let docURI = Services.io.newURI(document.documentURI, null, null);
+    let uri = Services.io.newURI(docURI.resolve(url), null, null);
+
+    if (["http", "https"].indexOf(uri.scheme) < 0) {
+      return null;
+    }
+
+    uri.userPass = "";
+
+    return uri.spec;
+  },
+};
--- a/toolkit/modules/moz.build
+++ b/toolkit/modules/moz.build
@ -31,6 +31,7 @@ EXTRA_JS_MODULES += [
    'Log.jsm',
    'NewTabUtils.jsm',
    'PageMenu.jsm',
+    'PageMetadata.jsm',
    'PermissionsUtils.jsm',
    'PopupNotifications.jsm',
    'Preferences.jsm',
--- a/toolkit/modules/tests/browser/browser.ini
+++ b/toolkit/modules/tests/browser/browser.ini
@ -1,6 +1,7 @@
 [DEFAULT]
 support-files =
  dummy_page.html
+  metadata_*.html

 [browser_Battery.js]
 [browser_Deprecated.js]
@ -8,5 +9,6 @@ support-files =
 skip-if = e10s # Bug ?????? - test already uses content scripts, but still fails only under e10s.
 [browser_Geometry.js]
 [browser_InlineSpellChecker.js]
+[browser_PageMetadata.js]
 [browser_RemoteWebNavigation.js]
 [browser_Troubleshoot.js]
--- a/toolkit/modules/tests/browser/browser_PageMetadata.js
+++ b/toolkit/modules/tests/browser/browser_PageMetadata.js
@ -0,0 +1,43 @@
+/**
+ * Tests PageMetadata.jsm, which extracts metadata and microdata from a
+ * document.
+ */
+
+let {PageMetadata} = Cu.import("resource://gre/modules/PageMetadata.jsm", {});
+
+let rootURL = "http://example.com/browser/toolkit/modules/tests/browser/";
+
+function promiseDocument(fileName) {
+  let url = rootURL + fileName;
+
+  return new Promise((resolve, reject) => {
+    let xhr = new XMLHttpRequest();
+    xhr.onload = () => resolve(xhr.responseXML);
+    xhr.onerror = () => reject(new Error("Error loading document"));
+    xhr.open("GET", url);
+    xhr.responseType = "document";
+    xhr.send();
+  });
+}
+
+/**
+ * Load a simple document.
+ */
+add_task(function* simpleDoc() {
+  let fileName = "metadata_simple.html";
+  info(`Loading a simple page, ${fileName}`);
+
+  let doc = yield promiseDocument(fileName);
+  Assert.notEqual(doc, null,
+                  "Should have a document to analyse");
+
+  let data = PageMetadata.getData(doc);
+  Assert.notEqual(data, null,
+                  "Should have non-null result");
+  Assert.equal(data.url, rootURL + fileName,
+               "Should have expected url property");
+  Assert.equal(data.title, "Test Title",
+               "Should have expected title property");
+  Assert.equal(data.description, "A very simple test page",
+               "Should have expected title property");
+});
--- a/toolkit/modules/tests/browser/metadata_simple.html
+++ b/toolkit/modules/tests/browser/metadata_simple.html
@ -0,0 +1,10 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <title>Test Title</title>
+    <meta property="description" content="A very simple test page">
+  </head>
+  <body>
+    Llama.
+  </body>
+</html>