gecko/toolkit/components/feeds/FeedProcessor.js

1925 lines
64 KiB
JavaScript
Raw Normal View History

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is Robert Sayre.
* Portions created by the Initial Developer are Copyright (C) 2006
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Ben Goodger <beng@google.com>
* Myk Melez <myk@mozilla.org>
* Michael Ventnor <m.ventnor@gmail.com>
* Will Guaraldi <will.guaraldi@pculture.org>
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
function LOG(str) {
dump("*** " + str + "\n");
}
const Ci = Components.interfaces;
const Cc = Components.classes;
const Cr = Components.results;
Components.utils.import("resource://gre/modules/XPCOMUtils.jsm");
Components.utils.import("resource://gre/modules/ISO8601DateUtils.jsm");
const FP_CONTRACTID = "@mozilla.org/feed-processor;1";
const FP_CLASSID = Components.ID("{26acb1f0-28fc-43bc-867a-a46aabc85dd4}");
const FP_CLASSNAME = "Feed Processor";
const FR_CONTRACTID = "@mozilla.org/feed-result;1";
const FR_CLASSID = Components.ID("{072a5c3d-30c6-4f07-b87f-9f63d51403f2}");
const FR_CLASSNAME = "Feed Result";
const FEED_CONTRACTID = "@mozilla.org/feed;1";
const FEED_CLASSID = Components.ID("{5d0cfa97-69dd-4e5e-ac84-f253162e8f9a}");
const FEED_CLASSNAME = "Feed";
const ENTRY_CONTRACTID = "@mozilla.org/feed-entry;1";
const ENTRY_CLASSID = Components.ID("{8e4444ff-8e99-4bdd-aa7f-fb3c1c77319f}");
const ENTRY_CLASSNAME = "Feed Entry";
const TEXTCONSTRUCT_CONTRACTID = "@mozilla.org/feed-textconstruct;1";
const TEXTCONSTRUCT_CLASSID =
Components.ID("{b992ddcd-3899-4320-9909-924b3e72c922}");
const TEXTCONSTRUCT_CLASSNAME = "Feed Text Construct";
const GENERATOR_CONTRACTID = "@mozilla.org/feed-generator;1";
const GENERATOR_CLASSID =
Components.ID("{414af362-9ad8-4296-898e-62247f25a20e}");
const GENERATOR_CLASSNAME = "Feed Generator";
const PERSON_CONTRACTID = "@mozilla.org/feed-person;1";
const PERSON_CLASSID = Components.ID("{95c963b7-20b2-11db-92f6-001422106990}");
const PERSON_CLASSNAME = "Feed Person";
const IO_CONTRACTID = "@mozilla.org/network/io-service;1"
const BAG_CONTRACTID = "@mozilla.org/hash-property-bag;1"
const ARRAY_CONTRACTID = "@mozilla.org/array;1";
const SAX_CONTRACTID = "@mozilla.org/saxparser/xmlreader;1";
const UNESCAPE_CONTRACTID = "@mozilla.org/feed-unescapehtml;1";
var gIoService = null;
const XMLNS = "http://www.w3.org/XML/1998/namespace";
const RSS090NS = "http://my.netscape.com/rdf/simple/0.9/";
const WAIROLE_NS = "http://www.w3.org/2005/01/wai-rdf/GUIRoleTaxonomy#";
/***** Some general utils *****/
function strToURI(link, base) {
var base = base || null;
if (!gIoService)
gIoService = Cc[IO_CONTRACTID].getService(Ci.nsIIOService);
try {
return gIoService.newURI(link, null, base);
}
catch(e) {
return null;
}
}
function isArray(a) {
return isObject(a) && a.constructor == Array;
}
function isObject(a) {
return (a && typeof a == "object") || isFunction(a);
}
function isFunction(a) {
return typeof a == "function";
}
function isIID(a, iid) {
var rv = false;
try {
a.QueryInterface(iid);
rv = true;
}
catch(e) {
}
return rv;
}
function isIArray(a) {
return isIID(a, Ci.nsIArray);
}
function isIFeedContainer(a) {
return isIID(a, Ci.nsIFeedContainer);
}
function stripTags(someHTML) {
return someHTML.replace(/<[^>]+>/g,"");
}
/**
* Searches through an array of links and returns a JS array
* of matching property bags.
*/
const IANA_URI = "http://www.iana.org/assignments/relation/";
function findAtomLinks(rel, links) {
var rvLinks = [];
for (var i = 0; i < links.length; ++i) {
var linkElement = links.queryElementAt(i, Ci.nsIPropertyBag2);
// atom:link MUST have @href
if (bagHasKey(linkElement, "href")) {
var relAttribute = null;
if (bagHasKey(linkElement, "rel"))
relAttribute = linkElement.getPropertyAsAString("rel")
if ((!relAttribute && rel == "alternate") || relAttribute == rel) {
rvLinks.push(linkElement);
continue;
}
// catch relations specified by IANA URI
if (relAttribute == IANA_URI + rel) {
rvLinks.push(linkElement);
}
}
}
return rvLinks;
}
function xmlEscape(s) {
s = s.replace(/&/g, "&amp;");
s = s.replace(/>/g, "&gt;");
s = s.replace(/</g, "&lt;");
s = s.replace(/"/g, "&quot;");
s = s.replace(/'/g, "&apos;");
return s;
}
function arrayContains(array, element) {
for (var i = 0; i < array.length; ++i) {
if (array[i] == element) {
return true;
}
}
return false;
}
// XXX add hasKey to nsIPropertyBag
function bagHasKey(bag, key) {
try {
bag.getProperty(key);
return true;
}
catch (e) {
return false;
}
}
function makePropGetter(key) {
return function FeedPropGetter(bag) {
try {
return value = bag.getProperty(key);
}
catch(e) {
}
return null;
}
}
function W3CToIETFDate(dateString) {
var date = ISO8601DateUtils.parse(dateString);
return date.toUTCString();
}
const RDF_NS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
// namespace map
var gNamespaces = {
"http://webns.net/mvcb/":"admin",
"http://backend.userland.com/rss":"",
"http://blogs.law.harvard.edu/tech/rss":"",
"http://www.w3.org/2005/Atom":"atom",
"http://purl.org/atom/ns#":"atom03",
"http://purl.org/rss/1.0/modules/content/":"content",
"http://purl.org/dc/elements/1.1/":"dc",
"http://purl.org/dc/terms/":"dcterms",
"http://www.w3.org/1999/02/22-rdf-syntax-ns#":"rdf",
"http://purl.org/rss/1.0/":"rss1",
"http://my.netscape.com/rdf/simple/0.9/":"rss1",
"http://wellformedweb.org/CommentAPI/":"wfw",
"http://purl.org/rss/1.0/modules/wiki/":"wiki",
"http://www.w3.org/XML/1998/namespace":"xml",
"http://search.yahoo.com/mrss/":"media",
"http://search.yahoo.com/mrss":"media"
}
// We allow a very small set of namespaces in XHTML content,
// for attributes only
var gAllowedXHTMLNamespaces = {
"http://www.w3.org/XML/1998/namespace":"xml",
"http://www.w3.org/TR/xhtml2":"xhtml2",
"http://www.w3.org/2005/07/aaa":"aaa",
// if someone ns qualifies XHTML, we have to prefix it to avoid an
// attribute collision.
"http://www.w3.org/1999/xhtml":"xhtml"
}
function FeedResult() {}
FeedResult.prototype = {
bozo: false,
doc: null,
version: null,
headers: null,
uri: null,
stylesheet: null,
registerExtensionPrefix: function FR_registerExtensionPrefix(ns, prefix) {
throw Cr.NS_ERROR_NOT_IMPLEMENTED;
},
// XPCOM stuff
classID: FR_CLASSID,
QueryInterface: XPCOMUtils.generateQI([Ci.nsIFeedResult])
}
function Feed() {
this.subtitle = null;
this.title = null;
this.items = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray);
this.link = null;
this.id = null;
this.generator = null;
this.authors = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray);
this.contributors = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray);
this.baseURI = null;
this.enclosureCount = 0;
this.type = Ci.nsIFeed.TYPE_FEED;
}
Feed.prototype = {
searchLists: {
title: ["title", "rss1:title", "atom03:title", "atom:title"],
subtitle: ["description","dc:description","rss1:description",
"atom03:tagline","atom:subtitle"],
items: ["items","atom03_entries","entries"],
id: ["atom:id","rdf:about"],
generator: ["generator"],
authors : ["authors"],
contributors: ["contributors"],
title: ["title","rss1:title", "atom03:title","atom:title"],
link: [["link",strToURI],["rss1:link",strToURI]],
categories: ["categories", "dc:subject"],
rights: ["atom03:rights","atom:rights"],
cloud: ["cloud"],
image: ["image", "rss1:image", "atom:logo"],
textInput: ["textInput", "rss1:textinput"],
skipDays: ["skipDays"],
skipHours: ["skipHours"],
updated: ["pubDate", "lastBuildDate", "atom03:modified", "dc:date",
"dcterms:modified", "atom:updated"]
},
normalize: function Feed_normalize() {
fieldsToObj(this, this.searchLists);
if (this.skipDays)
this.skipDays = this.skipDays.getProperty("days");
if (this.skipHours)
this.skipHours = this.skipHours.getProperty("hours");
if (this.updated)
this.updated = dateParse(this.updated);
// Assign Atom link if needed
if (bagHasKey(this.fields, "links"))
this._atomLinksToURI();
this._calcEnclosureCountAndFeedType();
// Resolve relative image links
if (this.image && bagHasKey(this.image, "url"))
this._resolveImageLink();
this._resetBagMembersToRawText([this.searchLists.subtitle,
this.searchLists.title]);
},
_calcEnclosureCountAndFeedType: function Feed_calcEnclosureCountAndFeedType() {
var entries_with_enclosures = 0;
var audio_count = 0;
var image_count = 0;
var video_count = 0;
var other_count = 0;
for (var i = 0; i < this.items.length; ++i) {
var entry = this.items.queryElementAt(i, Ci.nsIFeedEntry);
entry.QueryInterface(Ci.nsIFeedContainer);
if (entry.enclosures && entry.enclosures.length > 0) {
++entries_with_enclosures;
for (var e = 0; e < entry.enclosures.length; ++e) {
var enc = entry.enclosures.queryElementAt(e, Ci.nsIWritablePropertyBag2);
if (enc.hasKey("type")) {
var enctype = enc.get("type");
if (/^audio/.test(enctype)) {
++audio_count;
} else if (/^image/.test(enctype)) {
++image_count;
} else if (/^video/.test(enctype)) {
++video_count;
} else {
++other_count;
}
} else {
++other_count;
}
}
}
}
var feedtype = Ci.nsIFeed.TYPE_FEED;
// For a feed to be marked as TYPE_VIDEO, TYPE_AUDIO and TYPE_IMAGE,
// we enforce two things:
//
// 1. all entries must have at least one enclosure
// 2. all enclosures must be video for TYPE_VIDEO, audio for TYPE_AUDIO or image
// for TYPE_IMAGE
//
// Otherwise it's a TYPE_FEED.
if (entries_with_enclosures == this.items.length && other_count == 0) {
if (audio_count > 0 && !video_count && !image_count) {
feedtype = Ci.nsIFeed.TYPE_AUDIO;
} else if (image_count > 0 && !audio_count && !video_count) {
feedtype = Ci.nsIFeed.TYPE_IMAGE;
} else if (video_count > 0 && !audio_count && !image_count) {
feedtype = Ci.nsIFeed.TYPE_VIDEO;
}
}
this.type = feedtype;
this.enclosureCount = other_count + video_count + audio_count + image_count;
},
_atomLinksToURI: function Feed_linkToURI() {
var links = this.fields.getPropertyAsInterface("links", Ci.nsIArray);
var alternates = findAtomLinks("alternate", links);
if (alternates.length > 0) {
var href = alternates[0].getPropertyAsAString("href");
var base;
if (bagHasKey(alternates[0], "xml:base"))
base = alternates[0].getPropertyAsAString("xml:base");
this.link = this._resolveURI(href, base);
}
},
_resolveImageLink: function Feed_resolveImageLink() {
var base;
if (bagHasKey(this.image, "xml:base"))
base = this.image.getPropertyAsAString("xml:base");
var url = this._resolveURI(this.image.getPropertyAsAString("url"), base);
if (url)
this.image.setPropertyAsAString("url", url.spec);
},
_resolveURI: function Feed_resolveURI(linkSpec, baseSpec) {
var uri = null;
try {
var base = baseSpec ? strToURI(baseSpec, this.baseURI) : this.baseURI;
uri = strToURI(linkSpec, base);
}
catch(e) {
LOG(e);
}
return uri;
},
// reset the bag to raw contents, not text constructs
_resetBagMembersToRawText: function Feed_resetBagMembers(fieldLists) {
for (var i=0; i<fieldLists.length; i++) {
for (var j=0; j<fieldLists[i].length; j++) {
if (bagHasKey(this.fields, fieldLists[i][j])) {
var textConstruct = this.fields.getProperty(fieldLists[i][j]);
this.fields.setPropertyAsAString(fieldLists[i][j],
textConstruct.text);
}
}
}
},
// XPCOM stuff
classID: FEED_CLASSID,
QueryInterface: XPCOMUtils.generateQI([Ci.nsIFeed, Ci.nsIFeedContainer])
}
function Entry() {
this.summary = null;
this.content = null;
this.title = null;
this.fields = Cc["@mozilla.org/hash-property-bag;1"].
createInstance(Ci.nsIWritablePropertyBag2);
this.link = null;
this.id = null;
this.baseURI = null;
this.updated = null;
this.published = null;
this.authors = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray);
this.contributors = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray);
}
Entry.prototype = {
fields: null,
enclosures: null,
mediaContent: null,
searchLists: {
title: ["title", "rss1:title", "atom03:title", "atom:title"],
link: [["link",strToURI],["rss1:link",strToURI]],
id: [["guid", makePropGetter("guid")], "rdf:about",
"atom03:id", "atom:id"],
authors : ["authors"],
contributors: ["contributors"],
summary: ["description", "rss1:description", "dc:description",
"atom03:summary", "atom:summary"],
content: ["content:encoded","atom03:content","atom:content"],
rights: ["atom03:rights","atom:rights"],
published: ["pubDate", "atom03:issued", "dcterms:issued", "atom:published"],
updated: ["pubDate", "atom03:modified", "dc:date", "dcterms:modified",
"atom:updated"]
},
normalize: function Entry_normalize() {
fieldsToObj(this, this.searchLists);
// Assign Atom link if needed
if (bagHasKey(this.fields, "links"))
this._atomLinksToURI();
// Populate enclosures array
this._populateEnclosures();
// The link might be a guid w/ permalink=true
if (!this.link && bagHasKey(this.fields, "guid")) {
var guid = this.fields.getProperty("guid");
var isPermaLink = true;
if (bagHasKey(guid, "isPermaLink"))
isPermaLink = guid.getProperty("isPermaLink").toLowerCase() != "false";
if (guid && isPermaLink)
this.link = strToURI(guid.getProperty("guid"));
}
if (this.updated)
this.updated = dateParse(this.updated);
if (this.published)
this.published = dateParse(this.published);
this._resetBagMembersToRawText([this.searchLists.content,
this.searchLists.summary,
this.searchLists.title]);
},
_populateEnclosures: function Entry_populateEnclosures() {
if (bagHasKey(this.fields, "links"))
this._atomLinksToEnclosures();
// Add RSS2 enclosure to enclosures
if (bagHasKey(this.fields, "enclosure"))
this._enclosureToEnclosures();
// Add media:content to enclosures
if (bagHasKey(this.fields, "mediacontent"))
this._mediacontentToEnclosures();
// Add media:content in media:group to enclosures
if (bagHasKey(this.fields, "mediagroup"))
this._mediagroupToEnclosures();
},
__enclosure_map: null,
_addToEnclosures: function Entry_addToEnclosures(new_enc) {
// items we add to the enclosures array get displayed in the FeedWriter and
// they must have non-empty urls.
if (!bagHasKey(new_enc, "url") || new_enc.getPropertyAsAString("url") == "")
return;
if (this.__enclosure_map == null)
this.__enclosure_map = {};
var previous_enc = this.__enclosure_map[new_enc.getPropertyAsAString("url")];
if (previous_enc != undefined) {
previous_enc.QueryInterface(Ci.nsIWritablePropertyBag2);
if (!bagHasKey(previous_enc, "type") && bagHasKey(new_enc, "type"))
previous_enc.setPropertyAsAString("type", new_enc.getPropertyAsAString("type"));
if (!bagHasKey(previous_enc, "length") && bagHasKey(new_enc, "length"))
previous_enc.setPropertyAsAString("length", new_enc.getPropertyAsAString("length"));
return;
}
if (this.enclosures == null) {
this.enclosures = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray);
this.enclosures.QueryInterface(Ci.nsIMutableArray);
}
this.enclosures.appendElement(new_enc, false);
this.__enclosure_map[new_enc.getPropertyAsAString("url")] = new_enc;
},
_atomLinksToEnclosures: function Entry_linkToEnclosure() {
var links = this.fields.getPropertyAsInterface("links", Ci.nsIArray);
var enc_links = findAtomLinks("enclosure", links);
if (enc_links.length == 0)
return;
for (var i = 0; i < enc_links.length; ++i) {
var link = enc_links[i];
// an enclosure must have an href
if (!(link.getProperty("href")))
return;
var enc = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2);
// copy Atom bits over to equivalent enclosure bits
enc.setPropertyAsAString("url", link.getPropertyAsAString("href"));
if (bagHasKey(link, "type"))
enc.setPropertyAsAString("type", link.getPropertyAsAString("type"));
if (bagHasKey(link, "length"))
enc.setPropertyAsAString("length", link.getPropertyAsAString("length"));
this._addToEnclosures(enc);
}
},
_enclosureToEnclosures: function Entry_enclosureToEnclosures() {
var enc = this.fields.getPropertyAsInterface("enclosure", Ci.nsIPropertyBag2);
if (!(enc.getProperty("url")))
return;
this._addToEnclosures(enc);
},
_mediacontentToEnclosures: function Entry_mediacontentToEnclosures() {
var mediacontent = this.fields.getPropertyAsInterface("mediacontent", Ci.nsIArray);
for (var i = 0; i < mediacontent.length; ++i) {
var contentElement = mediacontent.queryElementAt(i, Ci.nsIWritablePropertyBag2);
// media:content don't require url, but if it's not there, we should
// skip it.
if (!bagHasKey(contentElement, "url"))
continue;
var enc = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2);
// copy media:content bits over to equivalent enclosure bits
enc.setPropertyAsAString("url", contentElement.getPropertyAsAString("url"));
if (bagHasKey(contentElement, "type")) {
enc.setPropertyAsAString("type", contentElement.getPropertyAsAString("type"));
}
if (bagHasKey(contentElement, "fileSize")) {
enc.setPropertyAsAString("length", contentElement.getPropertyAsAString("fileSize"));
}
this._addToEnclosures(enc);
}
},
_mediagroupToEnclosures: function Entry_mediagroupToEnclosures() {
var group = this.fields.getPropertyAsInterface("mediagroup", Ci.nsIPropertyBag2);
var content = group.getPropertyAsInterface("mediacontent", Ci.nsIArray);
for (var i = 0; i < content.length; ++i) {
var contentElement = content.queryElementAt(i, Ci.nsIWritablePropertyBag2);
// media:content don't require url, but if it's not there, we should
// skip it.
if (!bagHasKey(contentElement, "url"))
continue;
var enc = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2);
// copy media:content bits over to equivalent enclosure bits
enc.setPropertyAsAString("url", contentElement.getPropertyAsAString("url"));
if (bagHasKey(contentElement, "type")) {
enc.setPropertyAsAString("type", contentElement.getPropertyAsAString("type"));
}
if (bagHasKey(contentElement, "fileSize")) {
enc.setPropertyAsAString("length", contentElement.getPropertyAsAString("fileSize"));
}
this._addToEnclosures(enc);
}
},
// XPCOM stuff
classID: ENTRY_CLASSID,
QueryInterface: XPCOMUtils.generateQI(
[Ci.nsIFeedEntry, Ci.nsIFeedContainer]
)
}
Entry.prototype._atomLinksToURI = Feed.prototype._atomLinksToURI;
Entry.prototype._resolveURI = Feed.prototype._resolveURI;
Entry.prototype._resetBagMembersToRawText =
Feed.prototype._resetBagMembersToRawText;
// TextConstruct represents and element that could contain (X)HTML
function TextConstruct() {
this.lang = null;
this.base = null;
this.type = "text";
this.text = null;
this.unescapeHTML = Cc[UNESCAPE_CONTRACTID].
getService(Ci.nsIScriptableUnescapeHTML);
}
TextConstruct.prototype = {
plainText: function TC_plainText() {
if (this.type != "text") {
return this.unescapeHTML.unescape(stripTags(this.text));
}
return this.text;
},
createDocumentFragment: function TC_createDocumentFragment(element) {
if (this.type == "text") {
var doc = element.ownerDocument;
var docFragment = doc.createDocumentFragment();
var node = doc.createTextNode(this.text);
docFragment.appendChild(node);
return docFragment;
}
var isXML;
if (this.type == "xhtml")
isXML = true
else if (this.type == "html")
isXML = false;
else
return null;
return this.unescapeHTML.parseFragment(this.text, isXML,
this.base, element);
},
// XPCOM stuff
classID: TEXTCONSTRUCT_CLASSID,
QueryInterface: XPCOMUtils.generateQI([Ci.nsIFeedTextConstruct])
}
// Generator represents the software that produced the feed
function Generator() {
this.lang = null;
this.agent = null;
this.version = null;
this.uri = null;
// nsIFeedElementBase
this._attributes = null;
this.baseURI = null;
}
Generator.prototype = {
get attributes() {
return this._attributes;
},
set attributes(value) {
this._attributes = value;
this.version = this._attributes.getValueFromName("","version");
var uriAttribute = this._attributes.getValueFromName("","uri") ||
this._attributes.getValueFromName("","url");
this.uri = strToURI(uriAttribute, this.baseURI);
// RSS1
uriAttribute = this._attributes.getValueFromName(RDF_NS,"resource");
if (uriAttribute) {
this.agent = uriAttribute;
this.uri = strToURI(uriAttribute, this.baseURI);
}
},
// XPCOM stuff
classID: GENERATOR_CLASSID,
QueryInterface: XPCOMUtils.generateQI(
[Ci.nsIFeedGenerator, Ci.nsIFeedElementBase]
)
}
function Person() {
this.name = null;
this.uri = null;
this.email = null;
// nsIFeedElementBase
this.attributes = null;
this.baseURI = null;
}
Person.prototype = {
// XPCOM stuff
classID: PERSON_CLASSID,
QueryInterface: XPCOMUtils.generateQI(
[Ci.nsIFeedPerson, Ci.nsIFeedElementBase]
)
}
/**
* Map a list of fields into properties on a container.
*
* @param container An nsIFeedContainer
* @param fields A list of fields to search for. List members can
* be a list, in which case the second member is
* transformation function (like parseInt).
*/
function fieldsToObj(container, fields) {
var props,prop,field,searchList;
for (var key in fields) {
searchList = fields[key];
for (var i=0; i < searchList.length; ++i) {
props = searchList[i];
prop = null;
field = isArray(props) ? props[0] : props;
try {
prop = container.fields.getProperty(field);
}
catch(e) {
}
if (prop) {
prop = isArray(props) ? props[1](prop) : prop;
container[key] = prop;
}
}
}
}
/**
* Lower cases an element's localName property
* @param element A DOM element.
*
* @returns The lower case localName property of the specified element
*/
function LC(element) {
return element.localName.toLowerCase();
}
// TODO move these post-processor functions
// create a generator element
function atomGenerator(s, generator) {
generator.QueryInterface(Ci.nsIFeedGenerator);
generator.agent = s.trim();
return generator;
}
// post-process atom:logo to create an RSS2-like structure
function atomLogo(s, logo) {
logo.setPropertyAsAString("url", s.trim());
}
// post-process an RSS category, map it to the Atom fields.
function rssCatTerm(s, cat) {
// add slash handling?
cat.setPropertyAsAString("term", s.trim());
return cat;
}
// post-process a GUID
function rssGuid(s, guid) {
guid.setPropertyAsAString("guid", s.trim());
return guid;
}
// post-process an RSS author element
//
// It can contain a field like this:
//
// <author>lawyer@boyer.net (Lawyer Boyer)</author>
//
// or, delightfully, a field like this:
//
// <dc:creator>Simon St.Laurent (mailto:simonstl@simonstl.com)</dc:creator>
//
// We want to split this up and assign it to corresponding Atom
// fields.
//
function rssAuthor(s,author) {
author.QueryInterface(Ci.nsIFeedPerson);
// check for RSS2 string format
var chars = s.trim();
var matches = chars.match(/(.*)\((.*)\)/);
var emailCheck =
/^([a-zA-Z0-9_\.\-])+\@(([a-zA-Z0-9\-])+\.)+([a-zA-Z0-9]{2,4})+$/;
if (matches) {
var match1 = matches[1].trim();
var match2 = matches[2].trim();
if (match2.indexOf("mailto:") == 0)
match2 = match2.substring(7);
if (emailCheck.test(match1)) {
author.email = match1;
author.name = match2;
}
else if (emailCheck.test(match2)) {
author.email = match2;
author.name = match1;
}
else {
// put it back together
author.name = match1 + " (" + match2 + ")";
}
}
else {
author.name = chars;
if (chars.indexOf('@'))
author.email = chars;
}
return author;
}
//
// skipHours and skipDays map to arrays, so we need to change the
// string to an nsISupports in order to stick it in there.
//
function rssArrayElement(s) {
var str = Cc["@mozilla.org/supports-string;1"].
createInstance(Ci.nsISupportsString);
str.data = s;
str.QueryInterface(Ci.nsISupportsString);
return str;
}
/***** Some feed utils from TBird *****/
/**
* Tests a RFC822 date against a regex.
* @param aDateStr A string to test as an RFC822 date.
*
* @returns A boolean indicating whether the string is a valid RFC822 date.
*/
function isValidRFC822Date(aDateStr) {
var regex = new RegExp(RFC822_RE);
return regex.test(aDateStr);
}
// Regular expression matching RFC822 dates
const RFC822_RE = "^((Mon|Tue|Wed|Thu|Fri|Sat|Sun)([a-z]+)?,? *)?\\d\\d?"
+ " +(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)([a-z]+)?"
+ " +\\d\\d(\\d\\d)? +\\d?\\d:\\d\\d(:\\d\\d)?"
+ " +([+-]?\\d\\d\\d\\d|GMT|UT[C]?|(E|C|M|P)(ST|DT)|[A-IK-Z])$";
/**
* XXX -- need to decide what this should return.
* XXX -- Is there a Date class usable from C++?
*
* Tries tries parsing various date formats.
* @param dateString
* A string that is supposedly an RFC822 or RFC3339 date.
* @returns A Date.toString XXX--fixme
*/
function dateParse(dateString) {
var date = dateString.trim();
if (date.search(/^\d\d\d\d/) != -1) //Could be a ISO8601/W3C date
return W3CToIETFDate(dateString);
if (isValidRFC822Date(date))
return date;
if (!isNaN(parseInt(date, 10))) {
//It's an integer, so maybe it's a timestamp
var d = new Date(parseInt(date, 10) * 1000);
var now = new Date();
var yeardiff = now.getFullYear() - d.getFullYear();
if ((yeardiff >= 0) && (yeardiff < 3)) {
// it's quite likely the correct date. 3 years is an arbitrary cutoff,
// but this is an invalid date format, and there's no way to verify
// its correctness.
return d.toString();
}
}
// Can't help.
return null;
}
const XHTML_NS = "http://www.w3.org/1999/xhtml";
// The XHTMLHandler handles inline XHTML found in things like atom:summary
function XHTMLHandler(processor, isAtom, waiPrefixes) {
this._buf = "";
this._processor = processor;
this._depth = 0;
this._isAtom = isAtom;
// a stack of lists tracking in-scope namespaces
this._inScopeNS = [];
this._waiPrefixes = waiPrefixes;
}
// The fidelity can be improved here, to allow handling of stuff like
// SVG and MathML. XXX
XHTMLHandler.prototype = {
// look back up at the declared namespaces
// we always use the same prefixes for our safe stuff
_isInScope: function XH__isInScope(ns) {
for (var i in this._inScopeNS) {
for (var uri in this._inScopeNS[i]) {
if (this._inScopeNS[i][uri] == ns)
return true;
}
}
return false;
},
startDocument: function XH_startDocument() {
},
endDocument: function XH_endDocument() {
},
startElement: function XH_startElement(uri, localName, qName, attributes) {
++this._depth;
this._inScopeNS.push([]);
// RFC4287 requires XHTML to be wrapped in a div that is *not* part of
// the content. This prevents people from screwing up namespaces, but
// we need to skip it here.
if (this._isAtom && this._depth == 1 && localName == "div")
return;
// If it's an XHTML element, record it. Otherwise, it's ignored.
if (uri == XHTML_NS) {
this._buf += "<" + localName;
var uri;
for (var i=0; i < attributes.length; ++i) {
uri = attributes.getURI(i);
// XHTML attributes aren't in a namespace
if (uri == "") {
this._buf += (" " + attributes.getLocalName(i) + "='" +
xmlEscape(attributes.getValue(i)) + "'");
} else {
// write a small set of allowed attribute namespaces
var prefix = gAllowedXHTMLNamespaces[uri];
if (prefix != null) {
// The attribute value we'll attempt to write
var attributeValue = xmlEscape(attributes.getValue(i));
// More QName abuse from W3C
var rolePrefix = "";
if (attributes.getLocalName(i) == "role") {
for (var aPrefix in this._waiPrefixes) {
if (attributeValue.indexOf(aPrefix + ":") == 0) {
// Now, due to the terrible layer mismatch
// that is QNames in content, we have to see
// if the attribute value clashes with our
// namespace declarations.
var isCollision = false;
for (var uriKey in gAllowedXHTMLNamespaces) {
if (gAllowedXHTMLNamespaces[uriKey] == aPrefix)
isCollision = true;
}
if (isCollision) {
rolePrefix = aPrefix + i;
attributeValue =
rolePrefix + ":" +
attributeValue.substring(aPrefix.length + 1);
} else {
rolePrefix = aPrefix;
}
break;
}
}
if (rolePrefix)
this._buf += (" xmlns:" + rolePrefix +
"='" + WAIROLE_NS + "'");
}
// it's an allowed attribute NS.
// write the attribute
this._buf += (" " + prefix + ":" +
attributes.getLocalName(i) +
"='" + attributeValue + "'");
// write an xmlns declaration if necessary
if (prefix != "xml" && !this._isInScope(uri)) {
this._inScopeNS[this._inScopeNS.length - 1].push(uri);
this._buf += " xmlns:" + prefix + "='" + uri + "'";
}
}
}
}
this._buf += ">";
}
},
endElement: function XH_endElement(uri, localName, qName) {
--this._depth;
this._inScopeNS.pop();
// We need to skip outer divs in Atom. See comment in startElement.
if (this._isAtom && this._depth == 0 && localName == "div")
return;
// When we peek too far, go back to the main processor
if (this._depth < 0) {
this._processor.returnFromXHTMLHandler(this._buf.trim(),
uri, localName, qName);
return;
}
// If it's an XHTML element, record it. Otherwise, it's ignored.
if (uri == XHTML_NS) {
this._buf += "</" + localName + ">";
}
},
characters: function XH_characters(data) {
this._buf += xmlEscape(data);
},
startPrefixMapping: function XH_startPrefixMapping(prefix, uri) {
if (prefix && uri == WAIROLE_NS)
this._waiPrefixes[prefix] = WAIROLE_NS;
},
endPrefixMapping: function FP_endPrefixMapping(prefix) {
if (prefix)
delete this._waiPrefixes[prefix];
},
processingInstruction: function XH_processingInstruction() {
},
}
/**
* The ExtensionHandler deals with elements we haven't explicitly
* added to our transition table in the FeedProcessor.
*/
function ExtensionHandler(processor) {
this._buf = "";
this._depth = 0;
this._hasChildElements = false;
// The FeedProcessor
this._processor = processor;
// Fields of the outermost extension element.
this._localName = null;
this._uri = null;
this._qName = null;
this._attrs = null;
}
ExtensionHandler.prototype = {
startDocument: function EH_startDocument() {
},
endDocument: function EH_endDocument() {
},
startElement: function EH_startElement(uri, localName, qName, attrs) {
++this._depth;
var prefix = gNamespaces[uri] ? gNamespaces[uri] + ":" : "";
var key = prefix + localName;
if (this._depth == 1) {
this._uri = uri;
this._localName = localName;
this._qName = qName;
this._attrs = attrs;
}
// if we descend into another element, we won't send text
this._hasChildElements = (this._depth > 1);
},
endElement: function EH_endElement(uri, localName, qName) {
--this._depth;
if (this._depth == 0) {
var text = this._hasChildElements ? null : this._buf.trim();
this._processor.returnFromExtHandler(this._uri, this._localName,
text, this._attrs);
}
},
characters: function EH_characters(data) {
if (!this._hasChildElements)
this._buf += data;
},
startPrefixMapping: function EH_startPrefixMapping() {
},
endPrefixMapping: function EH_endPrefixMapping() {
},
processingInstruction: function EH_processingInstruction() {
},
};
/**
* ElementInfo is a simple container object that describes
* some characteristics of a feed element. For example, it
* says whether an element can be expected to appear more
* than once inside a given entry or feed.
*/
function ElementInfo(fieldName, containerClass, closeFunc, isArray) {
this.fieldName = fieldName;
this.containerClass = containerClass;
this.closeFunc = closeFunc;
this.isArray = isArray;
this.isWrapper = false;
}
/**
* FeedElementInfo represents a feed element, usually the root.
*/
function FeedElementInfo(fieldName, feedVersion) {
this.isWrapper = false;
this.fieldName = fieldName;
this.feedVersion = feedVersion;
}
/**
* Some feed formats include vestigial wrapper elements that we don't
* want to include in our object model, but we do need to keep track
* of during parsing.
*/
function WrapperElementInfo(fieldName) {
this.isWrapper = true;
this.fieldName = fieldName;
}
/***** The Processor *****/
function FeedProcessor() {
this._reader = Cc[SAX_CONTRACTID].createInstance(Ci.nsISAXXMLReader);
this._buf = "";
this._feed = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2);
this._handlerStack = [];
this._xmlBaseStack = []; // sparse array keyed to nesting depth
this._depth = 0;
this._state = "START";
this._result = null;
this._extensionHandler = null;
this._xhtmlHandler = null;
this._haveSentResult = false;
// http://www.w3.org/WAI/PF/GUI/ uses QNames in content :(
this._waiPrefixes = {};
// The nsIFeedResultListener waiting for the parse results
this.listener = null;
// These elements can contain (X)HTML or plain text.
// We keep a table here that contains their default treatment
this._textConstructs = {"atom:title":"text",
"atom:summary":"text",
"atom:rights":"text",
"atom:content":"text",
"atom:subtitle":"text",
"description":"html",
"rss1:description":"html",
"dc:description":"html",
"content:encoded":"html",
"title":"text",
"rss1:title":"text",
"atom03:title":"text",
"atom03:tagline":"text",
"atom03:summary":"text",
"atom03:content":"text"};
this._stack = [];
this._trans = {
"START": {
//If we hit a root RSS element, treat as RSS2.
"rss": new FeedElementInfo("RSS2", "rss2"),
// If we hit an RDF element, if could be RSS1, but we can't
// verify that until we hit a rss1:channel element.
"rdf:RDF": new WrapperElementInfo("RDF"),
// If we hit a Atom 1.0 element, treat as Atom 1.0.
"atom:feed": new FeedElementInfo("Atom", "atom"),
// Treat as Atom 0.3
"atom03:feed": new FeedElementInfo("Atom03", "atom03"),
},
/********* RSS2 **********/
"IN_RSS2": {
"channel": new WrapperElementInfo("channel")
},
"IN_CHANNEL": {
"item": new ElementInfo("items", Cc[ENTRY_CONTRACTID], null, true),
"managingEditor": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
rssAuthor, true),
"dc:creator": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
rssAuthor, true),
"dc:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
rssAuthor, true),
"dc:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID],
rssAuthor, true),
"category": new ElementInfo("categories", null, rssCatTerm, true),
"cloud": new ElementInfo("cloud", null, null, false),
"image": new ElementInfo("image", null, null, false),
"textInput": new ElementInfo("textInput", null, null, false),
"skipDays": new ElementInfo("skipDays", null, null, false),
"skipHours": new ElementInfo("skipHours", null, null, false),
"generator": new ElementInfo("generator", Cc[GENERATOR_CONTRACTID],
atomGenerator, false),
},
"IN_ITEMS": {
"author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
rssAuthor, true),
"dc:creator": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
rssAuthor, true),
"dc:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
rssAuthor, true),
"dc:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID],
rssAuthor, true),
"category": new ElementInfo("categories", null, rssCatTerm, true),
"enclosure": new ElementInfo("enclosure", null, null, false),
"media:content": new ElementInfo("mediacontent", null, null, true),
"media:group": new ElementInfo("mediagroup", null, null, false),
"guid": new ElementInfo("guid", null, rssGuid, false)
},
"IN_SKIPDAYS": {
"day": new ElementInfo("days", null, rssArrayElement, true)
},
"IN_SKIPHOURS":{
"hour": new ElementInfo("hours", null, rssArrayElement, true)
},
"IN_MEDIAGROUP": {
"media:content": new ElementInfo("mediacontent", null, null, true)
},
/********* RSS1 **********/
"IN_RDF": {
// If we hit a rss1:channel, we can verify that we have RSS1
"rss1:channel": new FeedElementInfo("rdf_channel", "rss1"),
"rss1:image": new ElementInfo("image", null, null, false),
"rss1:textinput": new ElementInfo("textInput", null, null, false),
"rss1:item": new ElementInfo("items", Cc[ENTRY_CONTRACTID], null, true),
},
"IN_RDF_CHANNEL": {
"admin:generatorAgent": new ElementInfo("generator",
Cc[GENERATOR_CONTRACTID],
null, false),
"dc:creator": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
rssAuthor, true),
"dc:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
rssAuthor, true),
"dc:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID],
rssAuthor, true),
},
/********* ATOM 1.0 **********/
"IN_ATOM": {
"atom:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
null, true),
"atom:generator": new ElementInfo("generator", Cc[GENERATOR_CONTRACTID],
atomGenerator, false),
"atom:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID],
null, true),
"atom:link": new ElementInfo("links", null, null, true),
"atom:logo": new ElementInfo("atom:logo", null, atomLogo, false),
"atom:entry": new ElementInfo("entries", Cc[ENTRY_CONTRACTID],
null, true)
},
"IN_ENTRIES": {
"atom:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
null, true),
"atom:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID],
null, true),
"atom:link": new ElementInfo("links", null, null, true),
},
/********* ATOM 0.3 **********/
"IN_ATOM03": {
"atom03:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
null, true),
"atom03:contributor": new ElementInfo("contributors",
Cc[PERSON_CONTRACTID],
null, true),
"atom03:link": new ElementInfo("links", null, null, true),
"atom03:entry": new ElementInfo("atom03_entries", Cc[ENTRY_CONTRACTID],
null, true),
"atom03:generator": new ElementInfo("generator", Cc[GENERATOR_CONTRACTID],
atomGenerator, false),
},
"IN_ATOM03_ENTRIES": {
"atom03:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
null, true),
"atom03:contributor": new ElementInfo("contributors",
Cc[PERSON_CONTRACTID],
null, true),
"atom03:link": new ElementInfo("links", null, null, true),
"atom03:entry": new ElementInfo("atom03_entries", Cc[ENTRY_CONTRACTID],
null, true)
}
}
}
// See startElement for a long description of how feeds are processed.
FeedProcessor.prototype = {
// Set ourselves as the SAX handler, and set the base URI
_init: function FP_init(uri) {
this._reader.contentHandler = this;
this._reader.errorHandler = this;
this._result = Cc[FR_CONTRACTID].createInstance(Ci.nsIFeedResult);
if (uri) {
this._result.uri = uri;
this._reader.baseURI = uri;
this._xmlBaseStack[0] = uri;
}
},
// This function is called once we figure out what type of feed
// we're dealing with. Some feed types require digging a bit further
// than the root.
_docVerified: function FP_docVerified(version) {
this._result.doc = Cc[FEED_CONTRACTID].createInstance(Ci.nsIFeed);
this._result.doc.baseURI =
this._xmlBaseStack[this._xmlBaseStack.length - 1];
this._result.doc.fields = this._feed;
this._result.version = version;
},
// When we're done with the feed, let the listener know what
// happened.
_sendResult: function FP_sendResult() {
this._haveSentResult = true;
try {
// Can be null when a non-feed is fed to us
if (this._result.doc)
this._result.doc.normalize();
}
catch (e) {
LOG("FIXME: " + e);
}
try {
if (this.listener != null)
this.listener.handleResult(this._result);
}
finally {
this._result = null;
}
},
// Parsing functions
parseFromStream: function FP_parseFromStream(stream, uri) {
this._init(uri);
this._reader.parseFromStream(stream, null, stream.available(),
"application/xml");
this._reader = null;
},
parseFromString: function FP_parseFromString(inputString, uri) {
this._init(uri);
this._reader.parseFromString(inputString, "application/xml");
this._reader = null;
},
parseAsync: function FP_parseAsync(requestObserver, uri) {
this._init(uri);
this._reader.parseAsync(requestObserver);
},
// nsIStreamListener
// The XMLReader will throw sensible exceptions if these get called
// out of order.
onStartRequest: function FP_onStartRequest(request, context) {
// this will throw if the request is not a channel, but so will nsParser.
var channel = request.QueryInterface(Ci.nsIChannel);
channel.contentType = "application/vnd.mozilla.maybe.feed";
this._reader.onStartRequest(request, context);
},
onStopRequest: function FP_onStopRequest(request, context, statusCode) {
try {
this._reader.onStopRequest(request, context, statusCode);
}
finally {
this._reader = null;
}
},
onDataAvailable:
function FP_onDataAvailable(request, context, inputStream, offset, count) {
this._reader.onDataAvailable(request, context, inputStream, offset, count);
},
// nsISAXErrorHandler
// We only care about fatal errors. When this happens, we may have
// parsed through the feed metadata and some number of entries. The
// listener can still show some of that data if it wants, and we'll
// set the bozo bit to indicate we were unable to parse all the way
// through.
fatalError: function FP_reportError() {
this._result.bozo = true;
//XXX need to QI to FeedProgressListener
if (!this._haveSentResult)
this._sendResult();
},
// nsISAXContentHandler
startDocument: function FP_startDocument() {
//LOG("----------");
},
endDocument: function FP_endDocument() {
if (!this._haveSentResult)
this._sendResult();
},
// The transitions defined above identify elements that contain more
// than just text. For example RSS items contain many fields, and so
// do Atom authors. The only commonly used elements that contain
// mixed content are Atom Text Constructs of type="xhtml", which we
// delegate to another handler for cleaning. That leaves a couple
// different types of elements to deal with: those that should occur
// only once, such as title elements, and those that can occur
// multiple times, such as the RSS category element and the Atom
// link element. Most of the RSS1/DC elements can occur multiple
// times in theory, but in practice, the only ones that do have
// analogues in Atom.
//
// Some elements are also groups of attributes or sub-elements,
// while others are simple text fields. For the most part, we don't
// have to pay explicit attention to the simple text elements,
// unless we want to post-process the resulting string to transform
// it into some richer object like a Date or URI.
//
// Elements that have more sophisticated content models still end up
// being dictionaries, whether they are based on attributes like RSS
// cloud, sub-elements like Atom author, or even items and
// entries. These elements are treated as "containers". It's
// theoretically possible for a container to have an attribute with
// the same universal name as a sub-element, but none of the feed
// formats allow this by default, and I don't of any extension that
// works this way.
//
startElement: function FP_startElement(uri, localName, qName, attributes) {
this._buf = "";
++this._depth;
var elementInfo;
//LOG("<" + localName + ">");
// Check for xml:base
var base = attributes.getValueFromName(XMLNS, "base");
if (base) {
this._xmlBaseStack[this._depth] =
strToURI(base, this._xmlBaseStack[this._xmlBaseStack.length - 1]);
}
// To identify the element we're dealing with, we look up the
// namespace URI in our gNamespaces dictionary, which will give us
// a "canonical" prefix for a namespace URI. For example, this
// allows Dublin Core "creator" elements to be consistently mapped
// to "dc:creator", for easy field access by consumer code. This
// strategy also happens to shorten up our state table.
var key = this._prefixForNS(uri) + localName;
// Check to see if we need to hand this off to our XHTML handler.
// The elements we're dealing with will look like this:
//
// <title type="xhtml">
// <div xmlns="http://www.w3.org/1999/xhtml">
// A title with <b>bold</b> and <i>italics</i>.
// </div>
// </title>
//
// When it returns in returnFromXHTMLHandler, the handler should
// give us back a string like this:
//
// "A title with <b>bold</b> and <i>italics</i>."
//
// The Atom spec explicitly says the div is not part of the content,
// and explicitly allows whitespace collapsing.
//
if ((this._result.version == "atom" || this._result.version == "atom03") &&
this._textConstructs[key] != null) {
var type = attributes.getValueFromName("","type");
if (type != null && type.indexOf("xhtml") >= 0) {
this._xhtmlHandler =
new XHTMLHandler(this, (this._result.version == "atom"),
this._waiPrefixes);
this._reader.contentHandler = this._xhtmlHandler;
return;
}
}
// Check our current state, and see if that state has a defined
// transition. For example, this._trans["atom:entry"]["atom:author"]
// will have one, and it tells us to add an item to our authors array.
if (this._trans[this._state] && this._trans[this._state][key]) {
elementInfo = this._trans[this._state][key];
}
else {
// If we don't have a transition, hand off to extension handler
this._extensionHandler = new ExtensionHandler(this);
this._reader.contentHandler = this._extensionHandler;
this._extensionHandler.startElement(uri, localName, qName, attributes);
return;
}
// This distinguishes wrappers like 'channel' from elements
// we'd actually like to do something with (which will test true).
this._handlerStack[this._depth] = elementInfo;
if (elementInfo.isWrapper) {
this._state = "IN_" + elementInfo.fieldName.toUpperCase();
this._stack.push([this._feed, this._state]);
}
else if (elementInfo.feedVersion) {
this._state = "IN_" + elementInfo.fieldName.toUpperCase();
// Check for the older RSS2 variants
if (elementInfo.feedVersion == "rss2")
elementInfo.feedVersion = this._findRSSVersion(attributes);
else if (uri == RSS090NS)
elementInfo.feedVersion = "rss090";
this._docVerified(elementInfo.feedVersion);
this._stack.push([this._feed, this._state]);
this._mapAttributes(this._feed, attributes);
}
else {
this._state = this._processComplexElement(elementInfo, attributes);
}
},
// In the endElement handler, we decrement the stack and look
// for cleanup/transition functions to execute. The second part
// of the state transition works as above in startElement, but
// the state we're looking for is prefixed with an underscore
// to distinguish endElement events from startElement events.
endElement: function FP_endElement(uri, localName, qName) {
var elementInfo = this._handlerStack[this._depth];
//LOG("</" + localName + ">");
if (elementInfo && !elementInfo.isWrapper)
this._closeComplexElement(elementInfo);
// cut down xml:base context
if (this._xmlBaseStack.length == this._depth + 1)
this._xmlBaseStack = this._xmlBaseStack.slice(0, this._depth);
// our new state is whatever is at the top of the stack now
if (this._stack.length > 0)
this._state = this._stack[this._stack.length - 1][1];
this._handlerStack = this._handlerStack.slice(0, this._depth);
--this._depth;
},
// Buffer up character data. The buffer is cleared with every
// opening element.
characters: function FP_characters(data) {
this._buf += data;
},
// TODO: It would be nice to check new prefixes here, and if they
// don't conflict with the ones we've defined, throw them in a
// dictionary to check.
startPrefixMapping: function FP_startPrefixMapping(prefix, uri) {
// Thanks for QNames in content, W3C
// This will even be a perf hit for every single feed
// http://www.w3.org/WAI/PF/GUI/
if (prefix && uri == WAIROLE_NS)
this._waiPrefixes[prefix] = WAIROLE_NS;
},
endPrefixMapping: function FP_endPrefixMapping(prefix) {
if (prefix)
delete this._waiPrefixes[prefix];
},
processingInstruction: function FP_processingInstruction(target, data) {
if (target == "xml-stylesheet") {
var hrefAttribute = data.match(/href=[\"\'](.*?)[\"\']/);
if (hrefAttribute && hrefAttribute.length == 2)
this._result.stylesheet = strToURI(hrefAttribute[1], this._result.uri);
}
},
// end of nsISAXContentHandler
// Handle our more complicated elements--those that contain
// attributes and child elements.
_processComplexElement:
function FP__processComplexElement(elementInfo, attributes) {
var obj, key, prefix;
// If the container is an entry/item, it'll need to have its
// more esoteric properties put in the 'fields' property bag.
if (elementInfo.containerClass == Cc[ENTRY_CONTRACTID]) {
obj = elementInfo.containerClass.createInstance(Ci.nsIFeedEntry);
obj.baseURI = this._xmlBaseStack[this._xmlBaseStack.length - 1];
this._mapAttributes(obj.fields, attributes);
}
else if (elementInfo.containerClass) {
obj = elementInfo.containerClass.createInstance(Ci.nsIFeedElementBase);
obj.baseURI = this._xmlBaseStack[this._xmlBaseStack.length - 1];
obj.attributes = attributes; // just set the SAX attributes
}
else {
obj = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2);
this._mapAttributes(obj, attributes);
}
// We should have a container/propertyBag that's had its
// attributes processed. Now we need to attach it to its
// container.
var newProp;
// First we'll see what's on top of the stack.
var container = this._stack[this._stack.length - 1][0];
// Check to see if it has the property
var prop;
try {
prop = container.getProperty(elementInfo.fieldName);
}
catch(e) {
}
if (elementInfo.isArray) {
if (!prop) {
container.setPropertyAsInterface(elementInfo.fieldName,
Cc[ARRAY_CONTRACTID].
createInstance(Ci.nsIMutableArray));
}
newProp = container.getProperty(elementInfo.fieldName);
// XXX This QI should not be necessary, but XPConnect seems to fly
// off the handle in the browser, and loses track of the interface
// on large files. Bug 335638.
newProp.QueryInterface(Ci.nsIMutableArray);
newProp.appendElement(obj,false);
// If new object is an nsIFeedContainer, we want to deal with
// its member nsIPropertyBag instead.
if (isIFeedContainer(obj))
newProp = obj.fields;
}
else {
// If it doesn't, set it.
if (!prop) {
container.setPropertyAsInterface(elementInfo.fieldName,obj);
}
newProp = container.getProperty(elementInfo.fieldName);
}
// make our new state name, and push the property onto the stack
var newState = "IN_" + elementInfo.fieldName.toUpperCase();
this._stack.push([newProp, newState, obj]);
return newState;
},
// Sometimes we need reconcile the element content with the object
// model for a given feed. We use helper functions to do the
// munging, but we need to identify array types here, so the munging
// happens only to the last element of an array.
_closeComplexElement: function FP__closeComplexElement(elementInfo) {
var stateTuple = this._stack.pop();
var container = stateTuple[0];
var containerParent = stateTuple[2];
var element = null;
var isArray = isIArray(container);
// If it's an array and we have to post-process,
// grab the last element
if (isArray)
element = container.queryElementAt(container.length - 1, Ci.nsISupports);
else
element = container;
// Run the post-processing function if there is one.
if (elementInfo.closeFunc)
element = elementInfo.closeFunc(this._buf, element);
// If an nsIFeedContainer was on top of the stack,
// we need to normalize it
if (elementInfo.containerClass == Cc[ENTRY_CONTRACTID])
containerParent.normalize();
// If it's an array, re-set the last element
if (isArray)
container.replaceElementAt(element, container.length - 1, false);
},
_prefixForNS: function FP_prefixForNS(uri) {
if (!uri)
return "";
var prefix = gNamespaces[uri];
if (prefix)
return prefix + ":";
if (uri.toLowerCase().indexOf("http://backend.userland.com") == 0)
return "";
else
return null;
},
_mapAttributes: function FP__mapAttributes(bag, attributes) {
// Cycle through the attributes, and set our properties using the
// prefix:localNames we find in our namespace dictionary.
for (var i = 0; i < attributes.length; ++i) {
var key = this._prefixForNS(attributes.getURI(i)) + attributes.getLocalName(i);
var val = attributes.getValue(i);
bag.setPropertyAsAString(key, val);
}
},
// Only for RSS2esque formats
_findRSSVersion: function FP__findRSSVersion(attributes) {
var versionAttr = attributes.getValueFromName("", "version").trim();
var versions = { "0.91":"rss091",
"0.92":"rss092",
"0.93":"rss093",
"0.94":"rss094" }
if (versions[versionAttr])
return versions[versionAttr];
if (versionAttr.substr(0,2) != "2.")
return "rssUnknown";
return "rss2";
},
// unknown element values are returned here. See startElement above
// for how this works.
returnFromExtHandler:
function FP_returnExt(uri, localName, chars, attributes) {
--this._depth;
// take control of the SAX events
this._reader.contentHandler = this;
if (localName == null && chars == null)
return;
// we don't take random elements inside rdf:RDF
if (this._state == "IN_RDF")
return;
// Grab the top of the stack
var top = this._stack[this._stack.length - 1];
if (!top)
return;
var container = top[0];
// Grab the last element if it's an array
if (isIArray(container)) {
var contract = this._handlerStack[this._depth].containerClass;
// check if it's something specific, but not an entry
if (contract && contract != Cc[ENTRY_CONTRACTID]) {
var el = container.queryElementAt(container.length - 1,
Ci.nsIFeedElementBase);
// XXX there must be a way to flatten these interfaces
if (contract == Cc[PERSON_CONTRACTID])
el.QueryInterface(Ci.nsIFeedPerson);
else
return; // don't know about this interface
var propName = localName;
var prefix = gNamespaces[uri];
// synonyms
if ((uri == "" ||
prefix &&
((prefix.indexOf("atom") > -1) ||
(prefix.indexOf("rss") > -1))) &&
(propName == "url" || propName == "href"))
propName = "uri";
try {
if (el[propName] !== "undefined") {
var propValue = chars;
// convert URI-bearing values to an nsIURI
if (propName == "uri") {
var base = this._xmlBaseStack[this._xmlBaseStack.length - 1];
propValue = strToURI(chars, base);
}
el[propName] = propValue;
}
}
catch(e) {
// ignore XPConnect errors
}
// the rest of the function deals with entry- and feed-level stuff
return;
}
else {
container = container.queryElementAt(container.length - 1,
Ci.nsIWritablePropertyBag2);
}
}
// Make the buffer our new property
var propName = this._prefixForNS(uri) + localName;
// But, it could be something containing HTML. If so,
// we need to know about that.
if (this._textConstructs[propName] != null &&
this._handlerStack[this._depth].containerClass !== null) {
var newProp = Cc[TEXTCONSTRUCT_CONTRACTID].
createInstance(Ci.nsIFeedTextConstruct);
newProp.text = chars;
// Look up the default type in our table
var type = this._textConstructs[propName];
var typeAttribute = attributes.getValueFromName("","type");
if (this._result.version == "atom" && typeAttribute != null) {
type = typeAttribute;
}
else if (this._result.version == "atom03" && typeAttribute != null) {
if (typeAttribute.toLowerCase().indexOf("xhtml") >= 0) {
type = "xhtml";
}
else if (typeAttribute.toLowerCase().indexOf("html") >= 0) {
type = "html";
}
else if (typeAttribute.toLowerCase().indexOf("text") >= 0) {
type = "text";
}
}
// If it's rss feed-level description, it's not supposed to have html
if (this._result.version.indexOf("rss") >= 0 &&
this._handlerStack[this._depth].containerClass != ENTRY_CONTRACTID) {
type = "text";
}
newProp.type = type;
newProp.base = this._xmlBaseStack[this._xmlBaseStack.length - 1];
container.setPropertyAsInterface(propName, newProp);
}
else {
container.setPropertyAsAString(propName, chars);
}
},
// Sometimes, we'll hand off SAX handling duties to an XHTMLHandler
// (see above) that will scrape out non-XHTML stuff, normalize
// namespaces, and remove the wrapper div from Atom 1.0. When the
// XHTMLHandler is done, it'll callback here.
returnFromXHTMLHandler:
function FP_returnFromXHTMLHandler(chars, uri, localName, qName) {
// retake control of the SAX content events
this._reader.contentHandler = this;
// Grab the top of the stack
var top = this._stack[this._stack.length - 1];
if (!top)
return;
var container = top[0];
// Assign the property
var newProp = newProp = Cc[TEXTCONSTRUCT_CONTRACTID].
createInstance(Ci.nsIFeedTextConstruct);
newProp.text = chars;
newProp.type = "xhtml";
newProp.base = this._xmlBaseStack[this._xmlBaseStack.length - 1];
container.setPropertyAsInterface(this._prefixForNS(uri) + localName,
newProp);
// XHTML will cause us to peek too far. The XHTML handler will
// send us an end element to call. RFC4287-valid feeds allow a
// more graceful way to handle this. Unfortunately, we can't count
// on compliance at this point.
this.endElement(uri, localName, qName);
},
// XPCOM stuff
classID: FP_CLASSID,
QueryInterface: XPCOMUtils.generateQI(
[Ci.nsIFeedProcessor, Ci.nsISAXContentHandler, Ci.nsISAXErrorHandler,
Ci.nsIStreamListener, Ci.nsIRequestObserver]
)
}
var components = [FeedProcessor, FeedResult, Feed, Entry,
TextConstruct, Generator, Person];
var NSGetFactory = XPCOMUtils.generateNSGetFactory(components);