No bug - update readability from github repo, includes fix for bug 1230050, rs=uplift-with-r+-patches-from-github

This commit is contained in:
Gijs Kruitbosch 2016-01-24 23:21:58 +00:00
parent 63b93c5c89
commit 8b153dd066
2 changed files with 14 additions and 11 deletions

View File

@ -33,10 +33,6 @@
*/ */
(function (global) { (function (global) {
function error(m) {
dump("JSDOMParser error: " + m + "\n");
}
// XML only defines these and the numeric ones: // XML only defines these and the numeric ones:
var entityTable = { var entityTable = {
@ -676,9 +672,9 @@
arr.push(" " + attr.name + '=' + quote + val + quote); arr.push(" " + attr.name + '=' + quote + val + quote);
} }
if (child.localName in voidElems) { if (child.localName in voidElems && !child.childNodes.length) {
// if this is a self-closing element, end it here // if this is a self-closing element, end it here
arr.push(">"); arr.push("/>");
} else { } else {
// otherwise, add its children // otherwise, add its children
arr.push(">"); arr.push(">");
@ -849,9 +845,16 @@
// makeElementNode(), which saves us from having to allocate a new array // makeElementNode(), which saves us from having to allocate a new array
// every time. // every time.
this.retPair = []; this.retPair = [];
this.errorState = "";
}; };
JSDOMParser.prototype = { JSDOMParser.prototype = {
error: function(m) {
dump("JSDOMParser error: " + m + "\n");
this.errorState += m + "\n";
},
/** /**
* Look at the next character without advancing the index. * Look at the next character without advancing the index.
*/ */
@ -906,7 +909,7 @@
// After a '=', we should see a '"' for the attribute value // After a '=', we should see a '"' for the attribute value
var c = this.nextChar(); var c = this.nextChar();
if (c !== '"' && c !== "'") { if (c !== '"' && c !== "'") {
error("Error reading attribute " + name + ", expecting '\"'"); this.error("Error reading attribute " + name + ", expecting '\"'");
return; return;
} }
@ -959,12 +962,12 @@
} }
// If this is a self-closing tag, read '/>' // If this is a self-closing tag, read '/>'
var closed = tag in voidElems; var closed = false;
if (c === "/") { if (c === "/") {
closed = true; closed = true;
c = this.nextChar(); c = this.nextChar();
if (c !== ">") { if (c !== ">") {
error("expected '>' to close " + tag); this.error("expected '>' to close " + tag);
return false; return false;
} }
} }
@ -1134,7 +1137,7 @@
} }
var closingTag = "</" + localName + ">"; var closingTag = "</" + localName + ">";
if (!this.match(closingTag)) { if (!this.match(closingTag)) {
error("expected '" + closingTag + "'"); this.error("expected '" + closingTag + "' and got " + this.html.substr(this.currentChar, closingTag.length));
return null; return null;
} }
} }

View File

@ -117,7 +117,7 @@ Readability.prototype = {
unlikelyCandidates: /banner|combx|comment|community|disqus|extra|foot|header|menu|related|remark|rss|share|shoutbox|sidebar|skyscraper|sponsor|ad-break|agegate|pagination|pager|popup/i, unlikelyCandidates: /banner|combx|comment|community|disqus|extra|foot|header|menu|related|remark|rss|share|shoutbox|sidebar|skyscraper|sponsor|ad-break|agegate|pagination|pager|popup/i,
okMaybeItsACandidate: /and|article|body|column|main|shadow/i, okMaybeItsACandidate: /and|article|body|column|main|shadow/i,
positive: /article|body|content|entry|hentry|main|page|pagination|post|text|blog|story/i, positive: /article|body|content|entry|hentry|main|page|pagination|post|text|blog|story/i,
negative: /hidden|banner|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i, negative: /hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i,
extraneous: /print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i, extraneous: /print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i,
byline: /byline|author|dateline|writtenby/i, byline: /byline|author|dateline|writtenby/i,
replaceFonts: /<(\/?)font[^>]*>/gi, replaceFonts: /<(\/?)font[^>]*>/gi,