mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
Bug 774914/785145 - Convert divs with only a <p> element child into plain <p> elements (r=mfinkle)
This commit is contained in:
parent
19a391a60a
commit
2be189d784
@ -458,18 +458,29 @@ Readability.prototype = {
|
||||
|
||||
// Turn all divs that don't have children block level elements into p's
|
||||
if (node.tagName === "DIV") {
|
||||
if (node.innerHTML.search(this.REGEXPS.divToPElements) === -1) {
|
||||
let newNode = doc.createElement('p');
|
||||
newNode.innerHTML = node.innerHTML;
|
||||
// Sites like http://mobile.slate.com encloses each paragraph with a DIV
|
||||
// element. DIVs with only a P element inside and no text content can be
|
||||
// safely converted into plain P elements to avoid confusing the scoring
|
||||
// algorithm with DIVs with are, in practice, paragraphs.
|
||||
let pIndex = this._getSinglePIndexInsideDiv(node);
|
||||
|
||||
if (node.innerHTML.search(this.REGEXPS.divToPElements) === -1 || pIndex >= 0) {
|
||||
let newNode;
|
||||
if (pIndex >= 0) {
|
||||
newNode = node.childNodes[pIndex];
|
||||
} else {
|
||||
newNode = doc.createElement('p');
|
||||
newNode.innerHTML = node.innerHTML;
|
||||
|
||||
// Manually update allElements since it is not a live NodeList
|
||||
newNode._index = nodeIndex;
|
||||
allElements[nodeIndex] = newNode;
|
||||
|
||||
nodesToScore[nodesToScore.length] = newNode;
|
||||
}
|
||||
|
||||
node.parentNode.replaceChild(newNode, node);
|
||||
|
||||
// Manually update allElements since it is not a live NodeList
|
||||
newNode._index = nodeIndex;
|
||||
allElements[nodeIndex] = newNode;
|
||||
purgeNode(node);
|
||||
|
||||
nodeIndex -= 1;
|
||||
nodesToScore[nodesToScore.length] = node;
|
||||
} else {
|
||||
// EXPERIMENTAL
|
||||
for (let i = 0, il = node.childNodes.length; i < il; i += 1) {
|
||||
@ -709,6 +720,36 @@ Readability.prototype = {
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* Get child index of the only P element inside a DIV with no
|
||||
* text content. Returns -1 if the DIV node contains non-empty
|
||||
* text nodes or if it contains other element nodes.
|
||||
*
|
||||
* @param Element
|
||||
**/
|
||||
_getSinglePIndexInsideDiv: function(e) {
|
||||
let childNodes = e.childNodes;
|
||||
let pIndex = -1;
|
||||
|
||||
for (let i = childNodes.length; --i >= 0;) {
|
||||
let node = childNodes[i];
|
||||
|
||||
if (node.nodeType === Node.ELEMENT_NODE) {
|
||||
if (node.tagName !== "P")
|
||||
return -1;
|
||||
|
||||
if (pIndex >= 0)
|
||||
return -1;
|
||||
|
||||
pIndex = i;
|
||||
} else if (node.nodeType == Node.TEXT_NODE && this._getInnerText(node, false)) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return pIndex;
|
||||
},
|
||||
|
||||
/**
|
||||
* Get the inner text of a node - cross browser compatibly.
|
||||
* This also strips out any excess whitespace to be found.
|
||||
|
Loading…
Reference in New Issue
Block a user