Bug 964024 - mozTXTToHTMLConv should never convert content inside <style>, <script>, <head> tags. r=honzab

This commit is contained in:
Magnus Melin 2014-02-08 12:39:00 +02:00
parent 66b8dafbab
commit d441437825
2 changed files with 114 additions and 16 deletions

View File

@ -1223,16 +1223,17 @@ mozTXTToHTMLConv::ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOu
#endif
// Look for simple entities not included in a tags and scan them.
/* Skip all tags ("<[...]>") and content in an a tag ("<a[...]</a>")
or in a tag ("<!--[...]-->").
Unescape the rest (text between tags) and pass it to ScanTXT. */
// Skip all tags ("<[...]>") and content in an a link tag ("<a [...]</a>"),
// comment tag ("<!--[...]-->"), style tag, script tag or head tag.
// Unescape the rest (text between tags) and pass it to ScanTXT.
for (int32_t i = 0; i < lengthOfInString;)
{
if (aInString[i] == '<') // html tag
{
uint32_t start = uint32_t(i);
if (nsCRT::ToLower((char)aInString[uint32_t(i) + 1]) == 'a')
// if a tag, skip until </a>
int32_t start = i;
if (Substring(aInString, i + 1, 2).LowerCaseEqualsASCII("a "))
// if a tag, skip until </a>.
// Make sure there's a space after, not to match "abbr".
{
i = aInString.Find("</a>", true, i);
if (i == kNotFound)
@ -1240,16 +1241,45 @@ mozTXTToHTMLConv::ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOu
else
i += 4;
}
else if (aInString[uint32_t(i) + 1] == '!' && aInString[uint32_t(i) + 2] == '-' &&
aInString[uint32_t(i) + 3] == '-')
//if out-commended code, skip until -->
else if (Substring(aInString, i + 1, 3).LowerCaseEqualsASCII("!--"))
// if out-commended code, skip until -->
{
i = aInString.Find("-->", false, i);
if (i == kNotFound)
i = lengthOfInString;
else
i += 3;
}
else if (Substring(aInString, i + 1, 5).LowerCaseEqualsASCII("style") &&
(aInString.CharAt(i + 6) == ' ' || aInString.CharAt(i + 6) == '>'))
// if style tag, skip until </style>
{
i = aInString.Find("</style>", true, i);
if (i == kNotFound)
i = lengthOfInString;
else
i += 8;
}
else if (Substring(aInString, i + 1, 6).LowerCaseEqualsASCII("script") &&
(aInString.CharAt(i + 7) == ' ' || aInString.CharAt(i + 7) == '>'))
// if script tag, skip until </script>
{
i = aInString.Find("</script>", true, i);
if (i == kNotFound)
i = lengthOfInString;
else
i += 9;
}
else if (Substring(aInString, i + 1, 4).LowerCaseEqualsASCII("head") &&
(aInString.CharAt(i + 5) == ' ' || aInString.CharAt(i + 5) == '>'))
// if head tag, skip until </head>
// Make sure not to match <header>.
{
i = aInString.Find("</head>", true, i);
if (i == kNotFound)
i = lengthOfInString;
else
i += 7;
}
else // just skip tag (attributes etc.)
{
@ -1259,7 +1289,7 @@ mozTXTToHTMLConv::ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOu
else
i++;
}
aOutString.Append(&uniBuffer[start], uint32_t(i) - start);
aOutString.Append(&uniBuffer[start], i - start);
}
else
{

View File

@ -13,7 +13,7 @@ function run_test() {
let converter = Cc["@mozilla.org/txttohtmlconv;1"]
.getService(Ci.mozITXTToHTMLConv);
const tests = [
const scanTXTtests = [
// -- RFC1738
{
input: "RFC1738: <URL:http://mozilla.org> then",
@ -113,15 +113,83 @@ function run_test() {
}
];
const scanHTMLtests = [
{
input: "http://foo.example.com",
shouldChange: true
},
{
input: " <a href='http://a.example.com/'>foo</a>",
shouldChange: false
},
{
input: "<abbr>see http://abbr.example.com</abbr>",
shouldChange: true
},
{
input: "<!-- see http://comment.example.com/ -->",
shouldChange: false
},
{
input: "<!-- greater > -->",
shouldChange: false
},
{
input: "<!-- lesser < -->",
shouldChange: false
},
{
input: "<style id='ex'>background-image: url(http://example.com/ex.png);</style>",
shouldChange: false
},
{
input: "<style>body > p, body > div { color:blue }</style>",
shouldChange: false
},
{
input: "<script>window.location='http://script.example.com/';</script>",
shouldChange: false
},
{
input: "<head><title>http://head.example.com/</title></head>",
shouldChange: false
},
{
input: "<header>see http://header.example.com</header>",
shouldChange: true
},
{
input: "<iframe src='http://iframe.example.com/' />",
shouldChange: false
},
{
input: "broken end <script",
shouldChange: false
},
];
function hrefLink(url) {
return ' href="' + url + '"';
}
for (let i = 0; i < tests.length; i++) {
let output = converter.scanTXT(tests[i].input, Ci.mozITXTToHTMLConv.kURLs);
let link = hrefLink(tests[i].url);
for (let i = 0; i < scanTXTtests.length; i++) {
let t = scanTXTtests[i];
let output = converter.scanTXT(t.input, Ci.mozITXTToHTMLConv.kURLs);
let link = hrefLink(t.url);
if (output.indexOf(link) == -1)
do_throw("Unexpected conversion: input=" + tests[i].input +
do_throw("Unexpected conversion by scanTXT: input=" + t.input +
", output=" + output + ", link=" + link);
}
for (let i = 0; i < scanHTMLtests.length; i++) {
let t = scanHTMLtests[i];
let output = converter.scanHTML(t.input, Ci.mozITXTToHTMLConv.kURLs);
let changed = (t.input != output);
if (changed != t.shouldChange) {
do_throw("Unexpected change by scanHTML: changed=" + changed +
", shouldChange=" + t.shouldChange +
", \ninput=" + t.input +
", \noutput=" + output);
}
}
}