Bug 964024 - mozTXTToHTMLConv should never convert content inside <style>, <script>, <head> tags. r=honzab

This commit is contained in:
Magnus Melin 2014-02-08 12:39:00 +02:00
parent 66b8dafbab
commit d441437825
2 changed files with 114 additions and 16 deletions

View File

@ -1223,16 +1223,17 @@ mozTXTToHTMLConv::ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOu
#endif #endif
// Look for simple entities not included in a tags and scan them. // Look for simple entities not included in a tags and scan them.
/* Skip all tags ("<[...]>") and content in an a tag ("<a[...]</a>") // Skip all tags ("<[...]>") and content in an a link tag ("<a [...]</a>"),
or in a tag ("<!--[...]-->"). // comment tag ("<!--[...]-->"), style tag, script tag or head tag.
Unescape the rest (text between tags) and pass it to ScanTXT. */ // Unescape the rest (text between tags) and pass it to ScanTXT.
for (int32_t i = 0; i < lengthOfInString;) for (int32_t i = 0; i < lengthOfInString;)
{ {
if (aInString[i] == '<') // html tag if (aInString[i] == '<') // html tag
{ {
uint32_t start = uint32_t(i); int32_t start = i;
if (nsCRT::ToLower((char)aInString[uint32_t(i) + 1]) == 'a') if (Substring(aInString, i + 1, 2).LowerCaseEqualsASCII("a "))
// if a tag, skip until </a> // if a tag, skip until </a>.
// Make sure there's a space after, not to match "abbr".
{ {
i = aInString.Find("</a>", true, i); i = aInString.Find("</a>", true, i);
if (i == kNotFound) if (i == kNotFound)
@ -1240,16 +1241,45 @@ mozTXTToHTMLConv::ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOu
else else
i += 4; i += 4;
} }
else if (aInString[uint32_t(i) + 1] == '!' && aInString[uint32_t(i) + 2] == '-' && else if (Substring(aInString, i + 1, 3).LowerCaseEqualsASCII("!--"))
aInString[uint32_t(i) + 3] == '-') // if out-commended code, skip until -->
//if out-commended code, skip until -->
{ {
i = aInString.Find("-->", false, i); i = aInString.Find("-->", false, i);
if (i == kNotFound) if (i == kNotFound)
i = lengthOfInString; i = lengthOfInString;
else else
i += 3; i += 3;
}
else if (Substring(aInString, i + 1, 5).LowerCaseEqualsASCII("style") &&
(aInString.CharAt(i + 6) == ' ' || aInString.CharAt(i + 6) == '>'))
// if style tag, skip until </style>
{
i = aInString.Find("</style>", true, i);
if (i == kNotFound)
i = lengthOfInString;
else
i += 8;
}
else if (Substring(aInString, i + 1, 6).LowerCaseEqualsASCII("script") &&
(aInString.CharAt(i + 7) == ' ' || aInString.CharAt(i + 7) == '>'))
// if script tag, skip until </script>
{
i = aInString.Find("</script>", true, i);
if (i == kNotFound)
i = lengthOfInString;
else
i += 9;
}
else if (Substring(aInString, i + 1, 4).LowerCaseEqualsASCII("head") &&
(aInString.CharAt(i + 5) == ' ' || aInString.CharAt(i + 5) == '>'))
// if head tag, skip until </head>
// Make sure not to match <header>.
{
i = aInString.Find("</head>", true, i);
if (i == kNotFound)
i = lengthOfInString;
else
i += 7;
} }
else // just skip tag (attributes etc.) else // just skip tag (attributes etc.)
{ {
@ -1259,7 +1289,7 @@ mozTXTToHTMLConv::ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOu
else else
i++; i++;
} }
aOutString.Append(&uniBuffer[start], uint32_t(i) - start); aOutString.Append(&uniBuffer[start], i - start);
} }
else else
{ {

View File

@ -13,7 +13,7 @@ function run_test() {
let converter = Cc["@mozilla.org/txttohtmlconv;1"] let converter = Cc["@mozilla.org/txttohtmlconv;1"]
.getService(Ci.mozITXTToHTMLConv); .getService(Ci.mozITXTToHTMLConv);
const tests = [ const scanTXTtests = [
// -- RFC1738 // -- RFC1738
{ {
input: "RFC1738: <URL:http://mozilla.org> then", input: "RFC1738: <URL:http://mozilla.org> then",
@ -113,15 +113,83 @@ function run_test() {
} }
]; ];
const scanHTMLtests = [
{
input: "http://foo.example.com",
shouldChange: true
},
{
input: " <a href='http://a.example.com/'>foo</a>",
shouldChange: false
},
{
input: "<abbr>see http://abbr.example.com</abbr>",
shouldChange: true
},
{
input: "<!-- see http://comment.example.com/ -->",
shouldChange: false
},
{
input: "<!-- greater > -->",
shouldChange: false
},
{
input: "<!-- lesser < -->",
shouldChange: false
},
{
input: "<style id='ex'>background-image: url(http://example.com/ex.png);</style>",
shouldChange: false
},
{
input: "<style>body > p, body > div { color:blue }</style>",
shouldChange: false
},
{
input: "<script>window.location='http://script.example.com/';</script>",
shouldChange: false
},
{
input: "<head><title>http://head.example.com/</title></head>",
shouldChange: false
},
{
input: "<header>see http://header.example.com</header>",
shouldChange: true
},
{
input: "<iframe src='http://iframe.example.com/' />",
shouldChange: false
},
{
input: "broken end <script",
shouldChange: false
},
];
function hrefLink(url) { function hrefLink(url) {
return ' href="' + url + '"'; return ' href="' + url + '"';
} }
for (let i = 0; i < tests.length; i++) { for (let i = 0; i < scanTXTtests.length; i++) {
let output = converter.scanTXT(tests[i].input, Ci.mozITXTToHTMLConv.kURLs); let t = scanTXTtests[i];
let link = hrefLink(tests[i].url); let output = converter.scanTXT(t.input, Ci.mozITXTToHTMLConv.kURLs);
let link = hrefLink(t.url);
if (output.indexOf(link) == -1) if (output.indexOf(link) == -1)
do_throw("Unexpected conversion: input=" + tests[i].input + do_throw("Unexpected conversion by scanTXT: input=" + t.input +
", output=" + output + ", link=" + link); ", output=" + output + ", link=" + link);
} }
for (let i = 0; i < scanHTMLtests.length; i++) {
let t = scanHTMLtests[i];
let output = converter.scanHTML(t.input, Ci.mozITXTToHTMLConv.kURLs);
let changed = (t.input != output);
if (changed != t.shouldChange) {
do_throw("Unexpected change by scanHTML: changed=" + changed +
", shouldChange=" + t.shouldChange +
", \ninput=" + t.input +
", \noutput=" + output);
}
}
} }