Bug 964024 - mozTXTToHTMLConv should never convert content inside <style>, <script>, <head> tags. r=honzab

2024-09-13 09:24:08 -07:00 · 2014-02-08 12:39:00 +02:00 · 2014-02-08 12:39:00 +02:00 · d441437825
commit d441437825
parent 66b8dafbab
2 changed files with 114 additions and 16 deletions
--- a/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp
+++ b/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp
@ -1223,16 +1223,17 @@ mozTXTToHTMLConv::ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOu
 #endif

  // Look for simple entities not included in a tags and scan them.
-  /* Skip all tags ("<[...]>") and content in an a tag ("<a[...]</a>")
-     or in a tag ("<!--[...]-->").
-     Unescape the rest (text between tags) and pass it to ScanTXT. */
+  // Skip all tags ("<[...]>") and content in an a link tag ("<a [...]</a>"),
+  // comment tag ("<!--[...]-->"), style tag, script tag or head tag.
+  // Unescape the rest (text between tags) and pass it to ScanTXT.
  for (int32_t i = 0; i < lengthOfInString;)
  {
    if (aInString[i] == '<')  // html tag
    {
-      uint32_t start = uint32_t(i);
-      if (nsCRT::ToLower((char)aInString[uint32_t(i) + 1]) == 'a')
-           // if a tag, skip until </a>
+      int32_t start = i;
+      if (Substring(aInString, i + 1, 2).LowerCaseEqualsASCII("a "))
+           // if a tag, skip until </a>.
+           // Make sure there's a space after, not to match "abbr".
      {
        i = aInString.Find("</a>", true, i);
        if (i == kNotFound)
@ -1240,16 +1241,45 @@ mozTXTToHTMLConv::ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOu
        else
          i += 4;
      }
-      else if (aInString[uint32_t(i) + 1] == '!' && aInString[uint32_t(i) + 2] == '-' &&
-        aInString[uint32_t(i) + 3] == '-')
-          //if out-commended code, skip until -->
+      else if (Substring(aInString, i + 1, 3).LowerCaseEqualsASCII("!--"))
+          // if out-commended code, skip until -->
      {
        i = aInString.Find("-->", false, i);
        if (i == kNotFound)
          i = lengthOfInString;
        else
          i += 3;
-
+      }
+      else if (Substring(aInString, i + 1, 5).LowerCaseEqualsASCII("style") &&
+               (aInString.CharAt(i + 6) == ' ' || aInString.CharAt(i + 6) == '>'))
+           // if style tag, skip until </style>
+      {
+        i = aInString.Find("</style>", true, i);
+        if (i == kNotFound)
+          i = lengthOfInString;
+        else
+          i += 8;
+      }
+      else if (Substring(aInString, i + 1, 6).LowerCaseEqualsASCII("script") &&
+               (aInString.CharAt(i + 7) == ' ' || aInString.CharAt(i + 7) == '>'))
+           // if script tag, skip until </script>
+      {
+        i = aInString.Find("</script>", true, i);
+        if (i == kNotFound)
+          i = lengthOfInString;
+        else
+          i += 9;
+      }
+      else if (Substring(aInString, i + 1, 4).LowerCaseEqualsASCII("head") &&
+               (aInString.CharAt(i + 5) == ' ' || aInString.CharAt(i + 5) == '>'))
+           // if head tag, skip until </head>
+           // Make sure not to match <header>.
+      {
+        i = aInString.Find("</head>", true, i);
+        if (i == kNotFound)
+          i = lengthOfInString;
+        else
+          i += 7;
      }
      else  // just skip tag (attributes etc.)
      {
@ -1259,7 +1289,7 @@ mozTXTToHTMLConv::ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOu
        else
          i++;
      }
-      aOutString.Append(&uniBuffer[start], uint32_t(i) - start);
+      aOutString.Append(&uniBuffer[start], i - start);
    }
    else
    {
--- a/netwerk/test/unit/test_mozTXTToHTMLConv.js
+++ b/netwerk/test/unit/test_mozTXTToHTMLConv.js
@ -13,7 +13,7 @@ function run_test() {
  let converter = Cc["@mozilla.org/txttohtmlconv;1"]
                     .getService(Ci.mozITXTToHTMLConv);

-  const tests = [
+  const scanTXTtests = [
    // -- RFC1738
    {
      input: "RFC1738: <URL:http://mozilla.org> then",
@ -113,15 +113,83 @@ function run_test() {
    }
  ];

+  const scanHTMLtests = [
+    {
+      input: "http://foo.example.com",
+      shouldChange: true
+    },
+    {
+      input: " <a href='http://a.example.com/'>foo</a>",
+      shouldChange: false
+    },
+    {
+      input: "<abbr>see http://abbr.example.com</abbr>",
+      shouldChange: true
+    },
+    {
+      input: "<!-- see http://comment.example.com/ -->",
+      shouldChange: false
+    },
+    {
+      input: "<!-- greater > -->",
+      shouldChange: false
+    },
+    {
+      input: "<!-- lesser < -->",
+      shouldChange: false
+    },
+    {
+      input: "<style id='ex'>background-image: url(http://example.com/ex.png);</style>",
+      shouldChange: false
+    },
+    {
+      input: "<style>body > p, body > div { color:blue }</style>",
+      shouldChange: false
+    },
+    {
+      input: "<script>window.location='http://script.example.com/';</script>",
+      shouldChange: false
+    },
+    {
+      input: "<head><title>http://head.example.com/</title></head>",
+      shouldChange: false
+    },
+    {
+      input: "<header>see http://header.example.com</header>",
+      shouldChange: true
+    },
+    {
+      input: "<iframe src='http://iframe.example.com/' />",
+      shouldChange: false
+    },
+    {
+      input: "broken end <script",
+      shouldChange: false
+    },
+  ];
+
  function hrefLink(url) {
    return ' href="' + url + '"';
  }

-  for (let i = 0; i < tests.length; i++) {
-    let output = converter.scanTXT(tests[i].input, Ci.mozITXTToHTMLConv.kURLs);
-    let link = hrefLink(tests[i].url);
+  for (let i = 0; i < scanTXTtests.length; i++) {
+    let t = scanTXTtests[i];
+    let output = converter.scanTXT(t.input, Ci.mozITXTToHTMLConv.kURLs);
+    let link = hrefLink(t.url);
    if (output.indexOf(link) == -1)
-      do_throw("Unexpected conversion: input=" + tests[i].input +
+      do_throw("Unexpected conversion by scanTXT: input=" + t.input +
               ", output=" + output + ", link=" + link);
  }
+
+  for (let i = 0; i < scanHTMLtests.length; i++) {
+    let t = scanHTMLtests[i];
+    let output = converter.scanHTML(t.input, Ci.mozITXTToHTMLConv.kURLs);
+    let changed = (t.input != output);
+    if (changed != t.shouldChange) {
+      do_throw("Unexpected change by scanHTML: changed=" + changed +
+               ", shouldChange=" + t.shouldChange +
+               ", \ninput=" + t.input +
+               ", \noutput=" + output);
+    }
+  }
 }