gecko/toolkit/components/url-classifier/src/nsUrlClassifierUtils.cpp

/* ***** BEGIN LICENSE BLOCK *****
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Mozilla Public License Version
 * 1.1 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is Url Classifier code
 *
 * The Initial Developer of the Original Code is
 * Google Inc.
 * Portions created by the Initial Developer are Copyright (C) 2007
 * the Initial Developer. All Rights Reserved.
 *
 * Contributor(s):
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either the GNU General Public License Version 2 or later (the "GPL"), or
 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the MPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the MPL, the GPL or the LGPL.
 *
 * ***** END LICENSE BLOCK ***** */

#include "nsEscape.h"
#include "nsString.h"
#include "nsIURI.h"
#include "nsNetUtil.h"
#include "nsUrlClassifierUtils.h"
#include "nsVoidArray.h"
#include "prprf.h"

static char int_to_hex_digit(PRInt32 i)
{
  NS_ASSERTION((i >= 0) && (i <= 15), "int too big in int_to_hex_digit");
  return static_cast<char>(((i < 10) ? (i + '0') : ((i - 10) + 'A')));
}

static PRBool
IsDecimal(const nsACString & num)
{
  for (PRUint32 i = 0; i < num.Length(); i++) {
    if (!isdigit(num[i])) {
      return PR_FALSE;
    }
  }

  return PR_TRUE;
}

static PRBool
IsHex(const nsACString & num)
{
  if (num.Length() < 3) {
    return PR_FALSE;
  }

  if (num[0] != '0' || !(num[1] == 'x' || num[1] == 'X')) {
    return PR_FALSE;
  }

  for (PRUint32 i = 2; i < num.Length(); i++) {
    if (!isxdigit(num[i])) {
      return PR_FALSE;
    }
  }

  return PR_TRUE;
}

static PRBool
IsOctal(const nsACString & num)
{
  if (num.Length() < 2) {
    return PR_FALSE;
  }

  if (num[0] != '0') {
    return PR_FALSE;
  }

  for (PRUint32 i = 1; i < num.Length(); i++) {
    if (!isdigit(num[i]) || num[i] == '8' || num[i] == '9') {
      return PR_FALSE;
    }
  }

  return PR_TRUE;
}

nsUrlClassifierUtils::nsUrlClassifierUtils() : mEscapeCharmap(nsnull)
{
}

nsresult
nsUrlClassifierUtils::Init()
{
  // Everything but alpha numerics, - and .
  mEscapeCharmap = new Charmap(0xffffffff, 0xfc009fff, 0xf8000001, 0xf8000001,
                               0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff);
  if (!mEscapeCharmap)
    return NS_ERROR_OUT_OF_MEMORY;
  return NS_OK;
}

NS_IMPL_ISUPPORTS1(nsUrlClassifierUtils, nsIUrlClassifierUtils)

/////////////////////////////////////////////////////////////////////////////
// nsIUrlClassifierUtils

NS_IMETHODIMP
nsUrlClassifierUtils::GetKeyForURI(nsIURI * uri, nsACString & _retval)
{
  nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(uri);
  if (!innerURI)
    innerURI = uri;

  nsCAutoString host;
  innerURI->GetAsciiHost(host);

  nsresult rv = CanonicalizeHostname(host, _retval);
  NS_ENSURE_SUCCESS(rv, rv);

  nsCAutoString path;
  rv = innerURI->GetPath(path);
  NS_ENSURE_SUCCESS(rv, rv);

  // strip out anchors
  PRInt32 ref = path.FindChar('#');
  if (ref != kNotFound)
    path.SetLength(ref);

  nsCAutoString temp;
  rv = CanonicalizePath(path, temp);
  NS_ENSURE_SUCCESS(rv, rv);

  _retval.Append(temp);

  return NS_OK;
}

/////////////////////////////////////////////////////////////////////////////
// non-interface methods

nsresult
nsUrlClassifierUtils::CanonicalizeHostname(const nsACString & hostname,
                                           nsACString & _retval)
{
  nsCAutoString unescaped;
  if (!NS_UnescapeURL(PromiseFlatCString(hostname).get(),
                      PromiseFlatCString(hostname).Length(),
                      0, unescaped)) {
    unescaped.Assign(hostname);
  }

  nsCAutoString cleaned;
  CleanupHostname(unescaped, cleaned);

  nsCAutoString temp;
  ParseIPAddress(cleaned, temp);
  if (!temp.IsEmpty()) {
    cleaned.Assign(temp);
  }

  ToLowerCase(cleaned);
  SpecialEncode(cleaned, PR_FALSE, _retval);

  return NS_OK;
}


nsresult
nsUrlClassifierUtils::CanonicalizePath(const nsACString & path,
                                       nsACString & _retval)
{
  _retval.Truncate();

  nsCAutoString decodedPath(path);
  nsCAutoString temp;
  while (NS_UnescapeURL(decodedPath.get(), decodedPath.Length(), 0, temp)) {
    decodedPath.Assign(temp);
    temp.Truncate();
  }

  SpecialEncode(decodedPath, PR_TRUE, _retval);
  // XXX: lowercase the path?

  return NS_OK;
}

void
nsUrlClassifierUtils::CleanupHostname(const nsACString & hostname,
                                      nsACString & _retval)
{
  _retval.Truncate();

  const char* curChar = hostname.BeginReading();
  const char* end = hostname.EndReading();
  char lastChar = '\0';
  while (curChar != end) {
    unsigned char c = static_cast<unsigned char>(*curChar);
    if (c == '.' && (lastChar == '\0' || lastChar == '.')) {
      // skip
    } else {
      _retval.Append(*curChar);
    }
    lastChar = c;
    ++curChar;
  }

  // cut off trailing dots
  while (_retval.Length() > 0 && _retval[_retval.Length() - 1] == '.') {
    _retval.SetLength(_retval.Length() - 1);
  }
}

void
nsUrlClassifierUtils::ParseIPAddress(const nsACString & host,
                                     nsACString & _retval)
{
  _retval.Truncate();
  nsACString::const_iterator iter, end;
  host.BeginReading(iter);
  host.EndReading(end);

  if (host.Length() <= 15) {
    // The Windows resolver allows a 4-part dotted decimal IP address to
    // have a space followed by any old rubbish, so long as the total length
    // of the string doesn't get above 15 characters. So, "10.192.95.89 xy"
    // is resolved to 10.192.95.89.
    // If the string length is greater than 15 characters, e.g.
    // "10.192.95.89 xy.wildcard.example.com", it will be resolved through
    // DNS.

    if (FindCharInReadable(' ', iter, end)) {
      end = iter;
    }
  }

  for (host.BeginReading(iter); iter != end; iter++) {
    if (!(isxdigit(*iter) || *iter == 'x' || *iter == 'X' || *iter == '.')) {
      // not an IP
      return;
    }
  }

  host.BeginReading(iter);
  nsCStringArray parts;
  parts.ParseString(PromiseFlatCString(Substring(iter, end)).get(), ".");
  if (parts.Count() > 4) {
    return;
  }

  // If any potentially-octal numbers (start with 0 but not hex) have
  // non-octal digits, no part of the ip can be in octal
  // XXX: this came from the old javascript implementation, is it really
  // supposed to be like this?
  PRBool allowOctal = PR_TRUE;
  for (PRInt32 i = 0; i < parts.Count(); i++) {
    const nsCString& part = *parts[i];
    if (part[0] == '0') {
      for (PRUint32 j = 1; j < part.Length(); j++) {
        if (part[j] == 'x') {
          break;
        }
        if (part[j] == '8' || part[j] == '9') {
          allowOctal = PR_FALSE;
          break;
        }
      }
    }
  }

  for (PRInt32 i = 0; i < parts.Count(); i++) {
    nsCAutoString canonical;

    if (i == parts.Count() - 1) {
      CanonicalNum(*parts[i], 5 - parts.Count(), allowOctal, canonical);
    } else {
      CanonicalNum(*parts[i], 1, allowOctal, canonical);
    }

    if (canonical.IsEmpty()) {
      _retval.Truncate();
      return;
    }

    if (_retval.IsEmpty()) {
      _retval.Assign(canonical);
    } else {
      _retval.Append('.');
      _retval.Append(canonical);
    }
  }
  return;
}

void
nsUrlClassifierUtils::CanonicalNum(const nsACString& num,
                                   PRUint32 bytes,
                                   PRBool allowOctal,
                                   nsACString& _retval)
{
  _retval.Truncate();

  if (num.Length() < 1) {
    return;
  }

  PRUint32 val;
  if (allowOctal && IsOctal(num)) {
    if (PR_sscanf(PromiseFlatCString(num).get(), "%o", &val) != 1) {
      return;
    }
  } else if (IsDecimal(num)) {
    if (PR_sscanf(PromiseFlatCString(num).get(), "%u", &val) != 1) {
      return;
    }
  } else if (IsHex(num)) {
  if (PR_sscanf(PromiseFlatCString(num).get(), num[1] == 'X' ? "0X%x" : "0x%x",
                &val) != 1) {
      return;
    }
  } else {
    return;
  }

  while (bytes--) {
    char buf[20];
    PR_snprintf(buf, sizeof(buf), "%u", val & 0xff);
    if (_retval.IsEmpty()) {
      _retval.Assign(buf);
    } else {
      _retval = nsDependentCString(buf) + NS_LITERAL_CSTRING(".") + _retval;
    }
    val >>= 8;
  }
}

// This function will encode all "special" characters in typical url
// encoding, that is %hh where h is a valid hex digit.  It will also fold
// any duplicated slashes.
PRBool
nsUrlClassifierUtils::SpecialEncode(const nsACString & url,
                                    PRBool foldSlashes,
                                    nsACString & _retval)
{
  PRBool changed = PR_FALSE;
  const char* curChar = url.BeginReading();
  const char* end = url.EndReading();

  unsigned char lastChar = '\0';
  while (curChar != end) {
    unsigned char c = static_cast<unsigned char>(*curChar);
    if (ShouldURLEscape(c)) {
      // We don't want to deal with 0, as it can break certain strings, just
      // encode as one.
      if (c == 0)
        c = 1;

      _retval.Append('%');
      _retval.Append(int_to_hex_digit(c / 16));
      _retval.Append(int_to_hex_digit(c % 16));

      changed = PR_TRUE;
    } else if (foldSlashes && (c == '/' && lastChar == '/')) {
      // skip
    } else {
      _retval.Append(*curChar);
    }
    lastChar = c;
    curChar++;
  }
  return changed;
}

PRBool
nsUrlClassifierUtils::ShouldURLEscape(const unsigned char c) const
{
  return c <= 32 || c == '%' || c >=127;
}
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`/* *** BEGIN LICENSE BLOCK ***`
			`* Version: MPL 1.1/GPL 2.0/LGPL 2.1`
			`*`
			`* The contents of this file are subject to the Mozilla Public License Version`
			`* 1.1 (the "License"); you may not use this file except in compliance with`
			`* the License. You may obtain a copy of the License at`
			`* http://www.mozilla.org/MPL/`
			`*`
			`* Software distributed under the License is distributed on an "AS IS" basis,`
			`* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License`
			`* for the specific language governing rights and limitations under the`
			`* License.`
			`*`
			`* The Original Code is Url Classifier code`
			`*`
			`* The Initial Developer of the Original Code is`
			`* Google Inc.`
			`* Portions created by the Initial Developer are Copyright (C) 2007`
			`* the Initial Developer. All Rights Reserved.`
			`*`
			`* Contributor(s):`
			`*`
			`* Alternatively, the contents of this file may be used under the terms of`
			`* either the GNU General Public License Version 2 or later (the "GPL"), or`
			`* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),`
			`* in which case the provisions of the GPL or the LGPL are applicable instead`
			`* of those above. If you wish to allow use of your version of this file only`
			`* under the terms of either the GPL or the LGPL, and not to allow others to`
			`* use your version of this file under the terms of the MPL, indicate your`
			`* decision by deleting the provisions above and replace them with the notice`
			`* and other provisions required by the GPL or the LGPL. If you do not delete`
			`* the provisions above, a recipient may use your version of this file under`
			`* the terms of any one of the MPL, the GPL or the LGPL.`
			`*`
			`* *** END LICENSE BLOCK *** */`

			`#include "nsEscape.h"`
			`#include "nsString.h"`
try landing new safebrowsing protocol again. b=387196, r=tony, r=vlad (for new fixes) 2007-07-25 23:38:43 -07:00			`#include "nsIURI.h"`
			`#include "nsNetUtil.h"`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`#include "nsUrlClassifierUtils.h"`
try landing new safebrowsing protocol again. b=387196, r=tony, r=vlad (for new fixes) 2007-07-25 23:38:43 -07:00			`#include "nsVoidArray.h"`
			`#include "prprf.h"`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00
			`static char int_to_hex_digit(PRInt32 i)`
			`{`
			`NS_ASSERTION((i >= 0) && (i <= 15), "int too big in int_to_hex_digit");`
Bug 348748 - Replace all instances of NS_STATIC_CAST and friends with C++ casts (and simultaneously bitrot nearly every patch in existence). r=bsmedberg on the script that did this. Tune in next time for Macro Wars: Episode II: Attack on the LL_* Macros. 2007-07-08 00:08:04 -07:00			`return static_cast<char>(((i < 10) ? (i + '0') : ((i - 10) + 'A')));`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`}`

try landing new safebrowsing protocol again. b=387196, r=tony, r=vlad (for new fixes) 2007-07-25 23:38:43 -07:00			`static PRBool`
			`IsDecimal(const nsACString & num)`
			`{`
			`for (PRUint32 i = 0; i < num.Length(); i++) {`
			`if (!isdigit(num[i])) {`
			`return PR_FALSE;`
			`}`
			`}`

			`return PR_TRUE;`
			`}`

			`static PRBool`
			`IsHex(const nsACString & num)`
			`{`
			`if (num.Length() < 3) {`
			`return PR_FALSE;`
			`}`

			`if (num[0] != '0' \|\| !(num[1] == 'x' \|\| num[1] == 'X')) {`
			`return PR_FALSE;`
			`}`

			`for (PRUint32 i = 2; i < num.Length(); i++) {`
			`if (!isxdigit(num[i])) {`
			`return PR_FALSE;`
			`}`
			`}`

			`return PR_TRUE;`
			`}`

			`static PRBool`
			`IsOctal(const nsACString & num)`
			`{`
			`if (num.Length() < 2) {`
			`return PR_FALSE;`
			`}`

			`if (num[0] != '0') {`
			`return PR_FALSE;`
			`}`

			`for (PRUint32 i = 1; i < num.Length(); i++) {`
			`if (!isdigit(num[i]) \|\| num[i] == '8' \|\| num[i] == '9') {`
			`return PR_FALSE;`
			`}`
			`}`

			`return PR_TRUE;`
			`}`

Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`nsUrlClassifierUtils::nsUrlClassifierUtils() : mEscapeCharmap(nsnull)`
			`{`
			`}`

			`nsresult`
			`nsUrlClassifierUtils::Init()`
			`{`
			`// Everything but alpha numerics, - and .`
			`mEscapeCharmap = new Charmap(0xffffffff, 0xfc009fff, 0xf8000001, 0xf8000001,`
			`0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff);`
			`if (!mEscapeCharmap)`
			`return NS_ERROR_OUT_OF_MEMORY;`
			`return NS_OK;`
			`}`

			`NS_IMPL_ISUPPORTS1(nsUrlClassifierUtils, nsIUrlClassifierUtils)`

			`/////////////////////////////////////////////////////////////////////////////`
			`// nsIUrlClassifierUtils`

			`NS_IMETHODIMP`
try landing new safebrowsing protocol again. b=387196, r=tony, r=vlad (for new fixes) 2007-07-25 23:38:43 -07:00			`nsUrlClassifierUtils::GetKeyForURI(nsIURI * uri, nsACString & _retval)`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`{`
try landing new safebrowsing protocol again. b=387196, r=tony, r=vlad (for new fixes) 2007-07-25 23:38:43 -07:00			`nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(uri);`
			`if (!innerURI)`
			`innerURI = uri;`

			`nsCAutoString host;`
			`innerURI->GetAsciiHost(host);`

			`nsresult rv = CanonicalizeHostname(host, _retval);`
			`NS_ENSURE_SUCCESS(rv, rv);`

			`nsCAutoString path;`
			`rv = innerURI->GetPath(path);`
			`NS_ENSURE_SUCCESS(rv, rv);`

handle query parameters and full-string queries as specified in the updated safebrowsing protocol. b=395377, r=tony, a=mconnor 2007-10-01 16:53:53 -07:00			`// strip out anchors`
try landing new safebrowsing protocol again. b=387196, r=tony, r=vlad (for new fixes) 2007-07-25 23:38:43 -07:00			`PRInt32 ref = path.FindChar('#');`
			`if (ref != kNotFound)`
			`path.SetLength(ref);`

Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`nsCAutoString temp;`
try landing new safebrowsing protocol again. b=387196, r=tony, r=vlad (for new fixes) 2007-07-25 23:38:43 -07:00			`rv = CanonicalizePath(path, temp);`
			`NS_ENSURE_SUCCESS(rv, rv);`

			`_retval.Append(temp);`

			`return NS_OK;`
			`}`

			`/////////////////////////////////////////////////////////////////////////////`
			`// non-interface methods`

			`nsresult`
			`nsUrlClassifierUtils::CanonicalizeHostname(const nsACString & hostname,`
			`nsACString & _retval)`
			`{`
			`nsCAutoString unescaped;`
			`if (!NS_UnescapeURL(PromiseFlatCString(hostname).get(),`
			`PromiseFlatCString(hostname).Length(),`
			`0, unescaped)) {`
			`unescaped.Assign(hostname);`
			`}`

			`nsCAutoString cleaned;`
			`CleanupHostname(unescaped, cleaned);`

			`nsCAutoString temp;`
			`ParseIPAddress(cleaned, temp);`
			`if (!temp.IsEmpty()) {`
			`cleaned.Assign(temp);`
			`}`

			`ToLowerCase(cleaned);`
			`SpecialEncode(cleaned, PR_FALSE, _retval);`

			`return NS_OK;`
			`}`


			`nsresult`
			`nsUrlClassifierUtils::CanonicalizePath(const nsACString & path,`
			`nsACString & _retval)`
			`{`
			`_retval.Truncate();`

			`nsCAutoString decodedPath(path);`
			`nsCAutoString temp;`
			`while (NS_UnescapeURL(decodedPath.get(), decodedPath.Length(), 0, temp)) {`
			`decodedPath.Assign(temp);`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`temp.Truncate();`
			`}`
try landing new safebrowsing protocol again. b=387196, r=tony, r=vlad (for new fixes) 2007-07-25 23:38:43 -07:00
			`SpecialEncode(decodedPath, PR_TRUE, _retval);`
			`// XXX: lowercase the path?`

Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`return NS_OK;`
			`}`

try landing new safebrowsing protocol again. b=387196, r=tony, r=vlad (for new fixes) 2007-07-25 23:38:43 -07:00			`void`
			`nsUrlClassifierUtils::CleanupHostname(const nsACString & hostname,`
			`nsACString & _retval)`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`{`
try landing new safebrowsing protocol again. b=387196, r=tony, r=vlad (for new fixes) 2007-07-25 23:38:43 -07:00			`_retval.Truncate();`

Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`const char* curChar = hostname.BeginReading();`
			`const char* end = hostname.EndReading();`
try landing new safebrowsing protocol again. b=387196, r=tony, r=vlad (for new fixes) 2007-07-25 23:38:43 -07:00			`char lastChar = '\0';`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`while (curChar != end) {`
Bug 348748 - Replace all instances of NS_STATIC_CAST and friends with C++ casts (and simultaneously bitrot nearly every patch in existence). r=bsmedberg on the script that did this. Tune in next time for Macro Wars: Episode II: Attack on the LL_* Macros. 2007-07-08 00:08:04 -07:00			`unsigned char c = static_cast<unsigned char>(*curChar);`
try landing new safebrowsing protocol again. b=387196, r=tony, r=vlad (for new fixes) 2007-07-25 23:38:43 -07:00			`if (c == '.' && (lastChar == '\0' \|\| lastChar == '.')) {`
			`// skip`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`} else {`
			`_retval.Append(*curChar);`
			`}`
try landing new safebrowsing protocol again. b=387196, r=tony, r=vlad (for new fixes) 2007-07-25 23:38:43 -07:00			`lastChar = c;`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`++curChar;`
			`}`
try landing new safebrowsing protocol again. b=387196, r=tony, r=vlad (for new fixes) 2007-07-25 23:38:43 -07:00
			`// cut off trailing dots`
fix assertion failure in url-classifier when canonicalizing a URI with no host. b=387196, r=tony, a=schrep 2007-07-26 17:52:33 -07:00			`while (_retval.Length() > 0 && _retval[_retval.Length() - 1] == '.') {`
try landing new safebrowsing protocol again. b=387196, r=tony, r=vlad (for new fixes) 2007-07-25 23:38:43 -07:00			`_retval.SetLength(_retval.Length() - 1);`
			`}`
implement the new google safebrowsing protocol. b=387196, r=tony 2007-07-25 18:49:20 -07:00			`}`

try landing new safebrowsing protocol again. b=387196, r=tony, r=vlad (for new fixes) 2007-07-25 23:38:43 -07:00			`void`
			`nsUrlClassifierUtils::ParseIPAddress(const nsACString & host,`
			`nsACString & _retval)`
			`{`
			`_retval.Truncate();`
			`nsACString::const_iterator iter, end;`
			`host.BeginReading(iter);`
			`host.EndReading(end);`

			`if (host.Length() <= 15) {`
			`// The Windows resolver allows a 4-part dotted decimal IP address to`
			`// have a space followed by any old rubbish, so long as the total length`
			`// of the string doesn't get above 15 characters. So, "10.192.95.89 xy"`
			`// is resolved to 10.192.95.89.`
			`// If the string length is greater than 15 characters, e.g.`
			`// "10.192.95.89 xy.wildcard.example.com", it will be resolved through`
			`// DNS.`

			`if (FindCharInReadable(' ', iter, end)) {`
			`end = iter;`
			`}`
			`}`

			`for (host.BeginReading(iter); iter != end; iter++) {`
			`if (!(isxdigit(iter) \|\| iter == 'x' \|\| iter == 'X' \|\| iter == '.')) {`
			`// not an IP`
			`return;`
			`}`
			`}`

			`host.BeginReading(iter);`
			`nsCStringArray parts;`
			`parts.ParseString(PromiseFlatCString(Substring(iter, end)).get(), ".");`
			`if (parts.Count() > 4) {`
			`return;`
			`}`

			`// If any potentially-octal numbers (start with 0 but not hex) have`
			`// non-octal digits, no part of the ip can be in octal`
			`// XXX: this came from the old javascript implementation, is it really`
			`// supposed to be like this?`
			`PRBool allowOctal = PR_TRUE;`
			`for (PRInt32 i = 0; i < parts.Count(); i++) {`
			`const nsCString& part = *parts[i];`
			`if (part[0] == '0') {`
			`for (PRUint32 j = 1; j < part.Length(); j++) {`
			`if (part[j] == 'x') {`
			`break;`
			`}`
			`if (part[j] == '8' \|\| part[j] == '9') {`
			`allowOctal = PR_FALSE;`
			`break;`
			`}`
			`}`
			`}`
			`}`

			`for (PRInt32 i = 0; i < parts.Count(); i++) {`
			`nsCAutoString canonical;`

			`if (i == parts.Count() - 1) {`
			`CanonicalNum(*parts[i], 5 - parts.Count(), allowOctal, canonical);`
			`} else {`
			`CanonicalNum(*parts[i], 1, allowOctal, canonical);`
			`}`

			`if (canonical.IsEmpty()) {`
			`_retval.Truncate();`
			`return;`
			`}`

			`if (_retval.IsEmpty()) {`
			`_retval.Assign(canonical);`
			`} else {`
			`_retval.Append('.');`
			`_retval.Append(canonical);`
			`}`
			`}`
			`return;`
			`}`

			`void`
			`nsUrlClassifierUtils::CanonicalNum(const nsACString& num,`
			`PRUint32 bytes,`
			`PRBool allowOctal,`
			`nsACString& _retval)`
			`{`
			`_retval.Truncate();`

			`if (num.Length() < 1) {`
			`return;`
			`}`

			`PRUint32 val;`
			`if (allowOctal && IsOctal(num)) {`
			`if (PR_sscanf(PromiseFlatCString(num).get(), "%o", &val) != 1) {`
			`return;`
			`}`
			`} else if (IsDecimal(num)) {`
			`if (PR_sscanf(PromiseFlatCString(num).get(), "%u", &val) != 1) {`
			`return;`
			`}`
			`} else if (IsHex(num)) {`
			`if (PR_sscanf(PromiseFlatCString(num).get(), num[1] == 'X' ? "0X%x" : "0x%x",`
			`&val) != 1) {`
			`return;`
			`}`
			`} else {`
			`return;`
			`}`

			`while (bytes--) {`
			`char buf[20];`
			`PR_snprintf(buf, sizeof(buf), "%u", val & 0xff);`
			`if (_retval.IsEmpty()) {`
			`_retval.Assign(buf);`
			`} else {`
			`_retval = nsDependentCString(buf) + NS_LITERAL_CSTRING(".") + _retval;`
			`}`
			`val >>= 8;`
			`}`
			`}`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00
			`// This function will encode all "special" characters in typical url`
try landing new safebrowsing protocol again. b=387196, r=tony, r=vlad (for new fixes) 2007-07-25 23:38:43 -07:00			`// encoding, that is %hh where h is a valid hex digit. It will also fold`
			`// any duplicated slashes.`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`PRBool`
try landing new safebrowsing protocol again. b=387196, r=tony, r=vlad (for new fixes) 2007-07-25 23:38:43 -07:00			`nsUrlClassifierUtils::SpecialEncode(const nsACString & url,`
			`PRBool foldSlashes,`
			`nsACString & _retval)`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`{`
			`PRBool changed = PR_FALSE;`
			`const char* curChar = url.BeginReading();`
			`const char* end = url.EndReading();`

try landing new safebrowsing protocol again. b=387196, r=tony, r=vlad (for new fixes) 2007-07-25 23:38:43 -07:00			`unsigned char lastChar = '\0';`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`while (curChar != end) {`
Bug 348748 - Replace all instances of NS_STATIC_CAST and friends with C++ casts (and simultaneously bitrot nearly every patch in existence). r=bsmedberg on the script that did this. Tune in next time for Macro Wars: Episode II: Attack on the LL_* Macros. 2007-07-08 00:08:04 -07:00			`unsigned char c = static_cast<unsigned char>(*curChar);`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`if (ShouldURLEscape(c)) {`
			`// We don't want to deal with 0, as it can break certain strings, just`
			`// encode as one.`
			`if (c == 0)`
			`c = 1;`

			`_retval.Append('%');`
			`_retval.Append(int_to_hex_digit(c / 16));`
			`_retval.Append(int_to_hex_digit(c % 16));`

			`changed = PR_TRUE;`
try landing new safebrowsing protocol again. b=387196, r=tony, r=vlad (for new fixes) 2007-07-25 23:38:43 -07:00			`} else if (foldSlashes && (c == '/' && lastChar == '/')) {`
			`// skip`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`} else {`
			`_retval.Append(*curChar);`
			`}`
try landing new safebrowsing protocol again. b=387196, r=tony, r=vlad (for new fixes) 2007-07-25 23:38:43 -07:00			`lastChar = c;`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`curChar++;`
			`}`
			`return changed;`
			`}`

			`PRBool`
			`nsUrlClassifierUtils::ShouldURLEscape(const unsigned char c) const`
			`{`
			`return c <= 32 \|\| c == '%' \|\| c >=127;`
			`}`