gecko/xpcom/io/nsEscape.cpp

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Mozilla Public License Version
 * 1.1 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is mozilla.org code.
 *
 * The Initial Developer of the Original Code is
 * Netscape Communications Corporation.
 * Portions created by the Initial Developer are Copyright (C) 1998
 * the Initial Developer. All Rights Reserved.
 *
 * Contributor(s):
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either of the GNU General Public License Version 2 or later (the "GPL"),
 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the MPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the MPL, the GPL or the LGPL.
 *
 * ***** END LICENSE BLOCK ***** */
 
//	First checked in on 98/12/03 by John R. McMullen, derived from net.h/mkparse.c.

#include "nsEscape.h"
#include "nsMemory.h"
#include "nsCRT.h"
#include "nsReadableUtils.h"

const int netCharType[256] =
/*	Bit 0		xalpha		-- the alphas
**	Bit 1		xpalpha		-- as xalpha but 
**                             converts spaces to plus and plus to %2B
**	Bit 3 ...	path		-- as xalphas but doesn't escape '/'
*/
    /*   0 1 2 3 4 5 6 7 8 9 A B C D E F */
    {    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,	/* 0x */
		 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,	/* 1x */
		 0,0,0,0,0,0,0,0,0,0,7,4,0,7,7,4,	/* 2x   !"#$%&'()*+,-./	 */
         7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0,	/* 3x  0123456789:;<=>?	 */
	     0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,	/* 4x  @ABCDEFGHIJKLMNO  */
	     /* bits for '@' changed from 7 to 0 so '@' can be escaped   */
	     /* in usernames and passwords in publishing.                */
	     7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7,	/* 5X  PQRSTUVWXYZ[\]^_	 */
	     0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,	/* 6x  `abcdefghijklmno	 */
	     7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,	/* 7X  pqrstuvwxyz{\}~	DEL */
		 0, };

/* decode % escaped hex codes into character values
 */
#define UNHEX(C) \
    ((C >= '0' && C <= '9') ? C - '0' : \
     ((C >= 'A' && C <= 'F') ? C - 'A' + 10 : \
     ((C >= 'a' && C <= 'f') ? C - 'a' + 10 : 0)))


#define IS_OK(C) (netCharType[((unsigned int) (C))] & (flags))
#define HEX_ESCAPE '%'

//----------------------------------------------------------------------------------------
static char* nsEscapeCount(
    const char * str,
    nsEscapeMask flags,
    size_t* out_len)
//----------------------------------------------------------------------------------------
{
	if (!str)
		return 0;

    size_t i, len = 0, charsToEscape = 0;
    static const char hexChars[] = "0123456789ABCDEF";

	register const unsigned char* src = (const unsigned char *) str;
    while (*src)
	{
        len++;
        if (!IS_OK(*src++))
            charsToEscape++;
	}

    // calculate how much memory should be allocated
    // original length + 2 bytes for each escaped character + terminating '\0'
    // do the sum in steps to check for overflow
    size_t dstSize = len + 1 + charsToEscape;
    if (dstSize <= len)
	return 0;
    dstSize += charsToEscape;
    if (dstSize < len)
	return 0;

    // fail if we need more than 4GB
    // size_t is likely to be long unsigned int but nsMemory::Alloc(size_t)
    // calls NS_Alloc_P(size_t) which calls PR_Malloc(PRUint32), so there is
    // no chance to allocate more than 4GB using nsMemory::Alloc()
    if (dstSize > PR_UINT32_MAX)
        return 0;

	char* result = (char *)nsMemory::Alloc(dstSize);
    if (!result)
        return 0;

    register unsigned char* dst = (unsigned char *) result;
	src = (const unsigned char *) str;
	if (flags == url_XPAlphas)
	{
	    for (i = 0; i < len; i++)
		{
			unsigned char c = *src++;
			if (IS_OK(c))
				*dst++ = c;
			else if (c == ' ')
				*dst++ = '+'; /* convert spaces to pluses */
			else 
			{
				*dst++ = HEX_ESCAPE;
				*dst++ = hexChars[c >> 4];	/* high nibble */
				*dst++ = hexChars[c & 0x0f];	/* low nibble */
			}
		}
	}
	else
	{
	    for (i = 0; i < len; i++)
		{
			unsigned char c = *src++;
			if (IS_OK(c))
				*dst++ = c;
			else 
			{
				*dst++ = HEX_ESCAPE;
				*dst++ = hexChars[c >> 4];	/* high nibble */
				*dst++ = hexChars[c & 0x0f];	/* low nibble */
			}
		}
	}

    *dst = '\0';     /* tack on eos */
	if(out_len)
		*out_len = dst - (unsigned char *) result;
    return result;
}

//----------------------------------------------------------------------------------------
NS_COM char* nsEscape(const char * str, nsEscapeMask flags)
//----------------------------------------------------------------------------------------
{
    if(!str)
        return NULL;
    return nsEscapeCount(str, flags, NULL);
}

//----------------------------------------------------------------------------------------
NS_COM char* nsUnescape(char * str)
//----------------------------------------------------------------------------------------
{
	nsUnescapeCount(str);
	return str;
}

//----------------------------------------------------------------------------------------
NS_COM PRInt32 nsUnescapeCount(char * str)
//----------------------------------------------------------------------------------------
{
    register char *src = str;
    register char *dst = str;
    static const char hexChars[] = "0123456789ABCDEFabcdef";

    char c1[] = " ";
    char c2[] = " ";
    char* const pc1 = c1;
    char* const pc2 = c2;

    while (*src)
    {
        c1[0] = *(src+1);
        if (*(src+1) == '\0') 
            c2[0] = '\0';
        else
            c2[0] = *(src+2);

        if (*src != HEX_ESCAPE || PL_strpbrk(pc1, hexChars) == 0 || 
                                  PL_strpbrk(pc2, hexChars) == 0 )
        	*dst++ = *src++;
        else 	
		{
        	src++; /* walk over escape */
        	if (*src)
            {
            	*dst = UNHEX(*src) << 4;
            	src++;
            }
        	if (*src)
            {
            	*dst = (*dst + UNHEX(*src));
            	src++;
            }
        	dst++;
        }
    }

    *dst = 0;
    return (int)(dst - str);

} /* NET_UnEscapeCnt */


NS_COM char *
nsEscapeHTML(const char * string)
{
    char *rv = nsnull;
    /* XXX Hardcoded max entity len. The +1 is for the trailing null. */
    PRUint32 len = PL_strlen(string);
    if (len >= (PR_UINT32_MAX / 6))
      return nsnull;

    rv = (char *)NS_Alloc( (6 * len) + 1 );
    char *ptr = rv;

    if(rv)
      {
        for(; *string != '\0'; string++)
          {
            if(*string == '<')
              {
                *ptr++ = '&';
                *ptr++ = 'l';
                *ptr++ = 't';
                *ptr++ = ';';
              }
            else if(*string == '>')
              {
                *ptr++ = '&';
                *ptr++ = 'g';
                *ptr++ = 't';
                *ptr++ = ';';
              }
            else if(*string == '&')
              {
                *ptr++ = '&';
                *ptr++ = 'a';
                *ptr++ = 'm';
                *ptr++ = 'p';
                *ptr++ = ';';
              }
            else if (*string == '"')
              {
                *ptr++ = '&';
                *ptr++ = 'q';
                *ptr++ = 'u';
                *ptr++ = 'o';
                *ptr++ = 't';
                *ptr++ = ';';
              }
            else if (*string == '\'')
              {
                *ptr++ = '&';
                *ptr++ = '#';
                *ptr++ = '3';
                *ptr++ = '9';
                *ptr++ = ';';
              }
            else
              {
                *ptr++ = *string;
              }
          }
        *ptr = '\0';
      }

    return(rv);
}

NS_COM PRUnichar *
nsEscapeHTML2(const PRUnichar *aSourceBuffer, PRInt32 aSourceBufferLen)
{
  // if the caller didn't calculate the length
  if (aSourceBufferLen < 0) {
    aSourceBufferLen = nsCRT::strlen(aSourceBuffer); // ...then I will
  }

  /* XXX Hardcoded max entity len. */
  if (PRUint32(aSourceBufferLen) >=
      ((PR_UINT32_MAX - sizeof(PRUnichar)) / (6 * sizeof(PRUnichar))) )
    return nsnull;

  PRUnichar *resultBuffer = (PRUnichar *)nsMemory::Alloc(aSourceBufferLen *
                            6 * sizeof(PRUnichar) + sizeof(PRUnichar('\0')));
  PRUnichar *ptr = resultBuffer;

  if (resultBuffer) {
    PRInt32 i;

    for(i = 0; i < aSourceBufferLen; i++) {
      if(aSourceBuffer[i] == '<') {
        *ptr++ = '&';
        *ptr++ = 'l';
        *ptr++ = 't';
        *ptr++ = ';';
      } else if(aSourceBuffer[i] == '>') {
        *ptr++ = '&';
        *ptr++ = 'g';
        *ptr++ = 't';
        *ptr++ = ';';
      } else if(aSourceBuffer[i] == '&') {
        *ptr++ = '&';
        *ptr++ = 'a';
        *ptr++ = 'm';
        *ptr++ = 'p';
        *ptr++ = ';';
      } else if (aSourceBuffer[i] == '"') {
        *ptr++ = '&';
        *ptr++ = 'q';
        *ptr++ = 'u';
        *ptr++ = 'o';
        *ptr++ = 't';
        *ptr++ = ';';
      } else if (aSourceBuffer[i] == '\'') {
        *ptr++ = '&';
        *ptr++ = '#';
        *ptr++ = '3';
        *ptr++ = '9';
        *ptr++ = ';';
      } else {
        *ptr++ = aSourceBuffer[i];
      }
    }
    *ptr = 0;
  }

  return resultBuffer;
}

//----------------------------------------------------------------------------------------

const int EscapeChars[256] =
/*      0    1    2    3    4    5    6    7    8    9    A    B    C    D    E    F */
{
        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,       /* 0x */
        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  	    /* 1x */
        0,1023,   0, 512,1023,   0,1023,   0,1023,1023,1023,1023,1023,1023, 953, 784,       /* 2x   !"#$%&'()*+,-./	 */
     1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1008, 912,   0,1008,   0, 768,       /* 3x  0123456789:;<=>?	 */
     1008,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,       /* 4x  @ABCDEFGHIJKLMNO  */
     1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896, 896, 896, 896,1023,       /* 5x  PQRSTUVWXYZ[\]^_	 */
        0,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,       /* 6x  `abcdefghijklmno	 */
     1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896,1012, 896,1023,   0,       /* 7x  pqrstuvwxyz{|}~	 */
        0    /* 8x  DEL               */
};

#define NO_NEED_ESC(C) (EscapeChars[((unsigned int) (C))] & (flags))

//----------------------------------------------------------------------------------------

/* returns an escaped string */

/* use the following flags to specify which 
   part of an URL you want to escape: 

   esc_Scheme        =     1
   esc_Username      =     2
   esc_Password      =     4
   esc_Host          =     8
   esc_Directory     =    16
   esc_FileBaseName  =    32
   esc_FileExtension =    64
   esc_Param         =   128
   esc_Query         =   256
   esc_Ref           =   512
*/

/* by default this function will not escape parts of a string
   that already look escaped, which means it already includes 
   a valid hexcode. This is done to avoid multiple escapes of
   a string. Use the following flags to force escaping of a 
   string:
 
   esc_Forced        =  1024
*/

NS_COM PRBool NS_EscapeURL(const char *part,
                           PRInt32 partLen,
                           PRUint32 flags,
                           nsACString &result)
{
    if (!part) {
        NS_NOTREACHED("null pointer");
        return PR_FALSE;
    }

    int i = 0;
    static const char hexChars[] = "0123456789ABCDEF";
    if (partLen < 0)
        partLen = strlen(part);
    PRBool forced = !!(flags & esc_Forced);
    PRBool ignoreNonAscii = !!(flags & esc_OnlyASCII);
    PRBool ignoreAscii = !!(flags & esc_OnlyNonASCII);
    PRBool writing = !!(flags & esc_AlwaysCopy);
    PRBool colon = !!(flags & esc_Colon);

    register const unsigned char* src = (const unsigned char *) part;

    char tempBuffer[100];
    unsigned int tempBufferPos = 0;

    PRBool previousIsNonASCII = PR_FALSE;
    for (i = 0; i < partLen; i++)
    {
      unsigned char c = *src++;

      // if the char has not to be escaped or whatever follows % is 
      // a valid escaped string, just copy the char.
      //
      // Also the % will not be escaped until forced
      // See bugzilla bug 61269 for details why we changed this
      //
      // And, we will not escape non-ascii characters if requested.
      // On special request we will also escape the colon even when
      // not covered by the matrix.
      // ignoreAscii is not honored for control characters (C0 and DEL)
      //
      // And, we should escape the '|' character when it occurs after any
      // non-ASCII character as it may be part of a multi-byte character.
      //
      // 0x20..0x7e are the valid ASCII characters. We also escape spaces
      // (0x20) since they are not legal in URLs.
      if ((NO_NEED_ESC(c) || (c == HEX_ESCAPE && !forced)
                          || (c > 0x7f && ignoreNonAscii)
                          || (c > 0x20 && c < 0x7f && ignoreAscii))
          && !(c == ':' && colon)
          && !(previousIsNonASCII && c == '|' && !ignoreNonAscii))
      {
        if (writing)
          tempBuffer[tempBufferPos++] = c;
      }
      else /* do the escape magic */
      {
        if (!writing)
        {
          result.Append(part, i);
          writing = PR_TRUE;
        }
        tempBuffer[tempBufferPos++] = HEX_ESCAPE;
        tempBuffer[tempBufferPos++] = hexChars[c >> 4];	/* high nibble */
        tempBuffer[tempBufferPos++] = hexChars[c & 0x0f]; /* low nibble */
      }

      if (tempBufferPos >= sizeof(tempBuffer) - 4)
      {
        NS_ASSERTION(writing, "should be writing");
        tempBuffer[tempBufferPos] = '\0';
        result += tempBuffer;
        tempBufferPos = 0;
      }

      previousIsNonASCII = (c > 0x7f);
    }
    if (writing) {
      tempBuffer[tempBufferPos] = '\0';
      result += tempBuffer;
    }
    return writing;
}

#define ISHEX(c) memchr(hexChars, c, sizeof(hexChars)-1)

NS_COM PRBool NS_UnescapeURL(const char *str, PRInt32 len, PRUint32 flags, nsACString &result)
{
    if (!str) {
        NS_NOTREACHED("null pointer");
        return PR_FALSE;
    }

    if (len < 0)
        len = strlen(str);

    PRBool ignoreNonAscii = !!(flags & esc_OnlyASCII);
    PRBool ignoreAscii = !!(flags & esc_OnlyNonASCII);
    PRBool writing = !!(flags & esc_AlwaysCopy);
    PRBool skipControl = !!(flags & esc_SkipControl); 

    static const char hexChars[] = "0123456789ABCDEFabcdef";

    const char *last = str;
    const char *p = str;

    for (int i=0; i<len; ++i, ++p) {
        //printf("%c [i=%d of len=%d]\n", *p, i, len);
        if (*p == HEX_ESCAPE && i < len-2) {
            unsigned char *p1 = ((unsigned char *) p) + 1;
            unsigned char *p2 = ((unsigned char *) p) + 2;
            if (ISHEX(*p1) && ISHEX(*p2) && 
                ((*p1 < '8' && !ignoreAscii) || (*p1 >= '8' && !ignoreNonAscii)) &&
                !(skipControl && 
                  (*p1 < '2' || (*p1 == '7' && (*p2 == 'f' || *p2 == 'F'))))) {
                //printf("- p1=%c p2=%c\n", *p1, *p2);
                writing = PR_TRUE;
                if (p > last) {
                    //printf("- p=%p, last=%p\n", p, last);
                    result.Append(last, p - last);
                    last = p;
                }
                char u = (UNHEX(*p1) << 4) + UNHEX(*p2);
                //printf("- u=%c\n", u);
                result.Append(u);
                i += 2;
                p += 2;
                last += 3;
            }
        }
    }
    if (writing && last < str + len)
        result.Append(last, str + len - last);

    return writing;
}
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`/* -- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -- */`
			`/* *** BEGIN LICENSE BLOCK ***`
			`* Version: MPL 1.1/GPL 2.0/LGPL 2.1`
			`*`
			`* The contents of this file are subject to the Mozilla Public License Version`
			`* 1.1 (the "License"); you may not use this file except in compliance with`
			`* the License. You may obtain a copy of the License at`
			`* http://www.mozilla.org/MPL/`
			`*`
			`* Software distributed under the License is distributed on an "AS IS" basis,`
			`* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License`
			`* for the specific language governing rights and limitations under the`
			`* License.`
			`*`
			`* The Original Code is mozilla.org code.`
			`*`
			`* The Initial Developer of the Original Code is`
			`* Netscape Communications Corporation.`
			`* Portions created by the Initial Developer are Copyright (C) 1998`
			`* the Initial Developer. All Rights Reserved.`
			`*`
			`* Contributor(s):`
			`*`
			`* Alternatively, the contents of this file may be used under the terms of`
			`* either of the GNU General Public License Version 2 or later (the "GPL"),`
			`* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),`
			`* in which case the provisions of the GPL or the LGPL are applicable instead`
			`* of those above. If you wish to allow use of your version of this file only`
			`* under the terms of either the GPL or the LGPL, and not to allow others to`
			`* use your version of this file under the terms of the MPL, indicate your`
			`* decision by deleting the provisions above and replace them with the notice`
			`* and other provisions required by the GPL or the LGPL. If you do not delete`
			`* the provisions above, a recipient may use your version of this file under`
			`* the terms of any one of the MPL, the GPL or the LGPL.`
			`*`
			`* *** END LICENSE BLOCK *** */`

			`// First checked in on 98/12/03 by John R. McMullen, derived from net.h/mkparse.c.`

			`#include "nsEscape.h"`
			`#include "nsMemory.h"`
			`#include "nsCRT.h"`
			`#include "nsReadableUtils.h"`

			`const int netCharType[256] =`
			`/* Bit 0 xalpha -- the alphas`
			`** Bit 1 xpalpha -- as xalpha but`
			`** converts spaces to plus and plus to %2B`
			`** Bit 3 ... path -- as xalphas but doesn't escape '/'`
			`*/`
			`/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */`
			`{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x */`
			`0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 1x */`
			`0,0,0,0,0,0,0,0,0,0,7,4,0,7,7,4, /* 2x !"#$%&'()+,-./ /`
			`7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */`
			`0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 4x @ABCDEFGHIJKLMNO */`
			`/* bits for '@' changed from 7 to 0 so '@' can be escaped */`
			`/* in usernames and passwords in publishing. */`
			`7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7, /* 5X PQRSTUVWXYZ[\]^_ */`
			0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 6x `abcdefghijklmno */
			`7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0, /* 7X pqrstuvwxyz{\}~ DEL */`
			`0, };`

			`/* decode % escaped hex codes into character values`
			`*/`
			`#define UNHEX(C) \`
			`((C >= '0' && C <= '9') ? C - '0' : \`
			`((C >= 'A' && C <= 'F') ? C - 'A' + 10 : \`
			`((C >= 'a' && C <= 'f') ? C - 'a' + 10 : 0)))`


			`#define IS_OK(C) (netCharType[((unsigned int) (C))] & (flags))`
			`#define HEX_ESCAPE '%'`

			`//----------------------------------------------------------------------------------------`
			`static char* nsEscapeCount(`
			`const char * str,`
			`nsEscapeMask flags,`
Bug 367736 - make nsEscape more efficient and clean up some signed/unsigned issues to prevent theoretical overflow, r=bsmedberg+dveditz 2008-08-19 06:57:12 -07:00			`size_t* out_len)`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`//----------------------------------------------------------------------------------------`
			`{`
			`if (!str)`
			`return 0;`

Bug 367736 - make nsEscape more efficient and clean up some signed/unsigned issues to prevent theoretical overflow, r=bsmedberg+dveditz 2008-08-19 06:57:12 -07:00			`size_t i, len = 0, charsToEscape = 0;`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`static const char hexChars[] = "0123456789ABCDEF";`

			`register const unsigned char* src = (const unsigned char *) str;`
Bug 367736 - make nsEscape more efficient and clean up some signed/unsigned issues to prevent theoretical overflow, r=bsmedberg+dveditz 2008-08-19 06:57:12 -07:00			`while (*src)`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`{`
Bug 367736 - make nsEscape more efficient and clean up some signed/unsigned issues to prevent theoretical overflow, r=bsmedberg+dveditz 2008-08-19 06:57:12 -07:00			`len++;`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`if (!IS_OK(*src++))`
Bug 367736 - make nsEscape more efficient and clean up some signed/unsigned issues to prevent theoretical overflow, r=bsmedberg+dveditz 2008-08-19 06:57:12 -07:00			`charsToEscape++;`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`}`

Bug 367736 - make nsEscape more efficient and clean up some signed/unsigned issues to prevent theoretical overflow, r=bsmedberg+dveditz 2008-08-19 06:57:12 -07:00			`// calculate how much memory should be allocated`
			`// original length + 2 bytes for each escaped character + terminating '\0'`
			`// do the sum in steps to check for overflow`
			`size_t dstSize = len + 1 + charsToEscape;`
			`if (dstSize <= len)`
			`return 0;`
			`dstSize += charsToEscape;`
			`if (dstSize < len)`
			`return 0;`

			`// fail if we need more than 4GB`
			`// size_t is likely to be long unsigned int but nsMemory::Alloc(size_t)`
			`// calls NS_Alloc_P(size_t) which calls PR_Malloc(PRUint32), so there is`
			`// no chance to allocate more than 4GB using nsMemory::Alloc()`
			`if (dstSize > PR_UINT32_MAX)`
			`return 0;`

			`char* result = (char *)nsMemory::Alloc(dstSize);`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`if (!result)`
			`return 0;`

			`register unsigned char* dst = (unsigned char *) result;`
			`src = (const unsigned char *) str;`
			`if (flags == url_XPAlphas)`
			`{`
			`for (i = 0; i < len; i++)`
			`{`
			`unsigned char c = *src++;`
			`if (IS_OK(c))`
			`*dst++ = c;`
			`else if (c == ' ')`
			`dst++ = '+'; / convert spaces to pluses */`
			`else`
			`{`
			`*dst++ = HEX_ESCAPE;`
			`dst++ = hexChars[c >> 4]; / high nibble */`
			`dst++ = hexChars[c & 0x0f]; / low nibble */`
			`}`
			`}`
			`}`
			`else`
			`{`
			`for (i = 0; i < len; i++)`
			`{`
			`unsigned char c = *src++;`
			`if (IS_OK(c))`
			`*dst++ = c;`
			`else`
			`{`
			`*dst++ = HEX_ESCAPE;`
			`dst++ = hexChars[c >> 4]; / high nibble */`
			`dst++ = hexChars[c & 0x0f]; / low nibble */`
			`}`
			`}`
			`}`

			`dst = '\0'; / tack on eos */`
			`if(out_len)`
			`out_len = dst - (unsigned char ) result;`
			`return result;`
			`}`

			`//----------------------------------------------------------------------------------------`
			`NS_COM char* nsEscape(const char * str, nsEscapeMask flags)`
			`//----------------------------------------------------------------------------------------`
			`{`
			`if(!str)`
			`return NULL;`
Bug 367736 - make nsEscape more efficient and clean up some signed/unsigned issues to prevent theoretical overflow, r=bsmedberg+dveditz 2008-08-19 06:57:12 -07:00			`return nsEscapeCount(str, flags, NULL);`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`}`

			`//----------------------------------------------------------------------------------------`
			`NS_COM char* nsUnescape(char * str)`
			`//----------------------------------------------------------------------------------------`
			`{`
			`nsUnescapeCount(str);`
			`return str;`
			`}`

			`//----------------------------------------------------------------------------------------`
			`NS_COM PRInt32 nsUnescapeCount(char * str)`
			`//----------------------------------------------------------------------------------------`
			`{`
			`register char *src = str;`
			`register char *dst = str;`
			`static const char hexChars[] = "0123456789ABCDEFabcdef";`

			`char c1[] = " ";`
			`char c2[] = " ";`
			`char* const pc1 = c1;`
			`char* const pc2 = c2;`

			`while (*src)`
			`{`
			`c1[0] = *(src+1);`
			`if (*(src+1) == '\0')`
			`c2[0] = '\0';`
			`else`
			`c2[0] = *(src+2);`

			`if (*src != HEX_ESCAPE \|\| PL_strpbrk(pc1, hexChars) == 0 \|\|`
			`PL_strpbrk(pc2, hexChars) == 0 )`
			`dst++ = src++;`
			`else`
			`{`
			`src++; /* walk over escape */`
			`if (*src)`
			`{`
			`dst = UNHEX(src) << 4;`
			`src++;`
			`}`
			`if (*src)`
			`{`
			`dst = (dst + UNHEX(*src));`
			`src++;`
			`}`
			`dst++;`
			`}`
			`}`

			`*dst = 0;`
			`return (int)(dst - str);`

			`} /* NET_UnEscapeCnt */`


			`NS_COM char *`
			`nsEscapeHTML(const char * string)`
			`{`
bug 455987 crash fix, r/sr=bsmedberg 2008-10-24 16:15:33 -07:00			`char *rv = nsnull;`
			`/* XXX Hardcoded max entity len. The +1 is for the trailing null. */`
			`PRUint32 len = PL_strlen(string);`
			`if (len >= (PR_UINT32_MAX / 6))`
			`return nsnull;`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00
bug 455987 crash fix, r/sr=bsmedberg 2008-10-24 16:15:33 -07:00			`rv = (char )NS_Alloc( (6 len) + 1 );`
			`char *ptr = rv;`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00
bug 455987 crash fix, r/sr=bsmedberg 2008-10-24 16:15:33 -07:00			`if(rv)`
			`{`
			`for(; *string != '\0'; string++)`
			`{`
			`if(*string == '<')`
			`{`
			`*ptr++ = '&';`
			`*ptr++ = 'l';`
			`*ptr++ = 't';`
			`*ptr++ = ';';`
			`}`
			`else if(*string == '>')`
			`{`
			`*ptr++ = '&';`
			`*ptr++ = 'g';`
			`*ptr++ = 't';`
			`*ptr++ = ';';`
			`}`
			`else if(*string == '&')`
			`{`
			`*ptr++ = '&';`
			`*ptr++ = 'a';`
			`*ptr++ = 'm';`
			`*ptr++ = 'p';`
			`*ptr++ = ';';`
			`}`
			`else if (*string == '"')`
			`{`
			`*ptr++ = '&';`
			`*ptr++ = 'q';`
			`*ptr++ = 'u';`
			`*ptr++ = 'o';`
			`*ptr++ = 't';`
			`*ptr++ = ';';`
			`}`
			`else if (*string == '\'')`
			`{`
			`*ptr++ = '&';`
			`*ptr++ = '#';`
			`*ptr++ = '3';`
			`*ptr++ = '9';`
			`*ptr++ = ';';`
			`}`
			`else`
			`{`
			`ptr++ = string;`
			`}`
			`}`
			`*ptr = '\0';`
			`}`

			`return(rv);`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`}`

			`NS_COM PRUnichar *`
			`nsEscapeHTML2(const PRUnichar *aSourceBuffer, PRInt32 aSourceBufferLen)`
			`{`
			`// if the caller didn't calculate the length`
Bug 562528 - comparison between signed and unsigned integer expressions in nsEscapeHTML2, r=bsmedberg --HG-- extra : rebase_source : 995cb88bc3bdb692ea154be2ebee597b0f777e58 2010-04-28 17:58:00 -07:00			`if (aSourceBufferLen < 0) {`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`aSourceBufferLen = nsCRT::strlen(aSourceBuffer); // ...then I will`
			`}`

			`/* XXX Hardcoded max entity len. */`
Bug 562528 - comparison between signed and unsigned integer expressions in nsEscapeHTML2, r=bsmedberg --HG-- extra : rebase_source : 995cb88bc3bdb692ea154be2ebee597b0f777e58 2010-04-28 17:58:00 -07:00			`if (PRUint32(aSourceBufferLen) >=`
bug 464998 - r+sr=bsmedberg, a=beltzner 2008-11-19 08:55:59 -08:00			`((PR_UINT32_MAX - sizeof(PRUnichar)) / (6 * sizeof(PRUnichar))) )`
bug 455987 crash fix, r/sr=bsmedberg 2008-10-24 16:15:33 -07:00			`return nsnull;`

Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`PRUnichar resultBuffer = (PRUnichar )nsMemory::Alloc(aSourceBufferLen *`
			`6 * sizeof(PRUnichar) + sizeof(PRUnichar('\0')));`
			`PRUnichar *ptr = resultBuffer;`

			`if (resultBuffer) {`
			`PRInt32 i;`

			`for(i = 0; i < aSourceBufferLen; i++) {`
			`if(aSourceBuffer[i] == '<') {`
			`*ptr++ = '&';`
			`*ptr++ = 'l';`
			`*ptr++ = 't';`
			`*ptr++ = ';';`
			`} else if(aSourceBuffer[i] == '>') {`
			`*ptr++ = '&';`
			`*ptr++ = 'g';`
			`*ptr++ = 't';`
			`*ptr++ = ';';`
			`} else if(aSourceBuffer[i] == '&') {`
			`*ptr++ = '&';`
			`*ptr++ = 'a';`
			`*ptr++ = 'm';`
			`*ptr++ = 'p';`
			`*ptr++ = ';';`
			`} else if (aSourceBuffer[i] == '"') {`
			`*ptr++ = '&';`
			`*ptr++ = 'q';`
			`*ptr++ = 'u';`
			`*ptr++ = 'o';`
			`*ptr++ = 't';`
			`*ptr++ = ';';`
			`} else if (aSourceBuffer[i] == '\'') {`
			`*ptr++ = '&';`
			`*ptr++ = '#';`
			`*ptr++ = '3';`
			`*ptr++ = '9';`
			`*ptr++ = ';';`
			`} else {`
			`*ptr++ = aSourceBuffer[i];`
			`}`
			`}`
			`*ptr = 0;`
			`}`

			`return resultBuffer;`
			`}`

			`//----------------------------------------------------------------------------------------`

			`const int EscapeChars[256] =`
			`/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */`
			`{`
			`0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */`
			`0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */`
Bug 376844 - Single quotes in URLs should be escaped. Patch by Wladimir Palant <trev.moz@adblockplus.org>. r=benjamin, sr=cbiesinger. 2007-06-17 05:29:08 -07:00			`0,1023, 0, 512,1023, 0,1023, 0,1023,1023,1023,1023,1023,1023, 953, 784, /* 2x !"#$%&'()+,-./ /`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1008, 912, 0,1008, 0, 768, /* 3x 0123456789:;<=>? */`
			`1008,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, /* 4x @ABCDEFGHIJKLMNO */`
			`1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896, 896, 896, 896,1023, /* 5x PQRSTUVWXYZ[\]^_ */`
			0,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, /* 6x `abcdefghijklmno */
			`1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896,1012, 896,1023, 0, /* 7x pqrstuvwxyz{\|}~ */`
			`0 /* 8x DEL */`
			`};`

			`#define NO_NEED_ESC(C) (EscapeChars[((unsigned int) (C))] & (flags))`

			`//----------------------------------------------------------------------------------------`

			`/* returns an escaped string */`

			`/* use the following flags to specify which`
			`part of an URL you want to escape:`

			`esc_Scheme = 1`
			`esc_Username = 2`
			`esc_Password = 4`
			`esc_Host = 8`
			`esc_Directory = 16`
			`esc_FileBaseName = 32`
			`esc_FileExtension = 64`
			`esc_Param = 128`
			`esc_Query = 256`
			`esc_Ref = 512`
			`*/`

			`/* by default this function will not escape parts of a string`
			`that already look escaped, which means it already includes`
			`a valid hexcode. This is done to avoid multiple escapes of`
			`a string. Use the following flags to force escaping of a`
			`string:`

			`esc_Forced = 1024`
			`*/`

			`NS_COM PRBool NS_EscapeURL(const char *part,`
			`PRInt32 partLen,`
			`PRUint32 flags,`
			`nsACString &result)`
			`{`
			`if (!part) {`
			`NS_NOTREACHED("null pointer");`
			`return PR_FALSE;`
			`}`

			`int i = 0;`
			`static const char hexChars[] = "0123456789ABCDEF";`
			`if (partLen < 0)`
			`partLen = strlen(part);`
Bug 398435 - "PRBool misuse bugs in xpcom/" [p=taras r=bsmedberg a1.9=sayrer] 2007-10-19 16:26:52 -07:00			`PRBool forced = !!(flags & esc_Forced);`
			`PRBool ignoreNonAscii = !!(flags & esc_OnlyASCII);`
			`PRBool ignoreAscii = !!(flags & esc_OnlyNonASCII);`
			`PRBool writing = !!(flags & esc_AlwaysCopy);`
			`PRBool colon = !!(flags & esc_Colon);`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00
			`register const unsigned char* src = (const unsigned char *) part;`

			`char tempBuffer[100];`
			`unsigned int tempBufferPos = 0;`

			`PRBool previousIsNonASCII = PR_FALSE;`
			`for (i = 0; i < partLen; i++)`
			`{`
			`unsigned char c = *src++;`

			`// if the char has not to be escaped or whatever follows % is`
			`// a valid escaped string, just copy the char.`
			`//`
			`// Also the % will not be escaped until forced`
			`// See bugzilla bug 61269 for details why we changed this`
			`//`
			`// And, we will not escape non-ascii characters if requested.`
			`// On special request we will also escape the colon even when`
			`// not covered by the matrix.`
			`// ignoreAscii is not honored for control characters (C0 and DEL)`
			`//`
			`// And, we should escape the '\|' character when it occurs after any`
			`// non-ASCII character as it may be part of a multi-byte character.`
add a comment explaining the 0x20..0x7f range 2007-07-23 20:30:00 -07:00			`//`
			`// 0x20..0x7e are the valid ASCII characters. We also escape spaces`
			`// (0x20) since they are not legal in URLs.`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`if ((NO_NEED_ESC(c) \|\| (c == HEX_ESCAPE && !forced)`
			`\|\| (c > 0x7f && ignoreNonAscii)`
bug 389106 escape spaces even for the OnlyNonASCII flag r=bzbarsky sr=dveditz 2007-07-23 17:28:42 -07:00			`\|\| (c > 0x20 && c < 0x7f && ignoreAscii))`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00			`&& !(c == ':' && colon)`
			`&& !(previousIsNonASCII && c == '\|' && !ignoreNonAscii))`
			`{`
			`if (writing)`
			`tempBuffer[tempBufferPos++] = c;`
			`}`
			`else /* do the escape magic */`
			`{`
			`if (!writing)`
			`{`
			`result.Append(part, i);`
			`writing = PR_TRUE;`
			`}`
			`tempBuffer[tempBufferPos++] = HEX_ESCAPE;`
			`tempBuffer[tempBufferPos++] = hexChars[c >> 4]; /* high nibble */`
			`tempBuffer[tempBufferPos++] = hexChars[c & 0x0f]; /* low nibble */`
			`}`

			`if (tempBufferPos >= sizeof(tempBuffer) - 4)`
			`{`
			`NS_ASSERTION(writing, "should be writing");`
			`tempBuffer[tempBufferPos] = '\0';`
			`result += tempBuffer;`
			`tempBufferPos = 0;`
			`}`

			`previousIsNonASCII = (c > 0x7f);`
			`}`
			`if (writing) {`
			`tempBuffer[tempBufferPos] = '\0';`
			`result += tempBuffer;`
			`}`
			`return writing;`
			`}`

			`#define ISHEX(c) memchr(hexChars, c, sizeof(hexChars)-1)`

			`NS_COM PRBool NS_UnescapeURL(const char *str, PRInt32 len, PRUint32 flags, nsACString &result)`
			`{`
			`if (!str) {`
			`NS_NOTREACHED("null pointer");`
			`return PR_FALSE;`
			`}`

			`if (len < 0)`
			`len = strlen(str);`

Bug 398435 - "PRBool misuse bugs in xpcom/" [p=taras r=bsmedberg a1.9=sayrer] 2007-10-19 16:26:52 -07:00			`PRBool ignoreNonAscii = !!(flags & esc_OnlyASCII);`
			`PRBool ignoreAscii = !!(flags & esc_OnlyNonASCII);`
			`PRBool writing = !!(flags & esc_AlwaysCopy);`
			`PRBool skipControl = !!(flags & esc_SkipControl);`
Free the (distributed) Lizard! Automatic merge from CVS: Module mozilla: tag HG_REPO_INITIAL_IMPORT at 22 Mar 2007 10:30 PDT, 2007-03-22 10:30:00 -07:00
			`static const char hexChars[] = "0123456789ABCDEFabcdef";`

			`const char *last = str;`
			`const char *p = str;`

			`for (int i=0; i<len; ++i, ++p) {`
			`//printf("%c [i=%d of len=%d]\n", *p, i, len);`
			`if (*p == HEX_ESCAPE && i < len-2) {`
			`unsigned char p1 = ((unsigned char ) p) + 1;`
			`unsigned char p2 = ((unsigned char ) p) + 2;`
			`if (ISHEX(p1) && ISHEX(p2) &&`
			`((p1 < '8' && !ignoreAscii) \|\| (p1 >= '8' && !ignoreNonAscii)) &&`
			`!(skipControl &&`
			`(p1 < '2' \|\| (p1 == '7' && (p2 == 'f' \|\| p2 == 'F'))))) {`
			`//printf("- p1=%c p2=%c\n", p1, p2);`
			`writing = PR_TRUE;`
			`if (p > last) {`
			`//printf("- p=%p, last=%p\n", p, last);`
			`result.Append(last, p - last);`
			`last = p;`
			`}`
			`char u = (UNHEX(p1) << 4) + UNHEX(p2);`
			`//printf("- u=%c\n", u);`
			`result.Append(u);`
			`i += 2;`
			`p += 2;`
			`last += 3;`
			`}`
			`}`
			`}`
			`if (writing && last < str + len)`
			`result.Append(last, str + len - last);`

			`return writing;`
			`}`