//------------------------------------------------------------------------------ // // Copyright (c) Microsoft Corporation. All rights reserved. // //------------------------------------------------------------------------------ namespace System.Web.Security.AntiXss { using System; using System.Collections; using System.Text; using System.Threading; /// /// The type of space encoding to use. /// internal enum EncodingType { /// /// Encode spaces for use in query strings /// QueryString = 1, /// /// Encode spaces for use in form data /// HtmlForm = 2 } /// /// Provides Html Parameter Encoding methods. /// internal static class HtmlParameterEncoder { /// /// The value to use when encoding a space for query strings. /// private static readonly char[] QueryStringSpace = "%20".ToCharArray(); /// /// The value to use when encoding a space for form data. /// private static readonly char[] FormStringSpace = "+".ToCharArray(); /// /// The values to output for each character. /// private static Lazy characterValuesLazy = new Lazy(InitialiseSafeList); /// /// Encodes a string for query string encoding and returns the encoded string. /// /// The text to URL-encode. /// The encoding for the text parameter. /// The URL-encoded text. /// URL encoding ensures that all browsers will correctly transmit text in URL strings. /// Characters such as a question mark (?), ampersand (&), slash mark (/), and spaces might be truncated or corrupted by some browsers. /// As a result, these characters must be encoded in <a> tags or in query strings where the strings can be re-sent by a browser /// in a request string. /// Thrown if the encoding is null. internal static string QueryStringParameterEncode(string s, Encoding encoding) { return FormQueryEncode(s, encoding, EncodingType.QueryString); } /// /// Encodes a string for form URL encoding and returns the encoded string. /// /// The text to URL-encode. /// The encoding for the text parameter. /// The URL-encoded text. /// URL encoding ensures that all browsers will correctly transmit text in URL strings. /// Characters such as a question mark (?), ampersand (&), slash mark (/), and spaces might be truncated or corrupted by some browsers. /// As a result, these characters must be encoded in <a> tags or in query strings where the strings can be re-sent by a browser /// in a request string. /// Thrown if the encoding is null. internal static string FormStringParameterEncode(string s, Encoding encoding) { return FormQueryEncode(s, encoding, EncodingType.HtmlForm); } /// /// Encodes a string for Query String or Form Data encoding. /// /// The text to URL-encode. /// The encoding for the text parameter. /// The encoding type to use. /// The encoded text. private static string FormQueryEncode(string s, Encoding encoding, EncodingType encodingType) { return FormQueryEncode(s, encoding, encodingType, characterValuesLazy); } private static string FormQueryEncode(string s, Encoding encoding, EncodingType encodingType, Lazy characterValuesLazy) { if (string.IsNullOrEmpty(s)) { return s; } if (encoding == null) { throw new ArgumentNullException("encoding"); } var characterValues = characterValuesLazy.Value; // RFC 3986 states strings must be converted to their UTF8 value before URL encoding. // See http://tools.ietf.org/html/rfc3986 // Conversion to char[] keeps null characters inline. byte[] utf8Bytes = encoding.GetBytes(s.ToCharArray()); char[] encodedInput = new char[utf8Bytes.Length * 3]; // Each byte can potentially be encoded as %xx int outputLength = 0; for (int characterPosition = 0; characterPosition < utf8Bytes.Length; characterPosition++) { byte currentCharacter = utf8Bytes[characterPosition]; if (currentCharacter == 0x00 || currentCharacter == 0x20 || currentCharacter > characterValues.Length || characterValues[currentCharacter] != null) { // character needs to be encoded char[] encodedCharacter; if (currentCharacter == 0x20) { switch (encodingType) { case EncodingType.QueryString: encodedCharacter = QueryStringSpace; break; // Special case for Html Form data, from http://www.w3.org/TR/html401/appendix/notes.html#non-ascii-chars case EncodingType.HtmlForm: encodedCharacter = FormStringSpace; break; default: throw new ArgumentOutOfRangeException("encodingType"); } } else { encodedCharacter = characterValues[currentCharacter]; } for (int j = 0; j < encodedCharacter.Length; j++) { encodedInput[outputLength++] = encodedCharacter[j]; } } else { // character does not need encoding encodedInput[outputLength++] = (char)currentCharacter; } } return new string(encodedInput, 0, outputLength); } /// /// Initializes the HTML safe list. /// private static char[][] InitialiseSafeList() { char[][] result = SafeList.Generate(255, SafeList.PercentThenHexValueGenerator); SafeList.PunchSafeList(ref result, UrlParameterSafeList()); return result; } /// /// Provides the safe characters for URL parameter encoding. /// /// The safe characters for URL parameter encoding. private static IEnumerable UrlParameterSafeList() { // Hyphen yield return 0x2D; // Full stop/period yield return 0x2E; // Digits for (int i = 0x30; i <= 0x39; i++) { yield return i; } // Upper case alphabet for (int i = 0x41; i <= 0x5A; i++) { yield return i; } // Underscore yield return 0x5F; // Lower case alphabet for (int i = 0x61; i <= 0x7A; i++) { yield return i; } // Tilde yield return 0x7E; } #region UrlPathEncode Helpers /// /// The values to output for each character. /// private static Lazy pathCharacterValuesLazy = new Lazy(InitialisePathSafeList); internal static string UrlPathEncode(string s, Encoding encoding) { return FormQueryEncode(s, encoding, EncodingType.QueryString, pathCharacterValuesLazy); } /// /// Initializes the HTML safe list. /// private static char[][] InitialisePathSafeList() { char[][] result = SafeList.Generate(255, SafeList.PercentThenHexValueGenerator); SafeList.PunchSafeList(ref result, UrlPathSafeList()); return result; } /// /// Provides the safe characters for URL path encoding. /// /// The safe characters for URL path encoding. private static IEnumerable UrlPathSafeList() { foreach (var c in UrlParameterSafeList()) { yield return c; } // Hash yield return 0x23; // Percent yield return 0x25; // Forward slash yield return 0x2F; // Backwards slash yield return 0x5C; // Left parenthesis yield return 0x28; //Right parenthesis yield return 0x29; } #endregion } }