//------------------------------------------------------------------------------ // // Copyright (c) Microsoft Corporation. All rights reserved. // //------------------------------------------------------------------------------ namespace System.Web.Security.AntiXss { using System; using System.Text; using System.Threading; /// /// Provides HTML encoding methods. /// internal static class UnicodeCharacterEncoder { /// /// A lock object to use when performing safe listing. /// private static readonly ReaderWriterLockSlim SyncLock = new ReaderWriterLockSlim(); /// /// The HTML escaped value for a space, used in attribute encoding. /// private static readonly char[] UnicodeSpace = " ".ToCharArray(); /// /// The XML named entity for an apostrophe, used in XML encoding. /// private static readonly char[] XmlApostrophe = "'".ToCharArray(); /// /// The current lower code chart settings. /// private static LowerCodeCharts currentLowerCodeChartSettings = LowerCodeCharts.None; /// /// The current lower middle code chart settings. /// private static LowerMidCodeCharts currentLowerMidCodeChartSettings = LowerMidCodeCharts.None; /// /// The current middle code chart settings. /// private static MidCodeCharts currentMidCodeChartSettings = MidCodeCharts.None; /// /// The current upper middle code chart settings. /// private static UpperMidCodeCharts currentUpperMidCodeChartSettings = UpperMidCodeCharts.None; /// /// The current upper code chart settings. /// private static UpperCodeCharts currentUpperCodeChartSettings = UpperCodeCharts.None; /// /// The values to output for each character. /// private static char[][] characterValues; /// /// The values to output for HTML named entities. /// private static Lazy namedEntitiesLazy = new Lazy(InitialiseNamedEntityList); /// /// Provides method specific encoding of characters. /// /// The character to encode /// The encoded character, if it has been encoded. /// True if the character has been encoded, otherwise false. private delegate bool MethodSpecificEncoder(char input, out char[] output); /// /// Marks characters from the specified languages as safe. /// /// The combination of lower code charts to use. /// The combination of lower mid code charts to use. /// The combination of mid code charts to use. /// The combination of upper mid code charts to use. /// The combination of upper code charts to use. /// The safe list affects all HTML and XML encoding functions. public static void MarkAsSafe( LowerCodeCharts lowerCodeCharts, LowerMidCodeCharts lowerMidCodeCharts, MidCodeCharts midCodeCharts, UpperMidCodeCharts upperMidCodeCharts, UpperCodeCharts upperCodeCharts) { if (lowerCodeCharts == currentLowerCodeChartSettings && lowerMidCodeCharts == currentLowerMidCodeChartSettings && midCodeCharts == currentMidCodeChartSettings && upperMidCodeCharts == currentUpperMidCodeChartSettings && upperCodeCharts == currentUpperCodeChartSettings) { return; } SyncLock.EnterWriteLock(); try { if (characterValues == null) { characterValues = SafeList.Generate(65536, SafeList.HashThenValueGenerator); } SafeList.PunchUnicodeThrough( ref characterValues, lowerCodeCharts, lowerMidCodeCharts, midCodeCharts, upperMidCodeCharts, upperCodeCharts); ApplyHtmlSpecificValues(); currentLowerCodeChartSettings = lowerCodeCharts; currentLowerMidCodeChartSettings = lowerMidCodeCharts; currentMidCodeChartSettings = midCodeCharts; currentUpperMidCodeChartSettings = upperMidCodeCharts; currentUpperCodeChartSettings = upperCodeCharts; } finally { SyncLock.ExitWriteLock(); } } /// /// Encodes input strings for use in XML. /// /// String to be encoded /// /// Encoded string for use in XML. /// internal static string XmlEncode(string input) { return HtmlEncode(input, false, XmlTweak); } /// /// Encodes input strings for use in XML. /// /// String to be encoded /// /// Encoded string for use in XML. /// internal static string XmlAttributeEncode(string input) { return HtmlEncode(input, false, XmlAttributeTweak); } /// /// Encodes input strings for use in HTML attributes. /// /// String to be encoded /// /// Encoded string for use in HTML attributes. /// internal static string HtmlAttributeEncode(string input) { return HtmlEncode(input, false, HtmlAttributeTweak); } /// /// Encodes input strings for use in HTML. /// /// String to be encoded /// Value indicating if the HTML 4.0 named entities should be used. /// /// Encoded string for use in HTML. /// internal static string HtmlEncode(string input, bool useNamedEntities) { return HtmlEncode(input, useNamedEntities, null); } /// /// HTML Attribute Encoding specific tweaks. /// /// The character to potentially encode. /// The encoded character, if any. /// True if encoding took place, otherwise false. private static bool HtmlAttributeTweak(char input, out char[] output) { if (input == ' ') { output = UnicodeSpace; return true; } output = null; return false; } /// /// XML specific tweaks. /// /// The character to potentially encode. /// The encoded character, if any. /// True if encoding took place, otherwise false. private static bool XmlTweak(char input, out char[] output) { if (input == '\'') { output = XmlApostrophe; return true; } output = null; return false; } /// /// XML Attribute Encoding specific tweaks. /// /// The character to potentially encode. /// The encoded character, if any. /// True if encoding took place, otherwise false. private static bool XmlAttributeTweak(char input, out char[] output) { if (input == '\'') { output = XmlApostrophe; return true; } if (input == ' ') { output = UnicodeSpace; return true; } output = null; return false; } /// /// Encodes input strings for use in HTML. /// /// String to be encoded /// Value indicating if the HTML 4.0 named entities should be used. /// A function, if needed. /// /// Encoded string for use in HTML. /// private static string HtmlEncode(string input, bool useNamedEntities, MethodSpecificEncoder encoderTweak) { if (string.IsNullOrEmpty(input)) { return input; } if (characterValues == null) { InitialiseSafeList(); } char[][] namedEntities = null; if (useNamedEntities) { namedEntities = namedEntitiesLazy.Value; } // Setup a new StringBuilder for output. // Worse case scenario - the longest entity name, thetasym is 10 characters, including the & and ;. StringBuilder builder = EncoderUtil.GetOutputStringBuilder(input.Length, 10 /* worstCaseOutputCharsPerInputChar */); SyncLock.EnterReadLock(); try { Utf16StringReader stringReader = new Utf16StringReader(input); while (true) { int currentCodePoint = stringReader.ReadNextScalarValue(); if (currentCodePoint < 0) { break; // EOF } if (currentCodePoint > Char.MaxValue) { // We don't have a pre-generated mapping of characters beyond the Basic Multilingual // Plane (BMP), so we need to generate these encodings on-the-fly. We should encode // the code point rather than the surrogate code units that make up this code point. // See: http://www.w3.org/International/questions/qa-escapes#bytheway char[] encodedCharacter = SafeList.HashThenValueGenerator(currentCodePoint); builder.Append('&'); builder.Append(encodedCharacter); builder.Append(';'); } else { // If we reached this point, the code point is within the BMP. char currentCharacter = (char)currentCodePoint; char[] tweekedValue; if (encoderTweak != null && encoderTweak(currentCharacter, out tweekedValue)) { builder.Append(tweekedValue); } else if (useNamedEntities && namedEntities[currentCodePoint] != null) { char[] encodedCharacter = namedEntities[currentCodePoint]; builder.Append('&'); builder.Append(encodedCharacter); builder.Append(';'); } else if (characterValues[currentCodePoint] != null) { // character needs to be encoded char[] encodedCharacter = characterValues[currentCodePoint]; builder.Append('&'); builder.Append(encodedCharacter); builder.Append(';'); } else { // character does not need encoding builder.Append(currentCharacter); } } } } finally { SyncLock.ExitReadLock(); } return builder.ToString(); } /// /// Initializes the HTML safe list. /// private static void InitialiseSafeList() { SyncLock.EnterWriteLock(); try { if (characterValues == null) { characterValues = SafeList.Generate(0xFFFF, SafeList.HashThenValueGenerator); SafeList.PunchUnicodeThrough( ref characterValues, LowerCodeCharts.Default, LowerMidCodeCharts.None, MidCodeCharts.None, UpperMidCodeCharts.None, UpperCodeCharts.None); ApplyHtmlSpecificValues(); } } finally { SyncLock.ExitWriteLock(); } } /// /// Applies Html specific values to the internal value list. /// /// /// ASP.NET 4 and Razor introduced a new syntax <%: %> and @ which are used to HTML-encode values. /// For example, <%: foo %> is shorthand for <%= HttpUtility.HtmlEncode(foo) %>. Since these could /// occur inside an attribute, e.g. <a href="@Foo">, ASP.NET mandates that HtmlEncode also encode /// characters that are meaningful inside HTML attributes, like the single quote. Encoding spaces /// isn't mandatory since it's expected that users will surround such variables with quotes. /// private static void ApplyHtmlSpecificValues() { characterValues['<'] = "lt".ToCharArray(); characterValues['>'] = "gt".ToCharArray(); characterValues['&'] = "amp".ToCharArray(); characterValues['"'] = "quot".ToCharArray(); characterValues['\''] = "#39".ToCharArray(); } /// /// Initialises the HTML named entities list. /// [System.Diagnostics.CodeAnalysis.SuppressMessage( "Microsoft.Maintainability", "CA1505:AvoidUnmaintainableCode", Justification = "Splitting or initialising via lookups has too large a performance increase.")] private static char[][] InitialiseNamedEntityList() { char[][] namedEntities = new char[65536][]; namedEntities[160] = "nbsp".ToCharArray(); namedEntities[161] = "iexcl".ToCharArray(); namedEntities[162] = "cent".ToCharArray(); namedEntities[163] = "pound".ToCharArray(); namedEntities[164] = "curren".ToCharArray(); namedEntities[165] = "yen".ToCharArray(); namedEntities[166] = "brvbar".ToCharArray(); namedEntities[167] = "sect".ToCharArray(); namedEntities[168] = "uml".ToCharArray(); namedEntities[169] = "copy".ToCharArray(); namedEntities[170] = "ordf".ToCharArray(); namedEntities[171] = "laquo".ToCharArray(); namedEntities[172] = "not".ToCharArray(); namedEntities[173] = "shy".ToCharArray(); namedEntities[174] = "reg".ToCharArray(); namedEntities[175] = "macr".ToCharArray(); namedEntities[176] = "deg".ToCharArray(); namedEntities[177] = "plusmn".ToCharArray(); namedEntities[178] = "sup2".ToCharArray(); namedEntities[179] = "sup3".ToCharArray(); namedEntities[180] = "acute".ToCharArray(); namedEntities[181] = "micro".ToCharArray(); namedEntities[182] = "para".ToCharArray(); namedEntities[183] = "middot".ToCharArray(); namedEntities[184] = "cedil".ToCharArray(); namedEntities[185] = "sup1".ToCharArray(); namedEntities[186] = "ordm".ToCharArray(); namedEntities[187] = "raquo".ToCharArray(); namedEntities[188] = "frac14".ToCharArray(); namedEntities[189] = "frac12".ToCharArray(); namedEntities[190] = "frac34".ToCharArray(); namedEntities[191] = "iquest".ToCharArray(); namedEntities[192] = "Agrave".ToCharArray(); namedEntities[193] = "Aacute".ToCharArray(); namedEntities[194] = "Acirc".ToCharArray(); namedEntities[195] = "Atilde".ToCharArray(); namedEntities[196] = "Auml".ToCharArray(); namedEntities[197] = "Aring".ToCharArray(); namedEntities[198] = "AElig".ToCharArray(); namedEntities[199] = "Ccedil".ToCharArray(); namedEntities[200] = "Egrave".ToCharArray(); namedEntities[201] = "Eacute".ToCharArray(); namedEntities[202] = "Ecirc".ToCharArray(); namedEntities[203] = "Euml".ToCharArray(); namedEntities[204] = "Igrave".ToCharArray(); namedEntities[205] = "Iacute".ToCharArray(); namedEntities[206] = "Icirc".ToCharArray(); namedEntities[207] = "Iuml".ToCharArray(); namedEntities[208] = "ETH".ToCharArray(); namedEntities[209] = "Ntilde".ToCharArray(); namedEntities[210] = "Ograve".ToCharArray(); namedEntities[211] = "Oacute".ToCharArray(); namedEntities[212] = "Ocirc".ToCharArray(); namedEntities[213] = "Otilde".ToCharArray(); namedEntities[214] = "Ouml".ToCharArray(); namedEntities[215] = "times".ToCharArray(); namedEntities[216] = "Oslash".ToCharArray(); namedEntities[217] = "Ugrave".ToCharArray(); namedEntities[218] = "Uacute".ToCharArray(); namedEntities[219] = "Ucirc".ToCharArray(); namedEntities[220] = "Uuml".ToCharArray(); namedEntities[221] = "Yacute".ToCharArray(); namedEntities[222] = "THORN".ToCharArray(); namedEntities[223] = "szlig".ToCharArray(); namedEntities[224] = "agrave".ToCharArray(); namedEntities[225] = "aacute".ToCharArray(); namedEntities[226] = "acirc".ToCharArray(); namedEntities[227] = "atilde".ToCharArray(); namedEntities[228] = "auml".ToCharArray(); namedEntities[229] = "aring".ToCharArray(); namedEntities[230] = "aelig".ToCharArray(); namedEntities[231] = "ccedil".ToCharArray(); namedEntities[232] = "egrave".ToCharArray(); namedEntities[233] = "eacute".ToCharArray(); namedEntities[234] = "ecirc".ToCharArray(); namedEntities[235] = "euml".ToCharArray(); namedEntities[236] = "igrave".ToCharArray(); namedEntities[237] = "iacute".ToCharArray(); namedEntities[238] = "icirc".ToCharArray(); namedEntities[239] = "iuml".ToCharArray(); namedEntities[240] = "eth".ToCharArray(); namedEntities[241] = "ntilde".ToCharArray(); namedEntities[242] = "ograve".ToCharArray(); namedEntities[243] = "oacute".ToCharArray(); namedEntities[244] = "ocirc".ToCharArray(); namedEntities[245] = "otilde".ToCharArray(); namedEntities[246] = "ouml".ToCharArray(); namedEntities[247] = "divide".ToCharArray(); namedEntities[248] = "oslash".ToCharArray(); namedEntities[249] = "ugrave".ToCharArray(); namedEntities[250] = "uacute".ToCharArray(); namedEntities[251] = "ucirc".ToCharArray(); namedEntities[252] = "uuml".ToCharArray(); namedEntities[253] = "yacute".ToCharArray(); namedEntities[254] = "thorn".ToCharArray(); namedEntities[255] = "yuml".ToCharArray(); namedEntities[338] = "OElig".ToCharArray(); namedEntities[339] = "oelig".ToCharArray(); namedEntities[352] = "Scaron".ToCharArray(); namedEntities[353] = "scaron".ToCharArray(); namedEntities[376] = "Yuml".ToCharArray(); namedEntities[402] = "fnof".ToCharArray(); namedEntities[710] = "circ".ToCharArray(); namedEntities[732] = "tilde".ToCharArray(); namedEntities[913] = "Alpha".ToCharArray(); namedEntities[914] = "Beta".ToCharArray(); namedEntities[915] = "Gamma".ToCharArray(); namedEntities[916] = "Delta".ToCharArray(); namedEntities[917] = "Epsilon".ToCharArray(); namedEntities[918] = "Zeta".ToCharArray(); namedEntities[919] = "Eta".ToCharArray(); namedEntities[920] = "Theta".ToCharArray(); namedEntities[921] = "Iota".ToCharArray(); namedEntities[922] = "Kappa".ToCharArray(); namedEntities[923] = "Lambda".ToCharArray(); namedEntities[924] = "Mu".ToCharArray(); namedEntities[925] = "Nu".ToCharArray(); namedEntities[926] = "Xi".ToCharArray(); namedEntities[927] = "Omicron".ToCharArray(); namedEntities[928] = "Pi".ToCharArray(); namedEntities[929] = "Rho".ToCharArray(); namedEntities[931] = "Sigma".ToCharArray(); namedEntities[932] = "Tau".ToCharArray(); namedEntities[933] = "Upsilon".ToCharArray(); namedEntities[934] = "Phi".ToCharArray(); namedEntities[935] = "Chi".ToCharArray(); namedEntities[936] = "Psi".ToCharArray(); namedEntities[937] = "Omega".ToCharArray(); namedEntities[945] = "alpha".ToCharArray(); namedEntities[946] = "beta".ToCharArray(); namedEntities[947] = "gamma".ToCharArray(); namedEntities[948] = "delta".ToCharArray(); namedEntities[949] = "epsilon".ToCharArray(); namedEntities[950] = "zeta".ToCharArray(); namedEntities[951] = "eta".ToCharArray(); namedEntities[952] = "theta".ToCharArray(); namedEntities[953] = "iota".ToCharArray(); namedEntities[954] = "kappa".ToCharArray(); namedEntities[955] = "lambda".ToCharArray(); namedEntities[956] = "mu".ToCharArray(); namedEntities[957] = "nu".ToCharArray(); namedEntities[958] = "xi".ToCharArray(); namedEntities[959] = "omicron".ToCharArray(); namedEntities[960] = "pi".ToCharArray(); namedEntities[961] = "rho".ToCharArray(); namedEntities[962] = "sigmaf".ToCharArray(); namedEntities[963] = "sigma".ToCharArray(); namedEntities[964] = "tau".ToCharArray(); namedEntities[965] = "upsilon".ToCharArray(); namedEntities[966] = "phi".ToCharArray(); namedEntities[967] = "chi".ToCharArray(); namedEntities[968] = "psi".ToCharArray(); namedEntities[969] = "omega".ToCharArray(); namedEntities[977] = "thetasym".ToCharArray(); namedEntities[978] = "upsih".ToCharArray(); namedEntities[982] = "piv".ToCharArray(); namedEntities[0x2002] = "ensp".ToCharArray(); namedEntities[0x2003] = "emsp".ToCharArray(); namedEntities[0x2009] = "thinsp".ToCharArray(); namedEntities[0x200C] = "zwnj".ToCharArray(); namedEntities[0x200D] = "zwj".ToCharArray(); namedEntities[0x200E] = "lrm".ToCharArray(); namedEntities[0x200F] = "rlm".ToCharArray(); namedEntities[0x2013] = "ndash".ToCharArray(); namedEntities[0x2014] = "mdash".ToCharArray(); namedEntities[0x2018] = "lsquo".ToCharArray(); namedEntities[0x2019] = "rsquo".ToCharArray(); namedEntities[0x201A] = "sbquo".ToCharArray(); namedEntities[0x201C] = "ldquo".ToCharArray(); namedEntities[0x201D] = "rdquo".ToCharArray(); namedEntities[0x201E] = "bdquo".ToCharArray(); namedEntities[0x2020] = "dagger".ToCharArray(); namedEntities[0x2021] = "Dagger".ToCharArray(); namedEntities[0x2022] = "bull".ToCharArray(); namedEntities[0x2026] = "hellip".ToCharArray(); namedEntities[0x2030] = "permil".ToCharArray(); namedEntities[0x2032] = "prime".ToCharArray(); namedEntities[0x2033] = "Prime".ToCharArray(); namedEntities[0x2039] = "lsaquo".ToCharArray(); namedEntities[0x203A] = "rsaquo".ToCharArray(); namedEntities[0x203E] = "oline".ToCharArray(); namedEntities[0x2044] = "frasl".ToCharArray(); namedEntities[0x20AC] = "euro".ToCharArray(); namedEntities[0x2111] = "image".ToCharArray(); namedEntities[0x2118] = "weierp".ToCharArray(); namedEntities[0x211C] = "real".ToCharArray(); namedEntities[0x2122] = "trade".ToCharArray(); namedEntities[0x2135] = "alefsym".ToCharArray(); namedEntities[0x2190] = "larr".ToCharArray(); namedEntities[0x2191] = "uarr".ToCharArray(); namedEntities[0x2192] = "rarr".ToCharArray(); namedEntities[0x2193] = "darr".ToCharArray(); namedEntities[0x2194] = "harr".ToCharArray(); namedEntities[0x21B5] = "crarr".ToCharArray(); namedEntities[0x21D0] = "lArr".ToCharArray(); namedEntities[0x21D1] = "uArr".ToCharArray(); namedEntities[0x21D2] = "rArr".ToCharArray(); namedEntities[0x21D3] = "dArr".ToCharArray(); namedEntities[0x21D4] = "hArr".ToCharArray(); namedEntities[0x2200] = "forall".ToCharArray(); namedEntities[0x2202] = "part".ToCharArray(); namedEntities[0x2203] = "exist".ToCharArray(); namedEntities[0x2205] = "empty".ToCharArray(); namedEntities[0x2207] = "nabla".ToCharArray(); namedEntities[0x2208] = "isin".ToCharArray(); namedEntities[0x2209] = "notin".ToCharArray(); namedEntities[0x220B] = "ni".ToCharArray(); namedEntities[0x220F] = "prod".ToCharArray(); namedEntities[0x2211] = "sum".ToCharArray(); namedEntities[0x2212] = "minus".ToCharArray(); namedEntities[0x2217] = "lowast".ToCharArray(); namedEntities[0x221A] = "radic".ToCharArray(); namedEntities[0x221D] = "prop".ToCharArray(); namedEntities[0x221E] = "infin".ToCharArray(); namedEntities[0x2220] = "ang".ToCharArray(); namedEntities[0x2227] = "and".ToCharArray(); namedEntities[0x2228] = "or".ToCharArray(); namedEntities[0x2229] = "cap".ToCharArray(); namedEntities[0x222A] = "cup".ToCharArray(); namedEntities[0x222B] = "int".ToCharArray(); namedEntities[0x2234] = "there4".ToCharArray(); namedEntities[0x223C] = "sim".ToCharArray(); namedEntities[0x2245] = "cong".ToCharArray(); namedEntities[0x2248] = "asymp".ToCharArray(); namedEntities[0x2260] = "ne".ToCharArray(); namedEntities[0x2261] = "equiv".ToCharArray(); namedEntities[0x2264] = "le".ToCharArray(); namedEntities[0x2265] = "ge".ToCharArray(); namedEntities[0x2282] = "sub".ToCharArray(); namedEntities[0x2283] = "sup".ToCharArray(); namedEntities[0x2284] = "nsub".ToCharArray(); namedEntities[0x2286] = "sube".ToCharArray(); namedEntities[0x2287] = "supe".ToCharArray(); namedEntities[0x2295] = "oplus".ToCharArray(); namedEntities[0x2297] = "otimes".ToCharArray(); namedEntities[0x22A5] = "perp".ToCharArray(); namedEntities[0x22C5] = "sdot".ToCharArray(); namedEntities[0x2308] = "lceil".ToCharArray(); namedEntities[0x2309] = "rceil".ToCharArray(); namedEntities[0x230A] = "lfloor".ToCharArray(); namedEntities[0x230B] = "rfloor".ToCharArray(); namedEntities[0x2329] = "lang".ToCharArray(); namedEntities[0x232A] = "rang".ToCharArray(); namedEntities[0x25CA] = "loz".ToCharArray(); namedEntities[0x2660] = "spades".ToCharArray(); namedEntities[0x2663] = "clubs".ToCharArray(); namedEntities[0x2665] = "hearts".ToCharArray(); namedEntities[0x2666] = "diams".ToCharArray(); return namedEntities; } } }