You've already forked linux-packaging-mono
Imported Upstream version 4.6.0.125
Former-commit-id: a2155e9bd80020e49e72e86c44da02a8ac0e57a4
This commit is contained in:
parent
a569aebcfd
commit
e79aa3c0ed
@@ -0,0 +1,126 @@
|
||||
//------------------------------------------------------------------------------
|
||||
// <copyright file="Utf16StringReader.cs" company="Microsoft">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
namespace System.Web.Security.AntiXss {
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
|
||||
/// <summary>
|
||||
/// Reads individual scalar values from a UTF-16 input string.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// For performance reasons, this is a mutable struct. Use caution when capturing instances of this type.
|
||||
/// </remarks>
|
||||
internal struct Utf16StringReader {
|
||||
|
||||
/// <summary>
|
||||
/// Starting code point for the UTF-16 leading surrogates.
|
||||
/// </summary>
|
||||
private const char LeadingSurrogateStart = '\uD800';
|
||||
|
||||
/// <summary>
|
||||
/// Starting code point for the UTF-16 trailing surrogates.
|
||||
/// </summary>
|
||||
private const char TrailingSurrogateStart = '\uDC00';
|
||||
|
||||
/// <summary>
|
||||
/// The Unicode replacement character U+FFFD.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// For more info, see http://www.unicode.org/charts/PDF/UFFF0.pdf.
|
||||
/// </remarks>
|
||||
private const int UnicodeReplacementCharacterCodePoint = '\uFFFD';
|
||||
|
||||
/// <summary>
|
||||
/// The current offset into '_input'.
|
||||
/// </summary>
|
||||
private int _currentOffset;
|
||||
|
||||
/// <summary>
|
||||
/// The input string we're iterating on.
|
||||
/// </summary>
|
||||
private readonly string _input;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes the reader with the given UTF-16 input string.
|
||||
/// </summary>
|
||||
/// <param name="input">The input string to decompose into scalar values.</param>
|
||||
public Utf16StringReader(string input) {
|
||||
Debug.Assert(input != null);
|
||||
|
||||
_input = input;
|
||||
_currentOffset = 0;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Similar to Char.ConvertToUtf32, but slightly faster in tight loops since parameter checks are not done.
|
||||
/// </summary>
|
||||
/// <param name="leadingSurrogate">The UTF-16 leading surrogate character.</param>
|
||||
/// <param name="trailingSurrogate">The UTF-16 trailing surrogate character.</param>
|
||||
/// <returns>The scalar value resulting from combining these two surrogate characters.</returns>
|
||||
/// <remarks>The caller must ensure that the inputs are valid surrogate characters. If not,
|
||||
/// the output of this routine is undefined.</remarks>
|
||||
private static int ConvertToUtf32(char leadingSurrogate, char trailingSurrogate) {
|
||||
Debug.Assert(Char.IsHighSurrogate(leadingSurrogate), "'leadingSurrogate' was not a high surrogate.");
|
||||
Debug.Assert(Char.IsLowSurrogate(trailingSurrogate), "'trailingSurrogate' was not a low surrogate.");
|
||||
|
||||
return (int)((leadingSurrogate - LeadingSurrogateStart) * 0x400 + (trailingSurrogate - TrailingSurrogateStart)) + 0x10000;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Determines whether a given code point is a valid Unicode scalar value.
|
||||
/// </summary>
|
||||
/// <param name="codePoint">The code point whose validity is to be checked.</param>
|
||||
/// <returns>True if the input is a valid Unicode scalar value, false otherwise.</returns>
|
||||
private static bool IsValidUnicodeScalarValue(int codePoint) {
|
||||
// Valid scalar values are U+0000 .. U+D7FF and U+E000 .. U+10FFFF.
|
||||
// See: http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf, D76
|
||||
return (0 <= codePoint && codePoint <= 0xD7FF)
|
||||
|| (0xE000 <= codePoint && codePoint <= 0x10FFFF);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reads the next scalar value from the input string.
|
||||
/// </summary>
|
||||
/// <returns>The next scalar value. If the input string contains invalid UTF-16, the
|
||||
/// return value is the Unicode replacement character U+FFFD. If the end of the string
|
||||
/// is reached, returns -1.</returns>
|
||||
public int ReadNextScalarValue() {
|
||||
if (_currentOffset >= _input.Length) {
|
||||
return -1; // EOF
|
||||
}
|
||||
|
||||
char thisCodeUnit = _input[_currentOffset++];
|
||||
int thisCodePoint = thisCodeUnit;
|
||||
|
||||
if (Char.IsHighSurrogate(thisCodeUnit)) {
|
||||
if (_currentOffset < _input.Length) {
|
||||
char nextCodeUnit = _input[_currentOffset];
|
||||
if (Char.IsLowSurrogate(nextCodeUnit)) {
|
||||
// We encountered a high (leading) surrogate followed by a low
|
||||
// (trailing) surrogate. Bump '_currentOffset' up by one more
|
||||
// since we're consuming both code units.
|
||||
_currentOffset++;
|
||||
thisCodePoint = ConvertToUtf32(thisCodeUnit, nextCodeUnit);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (IsValidUnicodeScalarValue(thisCodePoint)) {
|
||||
return thisCodePoint;
|
||||
}
|
||||
else {
|
||||
// ERROR: This code point was either an unmatched high (leading)
|
||||
// surrogate or an unmatched low (trailing) surrogate, neither of
|
||||
// which maps to a valid Unicode scalar value. Per the Unicode
|
||||
// specification (Ch. 3, C10), we replace with U+FFFD.
|
||||
return UnicodeReplacementCharacterCodePoint;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user