You've already forked linux-packaging-mono
							
							
		
			
				
	
	
		
			371 lines
		
	
	
		
			17 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
			
		
		
	
	
			371 lines
		
	
	
		
			17 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
| namespace System 
 | |
| {
 | |
|     using System.Diagnostics;
 | |
|     using System.Runtime.InteropServices;
 | |
|     using System.Text;
 | |
| 
 | |
|     internal static class IriHelper
 | |
|     {
 | |
|         //
 | |
|         // Checks if provided non surrogate char lies in iri range
 | |
|         //
 | |
|         internal static bool CheckIriUnicodeRange(char unicode, bool isQuery)
 | |
|         {
 | |
|             return ((unicode >= '\u00A0' && unicode <= '\uD7FF') ||
 | |
|                (unicode >= '\uF900' && unicode <= '\uFDCF') ||
 | |
|                (unicode >= '\uFDF0' && unicode <= '\uFFEF') ||
 | |
|                (isQuery && unicode >= '\uE000' && unicode <= '\uF8FF'));
 | |
|         }
 | |
| 
 | |
|         //
 | |
|         // Check if highSurr and lowSurr are a surrogate pair then 
 | |
|         // it checks if the combined char is in the range
 | |
|         // Takes in isQuery because because iri restrictions for query are different
 | |
|         //
 | |
|         internal static bool CheckIriUnicodeRange(char highSurr, char lowSurr, ref bool surrogatePair, bool isQuery)
 | |
|         {
 | |
|             bool inRange = false;
 | |
|             surrogatePair = false;
 | |
| 
 | |
|             Debug.Assert(Char.IsHighSurrogate(highSurr));
 | |
| 
 | |
|             if (Char.IsSurrogatePair(highSurr, lowSurr))
 | |
|             {
 | |
|                 surrogatePair = true;
 | |
|                 char[] chars = new char[2] { highSurr, lowSurr };
 | |
|                 string surrPair = new string(chars);
 | |
|                 if (((string.CompareOrdinal(surrPair, "\U00010000") >= 0)
 | |
|                         && (string.CompareOrdinal(surrPair, "\U0001FFFD") <= 0)) ||
 | |
|                     ((string.CompareOrdinal(surrPair, "\U00020000") >= 0)
 | |
|                         && (string.CompareOrdinal(surrPair, "\U0002FFFD") <= 0)) ||
 | |
|                     ((string.CompareOrdinal(surrPair, "\U00030000") >= 0)
 | |
|                         && (string.CompareOrdinal(surrPair, "\U0003FFFD") <= 0)) ||
 | |
|                     ((string.CompareOrdinal(surrPair, "\U00040000") >= 0)
 | |
|                         && (string.CompareOrdinal(surrPair, "\U0004FFFD") <= 0)) ||
 | |
|                     ((string.CompareOrdinal(surrPair, "\U00050000") >= 0)
 | |
|                         && (string.CompareOrdinal(surrPair, "\U0005FFFD") <= 0)) ||
 | |
|                     ((string.CompareOrdinal(surrPair, "\U00060000") >= 0)
 | |
|                         && (string.CompareOrdinal(surrPair, "\U0006FFFD") <= 0)) ||
 | |
|                     ((string.CompareOrdinal(surrPair, "\U00070000") >= 0)
 | |
|                         && (string.CompareOrdinal(surrPair, "\U0007FFFD") <= 0)) ||
 | |
|                     ((string.CompareOrdinal(surrPair, "\U00080000") >= 0)
 | |
|                         && (string.CompareOrdinal(surrPair, "\U0008FFFD") <= 0)) ||
 | |
|                     ((string.CompareOrdinal(surrPair, "\U00090000") >= 0)
 | |
|                         && (string.CompareOrdinal(surrPair, "\U0009FFFD") <= 0)) ||
 | |
|                     ((string.CompareOrdinal(surrPair, "\U000A0000") >= 0)
 | |
|                         && (string.CompareOrdinal(surrPair, "\U000AFFFD") <= 0)) ||
 | |
|                     ((string.CompareOrdinal(surrPair, "\U000B0000") >= 0)
 | |
|                         && (string.CompareOrdinal(surrPair, "\U000BFFFD") <= 0)) ||
 | |
|                     ((string.CompareOrdinal(surrPair, "\U000C0000") >= 0)
 | |
|                         && (string.CompareOrdinal(surrPair, "\U000CFFFD") <= 0)) ||
 | |
|                     ((string.CompareOrdinal(surrPair, "\U000D0000") >= 0)
 | |
|                         && (string.CompareOrdinal(surrPair, "\U000DFFFD") <= 0)) ||
 | |
|                     ((string.CompareOrdinal(surrPair, "\U000E1000") >= 0)
 | |
|                         && (string.CompareOrdinal(surrPair, "\U000EFFFD") <= 0)) ||
 | |
|                     (isQuery &&
 | |
|                         (((string.CompareOrdinal(surrPair, "\U000F0000") >= 0)
 | |
|                             && (string.CompareOrdinal(surrPair, "\U000FFFFD") <= 0)) ||
 | |
|                             ((string.CompareOrdinal(surrPair, "\U00100000") >= 0)
 | |
|                             && (string.CompareOrdinal(surrPair, "\U0010FFFD") <= 0)))))
 | |
|                 {
 | |
|                     inRange = true;
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             return inRange;
 | |
|         }
 | |
| 
 | |
|         //
 | |
|         // Check reserved chars according to rfc 3987 in a sepecific component
 | |
|         //
 | |
|         internal static bool CheckIsReserved(char ch, UriComponents component)
 | |
|         {
 | |
|             if ((component != UriComponents.Scheme) &&
 | |
|                     (component != UriComponents.UserInfo) &&
 | |
|                     (component != UriComponents.Host) &&
 | |
|                     (component != UriComponents.Port) &&
 | |
|                     (component != UriComponents.Path) &&
 | |
|                     (component != UriComponents.Query) &&
 | |
|                     (component != UriComponents.Fragment)
 | |
|                 )
 | |
|             {
 | |
|                 return (component == (UriComponents)0) ? Uri.IsGenDelim(ch) : false;
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 switch (component)
 | |
|                 {
 | |
|                     // Reserved chars according to rfc 3987
 | |
|                     case UriComponents.UserInfo:
 | |
|                         if (ch == '/' || ch == '?' || ch == '#' || ch == '[' || ch == ']' || ch == '@')
 | |
|                             return true;
 | |
|                         break;
 | |
|                     case UriComponents.Host:
 | |
|                         if (ch == ':' || ch == '/' || ch == '?' || ch == '#' || ch == '[' || ch == ']' || ch == '@')
 | |
|                             return true;
 | |
|                         break;
 | |
|                     case UriComponents.Path:
 | |
|                         if (ch == '/' || ch == '?' || ch == '#' || ch == '[' || ch == ']')
 | |
|                             return true;
 | |
|                         break;
 | |
|                     case UriComponents.Query:
 | |
|                         if (ch == '#' || ch == '[' || ch == ']')
 | |
|                             return true;
 | |
|                         break;
 | |
|                     case UriComponents.Fragment:
 | |
|                         if (ch == '#' || ch == '[' || ch == ']')
 | |
|                             return true;
 | |
|                         break;
 | |
|                     default:
 | |
|                         break;
 | |
|                 }
 | |
|                 return false;
 | |
|             }
 | |
|         }
 | |
|         
 | |
|         //
 | |
|         // IRI normalization for strings containing characters that are not allowed or 
 | |
|         // escaped characters that should be unescaped in the context of the specified Uri component.
 | |
|         //
 | |
|         internal static unsafe string EscapeUnescapeIri(char* pInput, int start, int end, UriComponents component)
 | |
|         {
 | |
| 
 | |
|             char[] dest = new char[end - start];
 | |
|             byte[] bytes = null;
 | |
| 
 | |
|             // Pin the array to do pointer accesses
 | |
|             GCHandle destHandle = GCHandle.Alloc(dest, GCHandleType.Pinned);
 | |
|             char* pDest = (char*)destHandle.AddrOfPinnedObject();
 | |
| 
 | |
|             const int percentEncodingLen = 3; // Escaped UTF-8 will take 3 chars: %AB.
 | |
|             const int bufferCapacityIncrease = 30 * percentEncodingLen; 
 | |
|             int bufferRemaining = 0;
 | |
| 
 | |
|             int next = start;
 | |
|             int destOffset = 0;
 | |
|             char ch;
 | |
|             bool escape = false;
 | |
|             bool surrogatePair = false;
 | |
| 
 | |
|             for (; next < end; ++next)
 | |
|             {
 | |
|                 escape = false;
 | |
|                 surrogatePair = false;
 | |
| 
 | |
|                 if ((ch = pInput[next]) == '%')
 | |
|                 {
 | |
|                     if (next + 2 < end)
 | |
|                     {
 | |
|                         ch = UriHelper.EscapedAscii(pInput[next + 1], pInput[next + 2]);
 | |
|                                                 
 | |
|                         // Do not unescape a reserved char
 | |
|                         if (ch == Uri.c_DummyChar || ch == '%' || CheckIsReserved(ch, component) || UriHelper.IsNotSafeForUnescape(ch))
 | |
|                         {
 | |
|                             // keep as is
 | |
|                             Debug.Assert(dest.Length > destOffset, "Buffer overrun detected");
 | |
|                             pDest[destOffset++] = pInput[next++];
 | |
|                             Debug.Assert(dest.Length > destOffset, "Buffer overrun detected");
 | |
|                             pDest[destOffset++] = pInput[next++];
 | |
|                             Debug.Assert(dest.Length > destOffset, "Buffer overrun detected");
 | |
|                             pDest[destOffset++] = pInput[next];
 | |
|                             continue;
 | |
|                         }
 | |
|                         else if (ch <= '\x7F')
 | |
|                         {
 | |
|                             Debug.Assert(ch < 0xFF, "Expecting ASCII character.");
 | |
|                             Debug.Assert(dest.Length > destOffset, "Buffer overrun detected");
 | |
|                             //ASCII
 | |
|                             pDest[destOffset++] = ch;
 | |
|                             next += 2;
 | |
|                             continue;
 | |
|                         }
 | |
|                         else
 | |
|                         {
 | |
|                             // possibly utf8 encoded sequence of unicode
 | |
| 
 | |
|                             // check if safe to unescape according to Iri rules
 | |
| 
 | |
|                             Debug.Assert(ch < 0xFF, "Expecting ASCII character.");
 | |
| 
 | |
|                             int startSeq = next;
 | |
|                             int byteCount = 1;
 | |
|                             // lazy initialization of max size, will reuse the array for next sequences
 | |
|                             if ((object)bytes == null)
 | |
|                                 bytes = new byte[end - next];
 | |
| 
 | |
|                             bytes[0] = (byte)ch;
 | |
|                             next += 3;
 | |
|                             while (next < end)
 | |
|                             {
 | |
|                                 // Check on exit criterion
 | |
|                                 if ((ch = pInput[next]) != '%' || next + 2 >= end)
 | |
|                                     break;
 | |
| 
 | |
|                                 // already made sure we have 3 characters in str
 | |
|                                 ch = UriHelper.EscapedAscii(pInput[next + 1], pInput[next + 2]);
 | |
| 
 | |
|                                 //invalid hex sequence ?
 | |
|                                 if (ch == Uri.c_DummyChar)
 | |
|                                     break;
 | |
|                                 // character is not part of a UTF-8 sequence ?
 | |
|                                 else if (ch < '\x80')
 | |
|                                     break;
 | |
|                                 else
 | |
|                                 {
 | |
|                                     //a UTF-8 sequence
 | |
|                                     bytes[byteCount++] = (byte)ch;
 | |
|                                     next += 3;
 | |
|                                 }
 | |
| 
 | |
|                                 Debug.Assert(ch < 0xFF, "Expecting ASCII character.");
 | |
|                             }
 | |
|                             next--; // for loop will increment
 | |
| 
 | |
| 
 | |
|                             // Using encoder with no replacement fall-back will skip all invalid UTF-8 sequences.
 | |
|                             Encoding noFallbackCharUTF8 = (Encoding)Encoding.UTF8.Clone();
 | |
|                             noFallbackCharUTF8.EncoderFallback = new EncoderReplacementFallback("");
 | |
|                             noFallbackCharUTF8.DecoderFallback = new DecoderReplacementFallback("");
 | |
| 
 | |
|                             char[] unescapedChars = new char[bytes.Length];
 | |
|                             int charCount = noFallbackCharUTF8.GetChars(bytes, 0, byteCount, unescapedChars, 0);
 | |
| 
 | |
| 
 | |
|                             if (charCount != 0)
 | |
|                             {
 | |
|                                 // If invalid sequences were present in the original escaped string, we need to 
 | |
|                                 // copy the escaped versions of those sequences.
 | |
|                                 // Decoded Unicode values will be kept only when they are allowed by the URI/IRI RFC
 | |
|                                 // rules.
 | |
|                                 UriHelper.MatchUTF8Sequence(pDest, dest, ref destOffset, unescapedChars, charCount, bytes,
 | |
|                                     byteCount, component == UriComponents.Query, true);
 | |
|                             }
 | |
|                             else
 | |
|                             {
 | |
|                                 // copy escaped sequence as is
 | |
|                                 for (int i = startSeq; i <= next; ++i)
 | |
|                                 {
 | |
|                                     Debug.Assert(dest.Length > destOffset, "Buffer overrun detected");
 | |
|                                     pDest[destOffset++] = pInput[i];
 | |
|                                 }
 | |
|                             }
 | |
| 
 | |
|                         }
 | |
| 
 | |
|                     }
 | |
|                     else
 | |
|                     {
 | |
|                         Debug.Assert(dest.Length > destOffset, "Buffer overrun detected");
 | |
|                         pDest[destOffset++] = pInput[next];
 | |
|                     }
 | |
|                 }
 | |
|                 else if (ch > '\x7f')
 | |
|                 {
 | |
|                     // unicode
 | |
| 
 | |
|                     char ch2;
 | |
| 
 | |
|                     if ((Char.IsHighSurrogate(ch)) && (next + 1 < end))
 | |
|                     {
 | |
|                         ch2 = pInput[next + 1];
 | |
|                         escape = !CheckIriUnicodeRange(ch, ch2, ref surrogatePair, component == UriComponents.Query);
 | |
|                         if (!escape)
 | |
|                         {
 | |
|                             // copy the two chars
 | |
|                             Debug.Assert(dest.Length > destOffset, "Buffer overrun detected");
 | |
|                             pDest[destOffset++] = pInput[next++];
 | |
|                             Debug.Assert(dest.Length > destOffset, "Buffer overrun detected");
 | |
|                             pDest[destOffset++] = pInput[next];
 | |
|                         }
 | |
|                     }
 | |
|                     else
 | |
|                     {
 | |
|                         if (CheckIriUnicodeRange(ch, component == UriComponents.Query))
 | |
|                         {
 | |
|                             if (!Uri.IsBidiControlCharacter(ch))
 | |
|                             {
 | |
|                                 // copy it
 | |
|                                 Debug.Assert(dest.Length > destOffset, "Buffer overrun detected");
 | |
|                                 pDest[destOffset++] = pInput[next];
 | |
|                             }
 | |
|                         }
 | |
|                         else
 | |
|                         {
 | |
|                             // escape it
 | |
|                             escape = true;
 | |
|                         }
 | |
|                     }
 | |
|                 }
 | |
|                 else
 | |
|                 {
 | |
|                     // just copy the character
 | |
|                     Debug.Assert(dest.Length > destOffset, "Buffer overrun detected");
 | |
|                     pDest[destOffset++] = pInput[next];
 | |
|                 }
 | |
| 
 | |
|                 if (escape)
 | |
|                 {
 | |
|                     const int maxNumberOfBytesEncoded = 4;
 | |
| 
 | |
|                     if (bufferRemaining < maxNumberOfBytesEncoded * percentEncodingLen)
 | |
|                     {
 | |
|                         int newBufferLength = 0;
 | |
| 
 | |
|                         checked
 | |
|                         {
 | |
|                             // may need more memory since we didn't anticipate escaping
 | |
|                             newBufferLength = dest.Length + bufferCapacityIncrease;
 | |
|                             bufferRemaining += bufferCapacityIncrease;
 | |
|                         }
 | |
|                         
 | |
|                         char[] newDest = new char[newBufferLength];
 | |
| 
 | |
|                         fixed (char* pNewDest = newDest)
 | |
|                         {
 | |
| #if !UT_PUBLIC_DEPENDS
 | |
|                             Buffer.Memcpy((byte*)pNewDest, (byte*)pDest, destOffset * sizeof(char));
 | |
| #else
 | |
|                             for (int idx=0; idx<destOffset; idx++)
 | |
|                             {
 | |
|                                 pNewDest[idx] = pDest[idx];
 | |
|                             }                            
 | |
| #endif
 | |
|                         }
 | |
| 
 | |
|                         if (destHandle.IsAllocated)
 | |
|                         {
 | |
|                             destHandle.Free();
 | |
|                         }
 | |
| 
 | |
|                         dest = newDest;
 | |
| 
 | |
|                         // re-pin new dest[] array
 | |
|                         destHandle = GCHandle.Alloc(dest, GCHandleType.Pinned);
 | |
|                         pDest = (char*)destHandle.AddrOfPinnedObject();
 | |
|                     }
 | |
| 
 | |
|                     byte[] encodedBytes = new byte[maxNumberOfBytesEncoded];
 | |
|                     fixed (byte* pEncodedBytes = encodedBytes)
 | |
|                     {
 | |
|                         int encodedBytesCount = Encoding.UTF8.GetBytes(pInput + next, surrogatePair ? 2 : 1, pEncodedBytes, maxNumberOfBytesEncoded);
 | |
|                         Debug.Assert(encodedBytesCount <= maxNumberOfBytesEncoded, "UTF8 encoder should not exceed specified byteCount");
 | |
| 
 | |
|                         bufferRemaining -= encodedBytesCount * percentEncodingLen;
 | |
| 
 | |
|                         for (int count = 0; count < encodedBytesCount; ++count)
 | |
|                         {
 | |
|                             UriHelper.EscapeAsciiChar((char)encodedBytes[count], dest, ref destOffset);
 | |
|                         }
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             if (destHandle.IsAllocated)
 | |
|                 destHandle.Free();
 | |
| 
 | |
|             Debug.Assert(destOffset <= dest.Length, "Buffer overrun detected");
 | |
|             return new string(dest, 0, destOffset);
 | |
|         }
 | |
|     }
 | |
| }
 |