661 lines
20 KiB
C#
661 lines
20 KiB
C#
//
|
|
// assembly: System
|
|
// namespace: System.Text.RegularExpressions
|
|
// file: category.cs
|
|
//
|
|
// author: Dan Lewis (dlewis@gmx.co.uk)
|
|
// (c) 2002
|
|
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining
|
|
// a copy of this software and associated documentation files (the
|
|
// "Software"), to deal in the Software without restriction, including
|
|
// without limitation the rights to use, copy, modify, merge, publish,
|
|
// distribute, sublicense, and/or sell copies of the Software, and to
|
|
// permit persons to whom the Software is furnished to do so, subject to
|
|
// the following conditions:
|
|
//
|
|
// The above copyright notice and this permission notice shall be
|
|
// included in all copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
//
|
|
|
|
using System;
|
|
using System.Globalization;
|
|
|
|
namespace System.Text.RegularExpressions {
|
|
|
|
enum Category : ushort {
|
|
None,
|
|
|
|
// canonical classes
|
|
|
|
Any, // any character except newline .
|
|
AnySingleline, // any character . (s option)
|
|
Word, // any word character \w
|
|
Digit, // any digit character \d
|
|
WhiteSpace, // any whitespace character \s
|
|
|
|
// ECMAScript classes
|
|
|
|
|
|
EcmaAny,
|
|
EcmaAnySingleline,
|
|
EcmaWord, // [a-zA-Z_0-9]
|
|
EcmaDigit, // [0-9]
|
|
EcmaWhiteSpace, // [ \f\n\r\t\v]
|
|
|
|
// unicode categories
|
|
|
|
UnicodeL, // Letter
|
|
UnicodeM, // Mark
|
|
UnicodeN, // Number
|
|
UnicodeZ, // Separator
|
|
UnicodeP, // Punctuation
|
|
UnicodeS, // Symbol
|
|
UnicodeC, // Other
|
|
|
|
UnicodeLu, // UppercaseLetter
|
|
UnicodeLl, // LowercaseLetter
|
|
UnicodeLt, // TitlecaseLetter
|
|
UnicodeLm, // ModifierLetter
|
|
UnicodeLo, // OtherLetter
|
|
UnicodeMn, // NonspacingMark
|
|
UnicodeMe, // EnclosingMark
|
|
UnicodeMc, // SpacingMark
|
|
UnicodeNd, // DecimalNumber
|
|
UnicodeNl, // LetterNumber
|
|
UnicodeNo, // OtherNumber
|
|
UnicodeZs, // SpaceSeparator
|
|
UnicodeZl, // LineSeparator
|
|
UnicodeZp, // ParagraphSeparator
|
|
UnicodePd, // DashPunctuation
|
|
UnicodePs, // OpenPunctuation
|
|
UnicodePi, // InitialPunctuation
|
|
UnicodePe, // ClosePunctuation
|
|
UnicodePf, // FinalPunctuation
|
|
UnicodePc, // ConnectorPunctuation
|
|
UnicodePo, // OtherPunctuation
|
|
UnicodeSm, // MathSymbol
|
|
UnicodeSc, // CurrencySymbol
|
|
UnicodeSk, // ModifierSymbol
|
|
UnicodeSo, // OtherSymbol
|
|
UnicodeCc, // Control
|
|
UnicodeCf, // Format
|
|
UnicodeCo, // PrivateUse
|
|
UnicodeCs, // Surrogate
|
|
UnicodeCn, // Unassigned
|
|
|
|
// unicode block ranges
|
|
|
|
// notes: the categories marked with a star are valid unicode block ranges,
|
|
// but don't seem to be accepted by the MS parser using the /p{...} format.
|
|
// any ideas?
|
|
|
|
UnicodeBasicLatin,
|
|
UnicodeLatin1Supplement, // *
|
|
UnicodeLatinExtendedA, // *
|
|
UnicodeLatinExtendedB, // *
|
|
UnicodeIPAExtensions,
|
|
UnicodeSpacingModifierLetters,
|
|
UnicodeCombiningDiacriticalMarks,
|
|
UnicodeGreek,
|
|
UnicodeCyrillic,
|
|
UnicodeArmenian,
|
|
UnicodeHebrew,
|
|
UnicodeArabic,
|
|
UnicodeSyriac,
|
|
UnicodeThaana,
|
|
UnicodeDevanagari,
|
|
UnicodeBengali,
|
|
UnicodeGurmukhi,
|
|
UnicodeGujarati,
|
|
UnicodeOriya,
|
|
UnicodeTamil,
|
|
UnicodeTelugu,
|
|
UnicodeKannada,
|
|
UnicodeMalayalam,
|
|
UnicodeSinhala,
|
|
UnicodeThai,
|
|
UnicodeLao,
|
|
UnicodeTibetan,
|
|
UnicodeMyanmar,
|
|
UnicodeGeorgian,
|
|
UnicodeHangulJamo,
|
|
UnicodeEthiopic,
|
|
UnicodeCherokee,
|
|
UnicodeUnifiedCanadianAboriginalSyllabics,
|
|
UnicodeOgham,
|
|
UnicodeRunic,
|
|
UnicodeKhmer,
|
|
UnicodeMongolian,
|
|
UnicodeLatinExtendedAdditional,
|
|
UnicodeGreekExtended,
|
|
UnicodeGeneralPunctuation,
|
|
UnicodeSuperscriptsandSubscripts,
|
|
UnicodeCurrencySymbols,
|
|
UnicodeCombiningMarksforSymbols,
|
|
UnicodeLetterlikeSymbols,
|
|
UnicodeNumberForms,
|
|
UnicodeArrows,
|
|
UnicodeMathematicalOperators,
|
|
UnicodeMiscellaneousTechnical,
|
|
UnicodeControlPictures,
|
|
UnicodeOpticalCharacterRecognition,
|
|
UnicodeEnclosedAlphanumerics,
|
|
UnicodeBoxDrawing,
|
|
UnicodeBlockElements,
|
|
UnicodeGeometricShapes,
|
|
UnicodeMiscellaneousSymbols,
|
|
UnicodeDingbats,
|
|
UnicodeBraillePatterns,
|
|
UnicodeCJKRadicalsSupplement,
|
|
UnicodeKangxiRadicals,
|
|
UnicodeIdeographicDescriptionCharacters,
|
|
UnicodeCJKSymbolsandPunctuation,
|
|
UnicodeHiragana,
|
|
UnicodeKatakana,
|
|
UnicodeBopomofo,
|
|
UnicodeHangulCompatibilityJamo,
|
|
UnicodeKanbun,
|
|
UnicodeBopomofoExtended,
|
|
UnicodeEnclosedCJKLettersandMonths,
|
|
UnicodeCJKCompatibility,
|
|
UnicodeCJKUnifiedIdeographsExtensionA,
|
|
UnicodeCJKUnifiedIdeographs,
|
|
UnicodeYiSyllables,
|
|
UnicodeYiRadicals,
|
|
UnicodeHangulSyllables,
|
|
UnicodeHighSurrogates,
|
|
UnicodeHighPrivateUseSurrogates,
|
|
UnicodeLowSurrogates,
|
|
UnicodePrivateUse,
|
|
UnicodeCJKCompatibilityIdeographs,
|
|
UnicodeAlphabeticPresentationForms,
|
|
UnicodeArabicPresentationFormsA, // *
|
|
UnicodeCombiningHalfMarks,
|
|
UnicodeCJKCompatibilityForms,
|
|
UnicodeSmallFormVariants,
|
|
UnicodeArabicPresentationFormsB, // *
|
|
UnicodeSpecials,
|
|
UnicodeHalfwidthandFullwidthForms,
|
|
|
|
UnicodeOldItalic,
|
|
UnicodeGothic,
|
|
UnicodeDeseret,
|
|
UnicodeByzantineMusicalSymbols,
|
|
UnicodeMusicalSymbols,
|
|
UnicodeMathematicalAlphanumericSymbols,
|
|
UnicodeCJKUnifiedIdeographsExtensionB,
|
|
UnicodeCJKCompatibilityIdeographsSupplement,
|
|
UnicodeTags,
|
|
|
|
LastValue // Keep this with the higher value in the enumeration
|
|
}
|
|
|
|
class CategoryUtils {
|
|
public static Category CategoryFromName (string name) {
|
|
try {
|
|
if (name.StartsWith ("Is")) // remove prefix from block range
|
|
name = name.Substring (2);
|
|
|
|
return (Category) Enum.Parse (typeof (Category), "Unicode" + name, false);
|
|
}
|
|
catch (ArgumentException) {
|
|
return Category.None;
|
|
}
|
|
}
|
|
|
|
public static bool IsCategory (Category cat, char c) {
|
|
switch (cat) {
|
|
case Category.None:
|
|
return false;
|
|
|
|
case Category.Any:
|
|
return c != '\n';
|
|
|
|
case Category.AnySingleline:
|
|
return true;
|
|
|
|
case Category.Word:
|
|
return
|
|
Char.IsLetterOrDigit (c) ||
|
|
IsCategory (UnicodeCategory.ConnectorPunctuation, c);
|
|
|
|
case Category.Digit:
|
|
return Char.IsDigit (c);
|
|
|
|
case Category.WhiteSpace:
|
|
return Char.IsWhiteSpace (c);
|
|
|
|
// ECMA categories
|
|
|
|
case Category.EcmaAny:
|
|
return c != '\n';
|
|
|
|
case Category.EcmaAnySingleline:
|
|
return true;
|
|
|
|
case Category.EcmaWord:
|
|
return
|
|
'a' <= c && c <= 'z' ||
|
|
'A' <= c && c <= 'Z' ||
|
|
'0' <= c && c <= '9' ||
|
|
'_' == c;
|
|
|
|
case Category.EcmaDigit:
|
|
return
|
|
'0' <= c && c <= '9';
|
|
|
|
case Category.EcmaWhiteSpace:
|
|
return
|
|
c == ' ' ||
|
|
c == '\f' ||
|
|
c == '\n' ||
|
|
c == '\r' ||
|
|
c == '\t' ||
|
|
c == '\v';
|
|
|
|
// Unicode categories...
|
|
|
|
// letter
|
|
|
|
case Category.UnicodeLu: return IsCategory (UnicodeCategory.UppercaseLetter, c);
|
|
case Category.UnicodeLl: return IsCategory (UnicodeCategory.LowercaseLetter, c);
|
|
case Category.UnicodeLt: return IsCategory (UnicodeCategory.TitlecaseLetter, c);
|
|
case Category.UnicodeLm: return IsCategory (UnicodeCategory.ModifierLetter, c);
|
|
case Category.UnicodeLo: return IsCategory (UnicodeCategory.OtherLetter, c);
|
|
|
|
// mark
|
|
|
|
case Category.UnicodeMn: return IsCategory (UnicodeCategory.NonSpacingMark, c);
|
|
case Category.UnicodeMe: return IsCategory (UnicodeCategory.EnclosingMark, c);
|
|
case Category.UnicodeMc: return IsCategory (UnicodeCategory.SpacingCombiningMark, c);
|
|
case Category.UnicodeNd: return IsCategory (UnicodeCategory.DecimalDigitNumber, c);
|
|
|
|
// number
|
|
|
|
case Category.UnicodeNl: return IsCategory (UnicodeCategory.LetterNumber, c);
|
|
case Category.UnicodeNo: return IsCategory (UnicodeCategory.OtherNumber, c);
|
|
|
|
// separator
|
|
|
|
case Category.UnicodeZs: return IsCategory (UnicodeCategory.SpaceSeparator, c);
|
|
case Category.UnicodeZl: return IsCategory (UnicodeCategory.LineSeparator, c);
|
|
case Category.UnicodeZp: return IsCategory (UnicodeCategory.ParagraphSeparator, c);
|
|
|
|
// punctuation
|
|
|
|
case Category.UnicodePd: return IsCategory (UnicodeCategory.DashPunctuation, c);
|
|
case Category.UnicodePs: return IsCategory (UnicodeCategory.OpenPunctuation, c);
|
|
case Category.UnicodePi: return IsCategory (UnicodeCategory.InitialQuotePunctuation, c);
|
|
case Category.UnicodePe: return IsCategory (UnicodeCategory.ClosePunctuation, c);
|
|
case Category.UnicodePf: return IsCategory (UnicodeCategory.FinalQuotePunctuation, c);
|
|
case Category.UnicodePc: return IsCategory (UnicodeCategory.ConnectorPunctuation, c);
|
|
case Category.UnicodePo: return IsCategory (UnicodeCategory.OtherPunctuation, c);
|
|
|
|
// symbol
|
|
|
|
case Category.UnicodeSm: return IsCategory (UnicodeCategory.MathSymbol, c);
|
|
case Category.UnicodeSc: return IsCategory (UnicodeCategory.CurrencySymbol, c);
|
|
case Category.UnicodeSk: return IsCategory (UnicodeCategory.ModifierSymbol, c);
|
|
case Category.UnicodeSo: return IsCategory (UnicodeCategory.OtherSymbol, c);
|
|
|
|
// other
|
|
|
|
case Category.UnicodeCc: return IsCategory (UnicodeCategory.Control, c);
|
|
case Category.UnicodeCf: return IsCategory (UnicodeCategory.Format, c);
|
|
case Category.UnicodeCo: return IsCategory (UnicodeCategory.PrivateUse, c);
|
|
case Category.UnicodeCs: return IsCategory (UnicodeCategory.Surrogate, c);
|
|
case Category.UnicodeCn: return IsCategory (UnicodeCategory.OtherNotAssigned, c);
|
|
|
|
case Category.UnicodeL: // letter
|
|
return
|
|
IsCategory (UnicodeCategory.UppercaseLetter, c) ||
|
|
IsCategory (UnicodeCategory.LowercaseLetter, c) ||
|
|
IsCategory (UnicodeCategory.TitlecaseLetter, c) ||
|
|
IsCategory (UnicodeCategory.ModifierLetter, c) ||
|
|
IsCategory (UnicodeCategory.OtherLetter, c);
|
|
|
|
case Category.UnicodeM: // mark
|
|
return
|
|
IsCategory (UnicodeCategory.NonSpacingMark, c) ||
|
|
IsCategory (UnicodeCategory.EnclosingMark, c) ||
|
|
IsCategory (UnicodeCategory.SpacingCombiningMark, c);
|
|
|
|
case Category.UnicodeN: // number
|
|
return
|
|
IsCategory (UnicodeCategory.DecimalDigitNumber, c) ||
|
|
IsCategory (UnicodeCategory.LetterNumber, c) ||
|
|
IsCategory (UnicodeCategory.OtherNumber, c);
|
|
|
|
case Category.UnicodeZ: // separator
|
|
return
|
|
IsCategory (UnicodeCategory.SpaceSeparator, c) ||
|
|
IsCategory (UnicodeCategory.LineSeparator, c) ||
|
|
IsCategory (UnicodeCategory.ParagraphSeparator, c);
|
|
|
|
case Category.UnicodeP: // punctuation
|
|
return
|
|
IsCategory (UnicodeCategory.DashPunctuation, c) ||
|
|
IsCategory (UnicodeCategory.OpenPunctuation, c) ||
|
|
IsCategory (UnicodeCategory.InitialQuotePunctuation, c) ||
|
|
IsCategory (UnicodeCategory.ClosePunctuation, c) ||
|
|
IsCategory (UnicodeCategory.FinalQuotePunctuation, c) ||
|
|
IsCategory (UnicodeCategory.ConnectorPunctuation, c) ||
|
|
IsCategory (UnicodeCategory.OtherPunctuation, c);
|
|
|
|
case Category.UnicodeS: // symbol
|
|
return
|
|
IsCategory (UnicodeCategory.MathSymbol, c) ||
|
|
IsCategory (UnicodeCategory.CurrencySymbol, c) ||
|
|
IsCategory (UnicodeCategory.ModifierSymbol, c) ||
|
|
IsCategory (UnicodeCategory.OtherSymbol, c);
|
|
|
|
case Category.UnicodeC: // other
|
|
return
|
|
IsCategory (UnicodeCategory.Control, c) ||
|
|
IsCategory (UnicodeCategory.Format, c) ||
|
|
IsCategory (UnicodeCategory.PrivateUse, c) ||
|
|
IsCategory (UnicodeCategory.Surrogate, c) ||
|
|
IsCategory (UnicodeCategory.OtherNotAssigned, c);
|
|
|
|
// Unicode block ranges...
|
|
|
|
case Category.UnicodeBasicLatin:
|
|
return '\u0000' <= c && c <= '\u007F';
|
|
|
|
case Category.UnicodeLatin1Supplement:
|
|
return '\u0080' <= c && c <= '\u00FF';
|
|
|
|
case Category.UnicodeLatinExtendedA:
|
|
return '\u0100' <= c && c <= '\u017F';
|
|
|
|
case Category.UnicodeLatinExtendedB:
|
|
return '\u0180' <= c && c <= '\u024F';
|
|
|
|
case Category.UnicodeIPAExtensions:
|
|
return '\u0250' <= c && c <= '\u02AF';
|
|
|
|
case Category.UnicodeSpacingModifierLetters:
|
|
return '\u02B0' <= c && c <= '\u02FF';
|
|
|
|
case Category.UnicodeCombiningDiacriticalMarks:
|
|
return '\u0300' <= c && c <= '\u036F';
|
|
|
|
case Category.UnicodeGreek:
|
|
return '\u0370' <= c && c <= '\u03FF';
|
|
|
|
case Category.UnicodeCyrillic:
|
|
return '\u0400' <= c && c <= '\u04FF';
|
|
|
|
case Category.UnicodeArmenian:
|
|
return '\u0530' <= c && c <= '\u058F';
|
|
|
|
case Category.UnicodeHebrew:
|
|
return '\u0590' <= c && c <= '\u05FF';
|
|
|
|
case Category.UnicodeArabic:
|
|
return '\u0600' <= c && c <= '\u06FF';
|
|
|
|
case Category.UnicodeSyriac:
|
|
return '\u0700' <= c && c <= '\u074F';
|
|
|
|
case Category.UnicodeThaana:
|
|
return '\u0780' <= c && c <= '\u07BF';
|
|
|
|
case Category.UnicodeDevanagari:
|
|
return '\u0900' <= c && c <= '\u097F';
|
|
|
|
case Category.UnicodeBengali:
|
|
return '\u0980' <= c && c <= '\u09FF';
|
|
|
|
case Category.UnicodeGurmukhi:
|
|
return '\u0A00' <= c && c <= '\u0A7F';
|
|
|
|
case Category.UnicodeGujarati:
|
|
return '\u0A80' <= c && c <= '\u0AFF';
|
|
|
|
case Category.UnicodeOriya:
|
|
return '\u0B00' <= c && c <= '\u0B7F';
|
|
|
|
case Category.UnicodeTamil:
|
|
return '\u0B80' <= c && c <= '\u0BFF';
|
|
|
|
case Category.UnicodeTelugu:
|
|
return '\u0C00' <= c && c <= '\u0C7F';
|
|
|
|
case Category.UnicodeKannada:
|
|
return '\u0C80' <= c && c <= '\u0CFF';
|
|
|
|
case Category.UnicodeMalayalam:
|
|
return '\u0D00' <= c && c <= '\u0D7F';
|
|
|
|
case Category.UnicodeSinhala:
|
|
return '\u0D80' <= c && c <= '\u0DFF';
|
|
|
|
case Category.UnicodeThai:
|
|
return '\u0E00' <= c && c <= '\u0E7F';
|
|
|
|
case Category.UnicodeLao:
|
|
return '\u0E80' <= c && c <= '\u0EFF';
|
|
|
|
case Category.UnicodeTibetan:
|
|
return '\u0F00' <= c && c <= '\u0FFF';
|
|
|
|
case Category.UnicodeMyanmar:
|
|
return '\u1000' <= c && c <= '\u109F';
|
|
|
|
case Category.UnicodeGeorgian:
|
|
return '\u10A0' <= c && c <= '\u10FF';
|
|
|
|
case Category.UnicodeHangulJamo:
|
|
return '\u1100' <= c && c <= '\u11FF';
|
|
|
|
case Category.UnicodeEthiopic:
|
|
return '\u1200' <= c && c <= '\u137F';
|
|
|
|
case Category.UnicodeCherokee:
|
|
return '\u13A0' <= c && c <= '\u13FF';
|
|
|
|
case Category.UnicodeUnifiedCanadianAboriginalSyllabics:
|
|
return '\u1400' <= c && c <= '\u167F';
|
|
|
|
case Category.UnicodeOgham:
|
|
return '\u1680' <= c && c <= '\u169F';
|
|
|
|
case Category.UnicodeRunic:
|
|
return '\u16A0' <= c && c <= '\u16FF';
|
|
|
|
case Category.UnicodeKhmer:
|
|
return '\u1780' <= c && c <= '\u17FF';
|
|
|
|
case Category.UnicodeMongolian:
|
|
return '\u1800' <= c && c <= '\u18AF';
|
|
|
|
case Category.UnicodeLatinExtendedAdditional:
|
|
return '\u1E00' <= c && c <= '\u1EFF';
|
|
|
|
case Category.UnicodeGreekExtended:
|
|
return '\u1F00' <= c && c <= '\u1FFF';
|
|
|
|
case Category.UnicodeGeneralPunctuation:
|
|
return '\u2000' <= c && c <= '\u206F';
|
|
|
|
case Category.UnicodeSuperscriptsandSubscripts:
|
|
return '\u2070' <= c && c <= '\u209F';
|
|
|
|
case Category.UnicodeCurrencySymbols:
|
|
return '\u20A0' <= c && c <= '\u20CF';
|
|
|
|
case Category.UnicodeCombiningMarksforSymbols:
|
|
return '\u20D0' <= c && c <= '\u20FF';
|
|
|
|
case Category.UnicodeLetterlikeSymbols:
|
|
return '\u2100' <= c && c <= '\u214F';
|
|
|
|
case Category.UnicodeNumberForms:
|
|
return '\u2150' <= c && c <= '\u218F';
|
|
|
|
case Category.UnicodeArrows:
|
|
return '\u2190' <= c && c <= '\u21FF';
|
|
|
|
case Category.UnicodeMathematicalOperators:
|
|
return '\u2200' <= c && c <= '\u22FF';
|
|
|
|
case Category.UnicodeMiscellaneousTechnical:
|
|
return '\u2300' <= c && c <= '\u23FF';
|
|
|
|
case Category.UnicodeControlPictures:
|
|
return '\u2400' <= c && c <= '\u243F';
|
|
|
|
case Category.UnicodeOpticalCharacterRecognition:
|
|
return '\u2440' <= c && c <= '\u245F';
|
|
|
|
case Category.UnicodeEnclosedAlphanumerics:
|
|
return '\u2460' <= c && c <= '\u24FF';
|
|
|
|
case Category.UnicodeBoxDrawing:
|
|
return '\u2500' <= c && c <= '\u257F';
|
|
|
|
case Category.UnicodeBlockElements:
|
|
return '\u2580' <= c && c <= '\u259F';
|
|
|
|
case Category.UnicodeGeometricShapes:
|
|
return '\u25A0' <= c && c <= '\u25FF';
|
|
|
|
case Category.UnicodeMiscellaneousSymbols:
|
|
return '\u2600' <= c && c <= '\u26FF';
|
|
|
|
case Category.UnicodeDingbats:
|
|
return '\u2700' <= c && c <= '\u27BF';
|
|
|
|
case Category.UnicodeBraillePatterns:
|
|
return '\u2800' <= c && c <= '\u28FF';
|
|
|
|
case Category.UnicodeCJKRadicalsSupplement:
|
|
return '\u2E80' <= c && c <= '\u2EFF';
|
|
|
|
case Category.UnicodeKangxiRadicals:
|
|
return '\u2F00' <= c && c <= '\u2FDF';
|
|
|
|
case Category.UnicodeIdeographicDescriptionCharacters:
|
|
return '\u2FF0' <= c && c <= '\u2FFF';
|
|
|
|
case Category.UnicodeCJKSymbolsandPunctuation:
|
|
return '\u3000' <= c && c <= '\u303F';
|
|
|
|
case Category.UnicodeHiragana:
|
|
return '\u3040' <= c && c <= '\u309F';
|
|
|
|
case Category.UnicodeKatakana:
|
|
return '\u30A0' <= c && c <= '\u30FF';
|
|
|
|
case Category.UnicodeBopomofo:
|
|
return '\u3100' <= c && c <= '\u312F';
|
|
|
|
case Category.UnicodeHangulCompatibilityJamo:
|
|
return '\u3130' <= c && c <= '\u318F';
|
|
|
|
case Category.UnicodeKanbun:
|
|
return '\u3190' <= c && c <= '\u319F';
|
|
|
|
case Category.UnicodeBopomofoExtended:
|
|
return '\u31A0' <= c && c <= '\u31BF';
|
|
|
|
case Category.UnicodeEnclosedCJKLettersandMonths:
|
|
return '\u3200' <= c && c <= '\u32FF';
|
|
|
|
case Category.UnicodeCJKCompatibility:
|
|
return '\u3300' <= c && c <= '\u33FF';
|
|
|
|
case Category.UnicodeCJKUnifiedIdeographsExtensionA:
|
|
return '\u3400' <= c && c <= '\u4DB5';
|
|
|
|
case Category.UnicodeCJKUnifiedIdeographs:
|
|
return '\u4E00' <= c && c <= '\u9FFF';
|
|
|
|
case Category.UnicodeYiSyllables:
|
|
return '\uA000' <= c && c <= '\uA48F';
|
|
|
|
case Category.UnicodeYiRadicals:
|
|
return '\uA490' <= c && c <= '\uA4CF';
|
|
|
|
case Category.UnicodeHangulSyllables:
|
|
return '\uAC00' <= c && c <= '\uD7A3';
|
|
|
|
case Category.UnicodeHighSurrogates:
|
|
return '\uD800' <= c && c <= '\uDB7F';
|
|
|
|
case Category.UnicodeHighPrivateUseSurrogates:
|
|
return '\uDB80' <= c && c <= '\uDBFF';
|
|
|
|
case Category.UnicodeLowSurrogates:
|
|
return '\uDC00' <= c && c <= '\uDFFF';
|
|
|
|
case Category.UnicodePrivateUse:
|
|
return '\uE000' <= c && c <= '\uF8FF';
|
|
|
|
case Category.UnicodeCJKCompatibilityIdeographs:
|
|
return '\uF900' <= c && c <= '\uFAFF';
|
|
|
|
case Category.UnicodeAlphabeticPresentationForms:
|
|
return '\uFB00' <= c && c <= '\uFB4F';
|
|
|
|
case Category.UnicodeArabicPresentationFormsA:
|
|
return '\uFB50' <= c && c <= '\uFDFF';
|
|
|
|
case Category.UnicodeCombiningHalfMarks:
|
|
return '\uFE20' <= c && c <= '\uFE2F';
|
|
|
|
case Category.UnicodeCJKCompatibilityForms:
|
|
return '\uFE30' <= c && c <= '\uFE4F';
|
|
|
|
case Category.UnicodeSmallFormVariants:
|
|
return '\uFE50' <= c && c <= '\uFE6F';
|
|
|
|
case Category.UnicodeArabicPresentationFormsB:
|
|
return '\uFE70' <= c && c <= '\uFEFE';
|
|
|
|
case Category.UnicodeHalfwidthandFullwidthForms:
|
|
return '\uFF00' <= c && c <= '\uFFEF';
|
|
|
|
case Category.UnicodeSpecials:
|
|
return
|
|
'\uFEFF' <= c && c <= '\uFEFF' ||
|
|
'\uFFF0' <= c && c <= '\uFFFD';
|
|
|
|
// these block ranges begin above 0x10000
|
|
|
|
case Category.UnicodeOldItalic:
|
|
case Category.UnicodeGothic:
|
|
case Category.UnicodeDeseret:
|
|
case Category.UnicodeByzantineMusicalSymbols:
|
|
case Category.UnicodeMusicalSymbols:
|
|
case Category.UnicodeMathematicalAlphanumericSymbols:
|
|
case Category.UnicodeCJKUnifiedIdeographsExtensionB:
|
|
case Category.UnicodeCJKCompatibilityIdeographsSupplement:
|
|
case Category.UnicodeTags:
|
|
return false;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
private static bool IsCategory (UnicodeCategory uc, char c) {
|
|
if (Char.GetUnicodeCategory (c) == uc)
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
}
|
|
}
|