239 lines
5.5 KiB
C#
239 lines
5.5 KiB
C#
|
|
namespace System.Text.RegularExpressions {
|
|
|
|
// for the IgnoreCase opcodes, the char data is stored lowercased
|
|
// two-byte integers are in little endian format
|
|
enum RxOp : byte {
|
|
// followed by count, min, max integers
|
|
Info,
|
|
|
|
False,
|
|
True,
|
|
|
|
// position anchors
|
|
AnyPosition,
|
|
StartOfString,
|
|
StartOfLine,
|
|
StartOfScan,
|
|
EndOfString,
|
|
EndOfLine,
|
|
End,
|
|
WordBoundary,
|
|
NoWordBoundary,
|
|
|
|
// latin1 strings
|
|
// followed by single byte length and latin1 bytes
|
|
// keep the order, see EmitString ()
|
|
String,
|
|
StringIgnoreCase,
|
|
StringReverse,
|
|
StringIgnoreCaseReverse,
|
|
|
|
// followed by two byte length and unicode chars (two bytes per char)
|
|
// a better setup may be to reference the chars in the patterns string
|
|
// (offset, length) pairs, at least when the pattern contains them,
|
|
// but this means we can't lowercase before hand: consider using a separate
|
|
// string/array
|
|
// keep the order, see EmitString ()
|
|
UnicodeString,
|
|
UnicodeStringIgnoreCase,
|
|
UnicodeStringReverse,
|
|
UnicodeStringIgnoreCaseReverse,
|
|
|
|
// latin1 single char
|
|
// followed by a latin1 byte
|
|
// keep the order, see EmitCharacter ()
|
|
Char,
|
|
NoChar,
|
|
CharIgnoreCase,
|
|
NoCharIgnoreCase,
|
|
CharReverse,
|
|
NoCharReverse,
|
|
CharIgnoreCaseReverse,
|
|
NoCharIgnoreCaseReverse,
|
|
|
|
// followed by latin1 min and max bytes
|
|
// keep the order, see EmitRange ()
|
|
Range,
|
|
NoRange,
|
|
RangeIgnoreCase,
|
|
NoRangeIgnoreCase,
|
|
RangeReverse,
|
|
NoRangeReverse,
|
|
RangeIgnoreCaseReverse,
|
|
NoRangeIgnoreCaseReverse,
|
|
|
|
// followed by lowbyte and length of the bitmap and by the bitmap
|
|
// keep the order, see EmitSet ()
|
|
Bitmap,
|
|
NoBitmap,
|
|
BitmapIgnoreCase,
|
|
NoBitmapIgnoreCase,
|
|
BitmapReverse,
|
|
NoBitmapReverse,
|
|
BitmapIgnoreCaseReverse,
|
|
NoBitmapIgnoreCaseReverse,
|
|
|
|
// unicode chars
|
|
// followed by a unicode char
|
|
// keep the order, see EmitCharacter ()
|
|
UnicodeChar,
|
|
NoUnicodeChar,
|
|
UnicodeCharIgnoreCase,
|
|
NoUnicodeCharIgnoreCase,
|
|
UnicodeCharReverse,
|
|
NoUnicodeCharReverse,
|
|
UnicodeCharIgnoreCaseReverse,
|
|
NoUnicodeCharIgnoreCaseReverse,
|
|
|
|
// followed by unicode char min and max chars
|
|
// keep the order, see EmitRange ()
|
|
UnicodeRange,
|
|
NoUnicodeRange,
|
|
UnicodeRangeIgnoreCase,
|
|
NoUnicodeRangeIgnoreCase,
|
|
UnicodeRangeReverse,
|
|
NoUnicodeRangeReverse,
|
|
UnicodeRangeIgnoreCaseReverse,
|
|
NoUnicodeRangeIgnoreCaseReverse,
|
|
|
|
// followed by lowchar and length of the bitmap and by the bitmap
|
|
UnicodeBitmap,
|
|
NoUnicodeBitmap,
|
|
UnicodeBitmapIgnoreCase,
|
|
NoUnicodeBitmapIgnoreCase,
|
|
UnicodeBitmapReverse,
|
|
NoUnicodeBitmapReverse,
|
|
UnicodeBitmapIgnoreCaseReverse,
|
|
NoUnicodeBitmapIgnoreCaseReverse,
|
|
|
|
// add reverse and negate versions of the categories
|
|
CategoryAny,
|
|
NoCategoryAny,
|
|
CategoryAnyReverse,
|
|
NoCategoryAnyReverse,
|
|
CategoryAnySingleline,
|
|
NoCategoryAnySingleline,
|
|
CategoryAnySinglelineReverse,
|
|
NoCategoryAnySinglelineReverse,
|
|
CategoryDigit,
|
|
NoCategoryDigit,
|
|
CategoryDigitReverse,
|
|
NoCategoryDigitReverse,
|
|
CategoryWord,
|
|
NoCategoryWord,
|
|
CategoryWordReverse,
|
|
NoCategoryWordReverse,
|
|
CategoryWhiteSpace,
|
|
NoCategoryWhiteSpace,
|
|
CategoryWhiteSpaceReverse,
|
|
NoCategoryWhiteSpaceReverse,
|
|
CategoryEcmaWord,
|
|
NoCategoryEcmaWord,
|
|
CategoryEcmaWordReverse,
|
|
NoCategoryEcmaWordReverse,
|
|
CategoryEcmaWhiteSpace,
|
|
NoCategoryEcmaWhiteSpace,
|
|
CategoryEcmaWhiteSpaceReverse,
|
|
NoCategoryEcmaWhiteSpaceReverse,
|
|
|
|
// followed by a unicode category value (byte)
|
|
CategoryUnicode,
|
|
NoCategoryUnicode,
|
|
CategoryUnicodeReverse,
|
|
NoCategoryUnicodeReverse,
|
|
|
|
CategoryUnicodeLetter,
|
|
NoCategoryUnicodeLetter,
|
|
CategoryUnicodeLetterReverse,
|
|
NoCategoryUnicodeLetterReverse,
|
|
CategoryUnicodeMark,
|
|
NoCategoryUnicodeMark,
|
|
CategoryUnicodeMarkReverse,
|
|
NoCategoryUnicodeMarkReverse,
|
|
CategoryUnicodeNumber,
|
|
NoCategoryUnicodeNumber,
|
|
CategoryUnicodeNumberReverse,
|
|
NoCategoryUnicodeNumberReverse,
|
|
CategoryUnicodeSeparator,
|
|
NoCategoryUnicodeSeparator,
|
|
CategoryUnicodeSeparatorReverse,
|
|
NoCategoryUnicodeSeparatorReverse,
|
|
CategoryUnicodePunctuation,
|
|
NoCategoryUnicodePunctuation,
|
|
CategoryUnicodePunctuationReverse,
|
|
NoCategoryUnicodePunctuationReverse,
|
|
CategoryUnicodeSymbol,
|
|
NoCategoryUnicodeSymbol,
|
|
CategoryUnicodeSymbolReverse,
|
|
NoCategoryUnicodeSymbolReverse,
|
|
CategoryUnicodeSpecials,
|
|
NoCategoryUnicodeSpecials,
|
|
CategoryUnicodeSpecialsReverse,
|
|
NoCategoryUnicodeSpecialsReverse,
|
|
CategoryUnicodeOther,
|
|
NoCategoryUnicodeOther,
|
|
CategoryUnicodeOtherReverse,
|
|
NoCategoryUnicodeOtherReverse,
|
|
// add more categories
|
|
|
|
// followed by Category value (byte)
|
|
CategoryGeneral,
|
|
NoCategoryGeneral,
|
|
CategoryGeneralReverse,
|
|
NoCategoryGeneralReverse,
|
|
|
|
// backreferences
|
|
// followed by two-byte reference number
|
|
// keep the order, see EmitReference ()
|
|
Reference,
|
|
ReferenceIgnoreCase,
|
|
ReferenceReverse,
|
|
ReferenceIgnoreCaseReverse,
|
|
|
|
// group/capture support
|
|
// followed by two-byte group id
|
|
OpenGroup,
|
|
CloseGroup,
|
|
|
|
BalanceStart,
|
|
Balance,
|
|
|
|
// followed by offset and two-byte group id
|
|
IfDefined,
|
|
|
|
// skip ahead num bytes
|
|
// followed by two-byte offset
|
|
Jump,
|
|
|
|
// followed by two-byte offset
|
|
SubExpression,
|
|
|
|
// followed by true and false two-byte offsets
|
|
Test,
|
|
|
|
// followed by two-byte offset
|
|
Branch,
|
|
|
|
// followed by two-byte offset
|
|
TestCharGroup,
|
|
|
|
// anchoring expression
|
|
// followed by offset of tail and offset
|
|
Anchor,
|
|
AnchorReverse,
|
|
|
|
// repetition support
|
|
// followed by min, max ints
|
|
Repeat,
|
|
RepeatLazy,
|
|
Until,
|
|
FastRepeat,
|
|
FastRepeatLazy,
|
|
// followed by min byte
|
|
RepeatInfinite,
|
|
RepeatInfiniteLazy,
|
|
}
|
|
}
|
|
|