You've already forked linux-packaging-mono
Imported Upstream version 4.6.0.125
Former-commit-id: a2155e9bd80020e49e72e86c44da02a8ac0e57a4
This commit is contained in:
parent
a569aebcfd
commit
e79aa3c0ed
@ -0,0 +1,393 @@
|
||||
//------------------------------------------------------------------------------
|
||||
// <copyright file="RegexCode.cs" company="Microsoft">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// This RegexCode class is internal to the regular expression package.
|
||||
// It provides operator constants for use by the Builder and the Machine.
|
||||
|
||||
// Implementation notes:
|
||||
//
|
||||
// Regexps are built into RegexCodes, which contain an operation array,
|
||||
// a string table, and some constants.
|
||||
//
|
||||
// Each operation is one of the codes below, followed by the integer
|
||||
// operands specified for each op.
|
||||
//
|
||||
// Strings and sets are indices into a string table.
|
||||
|
||||
|
||||
namespace System.Text.RegularExpressions {
|
||||
|
||||
using System.Collections;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Globalization;
|
||||
|
||||
internal sealed class RegexCode {
|
||||
// the following primitive operations come directly from the parser
|
||||
|
||||
// lef/back operands description
|
||||
|
||||
internal const int Onerep = 0; // lef,back char,min,max a {n}
|
||||
internal const int Notonerep = 1; // lef,back char,min,max .{n}
|
||||
internal const int Setrep = 2; // lef,back set,min,max [\d]{n}
|
||||
|
||||
internal const int Oneloop = 3; // lef,back char,min,max a {,n}
|
||||
internal const int Notoneloop = 4; // lef,back char,min,max .{,n}
|
||||
internal const int Setloop = 5; // lef,back set,min,max [\d]{,n}
|
||||
|
||||
internal const int Onelazy = 6; // lef,back char,min,max a {,n}?
|
||||
internal const int Notonelazy = 7; // lef,back char,min,max .{,n}?
|
||||
internal const int Setlazy = 8; // lef,back set,min,max [\d]{,n}?
|
||||
|
||||
internal const int One = 9; // lef char a
|
||||
internal const int Notone = 10; // lef char [^a]
|
||||
internal const int Set = 11; // lef set [a-z\s] \w \s \d
|
||||
|
||||
internal const int Multi = 12; // lef string abcd
|
||||
internal const int Ref = 13; // lef group \#
|
||||
|
||||
internal const int Bol = 14; // ^
|
||||
internal const int Eol = 15; // $
|
||||
internal const int Boundary = 16; // \b
|
||||
internal const int Nonboundary = 17; // \B
|
||||
internal const int Beginning = 18; // \A
|
||||
internal const int Start = 19; // \G
|
||||
internal const int EndZ = 20; // \Z
|
||||
internal const int End = 21; // \Z
|
||||
|
||||
internal const int Nothing = 22; // Reject!
|
||||
|
||||
// primitive control structures
|
||||
|
||||
internal const int Lazybranch = 23; // back jump straight first
|
||||
internal const int Branchmark = 24; // back jump branch first for loop
|
||||
internal const int Lazybranchmark = 25; // back jump straight first for loop
|
||||
internal const int Nullcount = 26; // back val set counter, null mark
|
||||
internal const int Setcount = 27; // back val set counter, make mark
|
||||
internal const int Branchcount = 28; // back jump,limit branch++ if zero<=c<limit
|
||||
internal const int Lazybranchcount= 29; // back jump,limit same, but straight first
|
||||
internal const int Nullmark = 30; // back save position
|
||||
internal const int Setmark = 31; // back save position
|
||||
internal const int Capturemark = 32; // back group define group
|
||||
internal const int Getmark = 33; // back recall position
|
||||
internal const int Setjump = 34; // back save backtrack state
|
||||
internal const int Backjump = 35; // zap back to saved state
|
||||
internal const int Forejump = 36; // zap backtracking state
|
||||
internal const int Testref = 37; // backtrack if ref undefined
|
||||
internal const int Goto = 38; // jump just go
|
||||
|
||||
internal const int Prune = 39; // prune it baby
|
||||
internal const int Stop = 40; // done!
|
||||
|
||||
internal const int ECMABoundary = 41; // \b
|
||||
internal const int NonECMABoundary= 42; // \B
|
||||
|
||||
// modifiers for alternate modes
|
||||
|
||||
internal const int Mask = 63; // Mask to get unmodified ordinary operator
|
||||
internal const int Rtl = 64; // bit to indicate that we're reverse scanning.
|
||||
internal const int Back = 128; // bit to indicate that we're backtracking.
|
||||
internal const int Back2 = 256; // bit to indicate that we're backtracking on a second branch.
|
||||
internal const int Ci = 512; // bit to indicate that we're case-insensitive.
|
||||
|
||||
// the code
|
||||
|
||||
internal int[] _codes; // the code
|
||||
internal String[] _strings; // the string/set table
|
||||
// not used! internal int[] _sparseIndex; // a list of the groups that are used
|
||||
internal int _trackcount; // how many instructions use backtracking
|
||||
#if SILVERLIGHT
|
||||
internal Dictionary<Int32, Int32> _caps; // mapping of user group numbers -> impl group slots
|
||||
#else
|
||||
internal Hashtable _caps; // mapping of user group numbers -> impl group slots
|
||||
#endif
|
||||
internal int _capsize; // number of impl group slots
|
||||
internal RegexPrefix _fcPrefix; // the set of candidate first characters (may be null)
|
||||
internal RegexBoyerMoore _bmPrefix; // the fixed prefix string as a Boyer-Moore machine (may be null)
|
||||
internal int _anchors; // the set of zero-length start anchors (RegexFCD.Bol, etc)
|
||||
internal bool _rightToLeft; // true if right to left
|
||||
|
||||
// optimizations
|
||||
|
||||
// constructor
|
||||
|
||||
internal RegexCode(int [] codes, List<String> stringlist, int trackcount,
|
||||
#if SILVERLIGHT
|
||||
Dictionary<Int32, Int32> caps, int capsize,
|
||||
#else
|
||||
Hashtable caps, int capsize,
|
||||
#endif
|
||||
RegexBoyerMoore bmPrefix, RegexPrefix fcPrefix,
|
||||
int anchors, bool rightToLeft) {
|
||||
_codes = codes;
|
||||
_strings = new String[stringlist.Count];
|
||||
_trackcount = trackcount;
|
||||
_caps = caps;
|
||||
_capsize = capsize;
|
||||
_bmPrefix = bmPrefix;
|
||||
_fcPrefix = fcPrefix;
|
||||
_anchors = anchors;
|
||||
_rightToLeft = rightToLeft;
|
||||
stringlist.CopyTo(0, _strings, 0, stringlist.Count);
|
||||
}
|
||||
|
||||
internal static bool OpcodeBacktracks(int Op) {
|
||||
Op &= Mask;
|
||||
|
||||
switch (Op) {
|
||||
case Oneloop:
|
||||
case Notoneloop:
|
||||
case Setloop:
|
||||
case Onelazy:
|
||||
case Notonelazy:
|
||||
case Setlazy:
|
||||
case Lazybranch:
|
||||
case Branchmark:
|
||||
case Lazybranchmark:
|
||||
case Nullcount:
|
||||
case Setcount:
|
||||
case Branchcount:
|
||||
case Lazybranchcount:
|
||||
case Setmark:
|
||||
case Capturemark:
|
||||
case Getmark:
|
||||
case Setjump:
|
||||
case Backjump:
|
||||
case Forejump:
|
||||
case Goto:
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
internal static int OpcodeSize(int Opcode) {
|
||||
Opcode &= Mask;
|
||||
|
||||
switch (Opcode) {
|
||||
case Nothing:
|
||||
case Bol:
|
||||
case Eol:
|
||||
case Boundary:
|
||||
case Nonboundary:
|
||||
case ECMABoundary:
|
||||
case NonECMABoundary:
|
||||
case Beginning:
|
||||
case Start:
|
||||
case EndZ:
|
||||
case End:
|
||||
|
||||
case Nullmark:
|
||||
case Setmark:
|
||||
case Getmark:
|
||||
case Setjump:
|
||||
case Backjump:
|
||||
case Forejump:
|
||||
case Stop:
|
||||
|
||||
return 1;
|
||||
|
||||
case One:
|
||||
case Notone:
|
||||
case Multi:
|
||||
case Ref:
|
||||
case Testref:
|
||||
|
||||
|
||||
case Goto:
|
||||
case Nullcount:
|
||||
case Setcount:
|
||||
case Lazybranch:
|
||||
case Branchmark:
|
||||
case Lazybranchmark:
|
||||
case Prune:
|
||||
case Set:
|
||||
|
||||
return 2;
|
||||
|
||||
case Capturemark:
|
||||
case Branchcount:
|
||||
case Lazybranchcount:
|
||||
|
||||
case Onerep:
|
||||
case Notonerep:
|
||||
case Oneloop:
|
||||
case Notoneloop:
|
||||
case Onelazy:
|
||||
case Notonelazy:
|
||||
case Setlazy:
|
||||
case Setrep:
|
||||
case Setloop:
|
||||
|
||||
return 3;
|
||||
|
||||
default:
|
||||
|
||||
throw MakeException(SR.GetString(SR.UnexpectedOpcode, Opcode.ToString(CultureInfo.CurrentCulture)));
|
||||
}
|
||||
}
|
||||
|
||||
internal static ArgumentException MakeException(String message) {
|
||||
return new ArgumentException(message);
|
||||
}
|
||||
|
||||
// Debug only code below
|
||||
|
||||
#if DBG
|
||||
internal static String[] CodeStr = new String[]
|
||||
{
|
||||
"Onerep", "Notonerep", "Setrep",
|
||||
"Oneloop", "Notoneloop", "Setloop",
|
||||
"Onelazy", "Notonelazy", "Setlazy",
|
||||
"One", "Notone", "Set",
|
||||
"Multi", "Ref",
|
||||
"Bol", "Eol", "Boundary", "Nonboundary", "Beginning", "Start", "EndZ", "End",
|
||||
"Nothing",
|
||||
"Lazybranch", "Branchmark", "Lazybranchmark",
|
||||
"Nullcount", "Setcount", "Branchcount", "Lazybranchcount",
|
||||
"Nullmark", "Setmark", "Capturemark", "Getmark",
|
||||
"Setjump", "Backjump", "Forejump", "Testref", "Goto",
|
||||
"Prune", "Stop",
|
||||
#if ECMA
|
||||
"ECMABoundary", "NonECMABoundary",
|
||||
#endif
|
||||
};
|
||||
|
||||
internal static String OperatorDescription(int Opcode) {
|
||||
bool isCi = ((Opcode & Ci) != 0);
|
||||
bool isRtl = ((Opcode & Rtl) != 0);
|
||||
bool isBack = ((Opcode & Back) != 0);
|
||||
bool isBack2 = ((Opcode & Back2) != 0);
|
||||
|
||||
return CodeStr[Opcode & Mask] +
|
||||
(isCi ? "-Ci" : "") + (isRtl ? "-Rtl" : "") + (isBack ? "-Back" : "") + (isBack2 ? "-Back2" : "");
|
||||
}
|
||||
|
||||
internal String OpcodeDescription(int offset) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
int opcode = _codes[offset];
|
||||
|
||||
sb.AppendFormat("{0:D6} ", offset);
|
||||
sb.Append(OpcodeBacktracks(opcode & Mask) ? '*' : ' ');
|
||||
sb.Append(OperatorDescription(opcode));
|
||||
sb.Append('(');
|
||||
|
||||
opcode &= Mask;
|
||||
|
||||
switch (opcode) {
|
||||
case One:
|
||||
case Notone:
|
||||
case Onerep:
|
||||
case Notonerep:
|
||||
case Oneloop:
|
||||
case Notoneloop:
|
||||
case Onelazy:
|
||||
case Notonelazy:
|
||||
sb.Append("Ch = ");
|
||||
sb.Append(RegexCharClass.CharDescription((char)_codes[offset+1]));
|
||||
break;
|
||||
|
||||
case Set:
|
||||
case Setrep:
|
||||
case Setloop:
|
||||
case Setlazy:
|
||||
sb.Append("Set = ");
|
||||
sb.Append(RegexCharClass.SetDescription(_strings[_codes[offset+1]]));
|
||||
break;
|
||||
|
||||
case Multi:
|
||||
sb.Append("String = ");
|
||||
sb.Append(_strings[_codes[offset+1]]);
|
||||
break;
|
||||
|
||||
case Ref:
|
||||
case Testref:
|
||||
sb.Append("Index = ");
|
||||
sb.Append(_codes[offset+1]);
|
||||
break;
|
||||
|
||||
case Capturemark:
|
||||
sb.Append("Index = ");
|
||||
sb.Append(_codes[offset+1]);
|
||||
if (_codes[offset+2] != -1) {
|
||||
sb.Append(", Unindex = ");
|
||||
sb.Append(_codes[offset+2]);
|
||||
}
|
||||
break;
|
||||
|
||||
case Nullcount:
|
||||
case Setcount:
|
||||
sb.Append("Value = ");
|
||||
sb.Append(_codes[offset+1]);
|
||||
break;
|
||||
|
||||
case Goto:
|
||||
case Lazybranch:
|
||||
case Branchmark:
|
||||
case Lazybranchmark:
|
||||
case Branchcount:
|
||||
case Lazybranchcount:
|
||||
sb.Append("Addr = ");
|
||||
sb.Append(_codes[offset+1]);
|
||||
break;
|
||||
}
|
||||
|
||||
switch (opcode) {
|
||||
case Onerep:
|
||||
case Notonerep:
|
||||
case Oneloop:
|
||||
case Notoneloop:
|
||||
case Onelazy:
|
||||
case Notonelazy:
|
||||
case Setrep:
|
||||
case Setloop:
|
||||
case Setlazy:
|
||||
sb.Append(", Rep = ");
|
||||
if (_codes[offset + 2] == Int32.MaxValue)
|
||||
sb.Append("inf");
|
||||
else
|
||||
sb.Append(_codes[offset + 2]);
|
||||
break;
|
||||
|
||||
case Branchcount:
|
||||
case Lazybranchcount:
|
||||
sb.Append(", Limit = ");
|
||||
if (_codes[offset + 2] == Int32.MaxValue)
|
||||
sb.Append("inf");
|
||||
else
|
||||
sb.Append(_codes[offset + 2]);
|
||||
break;
|
||||
}
|
||||
|
||||
sb.Append(")");
|
||||
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
internal void Dump() {
|
||||
int i;
|
||||
|
||||
Debug.WriteLine("Direction: " + (_rightToLeft ? "right-to-left" : "left-to-right"));
|
||||
Debug.WriteLine("Firstchars: " + (_fcPrefix == null ? "n/a" : RegexCharClass.SetDescription(_fcPrefix.Prefix)));
|
||||
Debug.WriteLine("Prefix: " + (_bmPrefix == null ? "n/a" : Regex.Escape(_bmPrefix.ToString())));
|
||||
Debug.WriteLine("Anchors: " + RegexFCD.AnchorDescription(_anchors));
|
||||
Debug.WriteLine("");
|
||||
if (_bmPrefix != null) {
|
||||
Debug.WriteLine("BoyerMoore:");
|
||||
Debug.WriteLine(_bmPrefix.Dump(" "));
|
||||
}
|
||||
for (i = 0; i < _codes.Length;) {
|
||||
Debug.WriteLine(OpcodeDescription(i));
|
||||
i += OpcodeSize(_codes[i]);
|
||||
}
|
||||
|
||||
Debug.WriteLine("");
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user