Jo Shields a575963da9 Imported Upstream version 3.6.0
Former-commit-id: da6be194a6b1221998fc28233f2503bd61dd9d14
2014-08-13 10:39:27 +01:00

2448 lines
67 KiB
C#

// Based upon interpreter.cs, written by Dan Lewis (dlewis@gmx.co.uk)
//
// There are a couple of bits flagged with DEAD_CODE which are bits that do
// not seem to have been completed
//
using System;
using System.Collections;
using System.Globalization;
using System.Diagnostics;
namespace System.Text.RegularExpressions {
internal delegate bool EvalDelegate (RxInterpreter interp, int strpos, ref int strpos_result);
sealed class RxInterpreter: BaseMachine {
byte[] program;
string str;
int string_start;
int string_end;
int group_count;
// int match_start;
int[] groups;
EvalDelegate eval_del; // optimized EvalByteCode method created by the CILCompiler
Mark[] marks = null; // mark stack
int mark_start; // start of current checkpoint
int mark_end; // end of checkpoint/next free mark
IntStack stack; // utility stack
RepeatContext repeat; // current repeat context
RepeatContext deep; // points to the most-nested repeat context
/* The readonly ensures the JIT can optimize out if (trace_rx) statements */
public static readonly bool trace_rx =
#if !NET_2_1
Environment.GetEnvironmentVariable ("MONO_TRACE_RX") != null;
#else
false;
#endif
// private classes
internal struct IntStack {
int [] values;
int count;
public int Pop ()
{
return values [--count];
}
public void Push (int value)
{
if (values == null) {
values = new int [8];
} else if (count == values.Length) {
int new_size = values.Length;
new_size += new_size >> 1;
int [] new_values = new int [new_size];
for (int i = 0; i < count; ++i)
new_values [i] = values [i];
values = new_values;
}
values [count++] = value;
}
public int Top {
get { return values [count - 1]; }
}
public int Count {
get { return count; }
set {
if (value > count)
throw new SystemException ("can only truncate the stack");
count = value;
}
}
}
private class RepeatContext {
public RepeatContext (RepeatContext previous, int min, int max, bool lazy, int expr_pc) {
this.previous = previous;
this.min = min;
this.max = max;
this.lazy = lazy;
this.expr_pc = expr_pc;
this.start = -1;
this.count = 0;
}
public int Count {
get { return count; }
set { count = value; }
}
public int Start {
get { return start; }
set { start = value; }
}
public bool IsMinimum {
get { return min <= count; }
}
public bool IsMaximum {
get { return max <= count; }
}
public bool IsLazy {
get { return lazy; }
}
public int Expression {
get { return expr_pc; }
}
public RepeatContext Previous {
get { return previous; }
}
private int start;
private int min, max;
private bool lazy;
private int expr_pc;
private RepeatContext previous;
private int count;
}
static int ReadInt (byte[] code, int pc)
{
int val = code [pc];
val |= (int)code [pc + 1] << 8;
val |= (int)code [pc + 2] << 16;
val |= (int)code [pc + 3] << 24;
return val;
}
public RxInterpreter (byte[] program, EvalDelegate eval_del)
{
this.program = program;
this.eval_del = eval_del;
group_count = 1 + (program [1] | ((int)program [2] << 8));
groups = new int [group_count];
stack = new IntStack ();
ResetGroups ();
}
public override Match Scan (Regex regex, string text, int start, int end) {
str = text;
string_start = start;
string_end = end;
int res = 0;
bool match;
if (eval_del != null) {
match = eval_del (this, start, ref res);
} else {
match = EvalByteCode (11, start, ref res);
}
marks [groups [0]].End = res;
if (match) {
return GenerateMatch (regex);
//Match m = new Match (regex, this, text, end, 0, match_start, res - match_start);
//return m;
}
return Match.Empty;
}
// capture management
private void Open (int gid, int ptr) {
int m = groups [gid];
if (m < mark_start || marks [m].IsDefined) {
m = CreateMark (m);
groups [gid] = m;
}
marks [m].Start = ptr;
}
private void Close (int gid, int ptr) {
marks [groups [gid]].End = ptr;
}
private bool Balance (int gid, int balance_gid, bool capture, int ptr) {
int b = groups [balance_gid];
if(b == -1 || marks[b].Index < 0) {
//Group not previously matched
return false;
}
Debug.Assert (marks [b].IsDefined, "Regex", "Balancng group not closed");
if (gid > 0 && capture){
Open (gid, marks [b].Index + marks [b].Length);
Close (gid, ptr);
}
groups [balance_gid] = marks[b].Previous;
return true;
}
private int Checkpoint () {
mark_start = mark_end;
return mark_start;
}
private void Backtrack (int cp) {
for (int i = 0; i < groups.Length; ++ i) {
int m = groups [i];
while (cp <= m)
m = marks [m].Previous;
groups [i] = m;
}
}
private void ResetGroups () {
int n = groups.Length;
if (marks == null)
marks = new Mark [n];
for (int i = 0; i < n; ++ i) {
groups [i] = i;
marks [i].Start = -1;
marks [i].End = -1;
marks [i].Previous = -1;
}
mark_start = 0;
mark_end = n;
}
private int GetLastDefined (int gid) {
int m = groups [gid];
while (m >= 0 && !marks [m].IsDefined)
m = marks [m].Previous;
return m;
}
private int CreateMark (int previous) {
if (mark_end == marks.Length) {
Mark [] dest = new Mark [marks.Length * 2];
marks.CopyTo (dest, 0);
marks = dest;
}
int m = mark_end ++;
marks [m].Start = marks [m].End = -1;
marks [m].Previous = previous;
return m;
}
private void GetGroupInfo (int gid, out int first_mark_index, out int n_caps)
{
first_mark_index = -1;
n_caps = 0;
for (int m = groups [gid]; m >= 0; m = marks [m].Previous) {
if (!marks [m].IsDefined)
continue;
if (first_mark_index < 0)
first_mark_index = m;
++n_caps;
}
}
private void PopulateGroup (Group g, int first_mark_index, int n_caps)
{
int i = 1;
for (int m = marks [first_mark_index].Previous; m >= 0; m = marks [m].Previous) {
if (!marks [m].IsDefined)
continue;
Capture cap = new Capture (str, marks [m].Index, marks [m].Length);
g.Captures.SetValue (cap, n_caps - 1 - i);
++i;
}
}
private Match GenerateMatch (Regex regex)
{
int n_caps, first_mark_index;
Group g;
GetGroupInfo (0, out first_mark_index, out n_caps);
// Avoid fully populating the Match instance if not needed
if (!needs_groups_or_captures)
return new Match (regex, this, str, string_end, 0, marks [first_mark_index].Index, marks [first_mark_index].Length);
Match retval = new Match (regex, this, str, string_end, groups.Length,
marks [first_mark_index].Index, marks [first_mark_index].Length, n_caps);
PopulateGroup (retval, first_mark_index, n_caps);
for (int gid = 1; gid < groups.Length; ++ gid) {
GetGroupInfo (gid, out first_mark_index, out n_caps);
if (first_mark_index < 0) {
g = Group.Fail;
} else {
g = new Group (str, marks [first_mark_index].Index, marks [first_mark_index].Length, n_caps);
PopulateGroup (g, first_mark_index, n_caps);
}
retval.Groups.SetValue (g, gid);
}
return retval;
}
// used by the IL backend
internal void SetStartOfMatch (int pos)
{
marks [groups [0]].Start = pos;
}
static bool IsWordChar (char c)
{
return Char.IsLetterOrDigit (c) || Char.GetUnicodeCategory (c) == UnicodeCategory.ConnectorPunctuation;
}
bool EvalByteCode (int pc, int strpos, ref int strpos_result)
{
// luckily the IL engine can deal with char_group_end at compile time
// this code offset needs to be checked only in opcodes that handle
// a single char and that are included in a TestCharGroup expression:
// the engine is supposed to jump to this offset as soons as the
// first opcode in the expression matches
// The code pattern becomes:
// on successfull match: check if char_group_end is nonzero and jump to
// test_char_group_passed after adjusting strpos
// on failure: try the next expression by simply advancing pc
int char_group_end = 0;
int length, start, end;
while (true) {
if (trace_rx) {
Console.WriteLine ("evaluating: {0} at pc: {1}, strpos: {2}, cge: {3}", (RxOp)program [pc], pc, strpos, char_group_end);
//Console.WriteLine ("deep: " + (deep == null ? 0 : deep.GetHashCode ()) + " repeat: " + (this.repeat == null ? 0 : this.repeat.GetHashCode ()));
}
switch ((RxOp)program [pc]) {
case RxOp.True:
if (char_group_end != 0) {
pc = char_group_end;
char_group_end = 0;
continue;
}
strpos_result = strpos;
return true;
case RxOp.False:
return false;
case RxOp.AnyPosition:
pc++;
continue;
case RxOp.StartOfString:
if (strpos != 0)
return false;
pc++;
continue;
case RxOp.StartOfLine:
if (strpos == 0 || str [strpos - 1] == '\n') {
pc++;
continue;
}
return false;
case RxOp.StartOfScan:
if (strpos != string_start)
return false;
pc++;
continue;
case RxOp.End:
if (strpos == string_end || (strpos == string_end - 1 && str [strpos] == '\n')) {
pc++;
continue;
}
return false;
case RxOp.EndOfString:
if (strpos != string_end)
return false;
pc++;
continue;
case RxOp.EndOfLine:
if (strpos == string_end || str [strpos] == '\n') {
pc++;
continue;
}
return false;
case RxOp.WordBoundary:
if (string_end == 0)
return false;
if (strpos == 0) {
if (IsWordChar (str [strpos])) {
pc++;
continue;
}
} else if (strpos == string_end) {
if (IsWordChar (str [strpos - 1])) {
pc++;
continue;
}
} else {
if (IsWordChar (str [strpos]) != IsWordChar (str [strpos - 1])) {
pc++;
continue;
}
}
return false;
case RxOp.NoWordBoundary:
if (string_end == 0)
return false;
if (strpos == 0) {
if (!IsWordChar (str [strpos])) {
pc++;
continue;
}
} else if (strpos == string_end) {
if (!IsWordChar (str [strpos - 1])) {
pc++;
continue;
}
} else {
if (IsWordChar (str [strpos]) == IsWordChar (str [strpos - 1])) {
pc++;
continue;
}
}
return false;
case RxOp.Anchor:
int skip = program [pc + 1] | ((int)program [pc + 2] << 8);
int anch_offset = program [pc + 3] | ((int)program [pc + 4] << 8);
/*
* In the general case, we have to evaluate the bytecode
* starting at pc + skip, however the optimizer emits some
* special cases, whose bytecode begins at pc + 5.
*/
int anch_pc = pc + 5;
RxOp anch_op = (RxOp)(program[anch_pc] & 0x00ff);
bool spec_anch = false;
// FIXME: Add more special cases from interpreter.cs
if (anch_op == RxOp.String || anch_op == RxOp.StringIgnoreCase) {
if (pc + skip == anch_pc + 2 + program [anch_pc + 1] + 1) {
// Anchor
// String
// True
spec_anch = true;
if (trace_rx)
Console.WriteLine (" string anchor at {0}, offset {1}", anch_pc, anch_offset);
}
}
pc += skip;
if ((RxOp)program [pc] == RxOp.StartOfString) {
if (strpos == 0) {
int res = strpos;
if (groups.Length > 1) {
ResetGroups ();
marks [groups [0]].Start = strpos;
}
if (EvalByteCode (pc + 1, strpos, ref res)) {
marks [groups [0]].Start = strpos;
if (groups.Length > 1)
marks [groups [0]].End = res;
strpos_result = res;
return true;
}
}
return false;
}
// it's important to test also the end of the string
// position for things like: "" =~ /$/
end = string_end + 1;
while (strpos < end) {
if (spec_anch) {
if (anch_op == RxOp.String || anch_op == RxOp.StringIgnoreCase) {
/*
* This means the match must contain a given
* string at a constant position, so we can skip
* forward until the string matches. This is a win if
* the rest of the regex
* has a complex positive lookbehind for example.
*/
int tmp_res = strpos;
if (!EvalByteCode (anch_pc, strpos + anch_offset, ref tmp_res)) {
strpos ++;
continue;
}
}
}
int res = strpos;
if (groups.Length > 1) {
ResetGroups ();
marks [groups [0]].Start = strpos;
}
if (EvalByteCode (pc, strpos, ref res)) {
// match_start = strpos;
marks [groups [0]].Start = strpos;
if (groups.Length > 1)
marks [groups [0]].End = res;
strpos_result = res;
return true;
}
strpos++;
}
return false;
case RxOp.AnchorReverse:
length = program [pc + 3] | ((int)program [pc + 4] << 8);
pc += program [pc + 1] | ((int)program [pc + 2] << 8);
// it's important to test also the end of the string
// position for things like: "" =~ /$/
end = 0;
while (strpos >= 0) {
int res = strpos;
if (groups.Length > 1) {
ResetGroups ();
marks [groups [0]].Start = strpos;
}
if (EvalByteCode (pc, strpos, ref res)) {
// match_start = strpos;
marks [groups [0]].Start = strpos;
if (groups.Length > 1)
marks [groups [0]].End = res;
strpos_result = res;
return true;
}
strpos--;
}
return false;
case RxOp.Reference:
length = GetLastDefined (program [pc + 1] | ((int)program [pc + 2] << 8));
if (length < 0)
return false;
start = marks [length].Index;
length = marks [length].Length;
if (strpos + length > string_end)
return false;
for (end = start + length; start < end; ++start) {
if (str [strpos] != str [start])
return false;
strpos++;
}
pc += 3;
continue;
case RxOp.ReferenceIgnoreCase:
length = GetLastDefined (program [pc + 1] | ((int)program [pc + 2] << 8));
if (length < 0)
return false;
start = marks [length].Index;
length = marks [length].Length;
if (strpos + length > string_end)
return false;
for (end = start + length; start < end; ++start) {
if (str [strpos] != str [start] && Char.ToLower (str [strpos]) != Char.ToLower (str [start]))
return false;
strpos++;
}
pc += 3;
continue;
case RxOp.ReferenceReverse: {
length = GetLastDefined (program [pc + 1] | ((int)program [pc + 2] << 8));
if (length < 0)
return false;
start = marks [length].Index;
length = marks [length].Length;
if (strpos - length < 0)
return false;
int p = strpos - length;
for (end = start + length; start < end; ++start, ++p) {
if (str [p] != str [start])
return false;
}
strpos -= length;
pc += 3;
continue;
}
case RxOp.IfDefined:
if (GetLastDefined (program [pc + 3] | ((int)program [pc + 4] << 8)) >= 0)
pc += 5;
else
pc += program [pc + 1] | ((int)program [pc + 2] << 8);
continue;
case RxOp.SubExpression: {
int res = 0;
if (EvalByteCode (pc + 3, strpos, ref res)) {
pc += program [pc + 1] | ((int)program [pc + 2] << 8);
strpos = res;
continue;
}
return false;
}
case RxOp.Test: {
int res = 0;
// FIXME: checkpoint
if (EvalByteCode (pc + 5, strpos, ref res)) {
pc += program [pc + 1] | ((int)program [pc + 2] << 8);
} else {
pc += program [pc + 3] | ((int)program [pc + 4] << 8);
}
continue;
}
case RxOp.OpenGroup:
Open (program [pc + 1] | ((int)program [pc + 2] << 8), strpos);
pc += 3;
continue;
case RxOp.CloseGroup:
Close (program [pc + 1] | ((int)program [pc + 2] << 8), strpos);
pc += 3;
continue;
case RxOp.BalanceStart: {
int res = 0;
if (!EvalByteCode (pc + 8, strpos, ref res))
goto Fail;
int gid = program [pc + 1] | ((int)program [pc + 2] << 8);
int balance_gid = program [pc + 3] | ((int)program [pc + 4] << 8);
bool capture = program [pc + 5] > 0;
if (!Balance (gid, balance_gid, capture, strpos))
goto Fail;
strpos = res;
pc += program[pc + 6] | ((int)program [pc + 7] << 8);
break;
}
case RxOp.Balance: {
goto Pass;
}
case RxOp.Jump:
pc += program [pc + 1] | ((int)program [pc + 2] << 8);
continue;
case RxOp.TestCharGroup:
char_group_end = pc + (program [pc + 1] | ((int)program [pc + 2] << 8));
pc += 3;
continue;
case RxOp.String:
start = pc + 2;
length = program [pc + 1];
if (strpos + length > string_end)
return false;
end = start + length;
for (; start < end; ++start) {
if (str [strpos] != program [start])
return false;
strpos++;
}
pc = end;
continue;
case RxOp.StringIgnoreCase:
start = pc + 2;
length = program [pc + 1];
if (strpos + length > string_end)
return false;
end = start + length;
for (; start < end; ++start) {
if (str [strpos] != program [start] && Char.ToLower (str [strpos]) != program [start])
return false;
strpos++;
}
pc = end;
continue;
case RxOp.StringReverse: {
start = pc + 2;
length = program [pc + 1];
if (strpos < length)
return false;
int p = strpos - length;
end = start + length;
for (; start < end; ++start, ++p) {
if (str [p] != program [start])
return false;
}
strpos -= length;
pc = end;
continue;
}
case RxOp.StringIgnoreCaseReverse: {
start = pc + 2;
length = program [pc + 1];
if (strpos < length)
return false;
int p = strpos - length;
end = start + length;
for (; start < end; ++start, ++p) {
if (str [p] != program [start] && Char.ToLower (str [p]) != program [start])
return false;
}
strpos -= length;
pc = end;
continue;
}
case RxOp.UnicodeString: {
start = pc + 3;
length = program [pc + 1] | ((int)program [pc + 2] << 8);
if (strpos + length > string_end)
return false;
end = start + length * 2;
for (; start < end; start += 2) {
int c = program [start] | ((int)program [start + 1] << 8);
if (str [strpos] != c)
return false;
strpos++;
}
pc = end;
continue;
}
case RxOp.UnicodeStringIgnoreCase: {
start = pc + 3;
length = program [pc + 1] | ((int)program [pc + 2] << 8);
if (strpos + length > string_end)
return false;
end = start + length * 2;
for (; start < end; start += 2) {
int c = program [start] | ((int)program [start + 1] << 8);
if (str [strpos] != c && Char.ToLower (str [strpos]) != c)
return false;
strpos++;
}
pc = end;
continue;
}
case RxOp.UnicodeStringReverse: {
start = pc + 3;
length = program [pc + 1] | ((int)program [pc + 2] << 8);
if (strpos < length)
return false;
int p = strpos - length;
end = start + length * 2;
for (; start < end; start += 2, p += 2) {
int c = program [start] | ((int)program [start + 1] << 8);
if (str [p] != c)
return false;
}
strpos -= length;
pc = end;
continue;
}
case RxOp.UnicodeStringIgnoreCaseReverse: {
start = pc + 3;
length = program [pc + 1] | ((int)program [pc + 2] << 8);
if (strpos < length)
return false;
int p = strpos - length;
end = start + length * 2;
for (; start < end; start += 2, p += 2) {
int c = program [start] | ((int)program [start + 1] << 8);
if (str [p] != c && Char.ToLower (str [p]) != c)
return false;
}
strpos -= length;
pc = end;
continue;
}
/*
* The opcodes below are basically specialized versions of one
* generic opcode, which has three parameters:
* - reverse (Reverse), revert (No), ignore-case (IgnoreCase)
* Thus each opcode has 8 variants.
* FIXME: Maybe move all unusual variations
* (Reverse+IgnoreCase+Unicode) into a generic GenericChar opcode
* like in the old interpreter.
* FIXME: Move all the Reverse opcodes to a separate method.
*/
#if FALSE
if (!reverse) {
if (strpos < string_end && (COND (str [strpos]))) {
if (!revert) {
strpos ++;
if (char_group_end != 0)
goto test_char_group_passed;
pc += ins_len;
continue;
} else {
/*
* If we are inside a char group, the cases are ANDed
* together, so we have to continue checking the
* other cases, and we need to increase strpos after
* the final check.
* The char group is termined by a True, hence the
* + 1 below.
* FIXME: Optimize this.
*/
pc += ins_len;
if (char_group_end == 0 || (pc + 1 == char_group_end))
strpos ++;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
continue;
}
} else {
if (!revert) {
if (char_group_end == 0)
return false;
pc += ins_len;
continue;
} else {
/* Fail both inside and outside a char group */
return false;
}
}
} else {
// Same as above, but use:
// - strpos > 0 instead of strpos < string_len
// - COND (str [strpos - 1]) instead of COND (str [strpos])
// - strpos -- instead of strpos ++
}
#endif
// GENERATED BY gen-interp.cs, DO NOT MODIFY
/* Char */
case RxOp.Char:
if (strpos < string_end) {
char c = str [strpos];
if (((c == program [pc + 1]))) {
strpos ++;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 2;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 2;
continue;
/* Range */
case RxOp.Range:
if (strpos < string_end) {
char c = str [strpos];
if (((c >= program [pc + 1] && c <= program [pc + 2]))) {
strpos ++;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 3;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 3;
continue;
/* UnicodeRange */
case RxOp.UnicodeRange:
if (strpos < string_end) {
char c = str [strpos];
if (((c >= (program [pc + 1] | ((int)program [pc + 2] << 8))) && (c <= (program [pc + 3] | ((int)program [pc + 4] << 8))))) {
strpos ++;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 5;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 5;
continue;
/* UnicodeChar */
case RxOp.UnicodeChar:
if (strpos < string_end) {
char c = str [strpos];
if (((c == (program [pc + 1] | ((int)program [pc + 2] << 8))))) {
strpos ++;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 3;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 3;
continue;
/* CategoryAny */
case RxOp.CategoryAny:
if (strpos < string_end) {
char c = str [strpos];
if (((c != '\n'))) {
strpos ++;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 1;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 1;
continue;
/* CategoryAnySingleline */
case RxOp.CategoryAnySingleline:
if (strpos < string_end) {
// char c = str [strpos];
if ((true)) {
strpos ++;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 1;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 1;
continue;
/* CategoryWord */
case RxOp.CategoryWord:
if (strpos < string_end) {
char c = str [strpos];
if (((Char.IsLetterOrDigit (c) || Char.GetUnicodeCategory (c) == UnicodeCategory.ConnectorPunctuation))) {
strpos ++;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 1;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 1;
continue;
/* CategoryDigit */
case RxOp.CategoryDigit:
if (strpos < string_end) {
char c = str [strpos];
if (((Char.IsDigit (c)))) {
strpos ++;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 1;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 1;
continue;
/* CategoryWhiteSpace */
case RxOp.CategoryWhiteSpace:
if (strpos < string_end) {
char c = str [strpos];
if (((Char.IsWhiteSpace (c)))) {
strpos ++;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 1;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 1;
continue;
/* CategoryEcmaWord */
case RxOp.CategoryEcmaWord:
if (strpos < string_end) {
char c = str [strpos];
if ((('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' || c == '_'))) {
strpos ++;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 1;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 1;
continue;
/* CategoryEcmaWhiteSpace */
case RxOp.CategoryEcmaWhiteSpace:
if (strpos < string_end) {
char c = str [strpos];
if (((c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'))) {
strpos ++;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 1;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 1;
continue;
/* CategoryUnicodeSpecials */
case RxOp.CategoryUnicodeSpecials:
if (strpos < string_end) {
char c = str [strpos];
if ((('\uFEFF' <= c && c <= '\uFEFF' || '\uFFF0' <= c && c <= '\uFFFD'))) {
strpos ++;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 1;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 1;
continue;
/* CategoryUnicode */
case RxOp.CategoryUnicode:
if (strpos < string_end) {
char c = str [strpos];
if (((Char.GetUnicodeCategory (c) == (UnicodeCategory)program [pc + 1]))) {
strpos ++;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 2;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 2;
continue;
/* CategoryGeneral */
case RxOp.CategoryGeneral:
if (strpos < string_end) {
char c = str [strpos];
if (((CategoryUtils.IsCategory ((Category)program [pc + 1], c)))) {
strpos ++;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 2;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 2;
continue;
/* Bitmap */
case RxOp.Bitmap:
if (strpos < string_end) {
char c = str [strpos];
int c2 = (int)c; c2 -= program [pc + 1]; length = program [pc + 2];
if (((c2 >= 0 && c2 < (length << 3) && (program [pc + 3 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
strpos ++;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 3 + program [pc + 2];
continue;
}
}
if (char_group_end == 0)
return false;
pc += 3 + program [pc + 2];
continue;
/* UnicodeBitmap */
case RxOp.UnicodeBitmap:
if (strpos < string_end) {
char c = str [strpos];
int c2 = (int)c; c2 -= (program [pc + 1] | ((int)program [pc + 2] << 8)); length = (program [pc + 3] | ((int)program [pc + 4] << 8));
if (((c2 >= 0 && c2 < (length << 3) && (program [pc + 5 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
strpos ++;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 5 + (program [pc + 3] | ((int)program [pc + 4] << 8));
continue;
}
}
if (char_group_end == 0)
return false;
pc += 5 + (program [pc + 3] | ((int)program [pc + 4] << 8));
continue;
case RxOp.CharIgnoreCase:
if (strpos < string_end) {
char c = Char.ToLower (str [strpos]);
if (((c == program [pc + 1]))) {
strpos ++;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 2;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 2;
continue;
case RxOp.RangeIgnoreCase:
if (strpos < string_end) {
char c = Char.ToLower (str [strpos]);
if (((c >= program [pc + 1] && c <= program [pc + 2]))) {
strpos ++;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 3;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 3;
continue;
case RxOp.UnicodeRangeIgnoreCase:
if (strpos < string_end) {
char c = Char.ToLower (str [strpos]);
if (((c >= (program [pc + 1] | ((int)program [pc + 2] << 8))) && (c <= (program [pc + 3] | ((int)program [pc + 4] << 8))))) {
strpos ++;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 5;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 5;
continue;
case RxOp.UnicodeCharIgnoreCase:
if (strpos < string_end) {
char c = Char.ToLower (str [strpos]);
if (((c == (program [pc + 1] | ((int)program [pc + 2] << 8))))) {
strpos ++;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 3;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 3;
continue;
case RxOp.BitmapIgnoreCase:
if (strpos < string_end) {
char c = Char.ToLower (str [strpos]);
int c2 = (int)c; c2 -= program [pc + 1]; length = program [pc + 2];
if (((c2 >= 0 && c2 < (length << 3) && (program [pc + 3 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
strpos ++;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 3 + program [pc + 2];
continue;
}
}
if (char_group_end == 0)
return false;
pc += 3 + program [pc + 2];
continue;
case RxOp.UnicodeBitmapIgnoreCase:
if (strpos < string_end) {
char c = Char.ToLower (str [strpos]);
int c2 = (int)c; c2 -= (program [pc + 1] | ((int)program [pc + 2] << 8)); length = (program [pc + 3] | ((int)program [pc + 4] << 8));
if (((c2 >= 0 && c2 < (length << 3) && (program [pc + 5 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
strpos ++;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 5 + (program [pc + 3] | ((int)program [pc + 4] << 8));
continue;
}
}
if (char_group_end == 0)
return false;
pc += 5 + (program [pc + 3] | ((int)program [pc + 4] << 8));
continue;
case RxOp.NoChar:
if (strpos < string_end) {
char c = str [strpos];
if (!((c == program [pc + 1]))) {
pc += 2;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos ++;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoRange:
if (strpos < string_end) {
char c = str [strpos];
if (!((c >= program [pc + 1] && c <= program [pc + 2]))) {
pc += 3;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos ++;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoUnicodeRange:
if (strpos < string_end) {
char c = str [strpos];
if (!((c >= (program [pc + 1] | ((int)program [pc + 2] << 8))) && (c <= (program [pc + 3] | ((int)program [pc + 4] << 8))))) {
pc += 5;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos ++;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoUnicodeChar:
if (strpos < string_end) {
char c = str [strpos];
if (!((c == (program [pc + 1] | ((int)program [pc + 2] << 8))))) {
pc += 3;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos ++;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoCategoryAny:
if (strpos < string_end) {
char c = str [strpos];
if (!((c != '\n'))) {
pc += 1;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos ++;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoCategoryAnySingleline:
if (strpos < string_end) {
#if DEAD_CODE
char c = str [strpos];
if (!(true)) {
pc += 1;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos ++;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
#endif
}
return false;
case RxOp.NoCategoryWord:
if (strpos < string_end) {
char c = str [strpos];
if (!((Char.IsLetterOrDigit (c) || Char.GetUnicodeCategory (c) == UnicodeCategory.ConnectorPunctuation))) {
pc += 1;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos ++;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoCategoryDigit:
if (strpos < string_end) {
char c = str [strpos];
if (!((Char.IsDigit (c)))) {
pc += 1;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos ++;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoCategoryWhiteSpace:
if (strpos < string_end) {
char c = str [strpos];
if (!((Char.IsWhiteSpace (c)))) {
pc += 1;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos ++;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoCategoryEcmaWord:
if (strpos < string_end) {
char c = str [strpos];
if (!(('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' || c == '_'))) {
pc += 1;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos ++;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoCategoryEcmaWhiteSpace:
if (strpos < string_end) {
char c = str [strpos];
if (!((c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'))) {
pc += 1;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos ++;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoCategoryUnicodeSpecials:
if (strpos < string_end) {
char c = str [strpos];
if (!(('\uFEFF' <= c && c <= '\uFEFF' || '\uFFF0' <= c && c <= '\uFFFD'))) {
pc += 1;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos ++;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoCategoryUnicode:
if (strpos < string_end) {
char c = str [strpos];
if (!((Char.GetUnicodeCategory (c) == (UnicodeCategory)program [pc + 1]))) {
pc += 2;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos ++;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoCategoryGeneral:
if (strpos < string_end) {
char c = str [strpos];
if (!((CategoryUtils.IsCategory ((Category)program [pc + 1], c)))) {
pc += 2;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos ++;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoBitmap:
if (strpos < string_end) {
char c = str [strpos];
int c2 = (int)c; c2 -= program [pc + 1]; length = program [pc + 2];
if (!((c2 >= 0 && c2 < (length << 3) && (program [pc + 3 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
pc += 3 + program [pc + 2];
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos ++;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoUnicodeBitmap:
if (strpos < string_end) {
char c = str [strpos];
int c2 = (int)c; c2 -= (program [pc + 1] | ((int)program [pc + 2] << 8)); length = (program [pc + 3] | ((int)program [pc + 4] << 8));
if (!((c2 >= 0 && c2 < (length << 3) && (program [pc + 5 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
pc += 5 + (program [pc + 3] | ((int)program [pc + 4] << 8));
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos ++;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoCharIgnoreCase:
if (strpos < string_end) {
char c = Char.ToLower (str [strpos]);
if (!((c == program [pc + 1]))) {
pc += 2;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos ++;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoRangeIgnoreCase:
if (strpos < string_end) {
char c = Char.ToLower (str [strpos]);
if (!((c >= program [pc + 1] && c <= program [pc + 2]))) {
pc += 3;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos ++;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoUnicodeRangeIgnoreCase:
if (strpos < string_end) {
char c = Char.ToLower (str [strpos]);
if (!((c >= (program [pc + 1] | ((int)program [pc + 2] << 8))) && (c <= (program [pc + 3] | ((int)program [pc + 4] << 8))))) {
pc += 5;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos ++;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoUnicodeCharIgnoreCase:
if (strpos < string_end) {
char c = Char.ToLower (str [strpos]);
if (!((c == (program [pc + 1] | ((int)program [pc + 2] << 8))))) {
pc += 3;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos ++;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoBitmapIgnoreCase:
if (strpos < string_end) {
char c = Char.ToLower (str [strpos]);
int c2 = (int)c; c2 -= program [pc + 1]; length = program [pc + 2];
if (!((c2 >= 0 && c2 < (length << 3) && (program [pc + 3 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
pc += 3 + program [pc + 2];
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos ++;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoUnicodeBitmapIgnoreCase:
if (strpos < string_end) {
char c = Char.ToLower (str [strpos]);
int c2 = (int)c; c2 -= (program [pc + 1] | ((int)program [pc + 2] << 8)); length = (program [pc + 3] | ((int)program [pc + 4] << 8));
if (!((c2 >= 0 && c2 < (length << 3) && (program [pc + 5 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
pc += 5 + (program [pc + 3] | ((int)program [pc + 4] << 8));
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos ++;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.CharReverse:
if (strpos > 0) {
char c = str [strpos - 1];
if (((c == program [pc + 1]))) {
strpos --;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 2;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 2;
continue;
case RxOp.RangeReverse:
if (strpos > 0) {
char c = str [strpos - 1];
if (((c >= program [pc + 1] && c <= program [pc + 2]))) {
strpos --;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 3;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 3;
continue;
case RxOp.UnicodeRangeReverse:
if (strpos > 0) {
char c = str [strpos - 1];
if (((c >= (program [pc + 1] | ((int)program [pc + 2] << 8))) && (c <= (program [pc + 3] | ((int)program [pc + 4] << 8))))) {
strpos --;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 5;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 5;
continue;
case RxOp.UnicodeCharReverse:
if (strpos > 0) {
char c = str [strpos - 1];
if (((c == (program [pc + 1] | ((int)program [pc + 2] << 8))))) {
strpos --;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 3;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 3;
continue;
case RxOp.CategoryAnyReverse:
if (strpos > 0) {
char c = str [strpos - 1];
if (((c != '\n'))) {
strpos --;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 1;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 1;
continue;
case RxOp.CategoryAnySinglelineReverse:
if (strpos > 0) {
//char c = str [strpos - 1];
if ((true)) {
strpos --;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 1;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 1;
continue;
case RxOp.CategoryWordReverse:
if (strpos > 0) {
char c = str [strpos - 1];
if (((Char.IsLetterOrDigit (c) || Char.GetUnicodeCategory (c) == UnicodeCategory.ConnectorPunctuation))) {
strpos --;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 1;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 1;
continue;
case RxOp.CategoryDigitReverse:
if (strpos > 0) {
char c = str [strpos - 1];
if (((Char.IsDigit (c)))) {
strpos --;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 1;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 1;
continue;
case RxOp.CategoryWhiteSpaceReverse:
if (strpos > 0) {
char c = str [strpos - 1];
if (((Char.IsWhiteSpace (c)))) {
strpos --;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 1;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 1;
continue;
case RxOp.CategoryEcmaWordReverse:
if (strpos > 0) {
char c = str [strpos - 1];
if ((('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' || c == '_'))) {
strpos --;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 1;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 1;
continue;
case RxOp.CategoryEcmaWhiteSpaceReverse:
if (strpos > 0) {
char c = str [strpos - 1];
if (((c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'))) {
strpos --;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 1;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 1;
continue;
case RxOp.CategoryUnicodeSpecialsReverse:
if (strpos > 0) {
char c = str [strpos - 1];
if ((('\uFEFF' <= c && c <= '\uFEFF' || '\uFFF0' <= c && c <= '\uFFFD'))) {
strpos --;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 1;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 1;
continue;
case RxOp.CategoryUnicodeReverse:
if (strpos > 0) {
char c = str [strpos - 1];
if (((Char.GetUnicodeCategory (c) == (UnicodeCategory)program [pc + 1]))) {
strpos --;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 2;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 2;
continue;
case RxOp.CategoryGeneralReverse:
if (strpos > 0) {
char c = str [strpos - 1];
if (((CategoryUtils.IsCategory ((Category)program [pc + 1], c)))) {
strpos --;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 2;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 2;
continue;
case RxOp.BitmapReverse:
if (strpos > 0) {
char c = str [strpos - 1];
int c2 = (int)c; c2 -= program [pc + 1]; length = program [pc + 2];
if (((c2 >= 0 && c2 < (length << 3) && (program [pc + 3 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
strpos --;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 3 + program [pc + 2];
continue;
}
}
if (char_group_end == 0)
return false;
pc += 3 + program [pc + 2];
continue;
case RxOp.UnicodeBitmapReverse:
if (strpos > 0) {
char c = str [strpos - 1];
int c2 = (int)c; c2 -= (program [pc + 1] | ((int)program [pc + 2] << 8)); length = (program [pc + 3] | ((int)program [pc + 4] << 8));
if (((c2 >= 0 && c2 < (length << 3) && (program [pc + 5 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
strpos --;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 5 + (program [pc + 3] | ((int)program [pc + 4] << 8));
continue;
}
}
if (char_group_end == 0)
return false;
pc += 5 + (program [pc + 3] | ((int)program [pc + 4] << 8));
continue;
case RxOp.CharIgnoreCaseReverse:
if (strpos > 0) {
char c = Char.ToLower (str [strpos - 1]);
if (((c == program [pc + 1]))) {
strpos --;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 2;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 2;
continue;
case RxOp.RangeIgnoreCaseReverse:
if (strpos > 0) {
char c = Char.ToLower (str [strpos - 1]);
if (((c >= program [pc + 1] && c <= program [pc + 2]))) {
strpos --;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 3;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 3;
continue;
case RxOp.UnicodeRangeIgnoreCaseReverse:
if (strpos > 0) {
char c = Char.ToLower (str [strpos - 1]);
if (((c >= (program [pc + 1] | ((int)program [pc + 2] << 8))) && (c <= (program [pc + 3] | ((int)program [pc + 4] << 8))))) {
strpos --;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 5;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 5;
continue;
case RxOp.UnicodeCharIgnoreCaseReverse:
if (strpos > 0) {
char c = Char.ToLower (str [strpos - 1]);
if (((c == (program [pc + 1] | ((int)program [pc + 2] << 8))))) {
strpos --;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 3;
continue;
}
}
if (char_group_end == 0)
return false;
pc += 3;
continue;
case RxOp.BitmapIgnoreCaseReverse:
if (strpos > 0) {
char c = Char.ToLower (str [strpos - 1]);
int c2 = (int)c; c2 -= program [pc + 1]; length = program [pc + 2];
if (((c2 >= 0 && c2 < (length << 3) && (program [pc + 3 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
strpos --;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 3 + program [pc + 2];
continue;
}
}
if (char_group_end == 0)
return false;
pc += 3 + program [pc + 2];
continue;
case RxOp.UnicodeBitmapIgnoreCaseReverse:
if (strpos > 0) {
char c = Char.ToLower (str [strpos - 1]);
int c2 = (int)c; c2 -= (program [pc + 1] | ((int)program [pc + 2] << 8)); length = (program [pc + 3] | ((int)program [pc + 4] << 8));
if (((c2 >= 0 && c2 < (length << 3) && (program [pc + 5 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
strpos --;
if (char_group_end != 0)
goto test_char_group_passed;
pc += 5 + (program [pc + 3] | ((int)program [pc + 4] << 8));
continue;
}
}
if (char_group_end == 0)
return false;
pc += 5 + (program [pc + 3] | ((int)program [pc + 4] << 8));
continue;
case RxOp.NoCharReverse:
if (strpos > 0) {
char c = str [strpos - 1];
if (!((c == program [pc + 1]))) {
pc += 2;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos --;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoRangeReverse:
if (strpos > 0) {
char c = str [strpos - 1];
if (!((c >= program [pc + 1] && c <= program [pc + 2]))) {
pc += 3;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos --;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoUnicodeRangeReverse:
if (strpos > 0) {
char c = str [strpos - 1];
if (!((c >= (program [pc + 1] | ((int)program [pc + 2] << 8))) && (c <= (program [pc + 3] | ((int)program [pc + 4] << 8))))) {
pc += 5;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos --;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoUnicodeCharReverse:
if (strpos > 0) {
char c = str [strpos - 1];
if (!((c == (program [pc + 1] | ((int)program [pc + 2] << 8))))) {
pc += 3;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos --;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoCategoryAnyReverse:
if (strpos > 0) {
char c = str [strpos - 1];
if (!((c != '\n'))) {
pc += 1;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos --;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoCategoryAnySinglelineReverse:
if (strpos > 0) {
#if DEAD_CODe
char c = str [strpos - 1];
if (!(true)) {
pc += 1;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos --;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
#endif
}
return false;
case RxOp.NoCategoryWordReverse:
if (strpos > 0) {
char c = str [strpos - 1];
if (!((Char.IsLetterOrDigit (c) || Char.GetUnicodeCategory (c) == UnicodeCategory.ConnectorPunctuation))) {
pc += 1;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos --;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoCategoryDigitReverse:
if (strpos > 0) {
char c = str [strpos - 1];
if (!((Char.IsDigit (c)))) {
pc += 1;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos --;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoCategoryWhiteSpaceReverse:
if (strpos > 0) {
char c = str [strpos - 1];
if (!((Char.IsWhiteSpace (c)))) {
pc += 1;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos --;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoCategoryEcmaWordReverse:
if (strpos > 0) {
char c = str [strpos - 1];
if (!(('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' || c == '_'))) {
pc += 1;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos --;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoCategoryEcmaWhiteSpaceReverse:
if (strpos > 0) {
char c = str [strpos - 1];
if (!((c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'))) {
pc += 1;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos --;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoCategoryUnicodeSpecialsReverse:
if (strpos > 0) {
char c = str [strpos - 1];
if (!(('\uFEFF' <= c && c <= '\uFEFF' || '\uFFF0' <= c && c <= '\uFFFD'))) {
pc += 1;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos --;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoCategoryUnicodeReverse:
if (strpos > 0) {
char c = str [strpos - 1];
if (!((Char.GetUnicodeCategory (c) == (UnicodeCategory)program [pc + 1]))) {
pc += 2;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos --;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoCategoryGeneralReverse:
if (strpos > 0) {
char c = str [strpos - 1];
if (!((CategoryUtils.IsCategory ((Category)program [pc + 1], c)))) {
pc += 2;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos --;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoBitmapReverse:
if (strpos > 0) {
char c = str [strpos - 1];
int c2 = (int)c; c2 -= program [pc + 1]; length = program [pc + 2];
if (!((c2 >= 0 && c2 < (length << 3) && (program [pc + 3 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
pc += 3 + program [pc + 2];
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos --;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoUnicodeBitmapReverse:
if (strpos > 0) {
char c = str [strpos - 1];
int c2 = (int)c; c2 -= (program [pc + 1] | ((int)program [pc + 2] << 8)); length = (program [pc + 3] | ((int)program [pc + 4] << 8));
if (!((c2 >= 0 && c2 < (length << 3) && (program [pc + 5 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
pc += 5 + (program [pc + 3] | ((int)program [pc + 4] << 8));
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos --;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoCharIgnoreCaseReverse:
if (strpos > 0) {
char c = Char.ToLower (str [strpos - 1]);
if (!((c == program [pc + 1]))) {
pc += 2;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos --;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoRangeIgnoreCaseReverse:
if (strpos > 0) {
char c = Char.ToLower (str [strpos - 1]);
if (!((c >= program [pc + 1] && c <= program [pc + 2]))) {
pc += 3;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos --;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoUnicodeRangeIgnoreCaseReverse:
if (strpos > 0) {
char c = Char.ToLower (str [strpos - 1]);
if (!((c >= (program [pc + 1] | ((int)program [pc + 2] << 8))) && (c <= (program [pc + 3] | ((int)program [pc + 4] << 8))))) {
pc += 5;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos --;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoUnicodeCharIgnoreCaseReverse:
if (strpos > 0) {
char c = Char.ToLower (str [strpos - 1]);
if (!((c == (program [pc + 1] | ((int)program [pc + 2] << 8))))) {
pc += 3;
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos --;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoBitmapIgnoreCaseReverse:
if (strpos > 0) {
char c = Char.ToLower (str [strpos - 1]);
int c2 = (int)c; c2 -= program [pc + 1]; length = program [pc + 2];
if (!((c2 >= 0 && c2 < (length << 3) && (program [pc + 3 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
pc += 3 + program [pc + 2];
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos --;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
case RxOp.NoUnicodeBitmapIgnoreCaseReverse:
if (strpos > 0) {
char c = Char.ToLower (str [strpos - 1]);
int c2 = (int)c; c2 -= (program [pc + 1] | ((int)program [pc + 2] << 8)); length = (program [pc + 3] | ((int)program [pc + 4] << 8));
if (!((c2 >= 0 && c2 < (length << 3) && (program [pc + 5 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
pc += 5 + (program [pc + 3] | ((int)program [pc + 4] << 8));
if (char_group_end == 0 || (pc + 1 == char_group_end)) {
strpos --;
if (pc + 1 == char_group_end)
goto test_char_group_passed;
}
continue;
}
}
return false;
// END OF GENERATED CODE
case RxOp.Branch: {
int res = 0;
if (EvalByteCode (pc + 3, strpos, ref res)) {
strpos_result = res;
return true;
}
//Console.WriteLine ("branch offset: {0}", program [pc + 1] | ((int)program [pc + 2] << 8));
pc += program [pc + 1] | ((int)program [pc + 2] << 8);
continue;
}
case RxOp.Repeat:
case RxOp.RepeatLazy: {
/*
* Repetation is modelled by two opcodes: Repeat and Until which
* contain the the qualified regex between them, i.e.:
* Repeat, <bytecode for the inner regex>, Until, <Tail expr>
* It is processed as follows:
* Repeat, [Until, <inner expr>]*, <Tail>
* This means that nested quantifiers are processed a bit
* strangely: when the inner quantifier fails to match, its
* tail is processed which includes the outer Until.
*
* This code is from the old interpreter.cs.
*
* FIXME: Rethink this.
*/
int res = 0;
this.repeat = new RepeatContext (
this.repeat, // previous context
ReadInt (program, pc + 3), // minimum
ReadInt (program, pc + 7), // maximum
(RxOp)program [pc] == RxOp.RepeatLazy, // lazy
pc + 11 // subexpression
);
int until = pc + (program [pc + 1] | ((int)program [pc + 2] << 8));
if (!EvalByteCode (until, strpos, ref res)) {
this.repeat = this.repeat.Previous;
return false;
}
strpos = res;
strpos_result = strpos;
return true;
}
case RxOp.Until: {
RepeatContext current = this.repeat;
int res = 0;
//
// Can we avoid recursion?
//
// Backtracking can be forced in nested quantifiers from the tail of this quantifier.
// Thus, we cannot, in general, use a simple loop on repeat.Expression to handle
// quantifiers.
//
// If 'deep' was unmolested, that implies that there was no nested quantifiers.
// Thus, we can safely avoid recursion.
//
if (deep == current)
goto Pass;
start = current.Start;
int start_count = current.Count;
// First match at least 'start' items without backtracking
while (!current.IsMinimum) {
++ current.Count;
current.Start = strpos;
deep = current;
if (!EvalByteCode (current.Expression, strpos, ref res)) {
current.Start = start;
current.Count = start_count;
goto Fail;
}
strpos = res;
if (deep != current) // recursive mode
goto Pass;
}
if (strpos == current.Start) {
// degenerate match ... match tail or fail
this.repeat = current.Previous;
deep = null;
if (EvalByteCode (pc + 1, strpos, ref res)) {
strpos = res;
goto Pass;
}
this.repeat = current;
goto Fail;
}
if (current.IsLazy) {
for (;;) {
// match tail first ...
this.repeat = current.Previous;
deep = null;
int cp = Checkpoint ();
if (EvalByteCode (pc + 1, strpos, ref res)) {
strpos = res;
goto Pass;
}
Backtrack (cp);
// ... then match more
this.repeat = current;
if (current.IsMaximum)
goto Fail;
++ current.Count;
current.Start = strpos;
deep = current;
if (!EvalByteCode (current.Expression, strpos, ref res)) {
current.Start = start;
current.Count = start_count;
goto Fail;
}
strpos = res;
if (deep != current) // recursive mode
goto Pass;
// Degenerate match: ptr has not moved since the last (failed) tail match.
// So, next and subsequent tail matches will fail.
if (strpos == current.Start)
goto Fail;
}
} else {
int stack_size = stack.Count;
// match greedily as much as possible
while (!current.IsMaximum) {
int cp = Checkpoint ();
int old_ptr = strpos;
int old_start = current.Start;
++ current.Count;
if (trace_rx)
Console.WriteLine ("recurse with count {0}.", current.Count);
current.Start = strpos;
deep = current;
if (!EvalByteCode (current.Expression, strpos, ref res)) {
-- current.Count;
current.Start = old_start;
Backtrack (cp);
break;
}
strpos = res;
if (deep != current) {
// recursive mode: no more backtracking, truncate the stack
stack.Count = stack_size;
goto Pass;
}
stack.Push (cp);
stack.Push (old_ptr);
// Degenerate match: no point going on
if (strpos == current.Start)
break;
}
if (trace_rx)
Console.WriteLine ("matching tail: {0} pc={1}", strpos, pc + 1);
// then, match the tail, backtracking as necessary.
this.repeat = current.Previous;
for (;;) {
deep = null;
if (EvalByteCode (pc + 1, strpos, ref res)) {
strpos = res;
stack.Count = stack_size;
goto Pass;
}
if (stack.Count == stack_size) {
this.repeat = current;
goto Fail;
}
--current.Count;
strpos = stack.Pop ();
Backtrack (stack.Pop ());
if (trace_rx)
Console.WriteLine ("backtracking to {0} expr={1} pc={2}", strpos, current.Expression, pc);
}
}
}
case RxOp.FastRepeat:
case RxOp.FastRepeatLazy: {
/*
* A FastRepeat is a simplified version of Repeat which does
* not contain another repeat inside, so backtracking is
* easier.
*/
bool lazy = program [pc] == (byte)RxOp.FastRepeatLazy;
int res = 0;
int tail = pc + (program [pc + 1] | ((int)program [pc + 2] << 8));
start = ReadInt (program, pc + 3);
end = ReadInt (program, pc + 7);
//Console.WriteLine ("min: {0}, max: {1} tail: {2}", start, end, tail);
length = 0;
deep = null;
// First match at least 'start' items
while (length < start) {
if (!EvalByteCode (pc + 11, strpos, ref res))
return false;
strpos = res;
length++;
}
if (lazy) {
while (true) {
// Match the tail
int cp = Checkpoint ();
if (EvalByteCode (tail, strpos, ref res)) {
strpos = res;
goto repeat_success;
}
Backtrack (cp);
if (length >= end)
return false;
// Match an item
if (!EvalByteCode (pc + 11, strpos, ref res))
return false;
strpos = res;
length ++;
}
} else {
// Then match as many items as possible, recording
// backtracking information
int old_stack_size = stack.Count;
while (length < end) {
int cp = Checkpoint ();
if (!EvalByteCode (pc + 11, strpos, ref res)) {
Backtrack (cp);
break;
}
stack.Push (cp);
stack.Push (strpos);
strpos = res;
length++;
}
if (tail <= pc)
throw new Exception ();
// Then, match the tail, backtracking as necessary.
while (true) {
if (EvalByteCode (tail, strpos, ref res)) {
strpos = res;
stack.Count = old_stack_size;
goto repeat_success;
}
if (stack.Count == old_stack_size)
return false;
// Backtrack
strpos = stack.Pop ();
Backtrack (stack.Pop ());
if (trace_rx)
Console.WriteLine ("backtracking to: {0}", strpos);
}
}
repeat_success:
// We matched the tail too so just return
goto Pass;
}
default:
Console.WriteLine ("evaluating: {0} at pc: {1}, strpos: {2}", (RxOp)program [pc], pc, strpos);
throw new NotSupportedException ();
}
continue;
Pass:
strpos_result = strpos;
return true;
Fail:
return false;
test_char_group_passed:
pc = char_group_end;
char_group_end = 0;
continue;
} // end of while (true)
}
}
}