469 lines
16 KiB
C#
Raw Normal View History

//------------------------------------------------------------------------------
// <copyright file="RegexMatch.cs" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//------------------------------------------------------------------------------
// Match is the result class for a regex search.
// It returns the location, length, and substring for
// the entire match as well as every captured group.
// Match is also used during the search to keep track of each capture for each group. This is
// done using the "_matches" array. _matches[x] represents an array of the captures for group x.
// This array consists of start and length pairs, and may have empty entries at the end. _matchcount[x]
// stores how many captures a group has. Note that _matchcount[x]*2 is the length of all the valid
// values in _matches. _matchcount[x]*2-2 is the Start of the last capture, and _matchcount[x]*2-1 is the
// Length of the last capture
//
// For example, if group 2 has one capture starting at position 4 with length 6,
// _matchcount[2] == 1
// _matches[2][0] == 4
// _matches[2][1] == 6
//
// Values in the _matches array can also be negative. This happens when using the balanced match
// construct, "(?<start-end>...)". When the "end" group matches, a capture is added for both the "start"
// and "end" groups. The capture added for "start" receives the negative values, and these values point to
// the next capture to be balanced. They do NOT point to the capture that "end" just balanced out. The negative
// values are indices into the _matches array transformed by the formula -3-x. This formula also untransforms.
//
namespace System.Text.RegularExpressions {
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Security.Permissions;
using System.Globalization;
/// <devdoc>
/// <para>
/// Represents
/// the results from a single regular expression match.
/// </para>
/// </devdoc>
#if !SILVERLIGHT
[ Serializable() ]
#endif
public class Match : Group {
internal static Match _empty = new Match(null, 1, String.Empty, 0, 0, 0);
internal GroupCollection _groupcoll;
// input to the match
internal Regex _regex;
internal int _textbeg;
internal int _textpos;
internal int _textend;
internal int _textstart;
// output from the match
internal int[][] _matches;
internal int[] _matchcount;
internal bool _balancing; // whether we've done any balancing with this match. If we
// have done balancing, we'll need to do extra work in Tidy().
/// <devdoc>
/// <para>
/// Returns an empty Match object.
/// </para>
/// </devdoc>
public static Match Empty {
get {
return _empty;
}
}
/*
* Nonpublic constructor
*/
internal Match(Regex regex, int capcount, String text, int begpos, int len, int startpos)
: base(text, new int[2], 0) {
_regex = regex;
_matchcount = new int[capcount];
_matches = new int[capcount][];
_matches[0] = _caps;
_textbeg = begpos;
_textend = begpos + len;
_textstart = startpos;
_balancing = false;
// No need for an exception here. This is only called internally, so we'll use an Assert instead
System.Diagnostics.Debug.Assert(!(_textbeg < 0 || _textstart < _textbeg || _textend < _textstart || _text.Length < _textend),
"The parameters are out of range.");
}
/*
* Nonpublic set-text method
*/
internal virtual void Reset(Regex regex, String text, int textbeg, int textend, int textstart) {
_regex = regex;
_text = text;
_textbeg = textbeg;
_textend = textend;
_textstart = textstart;
for (int i = 0; i < _matchcount.Length; i++) {
_matchcount[i] = 0;
}
_balancing = false;
}
/// <devdoc>
/// <para>[To be supplied.]</para>
/// </devdoc>
public virtual GroupCollection Groups {
get {
if (_groupcoll == null)
_groupcoll = new GroupCollection(this, null);
return _groupcoll;
}
}
/*
* Returns the next match
*/
/// <devdoc>
/// <para>Returns a new Match with the results for the next match, starting
/// at the position at which the last match ended (at the character beyond the last
/// matched character).</para>
/// </devdoc>
public Match NextMatch() {
if (_regex == null)
return this;
return _regex.Run(false, _length, _text, _textbeg, _textend - _textbeg, _textpos);
}
/*
* Return the result string (using the replacement pattern)
*/
/// <devdoc>
/// <para>
/// Returns the expansion of the passed replacement pattern. For
/// example, if the replacement pattern is ?$1$2?, Result returns the concatenation
/// of Group(1).ToString() and Group(2).ToString().
/// </para>
/// </devdoc>
public virtual String Result(String replacement) {
RegexReplacement repl;
if (replacement == null)
throw new ArgumentNullException("replacement");
if (_regex == null)
throw new NotSupportedException(SR.GetString(SR.NoResultOnFailed));
repl = (RegexReplacement)_regex.replref.Get();
if (repl == null || !repl.Pattern.Equals(replacement)) {
repl = RegexParser.ParseReplacement(replacement, _regex.caps, _regex.capsize, _regex.capnames, _regex.roptions);
_regex.replref.Cache(repl);
}
return repl.Replacement(this);
}
/*
* Used by the replacement code
*/
internal virtual String GroupToStringImpl(int groupnum) {
int c = _matchcount[groupnum];
if (c == 0)
return String.Empty;
int [] matches = _matches[groupnum];
return _text.Substring(matches[(c - 1) * 2], matches[(c * 2) - 1]);
}
/*
* Used by the replacement code
*/
internal String LastGroupToStringImpl() {
return GroupToStringImpl(_matchcount.Length - 1);
}
/*
* Convert to a thread-safe object by precomputing cache contents
*/
/// <devdoc>
/// <para>
/// Returns a Match instance equivalent to the one supplied that is safe to share
/// between multiple threads.
/// </para>
/// </devdoc>
#if !SILVERLIGHT
#if FEATURE_MONO_CAS
[HostProtection(Synchronization=true)]
#endif
static public Match Synchronized(Match inner) {
#else
static internal Match Synchronized(Match inner) {
#endif
if (inner == null)
throw new ArgumentNullException("inner");
int numgroups = inner._matchcount.Length;
// Populate all groups by looking at each one
for (int i = 0; i < numgroups; i++) {
Group group = inner.Groups[i];
// Depends on the fact that Group.Synchronized just
// operates on and returns the same instance
System.Text.RegularExpressions.Group.Synchronized(group);
}
return inner;
}
/*
* Nonpublic builder: add a capture to the group specified by "cap"
*/
internal virtual void AddMatch(int cap, int start, int len) {
int capcount;
if (_matches[cap] == null)
_matches[cap] = new int[2];
capcount = _matchcount[cap];
if (capcount * 2 + 2 > _matches[cap].Length) {
int[] oldmatches = _matches[cap];
int[] newmatches = new int[capcount * 8];
for (int j = 0; j < capcount * 2; j++)
newmatches[j] = oldmatches[j];
_matches[cap] = newmatches;
}
_matches[cap][capcount * 2] = start;
_matches[cap][capcount * 2 + 1] = len;
_matchcount[cap] = capcount + 1;
}
/*
* Nonpublic builder: Add a capture to balance the specified group. This is used by the
balanced match construct. (?<foo-foo2>...)
If there were no such thing as backtracking, this would be as simple as calling RemoveMatch(cap).
However, since we have backtracking, we need to keep track of everything.
*/
internal virtual void BalanceMatch(int cap) {
int capcount;
int target;
_balancing = true;
// we'll look at the last capture first
capcount = _matchcount[cap];
target = capcount * 2 - 2;
// first see if it is negative, and therefore is a reference to the next available
// capture group for balancing. If it is, we'll reset target to point to that capture.
if (_matches[cap][target] < 0)
target = -3 - _matches[cap][target];
// move back to the previous capture
target -= 2;
// if the previous capture is a reference, just copy that reference to the end. Otherwise, point to it.
if (target >= 0 && _matches[cap][target] < 0)
AddMatch(cap, _matches[cap][target], _matches[cap][target+1]);
else
AddMatch(cap, -3 - target, -4 - target /* == -3 - (target + 1) */ );
}
/*
* Nonpublic builder: removes a group match by capnum
*/
internal virtual void RemoveMatch(int cap) {
_matchcount[cap]--;
}
/*
* Nonpublic: tells if a group was matched by capnum
*/
internal virtual bool IsMatched(int cap) {
return cap < _matchcount.Length && _matchcount[cap] > 0 && _matches[cap][_matchcount[cap] * 2 - 1] != (-3 + 1);
}
/*
* Nonpublic: returns the index of the last specified matched group by capnum
*/
internal virtual int MatchIndex(int cap) {
int i = _matches[cap][_matchcount[cap] * 2 - 2];
if (i >= 0)
return i;
return _matches[cap][-3 - i];
}
/*
* Nonpublic: returns the length of the last specified matched group by capnum
*/
internal virtual int MatchLength(int cap) {
int i = _matches[cap][_matchcount[cap] * 2 - 1];
if (i >= 0)
return i;
return _matches[cap][-3 - i];
}
/*
* Nonpublic: tidy the match so that it can be used as an immutable result
*/
internal virtual void Tidy(int textpos) {
int[] interval;
interval = _matches[0];
_index = interval[0];
_length = interval[1];
_textpos = textpos;
_capcount = _matchcount[0];
if (_balancing) {
// The idea here is that we want to compact all of our unbalanced captures. To do that we
// use j basically as a count of how many unbalanced captures we have at any given time
// (really j is an index, but j/2 is the count). First we skip past all of the real captures
// until we find a balance captures. Then we check each subsequent entry. If it's a balance
// capture (it's negative), we decrement j. If it's a real capture, we increment j and copy
// it down to the last free position.
for (int cap = 0; cap < _matchcount.Length; cap++) {
int limit;
int[] matcharray;
limit = _matchcount[cap] * 2;
matcharray = _matches[cap];
int i = 0;
int j;
for (i = 0; i < limit; i++) {
if (matcharray[i] < 0)
break;
}
for (j = i; i < limit; i++) {
if (matcharray[i] < 0) {
// skip negative values
j--;
}
else {
// but if we find something positive (an actual capture), copy it back to the last
// unbalanced position.
if (i != j)
matcharray[j] = matcharray[i];
j++;
}
}
_matchcount[cap] = j / 2;
}
_balancing = false;
}
}
#if DBG
/// <internalonly/>
/// <devdoc>
/// </devdoc>
public bool Debug {
get {
if (_regex == null)
return false;
return _regex.Debug;
}
}
/// <internalonly/>
/// <devdoc>
/// </devdoc>
internal virtual void Dump() {
int i,j;
for (i = 0; i < _matchcount.Length; i++) {
System.Diagnostics.Debug.WriteLine("Capnum " + i.ToString(CultureInfo.InvariantCulture) + ":");
for (j = 0; j < _matchcount[i]; j++) {
String text = "";
if (_matches[i][j * 2] >= 0)
text = _text.Substring(_matches[i][j * 2], _matches[i][j * 2 + 1]);
System.Diagnostics.Debug.WriteLine(" (" + _matches[i][j * 2].ToString(CultureInfo.InvariantCulture) + "," + _matches[i][j * 2 + 1].ToString(CultureInfo.InvariantCulture) + ") " + text);
}
}
}
#endif
}
/*
* MatchSparse is for handling the case where slots are
* sparsely arranged (e.g., if somebody says use slot 100000)
*/
internal class MatchSparse : Match {
// the lookup hashtable
#if SILVERLIGHT
new internal Dictionary<Int32, Int32> _caps;
#else
new internal Hashtable _caps;
#endif
/*
* Nonpublic constructor
*/
#if SILVERLIGHT
internal MatchSparse(Regex regex, Dictionary<Int32, Int32> caps, int capcount,
#else
internal MatchSparse(Regex regex, Hashtable caps, int capcount,
#endif
String text, int begpos, int len, int startpos)
: base(regex, capcount, text, begpos, len, startpos) {
_caps = caps;
}
public override GroupCollection Groups {
get {
if (_groupcoll == null)
_groupcoll = new GroupCollection(this, _caps);
return _groupcoll;
}
}
#if DBG
internal override void Dump() {
if (_caps != null) {
#if SILVERLIGHT
IEnumerator<Int32> e = _caps.Keys.GetEnumerator();
#else
IEnumerator e = _caps.Keys.GetEnumerator();
#endif
while (e.MoveNext()) {
System.Diagnostics.Debug.WriteLine("Slot " + e.Current.ToString() + " -> " + _caps[e.Current].ToString());
}
}
base.Dump();
}
#endif
}
}