427 lines
14 KiB
C#
427 lines
14 KiB
C#
|
//------------------------------------------------------------------------------
|
||
|
// <copyright file="RegexReplacement.cs" company="Microsoft">
|
||
|
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||
|
// </copyright>
|
||
|
//------------------------------------------------------------------------------
|
||
|
|
||
|
// The RegexReplacement class represents a substitution string for
|
||
|
// use when using regexs to search/replace, etc. It's logically
|
||
|
// a sequence intermixed (1) constant strings and (2) group numbers.
|
||
|
|
||
|
namespace System.Text.RegularExpressions {
|
||
|
|
||
|
using System.Collections;
|
||
|
using System.Collections.Generic;
|
||
|
|
||
|
internal sealed class RegexReplacement {
|
||
|
/*
|
||
|
* Since RegexReplacement shares the same parser as Regex,
|
||
|
* the constructor takes a RegexNode which is a concatenation
|
||
|
* of constant strings and backreferences.
|
||
|
*/
|
||
|
#if SILVERLIGHT
|
||
|
internal RegexReplacement(String rep, RegexNode concat, Dictionary<Int32, Int32> _caps) {
|
||
|
#else
|
||
|
internal RegexReplacement(String rep, RegexNode concat, Hashtable _caps) {
|
||
|
#endif
|
||
|
StringBuilder sb;
|
||
|
List<String> strings;
|
||
|
List<Int32> rules;
|
||
|
int slot;
|
||
|
|
||
|
_rep = rep;
|
||
|
|
||
|
if (concat.Type() != RegexNode.Concatenate)
|
||
|
throw new ArgumentException(SR.GetString(SR.ReplacementError));
|
||
|
|
||
|
sb = new StringBuilder();
|
||
|
strings = new List<String>();
|
||
|
rules = new List<Int32>();
|
||
|
|
||
|
for (int i = 0; i < concat.ChildCount(); i++) {
|
||
|
RegexNode child = concat.Child(i);
|
||
|
|
||
|
switch (child.Type()) {
|
||
|
case RegexNode.Multi:
|
||
|
sb.Append(child._str);
|
||
|
break;
|
||
|
case RegexNode.One:
|
||
|
sb.Append(child._ch);
|
||
|
break;
|
||
|
case RegexNode.Ref:
|
||
|
if (sb.Length > 0) {
|
||
|
rules.Add(strings.Count);
|
||
|
strings.Add(sb.ToString());
|
||
|
sb.Length = 0;
|
||
|
}
|
||
|
slot = child._m;
|
||
|
|
||
|
if (_caps != null && slot >= 0)
|
||
|
slot = (int)_caps[slot];
|
||
|
|
||
|
rules.Add(-Specials - 1 - slot);
|
||
|
break;
|
||
|
default:
|
||
|
throw new ArgumentException(SR.GetString(SR.ReplacementError));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (sb.Length > 0) {
|
||
|
rules.Add(strings.Count);
|
||
|
strings.Add(sb.ToString());
|
||
|
}
|
||
|
|
||
|
_strings = strings;
|
||
|
_rules = rules;
|
||
|
}
|
||
|
|
||
|
internal String _rep;
|
||
|
internal List<String> _strings; // table of string constants
|
||
|
internal List<Int32> _rules; // negative -> group #, positive -> string #
|
||
|
|
||
|
// constants for special insertion patterns
|
||
|
|
||
|
internal const int Specials = 4;
|
||
|
internal const int LeftPortion = -1;
|
||
|
internal const int RightPortion = -2;
|
||
|
internal const int LastGroup = -3;
|
||
|
internal const int WholeString = -4;
|
||
|
|
||
|
/*
|
||
|
* Given a Match, emits into the StringBuilder the evaluated
|
||
|
* substitution pattern.
|
||
|
*/
|
||
|
private void ReplacementImpl(StringBuilder sb, Match match) {
|
||
|
for (int i = 0; i < _rules.Count; i++) {
|
||
|
int r = _rules[i];
|
||
|
if (r >= 0) // string lookup
|
||
|
sb.Append(_strings[r]);
|
||
|
else if (r < -Specials) // group lookup
|
||
|
sb.Append(match.GroupToStringImpl(-Specials - 1 - r));
|
||
|
else {
|
||
|
switch (-Specials - 1 - r) { // special insertion patterns
|
||
|
case LeftPortion:
|
||
|
sb.Append(match.GetLeftSubstring());
|
||
|
break;
|
||
|
case RightPortion:
|
||
|
sb.Append(match.GetRightSubstring());
|
||
|
break;
|
||
|
case LastGroup:
|
||
|
sb.Append(match.LastGroupToStringImpl());
|
||
|
break;
|
||
|
case WholeString:
|
||
|
sb.Append(match.GetOriginalString());
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Given a Match, emits into the List<String> the evaluated
|
||
|
* Right-to-Left substitution pattern.
|
||
|
*/
|
||
|
private void ReplacementImplRTL(List<String> al, Match match) {
|
||
|
for (int i = _rules.Count - 1; i >= 0; i--) {
|
||
|
int r = _rules[i];
|
||
|
if (r >= 0) // string lookup
|
||
|
al.Add(_strings[r]);
|
||
|
else if (r < -Specials) // group lookup
|
||
|
al.Add(match.GroupToStringImpl(-Specials - 1 - r));
|
||
|
else {
|
||
|
switch (-Specials - 1 - r) { // special insertion patterns
|
||
|
case LeftPortion:
|
||
|
al.Add(match.GetLeftSubstring());
|
||
|
break;
|
||
|
case RightPortion:
|
||
|
al.Add(match.GetRightSubstring());
|
||
|
break;
|
||
|
case LastGroup:
|
||
|
al.Add(match.LastGroupToStringImpl());
|
||
|
break;
|
||
|
case WholeString:
|
||
|
al.Add(match.GetOriginalString());
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* The original pattern string
|
||
|
*/
|
||
|
internal String Pattern {
|
||
|
get {
|
||
|
return _rep;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Returns the replacement result for a single match
|
||
|
*/
|
||
|
internal String Replacement(Match match) {
|
||
|
StringBuilder sb = new StringBuilder();
|
||
|
|
||
|
ReplacementImpl(sb, match);
|
||
|
|
||
|
return sb.ToString();
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Three very similar algorithms appear below: replace (pattern),
|
||
|
* replace (evaluator), and split.
|
||
|
*/
|
||
|
|
||
|
|
||
|
/*
|
||
|
* Replaces all ocurrances of the regex in the string with the
|
||
|
* replacement pattern.
|
||
|
*
|
||
|
* Note that the special case of no matches is handled on its own:
|
||
|
* with no matches, the input string is returned unchanged.
|
||
|
* The right-to-left case is split out because StringBuilder
|
||
|
* doesn't handle right-to-left string building directly very well.
|
||
|
*/
|
||
|
internal String Replace(Regex regex, String input, int count, int startat) {
|
||
|
Match match;
|
||
|
|
||
|
if (count < -1)
|
||
|
throw new ArgumentOutOfRangeException("count", SR.GetString(SR.CountTooSmall));
|
||
|
if (startat < 0 || startat > input.Length)
|
||
|
throw new ArgumentOutOfRangeException("startat", SR.GetString(SR.BeginIndexNotNegative));
|
||
|
|
||
|
if (count == 0)
|
||
|
return input;
|
||
|
|
||
|
match = regex.Match(input, startat);
|
||
|
if (!match.Success) {
|
||
|
return input;
|
||
|
}
|
||
|
else {
|
||
|
StringBuilder sb;
|
||
|
|
||
|
if (!regex.RightToLeft) {
|
||
|
sb = new StringBuilder();
|
||
|
int prevat = 0;
|
||
|
|
||
|
do {
|
||
|
if (match.Index != prevat)
|
||
|
sb.Append(input, prevat, match.Index - prevat);
|
||
|
|
||
|
prevat = match.Index + match.Length;
|
||
|
ReplacementImpl(sb, match);
|
||
|
if (--count == 0)
|
||
|
break;
|
||
|
|
||
|
match = match.NextMatch();
|
||
|
} while (match.Success);
|
||
|
|
||
|
if (prevat < input.Length)
|
||
|
sb.Append(input, prevat, input.Length - prevat);
|
||
|
}
|
||
|
else {
|
||
|
List<String> al = new List<String>();
|
||
|
int prevat = input.Length;
|
||
|
|
||
|
do {
|
||
|
if (match.Index + match.Length != prevat)
|
||
|
al.Add(input.Substring(match.Index + match.Length, prevat - match.Index - match.Length));
|
||
|
|
||
|
prevat = match.Index;
|
||
|
ReplacementImplRTL(al, match);
|
||
|
if (--count == 0)
|
||
|
break;
|
||
|
|
||
|
match = match.NextMatch();
|
||
|
} while (match.Success);
|
||
|
|
||
|
sb = new StringBuilder();
|
||
|
|
||
|
if (prevat > 0)
|
||
|
sb.Append(input, 0, prevat);
|
||
|
|
||
|
for (int i = al.Count - 1; i >= 0; i--) {
|
||
|
sb.Append(al[i]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return sb.ToString();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Replaces all ocurrances of the regex in the string with the
|
||
|
* replacement evaluator.
|
||
|
*
|
||
|
* Note that the special case of no matches is handled on its own:
|
||
|
* with no matches, the input string is returned unchanged.
|
||
|
* The right-to-left case is split out because StringBuilder
|
||
|
* doesn't handle right-to-left string building directly very well.
|
||
|
*/
|
||
|
internal static String Replace(MatchEvaluator evaluator, Regex regex,
|
||
|
String input, int count, int startat) {
|
||
|
Match match;
|
||
|
|
||
|
if (evaluator == null)
|
||
|
throw new ArgumentNullException("evaluator");
|
||
|
if (count < -1)
|
||
|
throw new ArgumentOutOfRangeException("count", SR.GetString(SR.CountTooSmall));
|
||
|
if (startat < 0 || startat > input.Length)
|
||
|
throw new ArgumentOutOfRangeException("startat", SR.GetString(SR.BeginIndexNotNegative));
|
||
|
|
||
|
if (count == 0)
|
||
|
return input;
|
||
|
|
||
|
match = regex.Match(input, startat);
|
||
|
|
||
|
if (!match.Success) {
|
||
|
return input;
|
||
|
}
|
||
|
else {
|
||
|
StringBuilder sb;
|
||
|
|
||
|
if (!regex.RightToLeft) {
|
||
|
sb = new StringBuilder();
|
||
|
int prevat = 0;
|
||
|
|
||
|
do {
|
||
|
if (match.Index != prevat)
|
||
|
sb.Append(input, prevat, match.Index - prevat);
|
||
|
|
||
|
prevat = match.Index + match.Length;
|
||
|
|
||
|
sb.Append(evaluator(match));
|
||
|
|
||
|
if (--count == 0)
|
||
|
break;
|
||
|
|
||
|
match = match.NextMatch();
|
||
|
} while (match.Success);
|
||
|
|
||
|
if (prevat < input.Length)
|
||
|
sb.Append(input, prevat, input.Length - prevat);
|
||
|
}
|
||
|
else {
|
||
|
List<String> al = new List<String>();
|
||
|
int prevat = input.Length;
|
||
|
|
||
|
do {
|
||
|
if (match.Index + match.Length != prevat)
|
||
|
al.Add(input.Substring(match.Index + match.Length, prevat - match.Index - match.Length));
|
||
|
|
||
|
prevat = match.Index;
|
||
|
|
||
|
al.Add(evaluator(match));
|
||
|
|
||
|
if (--count == 0)
|
||
|
break;
|
||
|
|
||
|
match = match.NextMatch();
|
||
|
} while (match.Success);
|
||
|
|
||
|
sb = new StringBuilder();
|
||
|
|
||
|
if (prevat > 0)
|
||
|
sb.Append(input, 0, prevat);
|
||
|
|
||
|
for (int i = al.Count - 1; i >= 0; i--) {
|
||
|
sb.Append(al[i]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return sb.ToString();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Does a split. In the right-to-left case we reorder the
|
||
|
* array to be forwards.
|
||
|
*/
|
||
|
internal static String[] Split(Regex regex, String input, int count, int startat) {
|
||
|
Match match;
|
||
|
String[] result;
|
||
|
|
||
|
if (count < 0)
|
||
|
throw new ArgumentOutOfRangeException("count", SR.GetString(SR.CountTooSmall));
|
||
|
|
||
|
if (startat < 0 || startat > input.Length)
|
||
|
throw new ArgumentOutOfRangeException("startat", SR.GetString(SR.BeginIndexNotNegative));
|
||
|
|
||
|
if (count == 1) {
|
||
|
result = new String[1];
|
||
|
result[0] = input;
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
count -= 1;
|
||
|
|
||
|
match = regex.Match(input, startat);
|
||
|
|
||
|
if (!match.Success) {
|
||
|
result = new String[1];
|
||
|
result[0] = input;
|
||
|
return result;
|
||
|
}
|
||
|
else {
|
||
|
List<String> al = new List<String>();
|
||
|
|
||
|
if (!regex.RightToLeft) {
|
||
|
int prevat = 0;
|
||
|
|
||
|
for (;;) {
|
||
|
al.Add(input.Substring(prevat, match.Index - prevat));
|
||
|
|
||
|
prevat = match.Index + match.Length;
|
||
|
|
||
|
// add all matched capture groups to the list.
|
||
|
for (int i=1; i<match.Groups.Count; i++) {
|
||
|
if (match.IsMatched(i))
|
||
|
al.Add(match.Groups[i].ToString());
|
||
|
}
|
||
|
|
||
|
if (--count == 0)
|
||
|
break;
|
||
|
|
||
|
match = match.NextMatch();
|
||
|
|
||
|
if (!match.Success)
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
al.Add(input.Substring(prevat, input.Length - prevat));
|
||
|
}
|
||
|
else {
|
||
|
int prevat = input.Length;
|
||
|
|
||
|
for (;;) {
|
||
|
al.Add(input.Substring(match.Index + match.Length, prevat - match.Index - match.Length));
|
||
|
|
||
|
prevat = match.Index;
|
||
|
|
||
|
// add all matched capture groups to the list.
|
||
|
for (int i=1; i<match.Groups.Count; i++) {
|
||
|
if (match.IsMatched(i))
|
||
|
al.Add(match.Groups[i].ToString());
|
||
|
}
|
||
|
|
||
|
if (--count == 0)
|
||
|
break;
|
||
|
|
||
|
match = match.NextMatch();
|
||
|
|
||
|
if (!match.Success)
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
al.Add(input.Substring(0, prevat));
|
||
|
|
||
|
al.Reverse(0, al.Count);
|
||
|
}
|
||
|
|
||
|
return al.ToArray();
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
}
|