You've already forked linux-packaging-mono
							
							
		
			
				
	
	
		
			592 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
			
		
		
	
	
			592 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
| //------------------------------------------------------------------------------
 | |
| // <copyright file="RegexFCD.cs" company="Microsoft">
 | |
| //     Copyright (c) Microsoft Corporation.  All rights reserved.
 | |
| // </copyright>                                                                
 | |
| //------------------------------------------------------------------------------
 | |
| 
 | |
| // This RegexFCD class is internal to the Regex package.
 | |
| // It builds a bunch of FC information (RegexFC) about
 | |
| // the regex for optimization purposes.
 | |
| 
 | |
| // Implementation notes:
 | |
| // 
 | |
| // This step is as simple as walking the tree and emitting
 | |
| // sequences of codes.
 | |
| 
 | |
| namespace System.Text.RegularExpressions {
 | |
| 
 | |
|     using System.Collections;
 | |
|     using System.Globalization;
 | |
|     
 | |
|     internal sealed class RegexFCD {
 | |
|         private int[]      _intStack;
 | |
|         private int        _intDepth;    
 | |
|         private RegexFC[]  _fcStack;
 | |
|         private int        _fcDepth;
 | |
|         private bool    _skipAllChildren;      // don't process any more children at the current level
 | |
|         private bool    _skipchild;            // don't process the current child. 
 | |
|         private bool    _failed = false;
 | |
|         
 | |
|         private const int BeforeChild = 64;
 | |
|         private const int AfterChild = 128;
 | |
| 
 | |
|         // where the regex can be pegged
 | |
| 
 | |
|         internal  const int Beginning  = 0x0001;
 | |
|         internal  const int Bol        = 0x0002;
 | |
|         internal  const int Start      = 0x0004;
 | |
|         internal  const int Eol        = 0x0008;
 | |
|         internal  const int EndZ       = 0x0010;
 | |
|         internal  const int End        = 0x0020;
 | |
|         internal  const int Boundary   = 0x0040;
 | |
|         internal  const int ECMABoundary = 0x0080;
 | |
| 
 | |
|         /*
 | |
|          * This is the one of the only two functions that should be called from outside.
 | |
|          * It takes a RegexTree and computes the set of chars that can start it.
 | |
|          */
 | |
|         internal static RegexPrefix FirstChars(RegexTree t) {
 | |
|             RegexFCD s = new RegexFCD();
 | |
|             RegexFC fc = s.RegexFCFromRegexTree(t);
 | |
| 
 | |
|             if (fc == null || fc._nullable)
 | |
|                 return null;
 | |
|             
 | |
|             CultureInfo culture = ((t._options & RegexOptions.CultureInvariant) != 0) ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;
 | |
|             return new RegexPrefix(fc.GetFirstChars(culture), fc.IsCaseInsensitive());
 | |
|         }
 | |
| 
 | |
|         /*
 | |
|          * This is a related computation: it takes a RegexTree and computes the
 | |
|          * leading substring if it see one. It's quite trivial and gives up easily.
 | |
|          */
 | |
|         internal static RegexPrefix Prefix(RegexTree tree) {
 | |
|             RegexNode curNode;
 | |
|             RegexNode concatNode = null;
 | |
|             int nextChild = 0;
 | |
| 
 | |
|             curNode = tree._root;
 | |
| 
 | |
|             for (;;) {
 | |
|                 switch (curNode._type) {
 | |
|                     case RegexNode.Concatenate:
 | |
|                         if (curNode.ChildCount() > 0) {
 | |
|                             concatNode = curNode;
 | |
|                             nextChild = 0;
 | |
|                         }
 | |
|                         break;
 | |
| 
 | |
|                     case RegexNode.Greedy:
 | |
|                     case RegexNode.Capture:
 | |
|                         curNode = curNode.Child(0);
 | |
|                         concatNode = null;
 | |
|                         continue;
 | |
| 
 | |
|                     case RegexNode.Oneloop:
 | |
|                     case RegexNode.Onelazy:
 | |
|                         if (curNode._m > 0) {
 | |
|                             string pref = String.Empty.PadRight(curNode._m, curNode._ch);
 | |
|                             return new RegexPrefix(pref, 0 != (curNode._options & RegexOptions.IgnoreCase));
 | |
|                         }
 | |
|                         else
 | |
|                             return RegexPrefix.Empty;
 | |
|                         
 | |
|                     case RegexNode.One:
 | |
|                         return new RegexPrefix(curNode._ch.ToString(CultureInfo.InvariantCulture), 0 != (curNode._options & RegexOptions.IgnoreCase));
 | |
|                             
 | |
|                     case RegexNode.Multi:
 | |
|                         return new RegexPrefix(curNode._str, 0 != (curNode._options & RegexOptions.IgnoreCase));
 | |
| 
 | |
|                     case RegexNode.Bol:
 | |
|                     case RegexNode.Eol:
 | |
|                     case RegexNode.Boundary:
 | |
|                     case RegexNode.ECMABoundary:
 | |
|                     case RegexNode.Beginning:
 | |
|                     case RegexNode.Start:
 | |
|                     case RegexNode.EndZ:
 | |
|                     case RegexNode.End:
 | |
|                     case RegexNode.Empty:
 | |
|                     case RegexNode.Require:
 | |
|                     case RegexNode.Prevent:
 | |
|                         break;
 | |
| 
 | |
|                     default:
 | |
|                         return RegexPrefix.Empty;
 | |
|                 }
 | |
| 
 | |
|                 if (concatNode == null || nextChild >= concatNode.ChildCount())
 | |
|                     return RegexPrefix.Empty;
 | |
| 
 | |
|                 curNode = concatNode.Child(nextChild++);
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         /*
 | |
|          * Yet another related computation: it takes a RegexTree and computes the
 | |
|          * leading anchors that it encounters.
 | |
|          */
 | |
|         internal static int Anchors(RegexTree tree) {
 | |
|             RegexNode curNode;
 | |
|             RegexNode concatNode = null;
 | |
|             int nextChild = 0;
 | |
|             int result = 0;
 | |
| 
 | |
|             curNode = tree._root;
 | |
| 
 | |
|             for (;;) {
 | |
|                 switch (curNode._type) {
 | |
|                     case RegexNode.Concatenate:
 | |
|                         if (curNode.ChildCount() > 0) {
 | |
|                             concatNode = curNode;
 | |
|                             nextChild = 0;
 | |
|                         }
 | |
|                         break;
 | |
| 
 | |
|                     case RegexNode.Greedy:
 | |
|                     case RegexNode.Capture:
 | |
|                         curNode = curNode.Child(0);
 | |
|                         concatNode = null;
 | |
|                         continue;
 | |
| 
 | |
|                     case RegexNode.Bol:
 | |
|                     case RegexNode.Eol:
 | |
|                     case RegexNode.Boundary:
 | |
|                     case RegexNode.ECMABoundary:
 | |
|                     case RegexNode.Beginning:
 | |
|                     case RegexNode.Start:
 | |
|                     case RegexNode.EndZ:
 | |
|                     case RegexNode.End:
 | |
|                         return result | AnchorFromType(curNode._type);
 | |
| 
 | |
|                     case RegexNode.Empty:
 | |
|                     case RegexNode.Require:
 | |
|                     case RegexNode.Prevent:
 | |
|                         break;
 | |
| 
 | |
|                     default:
 | |
|                         return result;
 | |
|                 }
 | |
| 
 | |
|                 if (concatNode == null || nextChild >= concatNode.ChildCount())
 | |
|                     return result;
 | |
| 
 | |
|                 curNode = concatNode.Child(nextChild++);
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         /*
 | |
|          * Convert anchor type to anchor bit.
 | |
|          */
 | |
|         private static int AnchorFromType(int type) {
 | |
|             switch (type) {
 | |
|                 case RegexNode.Bol:             return Bol;         
 | |
|                 case RegexNode.Eol:             return Eol;         
 | |
|                 case RegexNode.Boundary:        return Boundary;    
 | |
|                 case RegexNode.ECMABoundary:    return ECMABoundary;
 | |
|                 case RegexNode.Beginning:       return Beginning;   
 | |
|                 case RegexNode.Start:           return Start;       
 | |
|                 case RegexNode.EndZ:            return EndZ;        
 | |
|                 case RegexNode.End:             return End;         
 | |
|                 default:                        return 0;
 | |
|             }
 | |
|         }
 | |
| 
 | |
| #if DBG
 | |
|         internal static String AnchorDescription(int anchors) {
 | |
|             StringBuilder sb = new StringBuilder();
 | |
| 
 | |
|             if (0 != (anchors & Beginning))     sb.Append(", Beginning");
 | |
|             if (0 != (anchors & Start))         sb.Append(", Start");
 | |
|             if (0 != (anchors & Bol))           sb.Append(", Bol");
 | |
|             if (0 != (anchors & Boundary))      sb.Append(", Boundary");
 | |
|             if (0 != (anchors & ECMABoundary))  sb.Append(", ECMABoundary");
 | |
|             if (0 != (anchors & Eol))           sb.Append(", Eol");
 | |
|             if (0 != (anchors & End))           sb.Append(", End");
 | |
|             if (0 != (anchors & EndZ))          sb.Append(", EndZ");
 | |
| 
 | |
|             if (sb.Length >= 2)
 | |
|                 return(sb.ToString(2, sb.Length - 2));
 | |
| 
 | |
|             return "None";
 | |
|         }
 | |
| #endif
 | |
| 
 | |
|         /*
 | |
|          * private constructor; can't be created outside
 | |
|          */
 | |
|         private RegexFCD() {
 | |
|             _fcStack = new RegexFC[32];
 | |
|             _intStack = new int[32];
 | |
|         }
 | |
| 
 | |
|         /*
 | |
|          * To avoid recursion, we use a simple integer stack.
 | |
|          * This is the push.
 | |
|          */
 | |
|         private void PushInt(int I) {
 | |
|             if (_intDepth >= _intStack.Length) {
 | |
|                 int [] expanded = new int[_intDepth * 2];
 | |
| 
 | |
|                 System.Array.Copy(_intStack, 0, expanded, 0, _intDepth);
 | |
| 
 | |
|                 _intStack = expanded;
 | |
|             }
 | |
| 
 | |
|             _intStack[_intDepth++] = I;
 | |
|         }
 | |
| 
 | |
|         /*
 | |
|          * True if the stack is empty.
 | |
|          */
 | |
|         private bool IntIsEmpty() {
 | |
|             return _intDepth == 0;
 | |
|         }
 | |
| 
 | |
|         /*
 | |
|          * This is the pop.
 | |
|          */
 | |
|         private int PopInt() {
 | |
|             return _intStack[--_intDepth];
 | |
|         }
 | |
| 
 | |
|         /*
 | |
|           * We also use a stack of RegexFC objects.
 | |
|           * This is the push.
 | |
|           */
 | |
|         private void PushFC(RegexFC fc) {
 | |
|             if (_fcDepth >= _fcStack.Length) {
 | |
|                 RegexFC[] expanded = new RegexFC[_fcDepth * 2];
 | |
| 
 | |
|                 System.Array.Copy(_fcStack, 0, expanded, 0, _fcDepth);
 | |
|                 _fcStack = expanded;
 | |
|             }
 | |
| 
 | |
|             _fcStack[_fcDepth++] = fc;
 | |
|         }
 | |
| 
 | |
|         /*
 | |
|          * True if the stack is empty.
 | |
|          */
 | |
|         private bool FCIsEmpty() {
 | |
|             return _fcDepth == 0;
 | |
|         }
 | |
| 
 | |
|         /*
 | |
|          * This is the pop.
 | |
|          */
 | |
|         private RegexFC PopFC() {
 | |
|             return _fcStack[--_fcDepth];
 | |
|         }
 | |
| 
 | |
|         /*
 | |
|          * This is the top.
 | |
|          */
 | |
|         private RegexFC TopFC() {
 | |
|             return _fcStack[_fcDepth - 1];
 | |
|         }
 | |
| 
 | |
|         /*
 | |
|          * The main FC computation. It does a shortcutted depth-first walk
 | |
|          * through the tree and calls CalculateFC to emits code before
 | |
|          * and after each child of an interior node, and at each leaf.
 | |
|          */
 | |
|         private RegexFC RegexFCFromRegexTree(RegexTree tree) {
 | |
|             RegexNode curNode;
 | |
|             int curChild;
 | |
| 
 | |
|             curNode = tree._root;
 | |
|             curChild = 0;
 | |
| 
 | |
|             for (;;) {
 | |
|                 if (curNode._children == null) {
 | |
|                     // This is a leaf node
 | |
|                     CalculateFC(curNode._type, curNode, 0);
 | |
|                 }
 | |
|                 else if (curChild < curNode._children.Count && !_skipAllChildren) {
 | |
|                     // This is an interior node, and we have more children to analyze
 | |
|                     CalculateFC(curNode._type | BeforeChild, curNode, curChild);
 | |
| 
 | |
|                     if (!_skipchild) {
 | |
|                         curNode = (RegexNode)curNode._children[curChild];
 | |
|                         // this stack is how we get a depth first walk of the tree. 
 | |
|                         PushInt(curChild);
 | |
|                         curChild = 0;
 | |
|                     }
 | |
|                     else {
 | |
|                         curChild++;
 | |
|                         _skipchild = false;
 | |
|                     }
 | |
|                     continue;
 | |
|                 }
 | |
|                 
 | |
|                 // This is an interior node where we've finished analyzing all the children, or
 | |
|                 // the end of a leaf node. 
 | |
|                 _skipAllChildren = false;
 | |
| 
 | |
|                 if (IntIsEmpty())
 | |
|                     break;
 | |
| 
 | |
|                 curChild = PopInt();
 | |
|                 curNode = curNode._next;
 | |
| 
 | |
|                 CalculateFC(curNode._type | AfterChild, curNode, curChild);
 | |
|                 if (_failed)
 | |
|                     return null;
 | |
|                 
 | |
|                 curChild++;
 | |
|             }
 | |
| 
 | |
|             if (FCIsEmpty())
 | |
|                 return null; 
 | |
| 
 | |
|             return PopFC();
 | |
|         }
 | |
| 
 | |
|         /*
 | |
|          * Called in Beforechild to prevent further processing of the current child
 | |
|          */
 | |
|         private void SkipChild() {
 | |
|             _skipchild = true;
 | |
|         }
 | |
| 
 | |
|         /*
 | |
|          * FC computation and shortcut cases for each node type
 | |
|          */
 | |
|         private void CalculateFC(int NodeType, RegexNode node, int CurIndex) {
 | |
|             bool ci = false;
 | |
|             bool rtl = false;
 | |
| 
 | |
|             if (NodeType <= RegexNode.Ref) {
 | |
|                 if ((node._options & RegexOptions.IgnoreCase) != 0)
 | |
|                     ci = true;
 | |
|                 if ((node._options & RegexOptions.RightToLeft) != 0)
 | |
|                     rtl = true;
 | |
|             }
 | |
| 
 | |
|             switch (NodeType) {
 | |
|                 case RegexNode.Concatenate | BeforeChild:
 | |
|                 case RegexNode.Alternate | BeforeChild:
 | |
|                 case RegexNode.Testref | BeforeChild:
 | |
|                 case RegexNode.Loop | BeforeChild:
 | |
|                 case RegexNode.Lazyloop | BeforeChild:
 | |
|                     break;
 | |
| 
 | |
|                 case RegexNode.Testgroup | BeforeChild:
 | |
|                     if (CurIndex == 0)
 | |
|                         SkipChild();
 | |
|                     break;
 | |
| 
 | |
|                 case RegexNode.Empty:
 | |
|                     PushFC(new RegexFC(true));
 | |
|                     break;
 | |
| 
 | |
|                 case RegexNode.Concatenate | AfterChild:
 | |
|                     if (CurIndex != 0) {
 | |
|                         RegexFC child = PopFC();
 | |
|                         RegexFC cumul = TopFC();
 | |
| 
 | |
|                         _failed = !cumul.AddFC(child, true);
 | |
|                     }
 | |
| 
 | |
|                     if (!TopFC()._nullable)
 | |
|                         _skipAllChildren = true;
 | |
|                     break;
 | |
| 
 | |
|                 case RegexNode.Testgroup | AfterChild:
 | |
|                     if (CurIndex > 1) {
 | |
|                         RegexFC child = PopFC();
 | |
|                         RegexFC cumul = TopFC();
 | |
| 
 | |
|                         _failed = !cumul.AddFC(child, false);
 | |
|                     }
 | |
|                     break;
 | |
| 
 | |
|                 case RegexNode.Alternate | AfterChild:
 | |
|                 case RegexNode.Testref | AfterChild:
 | |
|                     if (CurIndex != 0) {
 | |
|                         RegexFC child = PopFC();
 | |
|                         RegexFC cumul = TopFC();
 | |
| 
 | |
|                         _failed = !cumul.AddFC(child, false);
 | |
|                     }
 | |
|                     break;
 | |
| 
 | |
|                 case RegexNode.Loop | AfterChild:
 | |
|                 case RegexNode.Lazyloop | AfterChild:
 | |
|                     if (node._m == 0)
 | |
|                         TopFC()._nullable = true;
 | |
|                     break;
 | |
| 
 | |
|                 case RegexNode.Group | BeforeChild:
 | |
|                 case RegexNode.Group | AfterChild:
 | |
|                 case RegexNode.Capture | BeforeChild:
 | |
|                 case RegexNode.Capture | AfterChild:
 | |
|                 case RegexNode.Greedy | BeforeChild:
 | |
|                 case RegexNode.Greedy | AfterChild:
 | |
|                     break;
 | |
| 
 | |
|                 case RegexNode.Require | BeforeChild:
 | |
|                 case RegexNode.Prevent | BeforeChild:
 | |
|                     SkipChild();
 | |
|                     PushFC(new RegexFC(true));
 | |
|                     break;
 | |
| 
 | |
|                 case RegexNode.Require | AfterChild:
 | |
|                 case RegexNode.Prevent | AfterChild:
 | |
|                     break;
 | |
| 
 | |
|                 case RegexNode.One:
 | |
|                 case RegexNode.Notone:
 | |
|                     PushFC(new RegexFC(node._ch, NodeType == RegexNode.Notone, false, ci));
 | |
|                     break;
 | |
| 
 | |
|                 case RegexNode.Oneloop:
 | |
|                 case RegexNode.Onelazy:
 | |
|                     PushFC(new RegexFC(node._ch, false, node._m == 0, ci));
 | |
|                     break;
 | |
| 
 | |
|                 case RegexNode.Notoneloop:
 | |
|                 case RegexNode.Notonelazy:
 | |
|                     PushFC(new RegexFC(node._ch, true, node._m == 0, ci));
 | |
|                     break;
 | |
| 
 | |
|                 case RegexNode.Multi:
 | |
|                     if (node._str.Length == 0)
 | |
|                         PushFC(new RegexFC(true));
 | |
|                     else if (!rtl)
 | |
|                         PushFC(new RegexFC(node._str[0], false, false, ci));
 | |
|                     else
 | |
|                         PushFC(new RegexFC(node._str[node._str.Length - 1], false, false, ci));
 | |
|                     break;
 | |
| 
 | |
|                 case RegexNode.Set:
 | |
|                     PushFC(new RegexFC(node._str, false, ci));
 | |
|                     break;
 | |
| 
 | |
|                 case RegexNode.Setloop:
 | |
|                 case RegexNode.Setlazy:
 | |
|                     PushFC(new RegexFC(node._str, node._m == 0, ci));
 | |
|                     break;
 | |
| 
 | |
|                 case RegexNode.Ref:
 | |
|                     PushFC(new RegexFC(RegexCharClass.AnyClass, true, false));
 | |
|                     break;
 | |
| 
 | |
|                 case RegexNode.Nothing:
 | |
|                 case RegexNode.Bol:
 | |
|                 case RegexNode.Eol:
 | |
|                 case RegexNode.Boundary:
 | |
|                 case RegexNode.Nonboundary:
 | |
|                 case RegexNode.ECMABoundary:
 | |
|                 case RegexNode.NonECMABoundary:
 | |
|                 case RegexNode.Beginning:
 | |
|                 case RegexNode.Start:
 | |
|                 case RegexNode.EndZ:
 | |
|                 case RegexNode.End:
 | |
|                     PushFC(new RegexFC(true));
 | |
|                     break;
 | |
| 
 | |
|                 default:
 | |
|                     throw new ArgumentException(SR.GetString(SR.UnexpectedOpcode, NodeType.ToString(CultureInfo.CurrentCulture)));
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     internal sealed class RegexFC {
 | |
|         internal RegexCharClass _cc;
 | |
|         internal bool _nullable;
 | |
|         internal bool _caseInsensitive;
 | |
| 
 | |
|         internal RegexFC(bool nullable) {
 | |
|             _cc = new RegexCharClass();
 | |
|             _nullable = nullable;
 | |
|         }
 | |
| 
 | |
|         internal RegexFC(char ch, bool not, bool nullable, bool caseInsensitive) {
 | |
|             _cc = new RegexCharClass();
 | |
| 
 | |
|             if (not) {
 | |
|                 if (ch > 0)
 | |
|                     _cc.AddRange('\0', (char)(ch - 1));
 | |
|                 if (ch < 0xFFFF)
 | |
|                     _cc.AddRange((char)(ch + 1), '\uFFFF');
 | |
|             }
 | |
|             else {
 | |
|                 _cc.AddRange(ch, ch);
 | |
|             }
 | |
| 
 | |
|             _caseInsensitive = caseInsensitive;
 | |
|             _nullable = nullable;
 | |
|         }
 | |
| 
 | |
|         internal RegexFC(String charClass, bool nullable, bool caseInsensitive) {
 | |
|             _cc = RegexCharClass.Parse(charClass);
 | |
| 
 | |
|             _nullable = nullable;
 | |
|             _caseInsensitive = caseInsensitive;
 | |
|         }
 | |
| 
 | |
|         internal bool AddFC(RegexFC fc, bool concatenate) {
 | |
|             if (!_cc.CanMerge || !fc._cc.CanMerge) {
 | |
|                 return false;
 | |
|             }
 | |
|             
 | |
|             if (concatenate) {
 | |
|                 if (!_nullable)
 | |
|                     return true;
 | |
| 
 | |
|                 if (!fc._nullable)
 | |
|                     _nullable = false;
 | |
|             }
 | |
|             else {
 | |
|                 if (fc._nullable)
 | |
|                     _nullable = true;
 | |
|             }
 | |
| 
 | |
|             _caseInsensitive |= fc._caseInsensitive;
 | |
|             _cc.AddCharClass(fc._cc);
 | |
|             return true;
 | |
|         }
 | |
| 
 | |
|         internal String GetFirstChars(CultureInfo culture) {
 | |
|             if (_caseInsensitive)
 | |
|                 _cc.AddLowercase(culture);
 | |
| 
 | |
|             return _cc.ToStringClass();
 | |
|         }
 | |
|         
 | |
|         internal bool IsCaseInsensitive() {
 | |
|             return _caseInsensitive;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     internal sealed class RegexPrefix {
 | |
|         internal String _prefix;
 | |
|         internal bool _caseInsensitive;
 | |
| 
 | |
|         internal static RegexPrefix _empty = new RegexPrefix(String.Empty, false);
 | |
| 
 | |
|         internal RegexPrefix(String prefix, bool ci) {
 | |
|             _prefix = prefix;
 | |
|             _caseInsensitive = ci;
 | |
|         }
 | |
| 
 | |
|         internal String Prefix {
 | |
|             get {
 | |
|                 return _prefix;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         internal bool CaseInsensitive {
 | |
|             get {
 | |
|                 return _caseInsensitive;
 | |
|             }
 | |
|         }
 | |
|         internal static RegexPrefix Empty {
 | |
|             get {
 | |
|                 return _empty;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| }
 |