You've already forked linux-packaging-mono
							
							
		
			
				
	
	
		
			518 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
			
		
		
	
	
			518 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
| //------------------------------------------------------------------------------
 | |
| // <copyright file="XPathScanner.cs" company="Microsoft">
 | |
| //     Copyright (c) Microsoft Corporation.  All rights reserved.
 | |
| // </copyright>
 | |
| // <owner current="true" primary="true">Microsoft</owner>
 | |
| // <spec>http://www.w3.org/TR/xpath#exprlex</spec>
 | |
| //------------------------------------------------------------------------------
 | |
| 
 | |
| using System.Diagnostics;
 | |
| 
 | |
| namespace System.Xml.Xsl.XPath {
 | |
|     using Res = System.Xml.Utils.Res;
 | |
| 
 | |
|     // Extends XPathOperator enumeration
 | |
|     internal enum LexKind {
 | |
|         Unknown,        // Unknown lexeme
 | |
|         Or,             // Operator 'or'
 | |
|         And,            // Operator 'and'
 | |
|         Eq,             // Operator '='
 | |
|         Ne,             // Operator '!='
 | |
|         Lt,             // Operator '<'
 | |
|         Le,             // Operator '<='
 | |
|         Gt,             // Operator '>'
 | |
|         Ge,             // Operator '>='
 | |
|         Plus,           // Operator '+'
 | |
|         Minus,          // Operator '-'
 | |
|         Multiply,       // Operator '*'
 | |
|         Divide,         // Operator 'div'
 | |
|         Modulo,         // Operator 'mod'
 | |
|         UnaryMinus,     // Not used
 | |
|         Union,          // Operator '|'
 | |
|         LastOperator    = Union,
 | |
| 
 | |
|         DotDot,         // '..'
 | |
|         ColonColon,     // '::'
 | |
|         SlashSlash,     // Operator '//'
 | |
|         Number,         // Number (numeric literal)
 | |
|         Axis,           // AxisName
 | |
| 
 | |
|         Name,           // NameTest, NodeType, FunctionName, AxisName, second part of VariableReference
 | |
|         String,         // Literal (string literal)
 | |
|         Eof,            // End of the expression
 | |
| 
 | |
|         FirstStringable = Name,
 | |
|         LastNonChar     = Eof,
 | |
| 
 | |
|         LParens     = '(',
 | |
|         RParens     = ')',
 | |
|         LBracket    = '[',
 | |
|         RBracket    = ']',
 | |
|         Dot         = '.',
 | |
|         At          = '@',
 | |
|         Comma       = ',',
 | |
| 
 | |
|         Star        = '*',      // NameTest
 | |
|         Slash       = '/',      // Operator '/'
 | |
|         Dollar      = '$',      // First part of VariableReference
 | |
|         RBrace      = '}',      // Used for AVTs
 | |
|     };
 | |
| 
 | |
|     internal sealed class XPathScanner {
 | |
|         private string  xpathExpr;
 | |
|         private int     curIndex;
 | |
|         private char    curChar;
 | |
|         private LexKind kind;
 | |
|         private string  name;
 | |
|         private string  prefix;
 | |
|         private string  stringValue;
 | |
|         private bool    canBeFunction;
 | |
|         private int     lexStart;
 | |
|         private int     prevLexEnd;
 | |
|         private LexKind prevKind;
 | |
|         private XPathAxis axis;
 | |
| 
 | |
|         private XmlCharType xmlCharType = XmlCharType.Instance;
 | |
| 
 | |
|         public XPathScanner(string xpathExpr) : this(xpathExpr, 0) {}
 | |
| 
 | |
|         public XPathScanner(string xpathExpr, int startFrom) {
 | |
|             Debug.Assert(xpathExpr != null);
 | |
|             this.xpathExpr = xpathExpr;
 | |
|             this.kind = LexKind.Unknown;
 | |
|             SetSourceIndex(startFrom);
 | |
|             NextLex();
 | |
|         }
 | |
| 
 | |
|         public string   Source      { get { return xpathExpr;   } }
 | |
|         public LexKind  Kind        { get { return kind;        } }
 | |
|         public int      LexStart    { get { return lexStart;    } }
 | |
|         public int      LexSize     { get { return curIndex - lexStart; } }
 | |
|         public int      PrevLexEnd  { get { return prevLexEnd;  } }
 | |
| 
 | |
|         private void SetSourceIndex(int index) {
 | |
|             Debug.Assert(0 <= index && index <= xpathExpr.Length);
 | |
|             curIndex = index - 1;
 | |
|             NextChar();
 | |
|         }
 | |
| 
 | |
|         private void NextChar() {
 | |
|             Debug.Assert(-1 <= curIndex && curIndex < xpathExpr.Length);
 | |
|             curIndex++;
 | |
|             if (curIndex < xpathExpr.Length) {
 | |
|                 curChar = xpathExpr[curIndex];
 | |
|             } else {
 | |
|                 Debug.Assert(curIndex == xpathExpr.Length);
 | |
|                 curChar = '\0';
 | |
|             }
 | |
|         }
 | |
| 
 | |
| #if XML10_FIFTH_EDITION
 | |
|         private char PeekNextChar() {
 | |
|             Debug.Assert(-1 <= curIndex && curIndex <= xpathExpr.Length);
 | |
|             if (curIndex + 1 < xpathExpr.Length) {
 | |
|                 return xpathExpr[curIndex + 1];
 | |
|             }
 | |
|             else {
 | |
|                 return '\0';
 | |
|             }
 | |
|         }
 | |
| #endif
 | |
| 
 | |
|         public string Name {
 | |
|             get {
 | |
|                 Debug.Assert(kind == LexKind.Name);
 | |
|                 Debug.Assert(name != null);
 | |
|                 return name;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         public string Prefix {
 | |
|             get {
 | |
|                 Debug.Assert(kind == LexKind.Name);
 | |
|                 Debug.Assert(prefix != null);
 | |
|                 return prefix;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         public string RawValue {
 | |
|             get {
 | |
|                 if (kind == LexKind.Eof) {
 | |
|                     return LexKindToString(kind);
 | |
|                 } else {
 | |
|                     return xpathExpr.Substring(lexStart, curIndex - lexStart);
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         public string StringValue {
 | |
|             get {
 | |
|                 Debug.Assert(kind == LexKind.String);
 | |
|                 Debug.Assert(stringValue != null);
 | |
|                 return stringValue;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         // Returns true if the character following an QName (possibly after intervening
 | |
|         // ExprWhitespace) is '('. In this case the token must be recognized as a NodeType
 | |
|         // or a FunctionName unless it is an OperatorName. This distinction cannot be done
 | |
|         // without knowing the previous lexeme. For example, "or" in "... or (1 != 0)" may
 | |
|         // be an OperatorName or a FunctionName.
 | |
|         public bool CanBeFunction {
 | |
|             get {
 | |
|                 Debug.Assert(kind == LexKind.Name);
 | |
|                 return canBeFunction;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         public XPathAxis Axis {
 | |
|             get {
 | |
|                 Debug.Assert(kind == LexKind.Axis);
 | |
|                 Debug.Assert(axis != XPathAxis.Unknown);
 | |
|                 return axis;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         private void SkipSpace() {
 | |
|             while (xmlCharType.IsWhiteSpace(curChar)) {
 | |
|                 NextChar();
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         private static bool IsAsciiDigit(char ch) {
 | |
|             return (uint)(ch - '0') <= 9;
 | |
|         }
 | |
| 
 | |
|         public void NextLex() {
 | |
|             prevLexEnd = curIndex;
 | |
|             prevKind = kind;
 | |
|             SkipSpace();
 | |
|             lexStart = curIndex;
 | |
| 
 | |
|             switch (curChar) {
 | |
|                 case '\0':
 | |
|                     kind = LexKind.Eof;
 | |
|                     return;
 | |
|                 case '(': case ')': case '[': case ']':
 | |
|                 case '@': case ',': case '$': case '}':
 | |
|                     kind = (LexKind)curChar;
 | |
|                     NextChar();
 | |
|                     break;
 | |
|                 case '.':
 | |
|                     NextChar();
 | |
|                     if (curChar == '.') {
 | |
|                         kind = LexKind.DotDot;
 | |
|                         NextChar();
 | |
|                     } else if (IsAsciiDigit(curChar)) {
 | |
|                         SetSourceIndex(lexStart);
 | |
|                         goto case '0';
 | |
|                     } else {
 | |
|                         kind = LexKind.Dot;
 | |
|                     }
 | |
|                     break;
 | |
|                 case ':':
 | |
|                     NextChar();
 | |
|                     if (curChar == ':') {
 | |
|                         kind = LexKind.ColonColon;
 | |
|                         NextChar();
 | |
|                     } else {
 | |
|                         kind = LexKind.Unknown;
 | |
|                     }
 | |
|                     break;
 | |
|                 case '*':
 | |
|                     kind = LexKind.Star;
 | |
|                     NextChar();
 | |
|                     CheckOperator(true);
 | |
|                     break;
 | |
|                 case '/':
 | |
|                     NextChar();
 | |
|                     if (curChar == '/') {
 | |
|                         kind = LexKind.SlashSlash;
 | |
|                         NextChar();
 | |
|                     } else {
 | |
|                         kind = LexKind.Slash;
 | |
|                     }
 | |
|                     break;
 | |
|                 case '|':
 | |
|                     kind = LexKind.Union;
 | |
|                     NextChar();
 | |
|                     break;
 | |
|                 case '+':
 | |
|                     kind = LexKind.Plus;
 | |
|                     NextChar();
 | |
|                     break;
 | |
|                 case '-':
 | |
|                     kind = LexKind.Minus;
 | |
|                     NextChar();
 | |
|                     break;
 | |
|                 case '=':
 | |
|                     kind = LexKind.Eq;
 | |
|                     NextChar();
 | |
|                     break;
 | |
|                 case '!':
 | |
|                     NextChar();
 | |
|                     if (curChar == '=') {
 | |
|                         kind = LexKind.Ne;
 | |
|                         NextChar();
 | |
|                     } else {
 | |
|                         kind = LexKind.Unknown;
 | |
|                     }
 | |
|                     break;
 | |
|                 case '<':
 | |
|                     NextChar();
 | |
|                     if (curChar == '=') {
 | |
|                         kind = LexKind.Le;
 | |
|                         NextChar();
 | |
|                     } else {
 | |
|                         kind = LexKind.Lt;
 | |
|                     }
 | |
|                     break;
 | |
|                 case '>':
 | |
|                     NextChar();
 | |
|                     if (curChar == '=') {
 | |
|                         kind = LexKind.Ge;
 | |
|                         NextChar();
 | |
|                     } else {
 | |
|                         kind = LexKind.Gt;
 | |
|                     }
 | |
|                     break;
 | |
|                 case '"':
 | |
|                 case '\'':
 | |
|                     kind = LexKind.String;
 | |
|                     ScanString();
 | |
|                     break;
 | |
|                 case '0': case '1': case '2': case '3':
 | |
|                 case '4': case '5': case '6': case '7':
 | |
|                 case '8': case '9':
 | |
|                     kind = LexKind.Number;
 | |
|                     ScanNumber();
 | |
|                     break;
 | |
|                 default:
 | |
|                     if (xmlCharType.IsStartNCNameSingleChar(curChar) 
 | |
| #if XML10_FIFTH_EDITION
 | |
|                         || xmlCharType.IsNCNameHighSurrogateChar(curChar)
 | |
| #endif
 | |
|                         ) {
 | |
|                         kind = LexKind.Name;
 | |
|                         this.name   = ScanNCName();
 | |
|                         this.prefix = string.Empty;
 | |
|                         this.canBeFunction = false;
 | |
|                         this.axis = XPathAxis.Unknown;
 | |
|                         bool colonColon = false;
 | |
|                         int saveSourceIndex = curIndex;
 | |
| 
 | |
|                         // "foo:bar" or "foo:*" -- one lexeme (no spaces allowed)
 | |
|                         // "foo::" or "foo ::"  -- two lexemes, reported as one (AxisName)
 | |
|                         // "foo:?" or "foo :?"  -- lexeme "foo" reported
 | |
|                         if (curChar == ':') {
 | |
|                             NextChar();
 | |
|                             if (curChar == ':') {   // "foo::" -> OperatorName, AxisName
 | |
|                                 NextChar();
 | |
|                                 colonColon = true;
 | |
|                                 SetSourceIndex(saveSourceIndex);
 | |
|                             } else {                // "foo:bar", "foo:*" or "foo:?"
 | |
|                                 if (curChar == '*') {
 | |
|                                     NextChar();
 | |
|                                     this.prefix = this.name;
 | |
|                                     this.name = "*";
 | |
|                                 } else if (xmlCharType.IsStartNCNameSingleChar(curChar) 
 | |
| #if XML10_FIFTH_EDITION
 | |
|                                     || xmlCharType.IsNCNameHighSurrogateChar(curChar)
 | |
| #endif
 | |
|                                     ) {
 | |
|                                     this.prefix = this.name;
 | |
|                                     this.name = ScanNCName();
 | |
|                                     // Look ahead for '(' to determine whether QName can be a FunctionName
 | |
|                                     saveSourceIndex = curIndex;
 | |
|                                     SkipSpace();
 | |
|                                     this.canBeFunction = (curChar == '(');
 | |
|                                     SetSourceIndex(saveSourceIndex);
 | |
|                                 } else {            // "foo:?" -> OperatorName, NameTest
 | |
|                                     // Return "foo" and leave ":" to be reported later as an unknown lexeme
 | |
|                                     SetSourceIndex(saveSourceIndex);
 | |
|                                 }
 | |
|                             }
 | |
|                         } else {
 | |
|                             SkipSpace();
 | |
|                             if (curChar == ':') {   // "foo ::" or "foo :?"
 | |
|                                 NextChar();
 | |
|                                 if (curChar == ':') {
 | |
|                                     NextChar();
 | |
|                                     colonColon = true;
 | |
|                                 }
 | |
|                                 SetSourceIndex(saveSourceIndex);
 | |
|                             } else {
 | |
|                                 this.canBeFunction = (curChar == '(');
 | |
|                             }
 | |
|                         }
 | |
|                         if (!CheckOperator(false) && colonColon) {
 | |
|                             this.axis = CheckAxis();
 | |
|                         }
 | |
|                     } else {
 | |
|                         kind = LexKind.Unknown;
 | |
|                         NextChar();
 | |
|                     }
 | |
|                     break;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         private bool CheckOperator(bool star) {
 | |
|             LexKind opKind;
 | |
| 
 | |
|             if (star) {
 | |
|                 opKind = LexKind.Multiply;
 | |
|             } else {
 | |
|                 if (prefix.Length != 0 || name.Length > 3)
 | |
|                     return false;
 | |
| 
 | |
|                 switch (name) {
 | |
|                     case "or" : opKind = LexKind.Or;      break;
 | |
|                     case "and": opKind = LexKind.And;     break;
 | |
|                     case "div": opKind = LexKind.Divide;  break;
 | |
|                     case "mod": opKind = LexKind.Modulo;  break;
 | |
|                     default   : return false;
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             // If there is a preceding token and the preceding token is not one of '@', '::', '(', '[', ',' or an Operator,
 | |
|             // then a '*' must be recognized as a MultiplyOperator and an NCName must be recognized as an OperatorName.
 | |
|             if (prevKind <= LexKind.LastOperator)
 | |
|                 return false;
 | |
| 
 | |
|             switch (prevKind) {
 | |
|                 case LexKind.Slash:
 | |
|                 case LexKind.SlashSlash:
 | |
|                 case LexKind.At:
 | |
|                 case LexKind.ColonColon:
 | |
|                 case LexKind.LParens:
 | |
|                 case LexKind.LBracket:
 | |
|                 case LexKind.Comma:
 | |
|                 case LexKind.Dollar:
 | |
|                     return false;
 | |
|             }
 | |
| 
 | |
|             this.kind = opKind;
 | |
|             return true;
 | |
|         }
 | |
| 
 | |
|         private XPathAxis CheckAxis() {
 | |
|             this.kind = LexKind.Axis;
 | |
|             switch (name) {
 | |
|                 case "ancestor"           : return XPathAxis.Ancestor;
 | |
|                 case "ancestor-or-self"   : return XPathAxis.AncestorOrSelf;
 | |
|                 case "attribute"          : return XPathAxis.Attribute;
 | |
|                 case "child"              : return XPathAxis.Child;
 | |
|                 case "descendant"         : return XPathAxis.Descendant;
 | |
|                 case "descendant-or-self" : return XPathAxis.DescendantOrSelf;
 | |
|                 case "following"          : return XPathAxis.Following;
 | |
|                 case "following-sibling"  : return XPathAxis.FollowingSibling;
 | |
|                 case "namespace"          : return XPathAxis.Namespace;
 | |
|                 case "parent"             : return XPathAxis.Parent;
 | |
|                 case "preceding"          : return XPathAxis.Preceding;
 | |
|                 case "preceding-sibling"  : return XPathAxis.PrecedingSibling;
 | |
|                 case "self"               : return XPathAxis.Self;
 | |
|                 default                   : this.kind = LexKind.Name; return XPathAxis.Unknown;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         private void ScanNumber() {
 | |
|             Debug.Assert(IsAsciiDigit(curChar) || curChar == '.');
 | |
|             while (IsAsciiDigit(curChar)) {
 | |
|                 NextChar();
 | |
|             }
 | |
|             if (curChar == '.') {
 | |
|                 NextChar();
 | |
|                 while (IsAsciiDigit(curChar)) {
 | |
|                     NextChar();
 | |
|                 }
 | |
|             }
 | |
|             if ((curChar & (~0x20)) == 'E') {
 | |
|                 NextChar();
 | |
|                 if (curChar == '+' || curChar == '-') {
 | |
|                     NextChar();
 | |
|                 }
 | |
|                 while (IsAsciiDigit(curChar)) {
 | |
|                     NextChar();
 | |
|                 }
 | |
|                 throw CreateException(Res.XPath_ScientificNotation);
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         private void ScanString() {
 | |
|             int startIdx = curIndex + 1;
 | |
|             int endIdx = xpathExpr.IndexOf(curChar, startIdx);
 | |
| 
 | |
|             if (endIdx < 0) {
 | |
|                 SetSourceIndex(xpathExpr.Length);
 | |
|                 throw CreateException(Res.XPath_UnclosedString);
 | |
|             }
 | |
| 
 | |
|             this.stringValue = xpathExpr.Substring(startIdx, endIdx - startIdx);
 | |
|             SetSourceIndex(endIdx + 1);
 | |
|         }
 | |
| 
 | |
|         private string ScanNCName() {
 | |
|             Debug.Assert(xmlCharType.IsStartNCNameSingleChar(curChar) 
 | |
| #if XML10_FIFTH_EDITION
 | |
|                 || xmlCharType.IsNCNameHighSurrogateChar(curChar)
 | |
| #endif
 | |
|                 );
 | |
|             int start = curIndex;
 | |
|             for (;;) {
 | |
|                 if (xmlCharType.IsNCNameSingleChar(curChar)) {
 | |
|                     NextChar();
 | |
|                 }
 | |
| #if XML10_FIFTH_EDITION
 | |
|                 else if (xmlCharType.IsNCNameSurrogateChar(PeekNextChar(), curChar)) {
 | |
|                     NextChar();
 | |
|                     NextChar();
 | |
|                 }
 | |
| #endif
 | |
|                 else {
 | |
|                     break;
 | |
|                 }
 | |
|             }
 | |
|             return xpathExpr.Substring(start, curIndex - start);
 | |
|         }
 | |
| 
 | |
|         public void PassToken(LexKind t) {
 | |
|             CheckToken(t);
 | |
|             NextLex();
 | |
|         }
 | |
| 
 | |
|         public void CheckToken(LexKind t) {
 | |
|             Debug.Assert(LexKind.FirstStringable <= t);
 | |
|             if (kind != t) {
 | |
|                 if (t == LexKind.Eof) {
 | |
|                     throw CreateException(Res.XPath_EofExpected, RawValue);
 | |
|                 } else {
 | |
|                     throw CreateException(Res.XPath_TokenExpected, LexKindToString(t), RawValue);
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         // May be called for the following tokens: Name, String, Eof, Comma, LParens, RParens, LBracket, RBracket, RBrace
 | |
|         private string LexKindToString(LexKind t) {
 | |
|             Debug.Assert(LexKind.FirstStringable <= t);
 | |
| 
 | |
|             if (LexKind.LastNonChar < t) {
 | |
|                 Debug.Assert("()[].@,*/$}".IndexOf((char)t) >= 0);
 | |
|                 return new String((char)t, 1);
 | |
|             }
 | |
| 
 | |
|             switch (t) {
 | |
|                 case LexKind.Name   : return "<name>";
 | |
|                 case LexKind.String : return "<string literal>";
 | |
|                 case LexKind.Eof    : return "<eof>";
 | |
|                 default:
 | |
|                     Debug.Fail("Unexpected LexKind: " + t.ToString());
 | |
|                     return string.Empty;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         public XPathCompileException CreateException(string resId, params string[] args) {
 | |
|             return new XPathCompileException(xpathExpr, lexStart, curIndex, resId, args);
 | |
|         }
 | |
|     }
 | |
| }
 |