//------------------------------------------------------------------------------ // // Copyright (c) Microsoft Corporation. All rights reserved. // // [....] //------------------------------------------------------------------------------ namespace MS.Internal.Xml.XPath { using System; using System.Xml; using System.Xml.XPath; using System.Diagnostics; using System.Globalization; using System.Text; using System.Collections; internal sealed class XPathScanner { private string xpathExpr; private int xpathExprIndex; private LexKind kind; private char currentChar; private string name; private string prefix; private string stringValue; private double numberValue = double.NaN; private bool canBeFunction; private XmlCharType xmlCharType = XmlCharType.Instance; public XPathScanner(string xpathExpr) { if (xpathExpr == null) { throw XPathException.Create(Res.Xp_ExprExpected, string.Empty); } this.xpathExpr = xpathExpr; NextChar(); NextLex(); } public string SourceText { get { return this.xpathExpr; } } private char CurerntChar { get { return currentChar; } } private bool NextChar() { Debug.Assert(0 <= xpathExprIndex && xpathExprIndex <= xpathExpr.Length); if (xpathExprIndex < xpathExpr.Length) { currentChar = xpathExpr[xpathExprIndex ++]; return true; } else { currentChar = '\0'; return false; } } #if XML10_FIFTH_EDITION private char PeekNextChar() { Debug.Assert(0 <= xpathExprIndex && xpathExprIndex <= xpathExpr.Length); if (xpathExprIndex < xpathExpr.Length) { return xpathExpr[xpathExprIndex]; } else { Debug.Assert(xpathExprIndex == xpathExpr.Length); return '\0'; } } #endif public LexKind Kind { get { return this.kind; } } public string Name { get { Debug.Assert(this.kind == LexKind.Name || this.kind == LexKind.Axe); Debug.Assert(this.name != null); return this.name; } } public string Prefix { get { Debug.Assert(this.kind == LexKind.Name); Debug.Assert(this.prefix != null); return this.prefix; } } public string StringValue { get { Debug.Assert(this.kind == LexKind.String); Debug.Assert(this.stringValue != null); return this.stringValue; } } public double NumberValue { get { Debug.Assert(this.kind == LexKind.Number); Debug.Assert(this.numberValue != double.NaN); return this.numberValue; } } // To parse PathExpr we need a way to distinct name from function. // THis distinction can't be done without context: "or (1 != 0)" this this a function or 'or' in OrExp public bool CanBeFunction { get { Debug.Assert(this.kind == LexKind.Name); return this.canBeFunction; } } void SkipSpace() { while (xmlCharType.IsWhiteSpace(this.CurerntChar) && NextChar()) ; } public bool NextLex() { SkipSpace(); switch (this.CurerntChar) { case '\0' : kind = LexKind.Eof; return false; case ',': case '@': case '(': case ')': case '|': case '*': case '[': case ']': case '+': case '-': case '=': case '#': case '$': kind = (LexKind) Convert.ToInt32(this.CurerntChar, CultureInfo.InvariantCulture); NextChar(); break; case '<': kind = LexKind.Lt; NextChar(); if (this.CurerntChar == '=') { kind = LexKind.Le; NextChar(); } break; case '>': kind = LexKind.Gt; NextChar(); if (this.CurerntChar == '=') { kind = LexKind.Ge; NextChar(); } break; case '!': kind = LexKind.Bang; NextChar(); if (this.CurerntChar == '=') { kind = LexKind.Ne; NextChar(); } break; case '.': kind = LexKind.Dot; NextChar(); if (this.CurerntChar == '.') { kind = LexKind.DotDot; NextChar(); } else if (XmlCharType.IsDigit(this.CurerntChar)) { kind = LexKind.Number; numberValue = ScanFraction(); } break; case '/': kind = LexKind.Slash; NextChar(); if (this.CurerntChar == '/') { kind = LexKind.SlashSlash; NextChar(); } break; case '"': case '\'': this.kind = LexKind.String; this.stringValue = ScanString(); break; default: if (XmlCharType.IsDigit(this.CurerntChar)) { kind = LexKind.Number; numberValue = ScanNumber(); } else if (xmlCharType.IsStartNCNameSingleChar(this.CurerntChar) #if XML10_FIFTH_EDITION || xmlCharType.IsNCNameHighSurrogateChar(this.CurerntChar) #endif ) { kind = LexKind.Name; this.name = ScanName(); this.prefix = string.Empty; // "foo:bar" is one lexem not three because it doesn't allow spaces in between // We should distinct it from "foo::" and need process "foo ::" as well if (this.CurerntChar == ':') { NextChar(); // can be "foo:bar" or "foo::" if (this.CurerntChar == ':') { // "foo::" NextChar(); kind = LexKind.Axe; } else { // "foo:*", "foo:bar" or "foo: " this.prefix = this.name; if (this.CurerntChar == '*') { NextChar(); this.name = "*"; } else if (xmlCharType.IsStartNCNameSingleChar(this.CurerntChar) #if XML10_FIFTH_EDITION || xmlCharType.IsNCNameHighSurrogateChar(this.CurerntChar) #endif ) { this.name = ScanName(); } else { throw XPathException.Create(Res.Xp_InvalidName, SourceText); } } } else { SkipSpace(); if (this.CurerntChar == ':') { NextChar(); // it can be "foo ::" or just "foo :" if (this.CurerntChar == ':') { NextChar(); kind = LexKind.Axe; } else { throw XPathException.Create(Res.Xp_InvalidName, SourceText); } } } SkipSpace(); this.canBeFunction = (this.CurerntChar == '('); } else { throw XPathException.Create(Res.Xp_InvalidToken, SourceText); } break; } return true; } private double ScanNumber() { Debug.Assert(this.CurerntChar == '.' || XmlCharType.IsDigit(this.CurerntChar)); int start = xpathExprIndex - 1; int len = 0; while (XmlCharType.IsDigit(this.CurerntChar)) { NextChar(); len ++; } if (this.CurerntChar == '.') { NextChar(); len ++; while (XmlCharType.IsDigit(this.CurerntChar)) { NextChar(); len ++; } } return XmlConvert.ToXPathDouble(this.xpathExpr.Substring(start, len)); } private double ScanFraction() { Debug.Assert(XmlCharType.IsDigit(this.CurerntChar)); int start = xpathExprIndex - 2; Debug.Assert(0 <= start && this.xpathExpr[start] == '.'); int len = 1; // '.' while (XmlCharType.IsDigit(this.CurerntChar)) { NextChar(); len ++; } return XmlConvert.ToXPathDouble(this.xpathExpr.Substring(start, len)); } private string ScanString() { char endChar = this.CurerntChar; NextChar(); int start = xpathExprIndex - 1; int len = 0; while(this.CurerntChar != endChar) { if (! NextChar()) { throw XPathException.Create(Res.Xp_UnclosedString); } len ++; } Debug.Assert(this.CurerntChar == endChar); NextChar(); return this.xpathExpr.Substring(start, len); } private string ScanName() { Debug.Assert(xmlCharType.IsStartNCNameSingleChar(this.CurerntChar) #if XML10_FIFTH_EDITION || xmlCharType.IsNCNameHighSurrogateChar(this.CurerntChar) #endif ); int start = xpathExprIndex - 1; int len = 0; for (;;) { if (xmlCharType.IsNCNameSingleChar(this.CurerntChar)) { NextChar(); len ++; } #if XML10_FIFTH_EDITION else if (xmlCharType.IsNCNameSurrogateChar(this.PeekNextChar(), this.CurerntChar)) { NextChar(); NextChar(); len += 2; } #endif else { break; } } return this.xpathExpr.Substring(start, len); } public enum LexKind { Comma = ',', Slash = '/', At = '@', Dot = '.', LParens = '(', RParens = ')', LBracket = '[', RBracket = ']', Star = '*', Plus = '+', Minus = '-', Eq = '=', Lt = '<', Gt = '>', Bang = '!', Dollar = '$', Apos = '\'', Quote = '"', Union = '|', Ne = 'N', // != Le = 'L', // <= Ge = 'G', // >= And = 'A', // && Or = 'O', // || DotDot = 'D', // .. SlashSlash = 'S', // // Name = 'n', // XML _Name String = 's', // Quoted string constant Number = 'd', // _Number constant Axe = 'a', // Axe (like child::) Eof = 'E', }; } }