536cd135cc
Former-commit-id: 5624ac747d633e885131e8349322922b6a59baaa
533 lines
21 KiB
C#
533 lines
21 KiB
C#
//------------------------------------------------------------------------------
|
|
// <copyright file="XPathParser.cs" company="Microsoft">
|
|
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
// </copyright>
|
|
// <owner current="true" primary="true">Microsoft</owner>
|
|
//------------------------------------------------------------------------------
|
|
|
|
using System.Collections.Generic;
|
|
using System.Diagnostics;
|
|
|
|
namespace System.Xml.Xsl.XPath {
|
|
using Res = System.Xml.Utils.Res;
|
|
using XPathNodeType = System.Xml.XPath.XPathNodeType;
|
|
|
|
internal class XPathParser<Node> {
|
|
private XPathScanner scanner;
|
|
private IXPathBuilder<Node> builder;
|
|
private Stack<int> posInfo = new Stack<int>();
|
|
|
|
// Six possible causes of exceptions in the builder:
|
|
// 1. Undefined prefix in a node test.
|
|
// 2. Undefined prefix in a variable reference, or unknown variable.
|
|
// 3. Undefined prefix in a function call, or unknown function, or wrong number/types of arguments.
|
|
// 4. Argument of Union operator is not a node-set.
|
|
// 5. First argument of Predicate is not a node-set.
|
|
// 6. Argument of Axis is not a node-set.
|
|
|
|
public Node Parse(XPathScanner scanner, IXPathBuilder<Node> builder, LexKind endLex) {
|
|
Debug.Assert(this.scanner == null && this.builder == null);
|
|
Debug.Assert(scanner != null && builder != null);
|
|
|
|
Node result = default(Node);
|
|
this.scanner = scanner;
|
|
this.builder = builder;
|
|
this.posInfo.Clear();
|
|
|
|
try {
|
|
builder.StartBuild();
|
|
result = ParseExpr();
|
|
scanner.CheckToken(endLex);
|
|
}
|
|
catch (XPathCompileException e) {
|
|
if (e.queryString == null) {
|
|
e.queryString = scanner.Source;
|
|
PopPosInfo(out e.startChar, out e.endChar);
|
|
}
|
|
throw;
|
|
}
|
|
finally {
|
|
result = builder.EndBuild(result);
|
|
#if DEBUG
|
|
this.builder = null;
|
|
this.scanner = null;
|
|
#endif
|
|
}
|
|
Debug.Assert(posInfo.Count == 0, "PushPosInfo() and PopPosInfo() calls have been unbalanced");
|
|
return result;
|
|
}
|
|
|
|
#region Location paths and node tests
|
|
/**************************************************************************************************/
|
|
/* Location paths and node tests */
|
|
/**************************************************************************************************/
|
|
|
|
internal static bool IsStep(LexKind lexKind) {
|
|
return (
|
|
lexKind == LexKind.Dot ||
|
|
lexKind == LexKind.DotDot ||
|
|
lexKind == LexKind.At ||
|
|
lexKind == LexKind.Axis ||
|
|
lexKind == LexKind.Star ||
|
|
lexKind == LexKind.Name // NodeTest is also Name
|
|
);
|
|
}
|
|
|
|
/*
|
|
* LocationPath ::= RelativeLocationPath | '/' RelativeLocationPath? | '//' RelativeLocationPath
|
|
*/
|
|
private Node ParseLocationPath() {
|
|
if (scanner.Kind == LexKind.Slash) {
|
|
scanner.NextLex();
|
|
Node opnd = builder.Axis(XPathAxis.Root, XPathNodeType.All, null, null);
|
|
|
|
if (IsStep(scanner.Kind)) {
|
|
opnd = builder.JoinStep(opnd, ParseRelativeLocationPath());
|
|
}
|
|
return opnd;
|
|
} else if (scanner.Kind == LexKind.SlashSlash) {
|
|
scanner.NextLex();
|
|
return builder.JoinStep(
|
|
builder.Axis(XPathAxis.Root, XPathNodeType.All, null, null),
|
|
builder.JoinStep(
|
|
builder.Axis(XPathAxis.DescendantOrSelf, XPathNodeType.All, null, null),
|
|
ParseRelativeLocationPath()
|
|
)
|
|
);
|
|
} else {
|
|
return ParseRelativeLocationPath();
|
|
}
|
|
}
|
|
|
|
/*
|
|
* RelativeLocationPath ::= Step (('/' | '//') Step)*
|
|
*/
|
|
//Max depth to avoid StackOverflow
|
|
const int MaxParseRelativePathDepth = 1024;
|
|
private int parseRelativePath = 0;
|
|
private Node ParseRelativeLocationPath() {
|
|
if (++parseRelativePath > MaxParseRelativePathDepth) {
|
|
if (System.Xml.XmlConfiguration.XsltConfigSection.LimitXPathComplexity) {
|
|
throw scanner.CreateException(System.Xml.Utils.Res.Xslt_InputTooComplex);
|
|
}
|
|
}
|
|
Node opnd = ParseStep();
|
|
if (scanner.Kind == LexKind.Slash) {
|
|
scanner.NextLex();
|
|
opnd = builder.JoinStep(opnd, ParseRelativeLocationPath());
|
|
} else if (scanner.Kind == LexKind.SlashSlash) {
|
|
scanner.NextLex();
|
|
opnd = builder.JoinStep(opnd,
|
|
builder.JoinStep(
|
|
builder.Axis(XPathAxis.DescendantOrSelf, XPathNodeType.All, null, null),
|
|
ParseRelativeLocationPath()
|
|
)
|
|
);
|
|
}
|
|
--parseRelativePath;
|
|
return opnd;
|
|
}
|
|
|
|
/*
|
|
* Step ::= '.' | '..' | (AxisName '::' | '@')? NodeTest Predicate*
|
|
*/
|
|
private Node ParseStep() {
|
|
Node opnd;
|
|
if (LexKind.Dot == scanner.Kind) { // '.'
|
|
scanner.NextLex();
|
|
opnd = builder.Axis(XPathAxis.Self, XPathNodeType.All, null, null);
|
|
if (LexKind.LBracket == scanner.Kind) {
|
|
throw scanner.CreateException(Res.XPath_PredicateAfterDot);
|
|
}
|
|
} else if (LexKind.DotDot == scanner.Kind) { // '..'
|
|
scanner.NextLex();
|
|
opnd = builder.Axis(XPathAxis.Parent, XPathNodeType.All, null, null);
|
|
if (LexKind.LBracket == scanner.Kind) {
|
|
throw scanner.CreateException(Res.XPath_PredicateAfterDotDot);
|
|
}
|
|
} else { // (AxisName '::' | '@')? NodeTest Predicate*
|
|
XPathAxis axis;
|
|
switch (scanner.Kind) {
|
|
case LexKind.Axis: // AxisName '::'
|
|
axis = scanner.Axis;
|
|
scanner.NextLex();
|
|
scanner.NextLex();
|
|
break;
|
|
case LexKind.At: // '@'
|
|
axis = XPathAxis.Attribute;
|
|
scanner.NextLex();
|
|
break;
|
|
case LexKind.Name:
|
|
case LexKind.Star:
|
|
// NodeTest must start with Name or '*'
|
|
axis = XPathAxis.Child;
|
|
break;
|
|
default:
|
|
throw scanner.CreateException(Res.XPath_UnexpectedToken, scanner.RawValue);
|
|
}
|
|
|
|
opnd = ParseNodeTest(axis);
|
|
|
|
while (LexKind.LBracket == scanner.Kind) {
|
|
opnd = builder.Predicate(opnd, ParsePredicate(), IsReverseAxis(axis));
|
|
}
|
|
}
|
|
return opnd;
|
|
}
|
|
|
|
private static bool IsReverseAxis(XPathAxis axis) {
|
|
return (
|
|
axis == XPathAxis.Ancestor || axis == XPathAxis.Preceding ||
|
|
axis == XPathAxis.AncestorOrSelf || axis == XPathAxis.PrecedingSibling
|
|
);
|
|
}
|
|
|
|
/*
|
|
* NodeTest ::= NameTest | ('comment' | 'text' | 'node') '(' ')' | 'processing-instruction' '(' Literal? ')'
|
|
* NameTest ::= '*' | NCName ':' '*' | QName
|
|
*/
|
|
private Node ParseNodeTest(XPathAxis axis) {
|
|
XPathNodeType nodeType;
|
|
string nodePrefix, nodeName;
|
|
|
|
int startChar = scanner.LexStart;
|
|
InternalParseNodeTest(scanner, axis, out nodeType, out nodePrefix, out nodeName);
|
|
PushPosInfo(startChar, scanner.PrevLexEnd);
|
|
Node result = builder.Axis(axis, nodeType, nodePrefix, nodeName);
|
|
PopPosInfo();
|
|
return result;
|
|
}
|
|
|
|
private static bool IsNodeType(XPathScanner scanner) {
|
|
return scanner.Prefix.Length == 0 && (
|
|
scanner.Name == "node" ||
|
|
scanner.Name == "text" ||
|
|
scanner.Name == "processing-instruction" ||
|
|
scanner.Name == "comment"
|
|
);
|
|
}
|
|
|
|
private static XPathNodeType PrincipalNodeType(XPathAxis axis) {
|
|
return (
|
|
axis == XPathAxis.Attribute ? XPathNodeType.Attribute :
|
|
axis == XPathAxis.Namespace ? XPathNodeType.Namespace :
|
|
/*else*/ XPathNodeType.Element
|
|
);
|
|
}
|
|
|
|
internal static void InternalParseNodeTest(XPathScanner scanner, XPathAxis axis, out XPathNodeType nodeType, out string nodePrefix, out string nodeName) {
|
|
switch (scanner.Kind) {
|
|
case LexKind.Name :
|
|
if (scanner.CanBeFunction && IsNodeType(scanner)) {
|
|
nodePrefix = null;
|
|
nodeName = null;
|
|
switch (scanner.Name) {
|
|
case "comment": nodeType = XPathNodeType.Comment; break;
|
|
case "text" : nodeType = XPathNodeType.Text; break;
|
|
case "node" : nodeType = XPathNodeType.All; break;
|
|
default:
|
|
Debug.Assert(scanner.Name == "processing-instruction");
|
|
nodeType = XPathNodeType.ProcessingInstruction;
|
|
break;
|
|
}
|
|
|
|
scanner.NextLex();
|
|
scanner.PassToken(LexKind.LParens);
|
|
|
|
if (nodeType == XPathNodeType.ProcessingInstruction) {
|
|
if (scanner.Kind != LexKind.RParens) { // 'processing-instruction' '(' Literal ')'
|
|
scanner.CheckToken(LexKind.String);
|
|
// It is not needed to set nodePrefix here, but for our current implementation
|
|
// comparing whole QNames is faster than comparing just local names
|
|
nodePrefix = string.Empty;
|
|
nodeName = scanner.StringValue;
|
|
scanner.NextLex();
|
|
}
|
|
}
|
|
|
|
scanner.PassToken(LexKind.RParens);
|
|
} else {
|
|
nodePrefix = scanner.Prefix;
|
|
nodeName = scanner.Name;
|
|
nodeType = PrincipalNodeType(axis);
|
|
scanner.NextLex();
|
|
if (nodeName == "*") {
|
|
nodeName = null;
|
|
}
|
|
}
|
|
break;
|
|
case LexKind.Star :
|
|
nodePrefix = null;
|
|
nodeName = null;
|
|
nodeType = PrincipalNodeType(axis);
|
|
scanner.NextLex();
|
|
break;
|
|
default :
|
|
throw scanner.CreateException(Res.XPath_NodeTestExpected, scanner.RawValue);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Predicate ::= '[' Expr ']'
|
|
*/
|
|
private Node ParsePredicate() {
|
|
scanner.PassToken(LexKind.LBracket);
|
|
Node opnd = ParseExpr();
|
|
scanner.PassToken(LexKind.RBracket);
|
|
return opnd;
|
|
}
|
|
#endregion
|
|
|
|
#region Expressions
|
|
/**************************************************************************************************/
|
|
/* Expressions */
|
|
/**************************************************************************************************/
|
|
|
|
/*
|
|
* Expr ::= OrExpr
|
|
* OrExpr ::= AndExpr ('or' AndExpr)*
|
|
* AndExpr ::= EqualityExpr ('and' EqualityExpr)*
|
|
* EqualityExpr ::= RelationalExpr (('=' | '!=') RelationalExpr)*
|
|
* RelationalExpr ::= AdditiveExpr (('<' | '>' | '<=' | '>=') AdditiveExpr)*
|
|
* AdditiveExpr ::= MultiplicativeExpr (('+' | '-') MultiplicativeExpr)*
|
|
* MultiplicativeExpr ::= UnaryExpr (('*' | 'div' | 'mod') UnaryExpr)*
|
|
* UnaryExpr ::= ('-')* UnionExpr
|
|
*/
|
|
private Node ParseExpr() {
|
|
return ParseSubExpr(/*callerPrec:*/0);
|
|
}
|
|
|
|
//Max depth to avoid StackOverflow
|
|
//limit the recursive call from ParseSubExpr -> ParseSubExpr
|
|
//and also ParseSubExpr->ParseUnionExpr->ParsePathExpr->...->ParseExpr->ParseSubExpr
|
|
const int MaxParseSubExprDepth = 1024;
|
|
private int parseSubExprDepth = 0;
|
|
private Node ParseSubExpr(int callerPrec) {
|
|
if (++parseSubExprDepth > MaxParseSubExprDepth) {
|
|
if (System.Xml.XmlConfiguration.XsltConfigSection.LimitXPathComplexity) {
|
|
throw scanner.CreateException(System.Xml.Utils.Res.Xslt_InputTooComplex);
|
|
}
|
|
}
|
|
|
|
XPathOperator op;
|
|
Node opnd;
|
|
|
|
// Check for unary operators
|
|
if (scanner.Kind == LexKind.Minus) {
|
|
op = XPathOperator.UnaryMinus;
|
|
int opPrec = XPathOperatorPrecedence[(int)op];
|
|
scanner.NextLex();
|
|
opnd = builder.Operator(op, ParseSubExpr(opPrec), default(Node));
|
|
} else {
|
|
opnd = ParseUnionExpr();
|
|
}
|
|
|
|
// Process binary operators
|
|
while (true) {
|
|
op = (scanner.Kind <= LexKind.LastOperator) ? (XPathOperator)scanner.Kind : XPathOperator.Unknown;
|
|
int opPrec = XPathOperatorPrecedence[(int)op];
|
|
if (opPrec <= callerPrec) {
|
|
break;
|
|
}
|
|
|
|
// Operator's precedence is greater than the one of our caller, so process it here
|
|
scanner.NextLex();
|
|
opnd = builder.Operator(op, opnd, ParseSubExpr(/*callerPrec:*/opPrec));
|
|
}
|
|
--parseSubExprDepth;
|
|
return opnd;
|
|
}
|
|
|
|
private static int[] XPathOperatorPrecedence = {
|
|
/*Unknown */ 0,
|
|
/*Or */ 1,
|
|
/*And */ 2,
|
|
/*Eq */ 3,
|
|
/*Ne */ 3,
|
|
/*Lt */ 4,
|
|
/*Le */ 4,
|
|
/*Gt */ 4,
|
|
/*Ge */ 4,
|
|
/*Plus */ 5,
|
|
/*Minus */ 5,
|
|
/*Multiply */ 6,
|
|
/*Divide */ 6,
|
|
/*Modulo */ 6,
|
|
/*UnaryMinus */ 7,
|
|
/*Union */ 8, // Not used
|
|
};
|
|
|
|
/*
|
|
* UnionExpr ::= PathExpr ('|' PathExpr)*
|
|
*/
|
|
private Node ParseUnionExpr() {
|
|
int startChar = scanner.LexStart;
|
|
Node opnd1 = ParsePathExpr();
|
|
|
|
if (scanner.Kind == LexKind.Union) {
|
|
PushPosInfo(startChar, scanner.PrevLexEnd);
|
|
opnd1 = builder.Operator(XPathOperator.Union, default(Node), opnd1);
|
|
PopPosInfo();
|
|
|
|
while (scanner.Kind == LexKind.Union) {
|
|
scanner.NextLex();
|
|
startChar = scanner.LexStart;
|
|
Node opnd2 = ParsePathExpr();
|
|
PushPosInfo(startChar, scanner.PrevLexEnd);
|
|
opnd1 = builder.Operator(XPathOperator.Union, opnd1, opnd2);
|
|
PopPosInfo();
|
|
}
|
|
}
|
|
return opnd1;
|
|
}
|
|
|
|
/*
|
|
* PathExpr ::= LocationPath | FilterExpr (('/' | '//') RelativeLocationPath )?
|
|
*/
|
|
private Node ParsePathExpr() {
|
|
// Here we distinguish FilterExpr from LocationPath - the former starts with PrimaryExpr
|
|
if (IsPrimaryExpr()) {
|
|
int startChar = scanner.LexStart;
|
|
Node opnd = ParseFilterExpr();
|
|
int endChar = scanner.PrevLexEnd;
|
|
|
|
if (scanner.Kind == LexKind.Slash) {
|
|
scanner.NextLex();
|
|
PushPosInfo(startChar, endChar);
|
|
opnd = builder.JoinStep(opnd, ParseRelativeLocationPath());
|
|
PopPosInfo();
|
|
} else if (scanner.Kind == LexKind.SlashSlash) {
|
|
scanner.NextLex();
|
|
PushPosInfo(startChar, endChar);
|
|
opnd = builder.JoinStep(opnd,
|
|
builder.JoinStep(
|
|
builder.Axis(XPathAxis.DescendantOrSelf, XPathNodeType.All, null, null),
|
|
ParseRelativeLocationPath()
|
|
)
|
|
);
|
|
PopPosInfo();
|
|
}
|
|
return opnd;
|
|
} else {
|
|
return ParseLocationPath();
|
|
}
|
|
}
|
|
|
|
/*
|
|
* FilterExpr ::= PrimaryExpr Predicate*
|
|
*/
|
|
private Node ParseFilterExpr() {
|
|
int startChar = scanner.LexStart;
|
|
Node opnd = ParsePrimaryExpr();
|
|
int endChar = scanner.PrevLexEnd;
|
|
|
|
while (scanner.Kind == LexKind.LBracket) {
|
|
PushPosInfo(startChar, endChar);
|
|
opnd = builder.Predicate(opnd, ParsePredicate(), /*reverseStep:*/false);
|
|
PopPosInfo();
|
|
}
|
|
return opnd;
|
|
}
|
|
|
|
private bool IsPrimaryExpr() {
|
|
return (
|
|
scanner.Kind == LexKind.String ||
|
|
scanner.Kind == LexKind.Number ||
|
|
scanner.Kind == LexKind.Dollar ||
|
|
scanner.Kind == LexKind.LParens ||
|
|
scanner.Kind == LexKind.Name && scanner.CanBeFunction && !IsNodeType(scanner)
|
|
);
|
|
}
|
|
|
|
/*
|
|
* PrimaryExpr ::= Literal | Number | VariableReference | '(' Expr ')' | FunctionCall
|
|
*/
|
|
private Node ParsePrimaryExpr() {
|
|
Debug.Assert(IsPrimaryExpr());
|
|
Node opnd;
|
|
switch (scanner.Kind) {
|
|
case LexKind.String:
|
|
opnd = builder.String(scanner.StringValue);
|
|
scanner.NextLex();
|
|
break;
|
|
case LexKind.Number:
|
|
opnd = builder.Number(XPathConvert.StringToDouble(scanner.RawValue));
|
|
scanner.NextLex();
|
|
break;
|
|
case LexKind.Dollar:
|
|
int startChar = scanner.LexStart;
|
|
scanner.NextLex();
|
|
scanner.CheckToken(LexKind.Name);
|
|
PushPosInfo(startChar, scanner.LexStart + scanner.LexSize);
|
|
opnd = builder.Variable(scanner.Prefix, scanner.Name);
|
|
PopPosInfo();
|
|
scanner.NextLex();
|
|
break;
|
|
case LexKind.LParens:
|
|
scanner.NextLex();
|
|
opnd = ParseExpr();
|
|
scanner.PassToken(LexKind.RParens);
|
|
break;
|
|
default:
|
|
Debug.Assert(
|
|
scanner.Kind == LexKind.Name && scanner.CanBeFunction && !IsNodeType(scanner),
|
|
"IsPrimaryExpr() returned true, but the lexeme is not recognized"
|
|
);
|
|
opnd = ParseFunctionCall();
|
|
break;
|
|
}
|
|
return opnd;
|
|
}
|
|
|
|
/*
|
|
* FunctionCall ::= FunctionName '(' (Expr (',' Expr)* )? ')'
|
|
*/
|
|
private Node ParseFunctionCall() {
|
|
List<Node> argList = new List<Node>();
|
|
string name = scanner.Name;
|
|
string prefix = scanner.Prefix;
|
|
int startChar = scanner.LexStart;
|
|
|
|
scanner.PassToken(LexKind.Name);
|
|
scanner.PassToken(LexKind.LParens);
|
|
|
|
if (scanner.Kind != LexKind.RParens) {
|
|
while (true) {
|
|
argList.Add(ParseExpr());
|
|
if (scanner.Kind != LexKind.Comma) {
|
|
scanner.CheckToken(LexKind.RParens);
|
|
break;
|
|
}
|
|
scanner.NextLex(); // move off the ','
|
|
}
|
|
}
|
|
|
|
scanner.NextLex(); // move off the ')'
|
|
PushPosInfo(startChar, scanner.PrevLexEnd);
|
|
Node result = builder.Function(prefix, name, argList);
|
|
PopPosInfo();
|
|
return result;
|
|
}
|
|
#endregion
|
|
|
|
/**************************************************************************************************/
|
|
/* Helper methods */
|
|
/**************************************************************************************************/
|
|
|
|
private void PushPosInfo(int startChar, int endChar) {
|
|
posInfo.Push(startChar);
|
|
posInfo.Push(endChar);
|
|
}
|
|
|
|
private void PopPosInfo() {
|
|
posInfo.Pop();
|
|
posInfo.Pop();
|
|
}
|
|
|
|
private void PopPosInfo(out int startChar, out int endChar) {
|
|
endChar = posInfo.Pop();
|
|
startChar = posInfo.Pop();
|
|
}
|
|
}
|
|
}
|