Jo Shields a575963da9 Imported Upstream version 3.6.0
Former-commit-id: da6be194a6b1221998fc28233f2503bd61dd9d14
2014-08-13 10:39:27 +01:00

522 lines
12 KiB
C#

//
// System.Xml.XPath.Tokenizer.cs / Mono.Xml.Xsl/PatternTokenizer.cs
//
// Author:
// Piers Haken (piersh@friskit.com)
// Atsushi Enomoto (atsushi@ximian.com)
//
// (C) 2002 Piers Haken
// (C) 2005 Novell Inc,
//
// IMPORTANT:
//
// Do not edit PatternTokenizer.cs. It is autogenerated.
//
//
// Permission is hereby granted, free of charge, to any person obtaining
// a copy of this software and associated documentation files (the
// "Software"), to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish,
// distribute, sublicense, and/or sell copies of the Software, and to
// permit persons to whom the Software is furnished to do so, subject to
// the following conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//
using System;
using System.Globalization;
using System.IO;
using System.Text;
using System.Collections;
using System.Xml;
using System.Xml.XPath;
using Mono.Xml.XPath;
#if XSLT_PATTERN
namespace Mono.Xml.Xsl
#else
namespace Mono.Xml.XPath
#endif
{
internal class Tokenizer : yyParser.yyInput
{
private string m_rgchInput;
private int m_ich;
private int m_cch;
private int m_iToken;
private int m_iTokenPrev = Token.EOF;
private Object m_objToken;
private bool m_fPrevWasOperator = false;
private bool m_fThisIsOperator = false;
private static readonly Hashtable s_mapTokens = new Hashtable ();
private static readonly Object [] s_rgTokenMap =
{
Token.AND, "and",
Token.OR, "or",
Token.DIV, "div",
Token.MOD, "mod",
Token.ANCESTOR, "ancestor",
Token.ANCESTOR_OR_SELF, "ancestor-or-self",
Token.ATTRIBUTE, "attribute",
Token.CHILD, "child",
Token.DESCENDANT, "descendant",
Token.DESCENDANT_OR_SELF, "descendant-or-self",
Token.FOLLOWING, "following",
Token.FOLLOWING_SIBLING, "following-sibling",
Token.NAMESPACE, "namespace",
Token.PARENT, "parent",
Token.PRECEDING, "preceding",
Token.PRECEDING_SIBLING, "preceding-sibling",
Token.SELF, "self",
Token.COMMENT, "comment",
Token.TEXT, "text",
Token.PROCESSING_INSTRUCTION, "processing-instruction",
Token.NODE, "node",
};
private const char EOL = '\0';
static Tokenizer ()
{
for (int i = 0; i < s_rgTokenMap.Length; i += 2)
s_mapTokens.Add (s_rgTokenMap [i + 1], s_rgTokenMap [i]);
}
public Tokenizer (string strInput)
{
//Console.WriteLine ("Tokenizing: " + strInput);
m_rgchInput = strInput;
m_ich = 0;
m_cch = strInput.Length;
SkipWhitespace ();
}
private char Peek (int iOffset)
{
if (m_ich + iOffset>= m_cch)
return EOL;
return m_rgchInput [m_ich + iOffset];
}
private char Peek ()
{
return Peek (0);
}
private char GetChar ()
{
if (m_ich >= m_cch)
return EOL;
return m_rgchInput [m_ich++];
}
private char PutBack ()
{
if (m_ich == 0)
throw new XPathException ("XPath parser returned an error status: invalid tokenizer state.");
return m_rgchInput [--m_ich];
}
private bool SkipWhitespace () // returns trus if any whitespace was skipped
{
if (!IsWhitespace (Peek ()))
return false;
while (IsWhitespace (Peek ()))
GetChar ();
return true;
}
private int ParseNumber ()
{
StringBuilder sb = new StringBuilder ();
while (IsDigit (Peek ()))
sb.Append ((char) GetChar ());
// don't handle '3.' as an error case (it is not. XPath 3.7 syntax [30])
if (Peek () == '.')
{
sb.Append ((char) GetChar ());
while (IsDigit (Peek ()))
sb.Append ((char) GetChar ());
}
m_objToken = Double.Parse (sb.ToString (), NumberFormatInfo.InvariantInfo);
return Token.NUMBER;
}
private int ParseLiteral ()
{
StringBuilder sb = new StringBuilder ();
char chInit = GetChar ();
char ch;
while ((ch = Peek ()) != chInit)
{
if (ch == EOL)
throw new XPathException ("unmatched "+chInit+" in expression");
sb.Append ((char) GetChar ());
}
GetChar ();
m_objToken = sb.ToString ();
return Token.LITERAL;
}
private string ReadIdentifier ()
{
StringBuilder sb = new StringBuilder ();
char ch = Peek ();
if (!Char.IsLetter (ch) && ch != '_')
return null;
sb.Append ((char) GetChar ());
while ((ch = Peek ()) == '_' || ch == '-' || ch == '.' || Char.IsLetterOrDigit (ch))
sb.Append ((char) GetChar ());
SkipWhitespace ();
return sb.ToString ();
}
private int ParseIdentifier ()
{
string strToken = ReadIdentifier ();
Object objToken = s_mapTokens [strToken];
int iToken = (objToken != null) ? (int) objToken : Token.QName;
m_objToken = strToken;
char ch = Peek ();
if (ch == ':')
{
if (Peek (1) == ':')
{
// If the two characters following an NCName (possibly
// after intervening ExprWhitespace) are ::, then the
// token must be recognized as an AxisName.
if (objToken == null || !IsAxisName (iToken))
throw new XPathException ("invalid axis name: '"+strToken+"'");
return iToken;
}
GetChar ();
SkipWhitespace ();
ch = Peek ();
if (ch == '*')
{
GetChar ();
m_objToken = new XmlQualifiedName ("", strToken);
return Token.QName;
}
string strToken2 = ReadIdentifier ();
if (strToken2 == null)
throw new XPathException ("invalid QName: "+strToken+":"+(char)ch);
ch = Peek ();
m_objToken = new XmlQualifiedName (strToken2, strToken);
if (ch == '(')
return Token.FUNCTION_NAME;
return Token.QName;
}
// If there is a preceding token and the preceding
// token is not one of @, ::, (, [, , or an Operator,
// then a * must be recognized as a MultiplyOperator
// and an NCName must be recognized as an OperatorName.
if (!IsFirstToken && !m_fPrevWasOperator)
{
if (objToken == null || !IsOperatorName (iToken))
throw new XPathException ("invalid operator name: '"+strToken+"'");
return iToken;
}
if (ch == '(')
{
// If the character following an NCName (possibly
// after intervening ExprWhitespace) is (, then the
// token must be recognized as a NodeType or a FunctionName.
if (objToken == null)
{
m_objToken = new XmlQualifiedName (strToken, "");
return Token.FUNCTION_NAME;
}
if (IsNodeType (iToken))
return iToken;
throw new XPathException ("invalid function name: '"+strToken+"'");
}
m_objToken = new XmlQualifiedName (strToken, "");
return Token.QName;
}
private static bool IsWhitespace (char ch)
{
// return Char.IsWhiteSpace (ch);
return (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
}
private static bool IsDigit (char ch)
{
// return Char.IsDigit (ch);
return ch >= '0' && ch <= '9';
}
int ParseToken ()
{
char ch = Peek ();
switch (ch)
{
case EOL:
return Token.EOF;
case '/':
m_fThisIsOperator = true;
GetChar ();
if (Peek () == '/')
{
GetChar ();
return Token.SLASH2;
}
return Token.SLASH;
case '.':
GetChar ();
if (Peek () == '.')
{
GetChar ();
return Token.DOT2;
}
else if (IsDigit (Peek ()))
{
PutBack ();
return ParseNumber ();
}
return Token.DOT;
case ':':
GetChar ();
if (Peek () == ':')
{
m_fThisIsOperator = true;
GetChar ();
return Token.COLON2;
}
return Token.ERROR;
case ',':
m_fThisIsOperator = true;
GetChar ();
return Token.COMMA;
case '@':
m_fThisIsOperator = true;
GetChar ();
return Token.AT;
case '[':
m_fThisIsOperator = true;
GetChar ();
return Token.BRACKET_OPEN;
case ']':
GetChar ();
return Token.BRACKET_CLOSE;
case '(':
m_fThisIsOperator = true;
GetChar ();
return Token.PAREN_OPEN;
case ')':
GetChar ();
return Token.PAREN_CLOSE;
case '+':
m_fThisIsOperator = true;
GetChar ();
return Token.PLUS;
case '-':
m_fThisIsOperator = true;
GetChar ();
return Token.MINUS;
case '*':
GetChar ();
if (!IsFirstToken && !m_fPrevWasOperator)
{
m_fThisIsOperator = true;
return Token.MULTIPLY;
}
return Token.ASTERISK;
case '$':
GetChar ();
m_fThisIsOperator = true;
return Token.DOLLAR;
case '|':
m_fThisIsOperator = true;
GetChar ();
return Token.BAR;
case '=':
m_fThisIsOperator = true;
GetChar ();
return Token.EQ;
case '!':
GetChar ();
if (Peek () == '=')
{
m_fThisIsOperator = true;
GetChar ();
return Token.NE;
}
break;
case '>':
m_fThisIsOperator = true;
GetChar ();
if (Peek () == '=')
{
GetChar ();
return Token.GE;
}
return Token.GT;
case '<':
m_fThisIsOperator = true;
GetChar ();
if (Peek () == '=')
{
GetChar ();
return Token.LE;
}
return Token.LT;
case '\'':
return ParseLiteral ();
case '\"':
return ParseLiteral ();
default:
if (IsDigit (ch))
{
return ParseNumber ();
}
else if (Char.IsLetter (ch) || ch == '_') // NCName
{
int iToken = ParseIdentifier ();
if (IsOperatorName (iToken))
m_fThisIsOperator = true;
return iToken;
}
break;
}
throw new XPathException ("invalid token: '"+ch+"'");
}
///////////////////////////
// yyParser.yyInput methods
///////////////////////////
/** move on to next token.
@return false if positioned beyond tokens.
@throws IOException on input error.
*/
public bool advance ()
{
m_fThisIsOperator = false;
m_objToken = null;
m_iToken = ParseToken ();
SkipWhitespace ();
m_iTokenPrev = m_iToken;
m_fPrevWasOperator = m_fThisIsOperator;
return (m_iToken != Token.EOF);
}
/** classifies current token.
Should not be called if advance() returned false.
@return current %token or single character.
*/
public int token ()
{
return m_iToken;
}
/** associated with current token.
Should not be called if advance() returned false.
@return value for token().
*/
public Object value ()
{
return m_objToken;
}
private bool IsFirstToken { get { return m_iTokenPrev == Token.EOF; } }
private bool IsNodeType (int iToken)
{
switch (iToken)
{
case Token.COMMENT:
case Token.TEXT:
case Token.PROCESSING_INSTRUCTION:
case Token.NODE:
return true;
default:
return false;
}
}
private bool IsOperatorName (int iToken)
{
switch (iToken)
{
case Token.AND:
case Token.OR:
case Token.MOD:
case Token.DIV:
return true;
default:
return false;
}
}
private bool IsAxisName (int iToken)
{
switch (iToken)
{
case Token.ATTRIBUTE:
case Token.ANCESTOR:
case Token.ANCESTOR_OR_SELF:
case Token.CHILD:
case Token.DESCENDANT:
case Token.DESCENDANT_OR_SELF:
case Token.FOLLOWING:
case Token.FOLLOWING_SIBLING:
case Token.NAMESPACE:
case Token.PARENT:
case Token.PRECEDING:
case Token.PRECEDING_SIBLING:
case Token.SELF:
return true;
default:
return false;
}
}
}
}