// ILTokenizer.cs
// Author: Sergey Chaban (serge@wildwestsoftware.com)
using System;
using System.IO;
using System.Text;
using System.Collections;
using System.Globalization;
namespace Mono.ILASM {
public delegate void NewTokenEvent (object sender, NewTokenEventArgs args);
public class NewTokenEventArgs : EventArgs {
public readonly ILToken Token;
public NewTokenEventArgs (ILToken token)
{
Token = token;
}
}
///
///
public class ILTokenizer : ITokenStream {
private static readonly string idchars = "_$@?.`";
private static Hashtable keywords;
private static Hashtable directives;
private ILToken lastToken;
private ILReader reader;
private StringHelper strBuilder;
private NumberHelper numBuilder;
private bool in_byte_array;
public event NewTokenEvent NewTokenEvent;
static ILTokenizer()
{
keywords = ILTables.Keywords;
directives = ILTables.Directives;
}
///
///
///
public ILTokenizer (StreamReader reader)
{
this.reader = new ILReader (reader);
strBuilder = new StringHelper (this);
numBuilder = new NumberHelper (this);
lastToken = ILToken.Invalid.Clone () as ILToken;
}
public ILReader Reader {
get {
return reader;
}
}
public Location Location {
get {
return reader.Location;
}
}
public bool InByteArray {
get { return in_byte_array; }
set { in_byte_array = value; }
}
public ILToken GetNextToken ()
{
if (lastToken == ILToken.EOF) return ILToken.EOF;
int ch;
int next;
ILToken res = ILToken.EOF.Clone () as ILToken;
while ((ch = reader.Read ()) != -1) {
// Comments
if (ch == '/') {
next = reader.Peek ();
if (next == '/') {
// double-slash comment, skip to the end of the line.
for (reader.Read ();
next != -1 && next != '\n';
next = reader.Read ());
continue;
} else if (next == '*') {
reader.Read ();
for (next = reader.Read (); next != -1; next = reader.Read ()) {
if (next == '*' && reader.Peek () == '/') {
reader.Read ();
goto end;
}
}
end:
continue;
}
}
// HEXBYTES are flagged by the parser otherwise it is
// impossible to figure them out
if (in_byte_array) {
string hx = String.Empty;
if (Char.IsWhiteSpace ((char) ch))
continue;
if (ch == ')') {
res = ILToken.CloseParens;
break;
}
if (!is_hex (ch))
throw new ILTokenizingException (reader.Location, ((char) ch).ToString ());
hx += (char) ch;
if (is_hex (reader.Peek ()))
hx += (char) reader.Read ();
else if (!Char.IsWhiteSpace ((char) reader.Peek ()) && reader.Peek () != ')')
throw new ILTokenizingException (reader.Location,
((char) reader.Peek ()).ToString ());
res.token = Token.HEXBYTE;
res.val = Byte.Parse (hx, NumberStyles.HexNumber);
while (Char.IsWhiteSpace ((char) reader.Peek ()))
reader.Read ();
break;
}
// Ellipsis
if (ch == '.' && reader.Peek () == '.') {
reader.MarkLocation ();
int ch2 = reader.Read ();
if (reader.Peek () == '.') {
res = ILToken.Ellipsis;
reader.Read ();
break;
}
reader.Unread (ch2);
reader.RestoreLocation ();
}
if (ch == '.' || ch == '#') {
next = reader.Peek ();
if (ch == '.' && Char.IsDigit((char) next)) {
numBuilder.Start (ch);
reader.Unread (ch);
numBuilder.Build ();
if (numBuilder.ResultToken != ILToken.Invalid) {
res.CopyFrom (numBuilder.ResultToken);
break;
}
} else {
if (strBuilder.Start (next) && strBuilder.TokenId == Token.ID) {
reader.MarkLocation ();
string dirBody = strBuilder.Build ();
string dir = new string ((char) ch, 1) + dirBody;
if (IsDirective (dir)) {
res = ILTables.Directives [dir] as ILToken;
} else {
reader.Unread (dirBody.ToCharArray ());
reader.RestoreLocation ();
res = ILToken.Dot;
}
} else {
res = ILToken.Dot;
}
break;
}
}
// Numbers && Hexbytes
if (numBuilder.Start (ch)) {
if ((ch == '-') && !(Char.IsDigit ((char) reader.Peek ()))) {
res = ILToken.Dash;
break;
} else {
reader.Unread (ch);
numBuilder.Build ();
if (numBuilder.ResultToken != ILToken.Invalid) {
res.CopyFrom (numBuilder.ResultToken);
break;
}
}
}
// Punctuation
ILToken punct = ILToken.GetPunctuation (ch);
if (punct != null) {
if (punct == ILToken.Colon && reader.Peek () == ':') {
reader.Read ();
res = ILToken.DoubleColon;
} else {
res = punct;
}
break;
}
// ID | QSTRING | SQSTRING | INSTR_* | KEYWORD
if (strBuilder.Start (ch)) {
reader.Unread (ch);
string val = strBuilder.Build ();
if (strBuilder.TokenId == Token.ID) {
ILToken opcode;
next = reader.Peek ();
if (next == '.') {
reader.MarkLocation ();
reader.Read ();
next = reader.Peek ();
if (IsIdChar ((char) next)) {
string opTail = BuildId ();
string full_str = String.Format ("{0}.{1}", val, opTail);
opcode = InstrTable.GetToken (full_str);
if (opcode == null) {
if (strBuilder.TokenId != Token.ID) {
reader.Unread (opTail.ToCharArray ());
reader.Unread ('.');
reader.RestoreLocation ();
res.val = val;
} else {
res.token = Token.COMP_NAME;
res.val = full_str;
}
break;
} else {
res = opcode;
break;
}
} else if (Char.IsWhiteSpace ((char) next)) {
// Handle 'tail.' and 'unaligned.'
opcode = InstrTable.GetToken (val + ".");
if (opcode != null) {
res = opcode;
break;
}
// Let the parser handle the dot
reader.Unread ('.');
}
}
opcode = InstrTable.GetToken (val);
if (opcode != null) {
res = opcode;
break;
}
if (IsKeyword (val)) {
res = ILTables.Keywords [val] as ILToken;
break;
}
}
res.token = strBuilder.TokenId;
res.val = val;
break;
}
}
OnNewToken (res);
lastToken.CopyFrom (res);
return res;
}
///
///
public ILToken NextToken {
get {
return GetNextToken ();
}
}
///
///
public ILToken LastToken {
get {
return lastToken;
}
}
bool is_hex (int e)
{
return (e >= '0' && e <= '9') || (e >= 'A' && e <= 'F') || (e >= 'a' && e <= 'f');
}
private static bool IsIdStartChar (char ch)
{
return (Char.IsLetter (ch) || (idchars.IndexOf (ch) != -1));
}
private static bool IsIdChar (char ch)
{
return (Char.IsLetterOrDigit (ch) || (idchars.IndexOf (ch) != -1));
}
///
///
///
///
public static bool IsOpcode (string name)
{
return InstrTable.IsInstr (name);
}
///
///
///
///
public static bool IsDirective (string name)
{
char ch = name [0];
bool res = (ch == '.' || ch == '#');
if (res) {
res = directives.Contains (name);
}
return res;
}
private string BuildId ()
{
StringBuilder idsb = new StringBuilder ();
int ch, last;
last = -1;
while ((ch = reader.Read ()) != -1) {
if (IsIdChar ((char) ch) || ch == '.') {
idsb.Append ((char) ch);
} else {
reader.Unread (ch);
// Never end an id on a DOT
if (last == '.') {
reader.Unread (last);
idsb.Length -= 1;
}
break;
}
last = ch;
}
return idsb.ToString ();
}
///
///
///
///
public static bool IsKeyword (string name)
{
return keywords.Contains (name);
}
private void OnNewToken (ILToken token)
{
if (NewTokenEvent != null)
NewTokenEvent (this, new NewTokenEventArgs (token));
}
}
}