From 2421d2c3d11c9c934c0efe5f47dd19cb985ad300 Mon Sep 17 00:00:00 2001
From: Jason Orendorff <jorendorff@mozilla.com>
Date: Fri, 17 Jul 2009 00:48:39 -0500
Subject: [PATCH] Bug 505249 - lirasm - Style cleanup and new tokenizer.
 r=graydon.

---
 js/src/lirasm/lirasm.cpp | 1067 ++++++++++++++++++++------------------
 1 file changed, 575 insertions(+), 492 deletions(-)
diff --git a/js/src/lirasm/lirasm.cpp b/js/src/lirasm/lirasm.cpp
index 060460898f6..f3c22fc77ed 100644
--- a/js/src/lirasm/lirasm.cpp
+++ b/js/src/lirasm/lirasm.cpp
@@ -44,7 +44,6 @@
 #include <iostream>
 #include <sstream>
 #include <fstream>
-#include <cassert>
 
 #ifdef AVMPLUS_UNIX
 #include <sys/types.h>
@@ -54,6 +53,8 @@
 
 #include <stdlib.h>
 #include <math.h>
+#include <ctype.h>
+#include <assert.h>
 
 #include "nanojit/nanojit.h"
 #include "jstracer.h"
@@ -106,6 +107,88 @@ const int PTRRET =
 #endif
     ;
 
+enum LirTokenType {
+    NAME, NUMBER, PUNCT, NEWLINE
+};
+
+struct LirToken {
+    LirTokenType type;
+    string data;
+    int lineno;
+};
+
+inline bool
+startsWith(const string &s, const string &prefix)
+{
+    return s.size() >= prefix.size() && s.compare(0, prefix.length(), prefix) == 0;
+}
+
+// LIR files must be ASCII, for simplicity.
+class LirTokenStream {
+public:
+    LirTokenStream(istream &in) : mIn(in), mLineno(0) {}
+
+    bool get(LirToken &token) {
+        if (mLine.empty()) {
+            if (!getline(mIn, mLine))
+                return false;
+            mLine += '\n';
+            mLineno++;
+        }
+        mLine.erase(0, mLine.find_first_not_of(" \t\v\r"));
+        char c = mLine[0];
+        size_t e = mLine.find_first_not_of("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$.+-");
+        if (startsWith(mLine, "->")) {
+            mLine.erase(0, 2);
+            token.type = PUNCT;
+            token.data = "->";
+        } else if (e > 0) {
+            string s = mLine.substr(0, e);
+            mLine.erase(0, e);
+            if (e > 1 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
+                token.type = NUMBER;
+            else if (isdigit(s[0]) || (e > 1 && s[0] == '.' && isdigit(s[1])))
+                token.type = NUMBER;
+            else
+                token.type = NAME;
+            token.data = s;
+        } else if (strchr(":,=[]()", c)) {
+            token.type = PUNCT;
+            token.data = c;
+            mLine.erase(0, 1);
+        } else if (c == ';' || c == '\n') {
+            token.type = NEWLINE;
+            token.data.clear();
+            mLine.clear();
+        } else {
+            cerr << mLineno << ": error: Unrecognized character in file." << endl;
+            return false;
+        }
+
+        token.lineno = mLineno;
+        return true;
+    }
+
+    bool eat(LirTokenType type, const char *exact = NULL) {
+        LirToken token;
+        return (get(token) && token.type == type && (exact == NULL || token.data == exact));
+    }
+
+    bool getName(string &name) {
+        LirToken t;
+        if (get(t) && t.type == NAME) {
+            name = t.data;
+            return true;
+        }
+        return false;
+    }
+
+private:
+    istream &mIn;
+    string mLine;
+    int mLineno;
+};
+
 class LirasmFragment {
 public:
     union {
@@ -122,23 +205,46 @@ typedef map<string, LirasmFragment> Fragments;
 
 class Lirasm {
 public:
+    Lirasm(bool verbose);
+    ~Lirasm();
+
+    void assemble(istream &in);
+    void lookupFunction(const string &name, CallInfo *&ci);
+
     Fragmento *mFragmento;
     LirBuffer *mLirbuf;
     LogControl mLogc;
     bool mVerbose;
-    avmplus::AvmCore s_core;
     Fragments mFragments;
-    vector<CallInfo*> mCallInfos;
+    map<string, pair<LOpcode, size_t> > mOpMap;
 
-    Lirasm(bool verbose);
-    ~Lirasm();
+    void bad(const string &msg) {
+        cerr << "error: " << msg << endl;
+        exit(1);
+    }
+
+private:
+    void handlePatch(LirTokenStream &in);
+
+    avmplus::AvmCore mCore;
 };
 
-class LirasmAssembler {
+class FragmentAssembler {
+public:
+    FragmentAssembler(Lirasm &parent, const string &fragmentName);
+    ~FragmentAssembler();
+
+    void assembleFragment(LirTokenStream &in, bool implied, const LirToken *firstToken);
+
 private:
+    // Prohibit copying.
+    FragmentAssembler(const FragmentAssembler &);
+    FragmentAssembler & operator=(const FragmentAssembler &);
+
     Lirasm *mParent;
+    const string mFragName;
     Fragment *mFragment;
-    string mFragName;
+    vector<CallInfo*> mCallInfos;
     map<string, LIns*> mLabels;
     LirWriter *mLir;
     LirBufWriter *mBufWriter;
@@ -146,37 +252,26 @@ private:
     LirWriter *mExprFilter;
     LirWriter *mVerboseWriter;
     multimap<string, LIns *> mFwdJumps;
-    map<string,pair<LOpcode,size_t> > op_map;
 
     size_t mLineno;
     LOpcode mOpcode;
     size_t mOpcount;
 
-    bool mInFrag;
-
     char mReturnTypeBits;
     vector<string> mTokens;
 
-    void lookupFunction(const char*, CallInfo *&);
+    void tokenizeLine(LirTokenStream &in, LirToken &token);
     void need(size_t);
-    istream& read_and_tokenize_line(istream&);
-    void tokenize(string const &tok_sep);
-    LIns *ref(string const &);
+    LIns *ref(const string &);
     LIns *do_skip(size_t);
-    LIns *assemble_call(string &);
+    LIns *assemble_call(const string &);
     LIns *assemble_general();
     LIns *assemble_guard();
     LIns *assemble_jump();
     LIns *assemble_load();
-    void bad(string const &msg);
-    void beginFragment();
+    void bad(const string &msg);
+    void extract_any_label(string &lab, char lab_delim);
     void endFragment();
-    void extract_any_label(string &op, string &lab, char lab_delim);
-    void patch();
-
-public:
-    LirasmAssembler(Lirasm &);
-    void assemble(istream &);
 };
 
 Function functions[] = {
@@ -186,66 +281,6 @@ Function functions[] = {
     FN(free, I32 | (PTRARG<<2))
 };
 
-void
-LirasmAssembler::lookupFunction(const char *name, CallInfo *&ci)
-{
-    const size_t nfuns = sizeof(functions) / sizeof(functions[0]);
-    for (size_t i = 0; i < nfuns; i++)
-        if (strcmp(name, functions[i].name) == 0) {
-            *ci = functions[i].callInfo;
-            return;
-        }
-
-    Fragments::const_iterator func = mParent->mFragments.find(name);
-    if (func != mParent->mFragments.end()) {
-        if (func->second.mReturnType == RT_FLOAT) {
-            CallInfo target = {(uintptr_t) func->second.rfloat, ARGSIZE_F, 0,
-                               0, nanojit::ABI_FASTCALL, func->first.c_str()};
-            *ci = target;
-
-        } else {
-            CallInfo target = {(uintptr_t) func->second.rint, ARGSIZE_LO, 0,
-                               0, nanojit::ABI_FASTCALL, func->first.c_str()};
-            *ci = target;
-        }
-    } else {
-    ci = NULL;
-    }
-}
-
-istream &
-LirasmAssembler::read_and_tokenize_line(istream &in)
-{
-    char buf[1024];
-    string tok_sep(" \n\t");
-
-    mTokens.clear();
-
-    if (in.getline(buf,sizeof(buf))) {
-        ++mLineno;
-        string line(buf);
-
-        size_t comment = line.find("//");
-        if (comment != string::npos)
-            line.resize(comment);
-
-        line += '\n';
-
-        size_t start = 0;
-        size_t end = 0;
-        while((start = line.find_first_not_of(tok_sep, end)) != string::npos &&
-              (end = line.find_first_of(tok_sep, start)) != string::npos) {
-            string ss = line.substr(start, (end-start));
-            if (ss == "=") {
-                mTokens[mTokens.size()-1] += ss;
-                continue;
-            }
-            mTokens.push_back(ss);
-        }
-    }
-    return in;
-}
-
 template<typename out, typename in> out
 lexical_cast(in arg)
 {
@@ -258,18 +293,19 @@ lexical_cast(in arg)
 }
 
 int32_t
-imm(string const &s)
+imm(const string &s)
 {
     stringstream tmp(s);
     int32_t ret;
     if ((s.find("0x") == 0 || s.find("0X") == 0) &&
-        (tmp >> hex >> ret && tmp.eof()))
+        (tmp >> hex >> ret && tmp.eof())) {
         return ret;
+    }
     return lexical_cast<int32_t>(s);
 }
 
 uint64_t
-quad(string const &s)
+quad(const string &s)
 {
     stringstream tmp1(s), tmp2(s);
     union {
@@ -292,223 +328,9 @@ pop_front(vector<t> &vec)
         cerr << "pop_front of empty vector" << endl;
         exit(1);
     }
-    t tmp = vec[0];
-    vec.erase(vec.begin());
-    return tmp;
-}
-
-void
-LirasmAssembler::bad(string const &msg)
-{
-    cerr << "instruction " << mLineno << ": " <<  msg << endl;
-    exit(1);
-}
-
-void
-LirasmAssembler::need(size_t n)
-{
-    if (mTokens.size() != n)
-        bad("need " + lexical_cast<string>(n)
-            + " tokens, have " + lexical_cast<string>(mTokens.size()));
-}
-
-LIns*
-LirasmAssembler::ref(string const &lab)
-{
-    if (mLabels.find(lab) == mLabels.end())
-        bad("unknown label '" + lab + "'");
-    return mLabels.find(lab)->second;
-}
-
-LIns*
-LirasmAssembler::do_skip(size_t i)
-{
-    LIns *s = mLir->insSkip(i);
-    memset(s->payload(), 0xba, i);
-    return s;
-}
-
-LIns*
-LirasmAssembler::assemble_jump()
-{
-    LIns *target = NULL;
-    LIns *condition = NULL;
-
-    if (mOpcode == LIR_j) {
-        need(1);
-    } else {
-        need(2);
-        string cond = pop_front(mTokens);
-        condition = ref(cond);
-    }
-    string name = pop_front(mTokens);
-    if (mLabels.find(name) != mLabels.end()) {
-        target = ref(name);
-        return mLir->insBranch(mOpcode, condition, target);
-    } else {
-        LIns *ins = mLir->insBranch(mOpcode, condition, target);
-        mFwdJumps.insert(make_pair(name, ins));
-        return ins;
-    }
-}
-
-LIns*
-LirasmAssembler::assemble_load()
-{
-    // Support implicit immediate-as-second-operand modes
-    // since, unlike sti/stqi, no immediate-displacement
-    // load opcodes were defined in LIR.
-    need(2);
-    if (mTokens[1].find("0x") == 0 ||
-        mTokens[1].find("0x") == 0 ||
-        mTokens[1].find_first_of("0123456789") == 0) {
-        return mLir->insLoad(mOpcode,
-                            ref(mTokens[0]),
-                            imm(mTokens[1]));
-    }
-    bad("immediate offset required for load");
-    return NULL;  // not reached
-}
-
-LIns*
-LirasmAssembler::assemble_call(string &op)
-{
-    CallInfo *ci = new CallInfo();
-    mParent->mCallInfos.push_back(ci);
-    LIns* args[MAXARGS];
-
-    // Assembler syntax for a call:
-    //
-    //   call 0x1234 fastcall a b c
-    //
-    // requires at least 2 args,
-    // fn address immediate and ABI token.
-
-    if (mTokens.size() < 2)
-        bad("need at least address and ABI code for " + op);
-
-    string func = pop_front(mTokens);
-    string abi = pop_front(mTokens);
-
-    AbiKind _abi;
-    if (abi == "fastcall")
-        _abi = ABI_FASTCALL;
-    else if (abi == "stdcall")
-        _abi = ABI_STDCALL;
-    else if (abi == "thiscall")
-        _abi = ABI_THISCALL;
-    else if (abi == "cdecl")
-        _abi = ABI_CDECL;
-    else
-        bad("call abi name '" + abi + "'");
-    ci->_abi = _abi;
-
-    if (mTokens.size() > MAXARGS)
-    bad("too many args to " + op);
-
-    if (func.find("0x") == 0) {
-        ci->_address = imm(func);
-
-        ci->_cse = 0;
-        ci->_fold = 0;
-
-#ifdef DEBUG
-        ci->_name = "fn";
-#endif
-
-    } else {
-        lookupFunction(func.c_str(), ci);
-        if (ci == NULL)
-            bad("invalid function reference " + func);
-        if (_abi != ci->_abi)
-            bad("invalid calling convention for " + func);
-    }
-
-    ci->_argtypes = 0;
-
-    for (size_t i = 0; i < mTokens.size(); ++i) {
-        args[i] = ref(mTokens[mTokens.size() - (i+1)]);
-        ci->_argtypes |= args[i]->isQuad() ? ARGSIZE_F : ARGSIZE_LO;
-        ci->_argtypes <<= 2;
-    }
-
-    // Select return type from opcode.
-    // FIXME: callh needs special treatment currently
-    // missing from here.
-    if (mOpcode == LIR_call)
-        ci->_argtypes |= ARGSIZE_LO;
-    else
-        ci->_argtypes |= ARGSIZE_F;
-
-    return mLir->insCall(ci, args);
-}
-
-LIns*
-LirasmAssembler::assemble_guard()
-{
-    LIns *exitIns = do_skip(sizeof(LasmSideExit));
-    LasmSideExit* exit = (LasmSideExit*) exitIns->payload();
-    memset(exit, 0, sizeof(LasmSideExit));
-    exit->from = mFragment;
-    exit->target = NULL;
-    exit->line = mLineno;
-
-    LIns *guardRec = do_skip(sizeof(GuardRecord));
-    GuardRecord *rec = (GuardRecord*) guardRec->payload();
-    memset(rec, 0, sizeof(GuardRecord));
-    rec->exit = exit;
-    exit->addGuard(rec);
-
-    need(mOpcount);
-
-    if (mOpcode != LIR_loop)
-        mReturnTypeBits |= RT_GUARD;
-
-    LIns *ins_cond;
-    if (mOpcode == LIR_xt || mOpcode == LIR_xf)
-        ins_cond = ref(pop_front(mTokens));
-    else
-        ins_cond = NULL;
-
-    if (!mTokens.empty())
-        bad("too many arguments");
-
-    return mLir->insGuard(mOpcode, ins_cond, guardRec);
-}
-
-LIns*
-LirasmAssembler::assemble_general()
-{
-    if (mOpcount == 0) {
-        // 0-ary ops may, or may not, have an immediate
-        // thing wedged in them; depends on the op. We
-        // are lax and set it if it's provided.
-        LIns *ins = mLir->ins0(mOpcode);
-        if (mTokens.size() > 0) {
-            assert(mTokens.size() == 1);
-            ins->initLInsI(mOpcode, imm(mTokens[0]));
-        }
-        return ins;
-    } else {
-        need(mOpcount);
-        if (mOpcount == 1) {
-            if (mOpcode == LIR_ret)
-                mReturnTypeBits |= RT_INT32;
-            if (mOpcode == LIR_fret)
-                mReturnTypeBits |= RT_FLOAT;
-
-            return mLir->ins1(mOpcode,
-                             ref(mTokens[0]));
-        } else if (mOpcount == 2) {
-            return mLir->ins2(mOpcode,
-                             ref(mTokens[0]),
-                             ref(mTokens[1]));
-        } else {
-            bad("too many operands");
-        }
-    }
-    // Never get here.
-    return NULL;
+   t tmp = vec[0];
+   vec.erase(vec.begin());
+   return tmp;
 }
 
 void
@@ -575,26 +397,8 @@ dump_srecords(ostream &out, Fragment *frag)
     }
 }
 
-void
-LirasmAssembler::extract_any_label(string &op,
-                                   string &lab,
-                                   char lab_delim)
-{
-    if (op.size() > 1 &&
-        op[op.size()-1] == lab_delim &&
-        !mTokens.empty()) {
-
-        lab = op;
-        op = pop_front(mTokens);
-        lab.erase(lab.size()-1);
-
-        if (mLabels.find(lab) != mLabels.end())
-            bad("duplicate label");
-    }
-}
-
-void
-LirasmAssembler::beginFragment()
+FragmentAssembler::FragmentAssembler(Lirasm &parent, const string &fragmentName)
+    : mParent(&parent), mFragName(fragmentName)
 {
     mFragment = new (&gc) Fragment(NULL);
     mFragment->lirbuf = mParent->mLirbuf;
@@ -615,24 +419,249 @@ LirasmAssembler::beginFragment()
     }
 #endif
 
-    mInFrag = true;
     mReturnTypeBits = 0;
     mLir->ins0(LIR_start);
+
+    mLineno = 0;
+}
+
+FragmentAssembler::~FragmentAssembler()
+{
+    delete mVerboseWriter;
+    delete mExprFilter;
+    delete mCseFilter;
+    delete mBufWriter;
+
+    for (size_t i = 0; i < mCallInfos.size(); ++i)
+        delete mCallInfos[i];
 }
 
 void
-LirasmAssembler::endFragment()
+FragmentAssembler::bad(const string &msg)
 {
-    mInFrag = false;
+    cerr << "instruction " << mLineno << ": " <<  msg << endl;
+    exit(1);
+}
 
-    if (mReturnTypeBits == 0)
+void
+FragmentAssembler::need(size_t n)
+{
+    if (mTokens.size() != n) {
+        bad("need " + lexical_cast<string>(n)
+            + " tokens, have " + lexical_cast<string>(mTokens.size()));
+    }
+}
+
+LIns *
+FragmentAssembler::ref(const string &lab)
+{
+    if (mLabels.find(lab) == mLabels.end())
+        bad("unknown label '" + lab + "'");
+    return mLabels.find(lab)->second;
+}
+
+LIns *
+FragmentAssembler::do_skip(size_t i)
+{
+    LIns *s = mLir->insSkip(i);
+    memset(s->payload(), 0xba, i);
+    return s;
+}
+
+LIns *
+FragmentAssembler::assemble_jump()
+{
+    LIns *target = NULL;
+    LIns *condition = NULL;
+
+    if (mOpcode == LIR_j) {
+        need(1);
+    } else {
+        need(2);
+        string cond = pop_front(mTokens);
+        condition = ref(cond);
+    }
+    string name = pop_front(mTokens);
+    if (mLabels.find(name) != mLabels.end()) {
+        target = ref(name);
+        return mLir->insBranch(mOpcode, condition, target);
+    } else {
+        LIns *ins = mLir->insBranch(mOpcode, condition, target);
+        mFwdJumps.insert(make_pair(name, ins));
+        return ins;
+    }
+}
+
+LIns *
+FragmentAssembler::assemble_load()
+{
+    // Support implicit immediate-as-second-operand modes
+    // since, unlike sti/stqi, no immediate-displacement
+    // load opcodes were defined in LIR.
+    need(2);
+    if (mTokens[1].find("0x") == 0 ||
+        mTokens[1].find("0x") == 0 ||
+        mTokens[1].find_first_of("0123456789") == 0) {
+        return mLir->insLoad(mOpcode,
+                             ref(mTokens[0]),
+                             imm(mTokens[1]));
+    }
+    bad("immediate offset required for load");
+    return NULL;  // not reached
+}
+
+LIns *
+FragmentAssembler::assemble_call(const string &op)
+{
+    CallInfo *ci = new CallInfo();
+    mCallInfos.push_back(ci);
+    LIns *args[MAXARGS];
+
+    // Assembler syntax for a call:
+    //
+    //   call 0x1234 fastcall a b c
+    //
+    // requires at least 2 args,
+    // fn address immediate and ABI token.
+
+    if (mTokens.size() < 2)
+        bad("need at least address and ABI code for " + op);
+
+    string func = pop_front(mTokens);
+    string abi = pop_front(mTokens);
+
+    AbiKind _abi;
+    if (abi == "fastcall")
+        _abi = ABI_FASTCALL;
+    else if (abi == "stdcall")
+        _abi = ABI_STDCALL;
+    else if (abi == "thiscall")
+        _abi = ABI_THISCALL;
+    else if (abi == "cdecl")
+        _abi = ABI_CDECL;
+    else
+        bad("call abi name '" + abi + "'");
+    ci->_abi = _abi;
+
+    if (mTokens.size() > MAXARGS)
+    bad("too many args to " + op);
+
+    if (func.find("0x") == 0) {
+        ci->_address = imm(func);
+
+        ci->_cse = 0;
+        ci->_fold = 0;
+
+#ifdef DEBUG
+        ci->_name = "fn";
+#endif
+    } else {
+        mParent->lookupFunction(func, ci);
+        if (ci == NULL)
+            bad("invalid function reference " + func);
+        if (_abi != ci->_abi)
+            bad("invalid calling convention for " + func);
+    }
+
+    ci->_argtypes = 0;
+
+    for (size_t i = 0; i < mTokens.size(); ++i) {
+        args[i] = ref(mTokens[mTokens.size() - (i+1)]);
+        ci->_argtypes |= args[i]->isQuad() ? ARGSIZE_F : ARGSIZE_LO;
+        ci->_argtypes <<= 2;
+    }
+
+    // Select return type from opcode.
+    // FIXME: callh needs special treatment currently
+    // missing from here.
+    if (mOpcode == LIR_call)
+        ci->_argtypes |= ARGSIZE_LO;
+    else
+        ci->_argtypes |= ARGSIZE_F;
+
+    return mLir->insCall(ci, args);
+}
+
+LIns *
+FragmentAssembler::assemble_guard()
+{
+    LIns *exitIns = do_skip(sizeof(LasmSideExit));
+    LasmSideExit* exit = (LasmSideExit*) exitIns->payload();
+    memset(exit, 0, sizeof(LasmSideExit));
+    exit->from = mFragment;
+    exit->target = NULL;
+    exit->line = mLineno;
+
+    LIns *guardRec = do_skip(sizeof(GuardRecord));
+    GuardRecord *rec = (GuardRecord*) guardRec->payload();
+    memset(rec, 0, sizeof(GuardRecord));
+    rec->exit = exit;
+    exit->addGuard(rec);
+
+    need(mOpcount);
+
+    if (mOpcode != LIR_loop)
+        mReturnTypeBits |= RT_GUARD;
+
+    LIns *ins_cond;
+    if (mOpcode == LIR_xt || mOpcode == LIR_xf)
+        ins_cond = ref(pop_front(mTokens));
+    else
+        ins_cond = NULL;
+
+    if (!mTokens.empty())
+        bad("too many arguments");
+
+    return mLir->insGuard(mOpcode, ins_cond, guardRec);
+}
+
+LIns *
+FragmentAssembler::assemble_general()
+{
+    if (mOpcount == 0) {
+        // 0-ary ops may, or may not, have an immediate
+        // thing wedged in them; depends on the op. We
+        // are lax and set it if it's provided.
+        LIns *ins = mLir->ins0(mOpcode);
+        if (mTokens.size() > 0) {
+            assert(mTokens.size() == 1);
+            ins->initLInsI(mOpcode, imm(mTokens[0]));
+        }
+        return ins;
+    } else {
+        need(mOpcount);
+        if (mOpcount == 1) {
+            if (mOpcode == LIR_ret)
+                mReturnTypeBits |= RT_INT32;
+            if (mOpcode == LIR_fret)
+                mReturnTypeBits |= RT_FLOAT;
+
+            return mLir->ins1(mOpcode,
+                              ref(mTokens[0]));
+        } else if (mOpcount == 2) {
+            return mLir->ins2(mOpcode,
+                              ref(mTokens[0]),
+                              ref(mTokens[1]));
+        } else {
+            bad("too many operands");
+        }
+    }
+    // Never get here.
+    return NULL;
+}
+
+void
+FragmentAssembler::endFragment()
+{
+    if (mReturnTypeBits == 0) {
         cerr << "warning: no return type in fragment '"
              << mFragName << "'" << endl;
-    if (mReturnTypeBits != RT_INT32 && mReturnTypeBits != RT_FLOAT
-        && mReturnTypeBits != RT_GUARD)
+    }
+    if (mReturnTypeBits != RT_INT32 && mReturnTypeBits != RT_FLOAT &&
+        mReturnTypeBits != RT_GUARD) {
         cerr << "warning: multiple return types in fragment '"
              << mFragName << "'" << endl;
-
+    }
     LIns *exitIns = do_skip(sizeof(SideExit));
     SideExit* exit = (SideExit*) exitIns->payload();
     memset(exit, 0, sizeof(SideExit));
@@ -645,15 +674,15 @@ LirasmAssembler::endFragment()
     if (mParent->mFragmento->assm()->error() != nanojit::None) {
         cerr << "error during assembly: ";
         switch (mParent->mFragmento->assm()->error()) {
-        case nanojit::OutOMem: cerr << "OutOMem"; break;
-        case nanojit::StackFull: cerr << "StackFull"; break;
-        case nanojit::RegionFull: cerr << "RegionFull"; break;
-        case nanojit::MaxLength: cerr << "MaxLength"; break;
-        case nanojit::MaxExit: cerr << "MaxExit"; break;
-        case nanojit::MaxXJump: cerr << "MaxXJump"; break;
-        case nanojit::UnknownPrim: cerr << "UnknownPrim"; break;
-        case nanojit::UnknownBranch: cerr << "UnknownBranch"; break;
-        case nanojit::None: cerr << "None"; break;
+          case nanojit::OutOMem: cerr << "OutOMem"; break;
+          case nanojit::StackFull: cerr << "StackFull"; break;
+          case nanojit::RegionFull: cerr << "RegionFull"; break;
+          case nanojit::MaxLength: cerr << "MaxLength"; break;
+          case nanojit::MaxExit: cerr << "MaxExit"; break;
+          case nanojit::MaxXJump: cerr << "MaxXJump"; break;
+          case nanojit::UnknownPrim: cerr << "UnknownPrim"; break;
+          case nanojit::UnknownBranch: cerr << "UnknownBranch"; break;
+          case nanojit::None: cerr << "None"; break;
         }
         cerr << endl;
         std::exit(1);
@@ -663,170 +692,156 @@ LirasmAssembler::endFragment()
     f = &mParent->mFragments[mFragName];
 
     switch (mReturnTypeBits) {
-      case RT_FLOAT:
-      default:
-        f->rfloat = reinterpret_cast<RetFloat>(mFragment->code());
-        f->mReturnType = RT_FLOAT;
-        break;
-      case RT_INT32:
-        f->rint = reinterpret_cast<RetInt>(mFragment->code());
-        f->mReturnType = RT_INT32;
-        break;
       case RT_GUARD:
         f->rguard = reinterpret_cast<RetGuard>(mFragment->code());
         f->mReturnType = RT_GUARD;
         break;
+      case RT_FLOAT:
+        f->rfloat = reinterpret_cast<RetFloat>(mFragment->code());
+        f->mReturnType = RT_FLOAT;
+        break;
+      default:
+        f->rint = reinterpret_cast<RetInt>(mFragment->code());
+        f->mReturnType = RT_INT32;
+        break;
     }
 
-    delete mVerboseWriter;
-    delete mExprFilter;
-    delete mCseFilter;
-    delete mBufWriter;
-    for (size_t i = 0; i < mParent->mCallInfos.size(); ++i)
-        delete mParent->mCallInfos[i];
-    mParent->mCallInfos.clear();
-
     mParent->mFragments[mFragName].mLabels = mLabels;
-    mLabels.clear();
 }
 
 void
-LirasmAssembler::assemble(istream &in)
+FragmentAssembler::tokenizeLine(LirTokenStream &in, LirToken &token)
 {
-#define OPDEF(op, number, args, repkind) \
-    op_map[#op] = make_pair(LIR_##op, args);
-#define OPDEF64(op, number, args, repkind) \
-    op_map[#op] = make_pair(LIR_##op, args);
-#include "nanojit/LIRopcode.tbl"
-#undef OPDEF
-#undef OPDEF64
+    mTokens.clear();
+    mTokens.push_back(token.data);
 
-    op_map["alloc"] = op_map["ialloc"];
-    op_map["param"] = op_map["iparam"];
+    while (in.get(token)) {
+        if (token.type == NEWLINE)
+            break;
+        mTokens.push_back(token.data);
+    }
+}
 
-        bool singleFrag = false;
-        bool first = true;
-        while(read_and_tokenize_line(in)) {
+void
+FragmentAssembler::extract_any_label(string &lab, char lab_delim)
+{
+    if (mTokens.size() > 2 && mTokens[1].size() == 1 && mTokens[1][0] == lab_delim) {
+        lab = pop_front(mTokens);
+        pop_front(mTokens);  // remove punctuation
 
-        if (mParent->mLirbuf->outOMem()) {
-            cerr << "lirbuf out of memory" << endl;
-            exit(1);
+        if (mLabels.find(lab) != mLabels.end())
+            bad("duplicate label");
+    }            
+}
+
+void
+FragmentAssembler::assembleFragment(LirTokenStream &in, bool implied, const LirToken *firstToken)
+{
+    LirToken token;
+    while (true) {
+        if (firstToken) {
+            token = *firstToken;
+            firstToken = NULL;
+        } else if (!in.get(token)) {
+            if (!implied)
+                bad("unexpected end of file in fragment '" + mFragName + "'");
+            break;
         }
-
-        if (mTokens.empty())
+        if (token.type == NEWLINE)
             continue;
+        if (token.type != NAME)
+            bad("unexpected token '" + token.data + "'");
 
-        string op = pop_front(mTokens);
-
-        if (op == ".patch") {
-            tokenize(".");
-            patch();
-            continue;
+        string op = token.data;
+        if (op == ".begin")
+            bad("nested fragments are not supported");
+        if (op == ".end") {
+            if (implied)
+                bad(".end without .begin");
+            if (!in.eat(NEWLINE))
+                bad("extra junk after .end");
+            break;
         }
 
-        if (!singleFrag) {
-            if (op == ".begin") {
-                if (mTokens.size() != 1)
-                    bad("missing fragment name");
-                if (mInFrag)
-                    bad("nested fragments are not supported");
-
-                mFragName = pop_front(mTokens);
-
-                beginFragment();
-                first = false;
-                continue;
-            } else if (op == ".end") {
-                if (!mInFrag)
-                    bad("expecting .begin before .end");
-                if (!mTokens.empty())
-                    bad("too many tokens");
-                endFragment();
-                continue;
-            }
-        }
-        if (first) {
-            first = false;
-            singleFrag = true;
-            mFragName = "main";
-
-            beginFragment();
-        }
+        mLineno = token.lineno;
+        tokenizeLine(in, token);
 
         string lab;
         LIns *ins = NULL;
-        extract_any_label(op, lab, ':');
+        extract_any_label(lab, ':');
 
         /* Save label and do any back-patching of deferred forward-jumps. */
         if (!lab.empty()) {
             ins = mLir->ins0(LIR_label);
-            typedef multimap<string,LIns*> mulmap;
+            typedef multimap<string, LIns *> mulmap;
             typedef mulmap::const_iterator ci;
-            pair<ci,ci> range = mFwdJumps.equal_range(lab);
+            pair<ci, ci> range = mFwdJumps.equal_range(lab);
             for (ci i = range.first; i != range.second; ++i) {
                 i->second->setTarget(ins);
             }
             mFwdJumps.erase(lab);
             lab.clear();
         }
-        extract_any_label(op, lab, '=');
+        extract_any_label(lab, '=');
 
-        if (op_map.find(op) == op_map.end())
+        assert(!mTokens.empty());
+        op = pop_front(mTokens);
+        if (mParent->mOpMap.find(op) == mParent->mOpMap.end())
             bad("unknown instruction '" + op + "'");
 
-        pair<LOpcode,size_t> entry = op_map[op];
+        pair<LOpcode, size_t> entry = mParent->mOpMap[op];
         mOpcode = entry.first;
         mOpcount = entry.second;
 
         switch (mOpcode) {
         // A few special opcode cases.
 
-        case LIR_j:
-        case LIR_jt:
-        case LIR_jf:
-        case LIR_ji:
+          case LIR_j:
+          case LIR_jt:
+          case LIR_jf:
+          case LIR_ji:
             ins = assemble_jump();
             break;
 
-        case LIR_int:
+          case LIR_int:
             need(1);
             ins = mLir->insImm(imm(mTokens[0]));
             break;
 
-        case LIR_quad:
+          case LIR_quad:
             need(1);
             ins = mLir->insImmq(quad(mTokens[0]));
             break;
 
-        case LIR_sti:
-        case LIR_stqi:
+          case LIR_sti:
+          case LIR_stqi:
             need(3);
             ins = mLir->insStorei(ref(mTokens[0]),
                                   ref(mTokens[1]),
                                   imm(mTokens[2]));
             break;
 
-        case LIR_ld:
-        case LIR_ldc:
-        case LIR_ldq:
-        case LIR_ldqc:
-        case LIR_ldcb:
-        case LIR_ldcs:
+          case LIR_ld:
+          case LIR_ldc:
+          case LIR_ldq:
+          case LIR_ldqc:
+          case LIR_ldcb:
+          case LIR_ldcs:
             ins = assemble_load();
             break;
 
-        case LIR_iparam:
+          case LIR_iparam:
             need(2);
             ins = mLir->insParam(imm(mTokens[0]),
                                  imm(mTokens[1]));
             break;
 
-        case LIR_ialloc:
+          case LIR_ialloc:
             need(1);
             ins = mLir->insAlloc(imm(mTokens[0]));
             break;
 
-        case LIR_skip:
+          case LIR_skip:
             need(1);
             {
                 int32_t count = imm(mTokens[0]);
@@ -836,20 +851,21 @@ LirasmAssembler::assemble(istream &in)
             }
             break;
 
-        case LIR_xt:
-        case LIR_xf:
-        case LIR_x:
-        case LIR_xbarrier:
-        case LIR_loop:
+          case LIR_xt:
+          case LIR_xf:
+          case LIR_x:
+          case LIR_xbarrier:
+          case LIR_loop:
             ins = assemble_guard();
             break;
 
-        case LIR_call:
-        case LIR_callh:
-        case LIR_fcall:
+          case LIR_call:
+          case LIR_callh:
+          case LIR_fcall:
             ins = assemble_call(op);
             break;
-        default:
+
+          default:
             ins = assemble_general();
             break;
         }
@@ -857,47 +873,43 @@ LirasmAssembler::assemble(istream &in)
         assert(ins);
         if (!lab.empty())
             mLabels.insert(make_pair(lab, ins));
-    }
-    if (mInFrag && singleFrag)
-        endFragment();
 
-    if (mInFrag)
-        bad("unexpected EOF");
-    if (mParent->mLirbuf->outOMem()) {
-        cerr << "lirbuf out of memory" << endl;
-        exit(1);
-    }
-}
-
-bool
-has_flag(vector<string> &args, string const &flag)
-{
-    for (vector<string>::iterator i = args.begin();
-         i != args.end(); ++i) {
-        if (*i == flag) {
-            args.erase(i);
-            return true;
+        if (mParent->mLirbuf->outOMem()) {
+            cerr << "lirbuf out of memory" << endl;
+            exit(1);
         }
     }
-    return false;
+    endFragment();
 }
 
-
 Lirasm::Lirasm(bool verbose)
 {
     mVerbose = verbose;
     nanojit::AvmCore::config.tree_opt = true;
     mLogc.lcbits = 0;
-    mFragmento = new (&gc) Fragmento(&s_core, &mLogc, 32);
+    mFragmento = new (&gc) Fragmento(&mCore, &mLogc, 32);
     mFragmento->labels = NULL;
     mLirbuf = new (&gc) LirBuffer(mFragmento);
 #ifdef DEBUG
     if (mVerbose) {
         mLogc.lcbits = LC_Assembly;
-        mFragmento->labels = new (&gc) LabelMap(&s_core);
+        mFragmento->labels = new (&gc) LabelMap(&mCore);
         mLirbuf->names = new (&gc) LirNameMap(&gc, mFragmento->labels);
     }
 #endif
+
+    // Populate the mOpMap table.
+#define OPDEF(op, number, args, repkind) \
+    mOpMap[#op] = make_pair(LIR_##op, args);
+#define OPDEF64(op, number, args, repkind) \
+    mOpMap[#op] = make_pair(LIR_##op, args);
+#include "nanojit/LIRopcode.tbl"
+#undef OPDEF
+#undef OPDEF64
+
+    // TODO - These should alias to the appropriate platform-specific LIR opcode.
+    mOpMap["alloc"] = mOpMap["ialloc"];
+    mOpMap["param"] = mOpMap["iparam"];
 }
 
 Lirasm::~Lirasm()
@@ -912,49 +924,120 @@ Lirasm::~Lirasm()
     delete mFragmento;
 }
 
-LirasmAssembler::LirasmAssembler(Lirasm &lasm)
-{
-    mParent = &lasm;
-    mInFrag = false;
-    mLineno = 0;
-}
-
 void
-LirasmAssembler::tokenize(string const &tok_sep)
+Lirasm::lookupFunction(const string &name, CallInfo *&ci)
 {
-    vector<string>::iterator i;
-    for (i = mTokens.begin(); i < mTokens.end(); i++)
-    {
-        string line = *i;
-        size_t start = 0;
-        size_t end = 0;
-        while((start = line.find_first_not_of(tok_sep, end)) != string::npos &&
-              (end = line.find_first_of(tok_sep, start)) != string::npos) {
-            const string ss = line.substr(start, (end-start));
-            i->erase(start, end-start+1);
-            mTokens.insert(i++, ss);
-            mTokens.insert(i++, tok_sep);
+    const size_t nfuns = sizeof(functions) / sizeof(functions[0]);
+    for (size_t i = 0; i < nfuns; i++) {
+        if (name == functions[i].name) {
+            *ci = functions[i].callInfo;
+            return;
         }
     }
+
+    Fragments::const_iterator func = mFragments.find(name);
+    if (func != mFragments.end()) {
+        if (func->second.mReturnType == RT_FLOAT) {
+            CallInfo target = {(uintptr_t) func->second.rfloat, ARGSIZE_F, 0,
+                               0, nanojit::ABI_FASTCALL, func->first.c_str()};
+            *ci = target;
+
+        } else {
+            CallInfo target = {(uintptr_t) func->second.rint, ARGSIZE_LO, 0,
+                               0, nanojit::ABI_FASTCALL, func->first.c_str()};
+            *ci = target;
+        }
+    } else {
+        ci = NULL;
+    }
 }
 
 void
-LirasmAssembler::patch()
+Lirasm::assemble(istream &in)
 {
-    if (mTokens[1] != "." || mTokens[3] != "->")
+    LirTokenStream ts(in);
+    bool first = true;
+
+    LirToken token;
+    while (ts.get(token)) {
+        if (mLirbuf->outOMem()) {
+            cerr << "lirbuf out of memory" << endl;
+            exit(1);
+        }
+        if (token.type == NEWLINE)
+            continue;
+        if (token.type != NAME)
+            bad("unexpected token '" + token.data + "'");
+
+        const string &op = token.data;
+        if (op == ".patch") {
+            handlePatch(ts);
+        } else if (op == ".begin") {
+            string name;
+            if (!ts.getName(name))
+                bad("expected fragment name after .begin");
+            if (!ts.eat(NEWLINE))
+                bad("extra junk after .begin " + name);
+
+            FragmentAssembler assembler(*this, name);
+            assembler.assembleFragment(ts, false, NULL);
+            first = false;
+        } else if (op == ".end") {
+            bad(".end without .begin");
+        } else if (first) {
+            FragmentAssembler assembler(*this, "main");
+            assembler.assembleFragment(ts, true, &token);
+            break;
+        } else {
+            bad("unexpected stray opcode '" + op + "'");
+        }
+    }
+
+    if (mLirbuf->outOMem()) {
+        cerr << "lirbuf out of memory" << endl;
+        exit(1);
+    }
+}
+
+void
+Lirasm::handlePatch(LirTokenStream &in)
+{
+    string src, fragName, guardName, destName;
+
+    if (!in.getName(src) || !in.eat(PUNCT, "->") || !in.getName(destName))
         bad("incorrect syntax");
+
+    // Break the src at '.'. This is awkward but the syntax looks nice.
+    size_t j = src.find('.');
+    if (j == string::npos || j == 0 || j == src.size() - 1)
+        bad("incorrect syntax");
+    fragName = src.substr(0, j);
+    guardName = src.substr(j + 1);
+
     Fragments::iterator i;
-    if ((i=mParent->mFragments.find(mTokens[0])) == mParent->mFragments.end())
+    if ((i=mFragments.find(fragName)) == mFragments.end())
         bad("invalid fragment reference");
     LirasmFragment *frag = &i->second;
-    if (frag->mLabels.find(mTokens[2]) == frag->mLabels.end())
+    if (frag->mLabels.find(guardName) == frag->mLabels.end())
         bad("invalid guard reference");
-    LIns *ins = frag->mLabels.find(mTokens[2])->second;
-    if ((i=mParent->mFragments.find(mTokens[4])) == mParent->mFragments.end())
+    LIns *ins = frag->mLabels.find(guardName)->second;
+    if ((i=mFragments.find(destName)) == mFragments.end())
         bad("invalid guard reference");
     ins->record()->exit->target = i->second.fragptr;
 
-    mParent->mFragmento->assm()->patch(ins->record()->exit);
+    mFragmento->assm()->patch(ins->record()->exit);
+}
+
+bool
+has_flag(vector<string> &args, const string &flag)
+{
+    for (vector<string>::iterator i = args.begin(); i != args.end(); ++i) {
+        if (*i == flag) {
+            args.erase(i);
+            return true;
+        }
+    }
+    return false;
 }
 
 int
@@ -996,7 +1079,7 @@ main(int argc, char **argv)
     }
 
     Lirasm lasm(verbose);
-    LirasmAssembler(lasm).assemble(in);
+    lasm.assemble(in);
 
     Fragments::const_iterator i;
     if (execute) {