From 2421d2c3d11c9c934c0efe5f47dd19cb985ad300 Mon Sep 17 00:00:00 2001 From: Jason Orendorff Date: Fri, 17 Jul 2009 00:48:39 -0500 Subject: [PATCH] Bug 505249 - lirasm - Style cleanup and new tokenizer. r=graydon. --- js/src/lirasm/lirasm.cpp | 1067 ++++++++++++++++++++------------------ 1 file changed, 575 insertions(+), 492 deletions(-) diff --git a/js/src/lirasm/lirasm.cpp b/js/src/lirasm/lirasm.cpp index 060460898f6..f3c22fc77ed 100644 --- a/js/src/lirasm/lirasm.cpp +++ b/js/src/lirasm/lirasm.cpp @@ -44,7 +44,6 @@ #include #include #include -#include #ifdef AVMPLUS_UNIX #include @@ -54,6 +53,8 @@ #include #include +#include +#include #include "nanojit/nanojit.h" #include "jstracer.h" @@ -106,6 +107,88 @@ const int PTRRET = #endif ; +enum LirTokenType { + NAME, NUMBER, PUNCT, NEWLINE +}; + +struct LirToken { + LirTokenType type; + string data; + int lineno; +}; + +inline bool +startsWith(const string &s, const string &prefix) +{ + return s.size() >= prefix.size() && s.compare(0, prefix.length(), prefix) == 0; +} + +// LIR files must be ASCII, for simplicity. +class LirTokenStream { +public: + LirTokenStream(istream &in) : mIn(in), mLineno(0) {} + + bool get(LirToken &token) { + if (mLine.empty()) { + if (!getline(mIn, mLine)) + return false; + mLine += '\n'; + mLineno++; + } + mLine.erase(0, mLine.find_first_not_of(" \t\v\r")); + char c = mLine[0]; + size_t e = mLine.find_first_not_of("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$.+-"); + if (startsWith(mLine, "->")) { + mLine.erase(0, 2); + token.type = PUNCT; + token.data = "->"; + } else if (e > 0) { + string s = mLine.substr(0, e); + mLine.erase(0, e); + if (e > 1 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) + token.type = NUMBER; + else if (isdigit(s[0]) || (e > 1 && s[0] == '.' && isdigit(s[1]))) + token.type = NUMBER; + else + token.type = NAME; + token.data = s; + } else if (strchr(":,=[]()", c)) { + token.type = PUNCT; + token.data = c; + mLine.erase(0, 1); + } else if (c == ';' || c == '\n') { + token.type = NEWLINE; + token.data.clear(); + mLine.clear(); + } else { + cerr << mLineno << ": error: Unrecognized character in file." << endl; + return false; + } + + token.lineno = mLineno; + return true; + } + + bool eat(LirTokenType type, const char *exact = NULL) { + LirToken token; + return (get(token) && token.type == type && (exact == NULL || token.data == exact)); + } + + bool getName(string &name) { + LirToken t; + if (get(t) && t.type == NAME) { + name = t.data; + return true; + } + return false; + } + +private: + istream &mIn; + string mLine; + int mLineno; +}; + class LirasmFragment { public: union { @@ -122,23 +205,46 @@ typedef map Fragments; class Lirasm { public: + Lirasm(bool verbose); + ~Lirasm(); + + void assemble(istream &in); + void lookupFunction(const string &name, CallInfo *&ci); + Fragmento *mFragmento; LirBuffer *mLirbuf; LogControl mLogc; bool mVerbose; - avmplus::AvmCore s_core; Fragments mFragments; - vector mCallInfos; + map > mOpMap; - Lirasm(bool verbose); - ~Lirasm(); + void bad(const string &msg) { + cerr << "error: " << msg << endl; + exit(1); + } + +private: + void handlePatch(LirTokenStream &in); + + avmplus::AvmCore mCore; }; -class LirasmAssembler { +class FragmentAssembler { +public: + FragmentAssembler(Lirasm &parent, const string &fragmentName); + ~FragmentAssembler(); + + void assembleFragment(LirTokenStream &in, bool implied, const LirToken *firstToken); + private: + // Prohibit copying. + FragmentAssembler(const FragmentAssembler &); + FragmentAssembler & operator=(const FragmentAssembler &); + Lirasm *mParent; + const string mFragName; Fragment *mFragment; - string mFragName; + vector mCallInfos; map mLabels; LirWriter *mLir; LirBufWriter *mBufWriter; @@ -146,37 +252,26 @@ private: LirWriter *mExprFilter; LirWriter *mVerboseWriter; multimap mFwdJumps; - map > op_map; size_t mLineno; LOpcode mOpcode; size_t mOpcount; - bool mInFrag; - char mReturnTypeBits; vector mTokens; - void lookupFunction(const char*, CallInfo *&); + void tokenizeLine(LirTokenStream &in, LirToken &token); void need(size_t); - istream& read_and_tokenize_line(istream&); - void tokenize(string const &tok_sep); - LIns *ref(string const &); + LIns *ref(const string &); LIns *do_skip(size_t); - LIns *assemble_call(string &); + LIns *assemble_call(const string &); LIns *assemble_general(); LIns *assemble_guard(); LIns *assemble_jump(); LIns *assemble_load(); - void bad(string const &msg); - void beginFragment(); + void bad(const string &msg); + void extract_any_label(string &lab, char lab_delim); void endFragment(); - void extract_any_label(string &op, string &lab, char lab_delim); - void patch(); - -public: - LirasmAssembler(Lirasm &); - void assemble(istream &); }; Function functions[] = { @@ -186,66 +281,6 @@ Function functions[] = { FN(free, I32 | (PTRARG<<2)) }; -void -LirasmAssembler::lookupFunction(const char *name, CallInfo *&ci) -{ - const size_t nfuns = sizeof(functions) / sizeof(functions[0]); - for (size_t i = 0; i < nfuns; i++) - if (strcmp(name, functions[i].name) == 0) { - *ci = functions[i].callInfo; - return; - } - - Fragments::const_iterator func = mParent->mFragments.find(name); - if (func != mParent->mFragments.end()) { - if (func->second.mReturnType == RT_FLOAT) { - CallInfo target = {(uintptr_t) func->second.rfloat, ARGSIZE_F, 0, - 0, nanojit::ABI_FASTCALL, func->first.c_str()}; - *ci = target; - - } else { - CallInfo target = {(uintptr_t) func->second.rint, ARGSIZE_LO, 0, - 0, nanojit::ABI_FASTCALL, func->first.c_str()}; - *ci = target; - } - } else { - ci = NULL; - } -} - -istream & -LirasmAssembler::read_and_tokenize_line(istream &in) -{ - char buf[1024]; - string tok_sep(" \n\t"); - - mTokens.clear(); - - if (in.getline(buf,sizeof(buf))) { - ++mLineno; - string line(buf); - - size_t comment = line.find("//"); - if (comment != string::npos) - line.resize(comment); - - line += '\n'; - - size_t start = 0; - size_t end = 0; - while((start = line.find_first_not_of(tok_sep, end)) != string::npos && - (end = line.find_first_of(tok_sep, start)) != string::npos) { - string ss = line.substr(start, (end-start)); - if (ss == "=") { - mTokens[mTokens.size()-1] += ss; - continue; - } - mTokens.push_back(ss); - } - } - return in; -} - template out lexical_cast(in arg) { @@ -258,18 +293,19 @@ lexical_cast(in arg) } int32_t -imm(string const &s) +imm(const string &s) { stringstream tmp(s); int32_t ret; if ((s.find("0x") == 0 || s.find("0X") == 0) && - (tmp >> hex >> ret && tmp.eof())) + (tmp >> hex >> ret && tmp.eof())) { return ret; + } return lexical_cast(s); } uint64_t -quad(string const &s) +quad(const string &s) { stringstream tmp1(s), tmp2(s); union { @@ -292,223 +328,9 @@ pop_front(vector &vec) cerr << "pop_front of empty vector" << endl; exit(1); } - t tmp = vec[0]; - vec.erase(vec.begin()); - return tmp; -} - -void -LirasmAssembler::bad(string const &msg) -{ - cerr << "instruction " << mLineno << ": " << msg << endl; - exit(1); -} - -void -LirasmAssembler::need(size_t n) -{ - if (mTokens.size() != n) - bad("need " + lexical_cast(n) - + " tokens, have " + lexical_cast(mTokens.size())); -} - -LIns* -LirasmAssembler::ref(string const &lab) -{ - if (mLabels.find(lab) == mLabels.end()) - bad("unknown label '" + lab + "'"); - return mLabels.find(lab)->second; -} - -LIns* -LirasmAssembler::do_skip(size_t i) -{ - LIns *s = mLir->insSkip(i); - memset(s->payload(), 0xba, i); - return s; -} - -LIns* -LirasmAssembler::assemble_jump() -{ - LIns *target = NULL; - LIns *condition = NULL; - - if (mOpcode == LIR_j) { - need(1); - } else { - need(2); - string cond = pop_front(mTokens); - condition = ref(cond); - } - string name = pop_front(mTokens); - if (mLabels.find(name) != mLabels.end()) { - target = ref(name); - return mLir->insBranch(mOpcode, condition, target); - } else { - LIns *ins = mLir->insBranch(mOpcode, condition, target); - mFwdJumps.insert(make_pair(name, ins)); - return ins; - } -} - -LIns* -LirasmAssembler::assemble_load() -{ - // Support implicit immediate-as-second-operand modes - // since, unlike sti/stqi, no immediate-displacement - // load opcodes were defined in LIR. - need(2); - if (mTokens[1].find("0x") == 0 || - mTokens[1].find("0x") == 0 || - mTokens[1].find_first_of("0123456789") == 0) { - return mLir->insLoad(mOpcode, - ref(mTokens[0]), - imm(mTokens[1])); - } - bad("immediate offset required for load"); - return NULL; // not reached -} - -LIns* -LirasmAssembler::assemble_call(string &op) -{ - CallInfo *ci = new CallInfo(); - mParent->mCallInfos.push_back(ci); - LIns* args[MAXARGS]; - - // Assembler syntax for a call: - // - // call 0x1234 fastcall a b c - // - // requires at least 2 args, - // fn address immediate and ABI token. - - if (mTokens.size() < 2) - bad("need at least address and ABI code for " + op); - - string func = pop_front(mTokens); - string abi = pop_front(mTokens); - - AbiKind _abi; - if (abi == "fastcall") - _abi = ABI_FASTCALL; - else if (abi == "stdcall") - _abi = ABI_STDCALL; - else if (abi == "thiscall") - _abi = ABI_THISCALL; - else if (abi == "cdecl") - _abi = ABI_CDECL; - else - bad("call abi name '" + abi + "'"); - ci->_abi = _abi; - - if (mTokens.size() > MAXARGS) - bad("too many args to " + op); - - if (func.find("0x") == 0) { - ci->_address = imm(func); - - ci->_cse = 0; - ci->_fold = 0; - -#ifdef DEBUG - ci->_name = "fn"; -#endif - - } else { - lookupFunction(func.c_str(), ci); - if (ci == NULL) - bad("invalid function reference " + func); - if (_abi != ci->_abi) - bad("invalid calling convention for " + func); - } - - ci->_argtypes = 0; - - for (size_t i = 0; i < mTokens.size(); ++i) { - args[i] = ref(mTokens[mTokens.size() - (i+1)]); - ci->_argtypes |= args[i]->isQuad() ? ARGSIZE_F : ARGSIZE_LO; - ci->_argtypes <<= 2; - } - - // Select return type from opcode. - // FIXME: callh needs special treatment currently - // missing from here. - if (mOpcode == LIR_call) - ci->_argtypes |= ARGSIZE_LO; - else - ci->_argtypes |= ARGSIZE_F; - - return mLir->insCall(ci, args); -} - -LIns* -LirasmAssembler::assemble_guard() -{ - LIns *exitIns = do_skip(sizeof(LasmSideExit)); - LasmSideExit* exit = (LasmSideExit*) exitIns->payload(); - memset(exit, 0, sizeof(LasmSideExit)); - exit->from = mFragment; - exit->target = NULL; - exit->line = mLineno; - - LIns *guardRec = do_skip(sizeof(GuardRecord)); - GuardRecord *rec = (GuardRecord*) guardRec->payload(); - memset(rec, 0, sizeof(GuardRecord)); - rec->exit = exit; - exit->addGuard(rec); - - need(mOpcount); - - if (mOpcode != LIR_loop) - mReturnTypeBits |= RT_GUARD; - - LIns *ins_cond; - if (mOpcode == LIR_xt || mOpcode == LIR_xf) - ins_cond = ref(pop_front(mTokens)); - else - ins_cond = NULL; - - if (!mTokens.empty()) - bad("too many arguments"); - - return mLir->insGuard(mOpcode, ins_cond, guardRec); -} - -LIns* -LirasmAssembler::assemble_general() -{ - if (mOpcount == 0) { - // 0-ary ops may, or may not, have an immediate - // thing wedged in them; depends on the op. We - // are lax and set it if it's provided. - LIns *ins = mLir->ins0(mOpcode); - if (mTokens.size() > 0) { - assert(mTokens.size() == 1); - ins->initLInsI(mOpcode, imm(mTokens[0])); - } - return ins; - } else { - need(mOpcount); - if (mOpcount == 1) { - if (mOpcode == LIR_ret) - mReturnTypeBits |= RT_INT32; - if (mOpcode == LIR_fret) - mReturnTypeBits |= RT_FLOAT; - - return mLir->ins1(mOpcode, - ref(mTokens[0])); - } else if (mOpcount == 2) { - return mLir->ins2(mOpcode, - ref(mTokens[0]), - ref(mTokens[1])); - } else { - bad("too many operands"); - } - } - // Never get here. - return NULL; + t tmp = vec[0]; + vec.erase(vec.begin()); + return tmp; } void @@ -575,26 +397,8 @@ dump_srecords(ostream &out, Fragment *frag) } } -void -LirasmAssembler::extract_any_label(string &op, - string &lab, - char lab_delim) -{ - if (op.size() > 1 && - op[op.size()-1] == lab_delim && - !mTokens.empty()) { - - lab = op; - op = pop_front(mTokens); - lab.erase(lab.size()-1); - - if (mLabels.find(lab) != mLabels.end()) - bad("duplicate label"); - } -} - -void -LirasmAssembler::beginFragment() +FragmentAssembler::FragmentAssembler(Lirasm &parent, const string &fragmentName) + : mParent(&parent), mFragName(fragmentName) { mFragment = new (&gc) Fragment(NULL); mFragment->lirbuf = mParent->mLirbuf; @@ -615,24 +419,249 @@ LirasmAssembler::beginFragment() } #endif - mInFrag = true; mReturnTypeBits = 0; mLir->ins0(LIR_start); + + mLineno = 0; +} + +FragmentAssembler::~FragmentAssembler() +{ + delete mVerboseWriter; + delete mExprFilter; + delete mCseFilter; + delete mBufWriter; + + for (size_t i = 0; i < mCallInfos.size(); ++i) + delete mCallInfos[i]; } void -LirasmAssembler::endFragment() +FragmentAssembler::bad(const string &msg) { - mInFrag = false; + cerr << "instruction " << mLineno << ": " << msg << endl; + exit(1); +} - if (mReturnTypeBits == 0) +void +FragmentAssembler::need(size_t n) +{ + if (mTokens.size() != n) { + bad("need " + lexical_cast(n) + + " tokens, have " + lexical_cast(mTokens.size())); + } +} + +LIns * +FragmentAssembler::ref(const string &lab) +{ + if (mLabels.find(lab) == mLabels.end()) + bad("unknown label '" + lab + "'"); + return mLabels.find(lab)->second; +} + +LIns * +FragmentAssembler::do_skip(size_t i) +{ + LIns *s = mLir->insSkip(i); + memset(s->payload(), 0xba, i); + return s; +} + +LIns * +FragmentAssembler::assemble_jump() +{ + LIns *target = NULL; + LIns *condition = NULL; + + if (mOpcode == LIR_j) { + need(1); + } else { + need(2); + string cond = pop_front(mTokens); + condition = ref(cond); + } + string name = pop_front(mTokens); + if (mLabels.find(name) != mLabels.end()) { + target = ref(name); + return mLir->insBranch(mOpcode, condition, target); + } else { + LIns *ins = mLir->insBranch(mOpcode, condition, target); + mFwdJumps.insert(make_pair(name, ins)); + return ins; + } +} + +LIns * +FragmentAssembler::assemble_load() +{ + // Support implicit immediate-as-second-operand modes + // since, unlike sti/stqi, no immediate-displacement + // load opcodes were defined in LIR. + need(2); + if (mTokens[1].find("0x") == 0 || + mTokens[1].find("0x") == 0 || + mTokens[1].find_first_of("0123456789") == 0) { + return mLir->insLoad(mOpcode, + ref(mTokens[0]), + imm(mTokens[1])); + } + bad("immediate offset required for load"); + return NULL; // not reached +} + +LIns * +FragmentAssembler::assemble_call(const string &op) +{ + CallInfo *ci = new CallInfo(); + mCallInfos.push_back(ci); + LIns *args[MAXARGS]; + + // Assembler syntax for a call: + // + // call 0x1234 fastcall a b c + // + // requires at least 2 args, + // fn address immediate and ABI token. + + if (mTokens.size() < 2) + bad("need at least address and ABI code for " + op); + + string func = pop_front(mTokens); + string abi = pop_front(mTokens); + + AbiKind _abi; + if (abi == "fastcall") + _abi = ABI_FASTCALL; + else if (abi == "stdcall") + _abi = ABI_STDCALL; + else if (abi == "thiscall") + _abi = ABI_THISCALL; + else if (abi == "cdecl") + _abi = ABI_CDECL; + else + bad("call abi name '" + abi + "'"); + ci->_abi = _abi; + + if (mTokens.size() > MAXARGS) + bad("too many args to " + op); + + if (func.find("0x") == 0) { + ci->_address = imm(func); + + ci->_cse = 0; + ci->_fold = 0; + +#ifdef DEBUG + ci->_name = "fn"; +#endif + } else { + mParent->lookupFunction(func, ci); + if (ci == NULL) + bad("invalid function reference " + func); + if (_abi != ci->_abi) + bad("invalid calling convention for " + func); + } + + ci->_argtypes = 0; + + for (size_t i = 0; i < mTokens.size(); ++i) { + args[i] = ref(mTokens[mTokens.size() - (i+1)]); + ci->_argtypes |= args[i]->isQuad() ? ARGSIZE_F : ARGSIZE_LO; + ci->_argtypes <<= 2; + } + + // Select return type from opcode. + // FIXME: callh needs special treatment currently + // missing from here. + if (mOpcode == LIR_call) + ci->_argtypes |= ARGSIZE_LO; + else + ci->_argtypes |= ARGSIZE_F; + + return mLir->insCall(ci, args); +} + +LIns * +FragmentAssembler::assemble_guard() +{ + LIns *exitIns = do_skip(sizeof(LasmSideExit)); + LasmSideExit* exit = (LasmSideExit*) exitIns->payload(); + memset(exit, 0, sizeof(LasmSideExit)); + exit->from = mFragment; + exit->target = NULL; + exit->line = mLineno; + + LIns *guardRec = do_skip(sizeof(GuardRecord)); + GuardRecord *rec = (GuardRecord*) guardRec->payload(); + memset(rec, 0, sizeof(GuardRecord)); + rec->exit = exit; + exit->addGuard(rec); + + need(mOpcount); + + if (mOpcode != LIR_loop) + mReturnTypeBits |= RT_GUARD; + + LIns *ins_cond; + if (mOpcode == LIR_xt || mOpcode == LIR_xf) + ins_cond = ref(pop_front(mTokens)); + else + ins_cond = NULL; + + if (!mTokens.empty()) + bad("too many arguments"); + + return mLir->insGuard(mOpcode, ins_cond, guardRec); +} + +LIns * +FragmentAssembler::assemble_general() +{ + if (mOpcount == 0) { + // 0-ary ops may, or may not, have an immediate + // thing wedged in them; depends on the op. We + // are lax and set it if it's provided. + LIns *ins = mLir->ins0(mOpcode); + if (mTokens.size() > 0) { + assert(mTokens.size() == 1); + ins->initLInsI(mOpcode, imm(mTokens[0])); + } + return ins; + } else { + need(mOpcount); + if (mOpcount == 1) { + if (mOpcode == LIR_ret) + mReturnTypeBits |= RT_INT32; + if (mOpcode == LIR_fret) + mReturnTypeBits |= RT_FLOAT; + + return mLir->ins1(mOpcode, + ref(mTokens[0])); + } else if (mOpcount == 2) { + return mLir->ins2(mOpcode, + ref(mTokens[0]), + ref(mTokens[1])); + } else { + bad("too many operands"); + } + } + // Never get here. + return NULL; +} + +void +FragmentAssembler::endFragment() +{ + if (mReturnTypeBits == 0) { cerr << "warning: no return type in fragment '" << mFragName << "'" << endl; - if (mReturnTypeBits != RT_INT32 && mReturnTypeBits != RT_FLOAT - && mReturnTypeBits != RT_GUARD) + } + if (mReturnTypeBits != RT_INT32 && mReturnTypeBits != RT_FLOAT && + mReturnTypeBits != RT_GUARD) { cerr << "warning: multiple return types in fragment '" << mFragName << "'" << endl; - + } LIns *exitIns = do_skip(sizeof(SideExit)); SideExit* exit = (SideExit*) exitIns->payload(); memset(exit, 0, sizeof(SideExit)); @@ -645,15 +674,15 @@ LirasmAssembler::endFragment() if (mParent->mFragmento->assm()->error() != nanojit::None) { cerr << "error during assembly: "; switch (mParent->mFragmento->assm()->error()) { - case nanojit::OutOMem: cerr << "OutOMem"; break; - case nanojit::StackFull: cerr << "StackFull"; break; - case nanojit::RegionFull: cerr << "RegionFull"; break; - case nanojit::MaxLength: cerr << "MaxLength"; break; - case nanojit::MaxExit: cerr << "MaxExit"; break; - case nanojit::MaxXJump: cerr << "MaxXJump"; break; - case nanojit::UnknownPrim: cerr << "UnknownPrim"; break; - case nanojit::UnknownBranch: cerr << "UnknownBranch"; break; - case nanojit::None: cerr << "None"; break; + case nanojit::OutOMem: cerr << "OutOMem"; break; + case nanojit::StackFull: cerr << "StackFull"; break; + case nanojit::RegionFull: cerr << "RegionFull"; break; + case nanojit::MaxLength: cerr << "MaxLength"; break; + case nanojit::MaxExit: cerr << "MaxExit"; break; + case nanojit::MaxXJump: cerr << "MaxXJump"; break; + case nanojit::UnknownPrim: cerr << "UnknownPrim"; break; + case nanojit::UnknownBranch: cerr << "UnknownBranch"; break; + case nanojit::None: cerr << "None"; break; } cerr << endl; std::exit(1); @@ -663,170 +692,156 @@ LirasmAssembler::endFragment() f = &mParent->mFragments[mFragName]; switch (mReturnTypeBits) { - case RT_FLOAT: - default: - f->rfloat = reinterpret_cast(mFragment->code()); - f->mReturnType = RT_FLOAT; - break; - case RT_INT32: - f->rint = reinterpret_cast(mFragment->code()); - f->mReturnType = RT_INT32; - break; case RT_GUARD: f->rguard = reinterpret_cast(mFragment->code()); f->mReturnType = RT_GUARD; break; + case RT_FLOAT: + f->rfloat = reinterpret_cast(mFragment->code()); + f->mReturnType = RT_FLOAT; + break; + default: + f->rint = reinterpret_cast(mFragment->code()); + f->mReturnType = RT_INT32; + break; } - delete mVerboseWriter; - delete mExprFilter; - delete mCseFilter; - delete mBufWriter; - for (size_t i = 0; i < mParent->mCallInfos.size(); ++i) - delete mParent->mCallInfos[i]; - mParent->mCallInfos.clear(); - mParent->mFragments[mFragName].mLabels = mLabels; - mLabels.clear(); } void -LirasmAssembler::assemble(istream &in) +FragmentAssembler::tokenizeLine(LirTokenStream &in, LirToken &token) { -#define OPDEF(op, number, args, repkind) \ - op_map[#op] = make_pair(LIR_##op, args); -#define OPDEF64(op, number, args, repkind) \ - op_map[#op] = make_pair(LIR_##op, args); -#include "nanojit/LIRopcode.tbl" -#undef OPDEF -#undef OPDEF64 + mTokens.clear(); + mTokens.push_back(token.data); - op_map["alloc"] = op_map["ialloc"]; - op_map["param"] = op_map["iparam"]; + while (in.get(token)) { + if (token.type == NEWLINE) + break; + mTokens.push_back(token.data); + } +} - bool singleFrag = false; - bool first = true; - while(read_and_tokenize_line(in)) { +void +FragmentAssembler::extract_any_label(string &lab, char lab_delim) +{ + if (mTokens.size() > 2 && mTokens[1].size() == 1 && mTokens[1][0] == lab_delim) { + lab = pop_front(mTokens); + pop_front(mTokens); // remove punctuation - if (mParent->mLirbuf->outOMem()) { - cerr << "lirbuf out of memory" << endl; - exit(1); + if (mLabels.find(lab) != mLabels.end()) + bad("duplicate label"); + } +} + +void +FragmentAssembler::assembleFragment(LirTokenStream &in, bool implied, const LirToken *firstToken) +{ + LirToken token; + while (true) { + if (firstToken) { + token = *firstToken; + firstToken = NULL; + } else if (!in.get(token)) { + if (!implied) + bad("unexpected end of file in fragment '" + mFragName + "'"); + break; } - - if (mTokens.empty()) + if (token.type == NEWLINE) continue; + if (token.type != NAME) + bad("unexpected token '" + token.data + "'"); - string op = pop_front(mTokens); - - if (op == ".patch") { - tokenize("."); - patch(); - continue; + string op = token.data; + if (op == ".begin") + bad("nested fragments are not supported"); + if (op == ".end") { + if (implied) + bad(".end without .begin"); + if (!in.eat(NEWLINE)) + bad("extra junk after .end"); + break; } - if (!singleFrag) { - if (op == ".begin") { - if (mTokens.size() != 1) - bad("missing fragment name"); - if (mInFrag) - bad("nested fragments are not supported"); - - mFragName = pop_front(mTokens); - - beginFragment(); - first = false; - continue; - } else if (op == ".end") { - if (!mInFrag) - bad("expecting .begin before .end"); - if (!mTokens.empty()) - bad("too many tokens"); - endFragment(); - continue; - } - } - if (first) { - first = false; - singleFrag = true; - mFragName = "main"; - - beginFragment(); - } + mLineno = token.lineno; + tokenizeLine(in, token); string lab; LIns *ins = NULL; - extract_any_label(op, lab, ':'); + extract_any_label(lab, ':'); /* Save label and do any back-patching of deferred forward-jumps. */ if (!lab.empty()) { ins = mLir->ins0(LIR_label); - typedef multimap mulmap; + typedef multimap mulmap; typedef mulmap::const_iterator ci; - pair range = mFwdJumps.equal_range(lab); + pair range = mFwdJumps.equal_range(lab); for (ci i = range.first; i != range.second; ++i) { i->second->setTarget(ins); } mFwdJumps.erase(lab); lab.clear(); } - extract_any_label(op, lab, '='); + extract_any_label(lab, '='); - if (op_map.find(op) == op_map.end()) + assert(!mTokens.empty()); + op = pop_front(mTokens); + if (mParent->mOpMap.find(op) == mParent->mOpMap.end()) bad("unknown instruction '" + op + "'"); - pair entry = op_map[op]; + pair entry = mParent->mOpMap[op]; mOpcode = entry.first; mOpcount = entry.second; switch (mOpcode) { // A few special opcode cases. - case LIR_j: - case LIR_jt: - case LIR_jf: - case LIR_ji: + case LIR_j: + case LIR_jt: + case LIR_jf: + case LIR_ji: ins = assemble_jump(); break; - case LIR_int: + case LIR_int: need(1); ins = mLir->insImm(imm(mTokens[0])); break; - case LIR_quad: + case LIR_quad: need(1); ins = mLir->insImmq(quad(mTokens[0])); break; - case LIR_sti: - case LIR_stqi: + case LIR_sti: + case LIR_stqi: need(3); ins = mLir->insStorei(ref(mTokens[0]), ref(mTokens[1]), imm(mTokens[2])); break; - case LIR_ld: - case LIR_ldc: - case LIR_ldq: - case LIR_ldqc: - case LIR_ldcb: - case LIR_ldcs: + case LIR_ld: + case LIR_ldc: + case LIR_ldq: + case LIR_ldqc: + case LIR_ldcb: + case LIR_ldcs: ins = assemble_load(); break; - case LIR_iparam: + case LIR_iparam: need(2); ins = mLir->insParam(imm(mTokens[0]), imm(mTokens[1])); break; - case LIR_ialloc: + case LIR_ialloc: need(1); ins = mLir->insAlloc(imm(mTokens[0])); break; - case LIR_skip: + case LIR_skip: need(1); { int32_t count = imm(mTokens[0]); @@ -836,20 +851,21 @@ LirasmAssembler::assemble(istream &in) } break; - case LIR_xt: - case LIR_xf: - case LIR_x: - case LIR_xbarrier: - case LIR_loop: + case LIR_xt: + case LIR_xf: + case LIR_x: + case LIR_xbarrier: + case LIR_loop: ins = assemble_guard(); break; - case LIR_call: - case LIR_callh: - case LIR_fcall: + case LIR_call: + case LIR_callh: + case LIR_fcall: ins = assemble_call(op); break; - default: + + default: ins = assemble_general(); break; } @@ -857,47 +873,43 @@ LirasmAssembler::assemble(istream &in) assert(ins); if (!lab.empty()) mLabels.insert(make_pair(lab, ins)); - } - if (mInFrag && singleFrag) - endFragment(); - if (mInFrag) - bad("unexpected EOF"); - if (mParent->mLirbuf->outOMem()) { - cerr << "lirbuf out of memory" << endl; - exit(1); - } -} - -bool -has_flag(vector &args, string const &flag) -{ - for (vector::iterator i = args.begin(); - i != args.end(); ++i) { - if (*i == flag) { - args.erase(i); - return true; + if (mParent->mLirbuf->outOMem()) { + cerr << "lirbuf out of memory" << endl; + exit(1); } } - return false; + endFragment(); } - Lirasm::Lirasm(bool verbose) { mVerbose = verbose; nanojit::AvmCore::config.tree_opt = true; mLogc.lcbits = 0; - mFragmento = new (&gc) Fragmento(&s_core, &mLogc, 32); + mFragmento = new (&gc) Fragmento(&mCore, &mLogc, 32); mFragmento->labels = NULL; mLirbuf = new (&gc) LirBuffer(mFragmento); #ifdef DEBUG if (mVerbose) { mLogc.lcbits = LC_Assembly; - mFragmento->labels = new (&gc) LabelMap(&s_core); + mFragmento->labels = new (&gc) LabelMap(&mCore); mLirbuf->names = new (&gc) LirNameMap(&gc, mFragmento->labels); } #endif + + // Populate the mOpMap table. +#define OPDEF(op, number, args, repkind) \ + mOpMap[#op] = make_pair(LIR_##op, args); +#define OPDEF64(op, number, args, repkind) \ + mOpMap[#op] = make_pair(LIR_##op, args); +#include "nanojit/LIRopcode.tbl" +#undef OPDEF +#undef OPDEF64 + + // TODO - These should alias to the appropriate platform-specific LIR opcode. + mOpMap["alloc"] = mOpMap["ialloc"]; + mOpMap["param"] = mOpMap["iparam"]; } Lirasm::~Lirasm() @@ -912,49 +924,120 @@ Lirasm::~Lirasm() delete mFragmento; } -LirasmAssembler::LirasmAssembler(Lirasm &lasm) -{ - mParent = &lasm; - mInFrag = false; - mLineno = 0; -} - void -LirasmAssembler::tokenize(string const &tok_sep) +Lirasm::lookupFunction(const string &name, CallInfo *&ci) { - vector::iterator i; - for (i = mTokens.begin(); i < mTokens.end(); i++) - { - string line = *i; - size_t start = 0; - size_t end = 0; - while((start = line.find_first_not_of(tok_sep, end)) != string::npos && - (end = line.find_first_of(tok_sep, start)) != string::npos) { - const string ss = line.substr(start, (end-start)); - i->erase(start, end-start+1); - mTokens.insert(i++, ss); - mTokens.insert(i++, tok_sep); + const size_t nfuns = sizeof(functions) / sizeof(functions[0]); + for (size_t i = 0; i < nfuns; i++) { + if (name == functions[i].name) { + *ci = functions[i].callInfo; + return; } } + + Fragments::const_iterator func = mFragments.find(name); + if (func != mFragments.end()) { + if (func->second.mReturnType == RT_FLOAT) { + CallInfo target = {(uintptr_t) func->second.rfloat, ARGSIZE_F, 0, + 0, nanojit::ABI_FASTCALL, func->first.c_str()}; + *ci = target; + + } else { + CallInfo target = {(uintptr_t) func->second.rint, ARGSIZE_LO, 0, + 0, nanojit::ABI_FASTCALL, func->first.c_str()}; + *ci = target; + } + } else { + ci = NULL; + } } void -LirasmAssembler::patch() +Lirasm::assemble(istream &in) { - if (mTokens[1] != "." || mTokens[3] != "->") + LirTokenStream ts(in); + bool first = true; + + LirToken token; + while (ts.get(token)) { + if (mLirbuf->outOMem()) { + cerr << "lirbuf out of memory" << endl; + exit(1); + } + if (token.type == NEWLINE) + continue; + if (token.type != NAME) + bad("unexpected token '" + token.data + "'"); + + const string &op = token.data; + if (op == ".patch") { + handlePatch(ts); + } else if (op == ".begin") { + string name; + if (!ts.getName(name)) + bad("expected fragment name after .begin"); + if (!ts.eat(NEWLINE)) + bad("extra junk after .begin " + name); + + FragmentAssembler assembler(*this, name); + assembler.assembleFragment(ts, false, NULL); + first = false; + } else if (op == ".end") { + bad(".end without .begin"); + } else if (first) { + FragmentAssembler assembler(*this, "main"); + assembler.assembleFragment(ts, true, &token); + break; + } else { + bad("unexpected stray opcode '" + op + "'"); + } + } + + if (mLirbuf->outOMem()) { + cerr << "lirbuf out of memory" << endl; + exit(1); + } +} + +void +Lirasm::handlePatch(LirTokenStream &in) +{ + string src, fragName, guardName, destName; + + if (!in.getName(src) || !in.eat(PUNCT, "->") || !in.getName(destName)) bad("incorrect syntax"); + + // Break the src at '.'. This is awkward but the syntax looks nice. + size_t j = src.find('.'); + if (j == string::npos || j == 0 || j == src.size() - 1) + bad("incorrect syntax"); + fragName = src.substr(0, j); + guardName = src.substr(j + 1); + Fragments::iterator i; - if ((i=mParent->mFragments.find(mTokens[0])) == mParent->mFragments.end()) + if ((i=mFragments.find(fragName)) == mFragments.end()) bad("invalid fragment reference"); LirasmFragment *frag = &i->second; - if (frag->mLabels.find(mTokens[2]) == frag->mLabels.end()) + if (frag->mLabels.find(guardName) == frag->mLabels.end()) bad("invalid guard reference"); - LIns *ins = frag->mLabels.find(mTokens[2])->second; - if ((i=mParent->mFragments.find(mTokens[4])) == mParent->mFragments.end()) + LIns *ins = frag->mLabels.find(guardName)->second; + if ((i=mFragments.find(destName)) == mFragments.end()) bad("invalid guard reference"); ins->record()->exit->target = i->second.fragptr; - mParent->mFragmento->assm()->patch(ins->record()->exit); + mFragmento->assm()->patch(ins->record()->exit); +} + +bool +has_flag(vector &args, const string &flag) +{ + for (vector::iterator i = args.begin(); i != args.end(); ++i) { + if (*i == flag) { + args.erase(i); + return true; + } + } + return false; } int @@ -996,7 +1079,7 @@ main(int argc, char **argv) } Lirasm lasm(verbose); - LirasmAssembler(lasm).assemble(in); + lasm.assemble(in); Fragments::const_iterator i; if (execute) {