mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
127df225b6
--HG-- extra : convert_revision : f5f333d75636ed9e4c7869bdcf6aaa69beb35a2e
2218 lines
78 KiB
C++
Executable File
2218 lines
78 KiB
C++
Executable File
/* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
|
|
/* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
|
|
/* ***** BEGIN LICENSE BLOCK *****
|
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
*
|
|
* The contents of this file are subject to the Mozilla Public License Version
|
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
* http://www.mozilla.org/MPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
* for the specific language governing rights and limitations under the
|
|
* License.
|
|
*
|
|
* The Original Code is [Open Source Virtual Machine].
|
|
*
|
|
* The Initial Developer of the Original Code is
|
|
* Adobe System Incorporated.
|
|
* Portions created by the Initial Developer are Copyright (C) 2004-2007
|
|
* the Initial Developer. All Rights Reserved.
|
|
*
|
|
* Contributor(s):
|
|
* Adobe AS3 Team
|
|
*
|
|
* Alternatively, the contents of this file may be used under the terms of
|
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
* of those above. If you wish to allow use of your version of this file only
|
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
* use your version of this file under the terms of the MPL, indicate your
|
|
* decision by deleting the provisions above and replace them with the notice
|
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
* the provisions above, a recipient may use your version of this file under
|
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
*
|
|
* ***** END LICENSE BLOCK ***** */
|
|
|
|
#include "nanojit.h"
|
|
|
|
#ifdef FEATURE_NANOJIT
|
|
|
|
#ifdef VTUNE
|
|
#include "../core/CodegenLIR.h"
|
|
#endif
|
|
|
|
#ifdef _MSC_VER
|
|
// disable some specific warnings which are normally useful, but pervasive in the code-gen macros
|
|
#pragma warning(disable:4310) // cast truncates constant value
|
|
#endif
|
|
|
|
namespace nanojit
|
|
{
|
|
/**
|
|
* Need the following:
|
|
*
|
|
* - merging paths ( build a graph? ), possibly use external rep to drive codegen
|
|
*/
|
|
Assembler::Assembler(CodeAlloc& codeAlloc, Allocator& dataAlloc, Allocator& alloc, AvmCore* core, LogControl* logc)
|
|
: codeList(NULL)
|
|
, alloc(alloc)
|
|
, _codeAlloc(codeAlloc)
|
|
, _dataAlloc(dataAlloc)
|
|
, _thisfrag(NULL)
|
|
, _branchStateMap(alloc)
|
|
, _patches(alloc)
|
|
, _labels(alloc)
|
|
#if NJ_USES_QUAD_CONSTANTS
|
|
, _quadConstants(alloc)
|
|
#endif
|
|
, _epilogue(NULL)
|
|
, _err(None)
|
|
#if PEDANTIC
|
|
, pedanticTop(NULL)
|
|
#endif
|
|
#ifdef VTUNE
|
|
, cgen(NULL)
|
|
#endif
|
|
, config(core->config)
|
|
{
|
|
VMPI_memset(&_stats, 0, sizeof(_stats));
|
|
nInit(core);
|
|
(void)logc;
|
|
verbose_only( _logc = logc; )
|
|
verbose_only( _outputCache = 0; )
|
|
verbose_only( outline[0] = '\0'; )
|
|
verbose_only( outlineEOL[0] = '\0'; )
|
|
|
|
reset();
|
|
}
|
|
|
|
#ifdef _DEBUG
|
|
|
|
/*static*/ LIns* const AR::BAD_ENTRY = (LIns*)0xdeadbeef;
|
|
|
|
void AR::validateQuick()
|
|
{
|
|
NanoAssert(_highWaterMark < NJ_MAX_STACK_ENTRY);
|
|
NanoAssert(_entries[0] == NULL);
|
|
// Only check a few entries around _highWaterMark.
|
|
uint32_t const RADIUS = 4;
|
|
uint32_t const lo = (_highWaterMark > 1 + RADIUS ? _highWaterMark - RADIUS : 1);
|
|
uint32_t const hi = (_highWaterMark + 1 + RADIUS < NJ_MAX_STACK_ENTRY ? _highWaterMark + 1 + RADIUS : NJ_MAX_STACK_ENTRY);
|
|
for (uint32_t i = lo; i <= _highWaterMark; ++i)
|
|
NanoAssert(_entries[i] != BAD_ENTRY);
|
|
for (uint32_t i = _highWaterMark+1; i < hi; ++i)
|
|
NanoAssert(_entries[i] == BAD_ENTRY);
|
|
}
|
|
|
|
void AR::validateFull()
|
|
{
|
|
NanoAssert(_highWaterMark < NJ_MAX_STACK_ENTRY);
|
|
NanoAssert(_entries[0] == NULL);
|
|
for (uint32_t i = 1; i <= _highWaterMark; ++i)
|
|
NanoAssert(_entries[i] != BAD_ENTRY);
|
|
for (uint32_t i = _highWaterMark+1; i < NJ_MAX_STACK_ENTRY; ++i)
|
|
NanoAssert(_entries[i] == BAD_ENTRY);
|
|
}
|
|
|
|
void AR::validate()
|
|
{
|
|
static uint32_t validateCounter = 0;
|
|
if (++validateCounter >= 100)
|
|
{
|
|
validateFull();
|
|
validateCounter = 0;
|
|
}
|
|
else
|
|
{
|
|
validateQuick();
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
inline void AR::clear()
|
|
{
|
|
_highWaterMark = 0;
|
|
NanoAssert(_entries[0] == NULL);
|
|
#ifdef _DEBUG
|
|
for (uint32_t i = 1; i < NJ_MAX_STACK_ENTRY; ++i)
|
|
_entries[i] = BAD_ENTRY;
|
|
#endif
|
|
}
|
|
|
|
bool AR::Iter::next(LIns*& ins, uint32_t& nStackSlots, int32_t& arIndex)
|
|
{
|
|
while (++_i <= _ar._highWaterMark)
|
|
{
|
|
if ((ins = _ar._entries[_i]) != NULL)
|
|
{
|
|
nStackSlots = nStackSlotsFor(ins);
|
|
_i += nStackSlots - 1;
|
|
arIndex = _i;
|
|
return true;
|
|
}
|
|
}
|
|
ins = NULL;
|
|
nStackSlots = 0;
|
|
arIndex = 0;
|
|
return false;
|
|
}
|
|
|
|
void Assembler::arReset()
|
|
{
|
|
_activation.clear();
|
|
_branchStateMap.clear();
|
|
_patches.clear();
|
|
_labels.clear();
|
|
#if NJ_USES_QUAD_CONSTANTS
|
|
_quadConstants.clear();
|
|
#endif
|
|
}
|
|
|
|
void Assembler::registerResetAll()
|
|
{
|
|
nRegisterResetAll(_allocator);
|
|
|
|
// At start, should have some registers free and none active.
|
|
NanoAssert(0 != _allocator.free);
|
|
NanoAssert(0 == _allocator.countActive());
|
|
#ifdef NANOJIT_IA32
|
|
debug_only(_fpuStkDepth = 0; )
|
|
#endif
|
|
}
|
|
|
|
// Finds a register in 'allow' to store the result of 'ins', evicting one
|
|
// if necessary. Doesn't consider the prior state of 'ins' (except that
|
|
// ins->isUsed() must be true).
|
|
Register Assembler::registerAlloc(LIns* ins, RegisterMask allow)
|
|
{
|
|
RegisterMask allowedAndFree = allow & _allocator.free;
|
|
Register r;
|
|
NanoAssert(ins->isUsed());
|
|
|
|
if (allowedAndFree) {
|
|
// At least one usable register is free -- no need to steal.
|
|
// Pick a preferred one if possible.
|
|
RegisterMask preferredAndFree = allowedAndFree & SavedRegs;
|
|
RegisterMask set = ( preferredAndFree ? preferredAndFree : allowedAndFree );
|
|
r = nRegisterAllocFromSet(set);
|
|
_allocator.addActive(r, ins);
|
|
ins->setReg(r);
|
|
} else {
|
|
counter_increment(steals);
|
|
|
|
// Nothing free, steal one.
|
|
// LSRA says pick the one with the furthest use.
|
|
LIns* vic = findVictim(allow);
|
|
NanoAssert(vic->isUsed());
|
|
r = vic->getReg();
|
|
|
|
evict(vic);
|
|
|
|
// r ends up staying active, but the LIns defining it changes.
|
|
_allocator.removeFree(r);
|
|
_allocator.addActive(r, ins);
|
|
ins->setReg(r);
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
// Finds a register in 'allow' to store a temporary value (one not
|
|
// associated with a particular LIns), evicting one if necessary. The
|
|
// returned register is marked as being free and so can only be safely
|
|
// used for code generation purposes until the regstate is next inspected
|
|
// or updated.
|
|
Register Assembler::registerAllocTmp(RegisterMask allow)
|
|
{
|
|
LIns dummyIns;
|
|
dummyIns.markAsUsed();
|
|
Register r = registerAlloc(&dummyIns, allow);
|
|
|
|
// Mark r as free, ready for use as a temporary value.
|
|
_allocator.removeActive(r);
|
|
_allocator.addFree(r);
|
|
return r;
|
|
}
|
|
|
|
/**
|
|
* these instructions don't have to be saved & reloaded to spill,
|
|
* they can just be recalculated w/out any inputs.
|
|
*/
|
|
bool Assembler::canRemat(LIns *i) {
|
|
return i->isconst() || i->isconstq() || i->isop(LIR_alloc);
|
|
}
|
|
|
|
void Assembler::codeAlloc(NIns *&start, NIns *&end, NIns *&eip
|
|
verbose_only(, size_t &nBytes))
|
|
{
|
|
// save the block we just filled
|
|
if (start)
|
|
CodeAlloc::add(codeList, start, end);
|
|
|
|
// CodeAlloc contract: allocations never fail
|
|
_codeAlloc.alloc(start, end);
|
|
verbose_only( nBytes += (end - start) * sizeof(NIns); )
|
|
NanoAssert(uintptr_t(end) - uintptr_t(start) >= (size_t)LARGEST_UNDERRUN_PROT);
|
|
eip = end;
|
|
|
|
#ifdef VTUNE
|
|
if (_nIns && _nExitIns) {
|
|
//cgen->jitAddRecord((uintptr_t)list->code, 0, 0, true); // add placeholder record for top of page
|
|
cgen->jitCodePosUpdate((uintptr_t)list->code);
|
|
cgen->jitPushInfo(); // new page requires new entry
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void Assembler::reset()
|
|
{
|
|
_nIns = 0;
|
|
_nExitIns = 0;
|
|
codeStart = codeEnd = 0;
|
|
exitStart = exitEnd = 0;
|
|
_stats.pages = 0;
|
|
codeList = 0;
|
|
|
|
nativePageReset();
|
|
registerResetAll();
|
|
arReset();
|
|
}
|
|
|
|
#ifdef _DEBUG
|
|
void Assembler::pageValidate()
|
|
{
|
|
if (error()) return;
|
|
// This may be a normal code chunk or an exit code chunk.
|
|
NanoAssertMsg(containsPtr(codeStart, codeEnd, _nIns),
|
|
"Native instruction pointer overstep paging bounds; check overrideProtect for last instruction");
|
|
}
|
|
#endif
|
|
|
|
#ifdef _DEBUG
|
|
|
|
bool AR::isValidEntry(uint32_t idx, LIns* ins) const
|
|
{
|
|
return idx > 0 && idx <= _highWaterMark && _entries[idx] == ins;
|
|
}
|
|
|
|
void AR::checkForResourceConsistency(const RegAlloc& regs)
|
|
{
|
|
validate();
|
|
for (uint32_t i = 1; i <= _highWaterMark; ++i)
|
|
{
|
|
LIns* ins = _entries[i];
|
|
if (!ins)
|
|
continue;
|
|
Register r = ins->getReg();
|
|
uint32_t arIndex = ins->getArIndex();
|
|
NanoAssert(arIndex != 0);
|
|
if (ins->isop(LIR_alloc)) {
|
|
int const n = i + (ins->size()>>2);
|
|
for (int j=i+1; j < n; j++) {
|
|
NanoAssert(_entries[j]==ins);
|
|
}
|
|
NanoAssert(arIndex == (uint32_t)n-1);
|
|
i = n-1;
|
|
}
|
|
else if (ins->isQuad()) {
|
|
NanoAssert(_entries[i + 1]==ins);
|
|
i += 1; // skip high word
|
|
}
|
|
else {
|
|
NanoAssertMsg(arIndex == i, "Stack record index mismatch");
|
|
}
|
|
NanoAssertMsg(r == UnknownReg || regs.isConsistent(r, ins), "Register record mismatch");
|
|
}
|
|
}
|
|
|
|
void Assembler::resourceConsistencyCheck()
|
|
{
|
|
NanoAssert(!error());
|
|
|
|
#ifdef NANOJIT_IA32
|
|
NanoAssert((_allocator.active[FST0] && _fpuStkDepth == -1) ||
|
|
(!_allocator.active[FST0] && _fpuStkDepth == 0));
|
|
#endif
|
|
|
|
_activation.checkForResourceConsistency(_allocator);
|
|
|
|
registerConsistencyCheck();
|
|
}
|
|
|
|
void Assembler::registerConsistencyCheck()
|
|
{
|
|
RegisterMask managed = _allocator.managed;
|
|
for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) {
|
|
if (rmask(r) & managed) {
|
|
// A register managed by register allocation must be either
|
|
// free or active, but not both.
|
|
if (_allocator.isFree(r)) {
|
|
NanoAssertMsgf(_allocator.getActive(r)==0,
|
|
"register %s is free but assigned to ins", gpn(r));
|
|
} else {
|
|
// An LIns defining a register must have that register in
|
|
// its reservation.
|
|
LIns* ins = _allocator.getActive(r);
|
|
NanoAssert(ins);
|
|
NanoAssertMsg(r == ins->getReg(), "Register record mismatch");
|
|
}
|
|
} else {
|
|
// A register not managed by register allocation must be
|
|
// neither free nor active.
|
|
NanoAssert(!_allocator.isFree(r));
|
|
NanoAssert(!_allocator.getActive(r));
|
|
}
|
|
}
|
|
}
|
|
#endif /* _DEBUG */
|
|
|
|
void Assembler::findRegFor2(RegisterMask allow, LIns* ia, Register& ra, LIns* ib, Register& rb)
|
|
{
|
|
if (ia == ib) {
|
|
ra = rb = findRegFor(ia, allow);
|
|
} else {
|
|
// You might think we could just do this:
|
|
//
|
|
// ra = findRegFor(ia, allow);
|
|
// rb = findRegFor(ib, allow & ~rmask(ra));
|
|
//
|
|
// But if 'ib' was already in an allowed register, the first
|
|
// findRegFor() call could evict it, whereupon the second
|
|
// findRegFor() call would immediately restore it, which is
|
|
// sub-optimal. What we effectively do instead is this:
|
|
//
|
|
// ra = findRegFor(ia, allow & ~rmask(rb));
|
|
// rb = findRegFor(ib, allow & ~rmask(ra));
|
|
//
|
|
// but we have to determine what 'rb' initially is to avoid the
|
|
// mutual dependency between the assignments.
|
|
bool rbDone = !ib->isUnusedOrHasUnknownReg() && (rb = ib->getReg(), allow & rmask(rb));
|
|
if (rbDone) {
|
|
allow &= ~rmask(rb); // ib already in an allowable reg, keep that one
|
|
}
|
|
ra = findRegFor(ia, allow);
|
|
if (!rbDone) {
|
|
allow &= ~rmask(ra);
|
|
rb = findRegFor(ib, allow);
|
|
}
|
|
}
|
|
}
|
|
|
|
Register Assembler::findSpecificRegFor(LIns* i, Register w)
|
|
{
|
|
return findRegFor(i, rmask(w));
|
|
}
|
|
|
|
// Like findRegFor(), but called when the LIns is used as a pointer. It
|
|
// doesn't have to be called, findRegFor() can still be used, but it can
|
|
// optimize the LIR_alloc case by indexing off FP, thus saving the use of
|
|
// a GpReg.
|
|
//
|
|
Register Assembler::getBaseReg(LIns *i, int &d, RegisterMask allow)
|
|
{
|
|
#if !PEDANTIC
|
|
if (i->isop(LIR_alloc)) {
|
|
// The value of a LIR_alloc is a pointer to its stack memory,
|
|
// which is always relative to FP. So we can just return FP if we
|
|
// also adjust 'd' (and can do so in a valid manner). Or, in the
|
|
// PEDANTIC case, we can just assign a register as normal;
|
|
// findRegFor() will allocate the stack memory for LIR_alloc if
|
|
// necessary.
|
|
d += findMemFor(i);
|
|
return FP;
|
|
}
|
|
#else
|
|
(void) d;
|
|
#endif
|
|
return findRegFor(i, allow);
|
|
}
|
|
|
|
// Finds a register in 'allow' to hold the result of 'ins'. Used when we
|
|
// encounter a use of 'ins'. The actions depend on the prior regstate of
|
|
// 'ins':
|
|
// - If the result of 'ins' is not in any register, we find an allowed
|
|
// one, evicting one if necessary.
|
|
// - If the result of 'ins' is already in an allowed register, we use that.
|
|
// - If the result of 'ins' is already in a not-allowed register, we find an
|
|
// allowed one and move it.
|
|
//
|
|
Register Assembler::findRegFor(LIns* ins, RegisterMask allow)
|
|
{
|
|
if (ins->isop(LIR_alloc)) {
|
|
// Never allocate a reg for this without stack space too.
|
|
findMemFor(ins);
|
|
}
|
|
|
|
Register r;
|
|
|
|
if (!ins->isUsed()) {
|
|
// 'ins' is unused, ie. dead after this point. Mark it as used
|
|
// and allocate it a register.
|
|
ins->markAsUsed();
|
|
RegisterMask prefer = hint(ins, allow);
|
|
r = registerAlloc(ins, prefer);
|
|
|
|
} else if (!ins->hasKnownReg()) {
|
|
// 'ins' is in a spill slot. Allocate it a register.
|
|
RegisterMask prefer = hint(ins, allow);
|
|
r = registerAlloc(ins, prefer);
|
|
|
|
} else if (rmask(r = ins->getReg()) & allow) {
|
|
// 'ins' is in an allowed register.
|
|
_allocator.useActive(r);
|
|
|
|
} else {
|
|
// 'ins' is in a register (r) that's not in 'allow'.
|
|
RegisterMask prefer = hint(ins, allow);
|
|
#ifdef NANOJIT_IA32
|
|
if (((rmask(r)&XmmRegs) && !(allow&XmmRegs)) ||
|
|
((rmask(r)&x87Regs) && !(allow&x87Regs)))
|
|
{
|
|
// x87 <-> xmm copy required
|
|
//_nvprof("fpu-evict",1);
|
|
evict(ins);
|
|
r = registerAlloc(ins, prefer);
|
|
} else
|
|
#elif defined(NANOJIT_PPC)
|
|
if (((rmask(r)&GpRegs) && !(allow&GpRegs)) ||
|
|
((rmask(r)&FpRegs) && !(allow&FpRegs)))
|
|
{
|
|
evict(ins);
|
|
r = registerAlloc(ins, prefer);
|
|
} else
|
|
#endif
|
|
{
|
|
// The post-state register holding 'ins' is 's', the pre-state
|
|
// register holding 'ins' is 'r'. For example, if s=eax and
|
|
// r=ecx:
|
|
//
|
|
// pre-state: ecx(ins)
|
|
// instruction: mov eax, ecx
|
|
// post-state: eax(ins)
|
|
//
|
|
Register s = r;
|
|
_allocator.retire(r);
|
|
r = registerAlloc(ins, prefer);
|
|
|
|
// 'ins' is in 'allow', in register r (different to the old r);
|
|
// s is the old r.
|
|
if ((rmask(s) & GpRegs) && (rmask(r) & GpRegs)) {
|
|
MR(s, r); // move 'ins' from its pre-state reg (r) to its post-state reg (s)
|
|
} else {
|
|
asm_nongp_copy(s, r);
|
|
}
|
|
}
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
// Like findSpecificRegFor(), but only for when 'r' is known to be free
|
|
// and 'ins' is known to not already have a register allocated. Updates
|
|
// the regstate (maintaining the invariants) but does not generate any
|
|
// code. The return value is redundant, always being 'r', but it's
|
|
// sometimes useful to have it there for assignments.
|
|
Register Assembler::findSpecificRegForUnallocated(LIns* ins, Register r)
|
|
{
|
|
if (ins->isop(LIR_alloc)) {
|
|
// never allocate a reg for this w/out stack space too
|
|
findMemFor(ins);
|
|
}
|
|
|
|
NanoAssert(ins->isUnusedOrHasUnknownReg());
|
|
NanoAssert(_allocator.free & rmask(r));
|
|
|
|
if (!ins->isUsed())
|
|
ins->markAsUsed();
|
|
ins->setReg(r);
|
|
_allocator.removeFree(r);
|
|
_allocator.addActive(r, ins);
|
|
|
|
return r;
|
|
}
|
|
|
|
#if NJ_USES_QUAD_CONSTANTS
|
|
const uint64_t* Assembler::findQuadConstant(uint64_t q)
|
|
{
|
|
uint64_t* p = _quadConstants.get(q);
|
|
if (!p)
|
|
{
|
|
p = new (_dataAlloc) uint64_t;
|
|
*p = q;
|
|
_quadConstants.put(q, p);
|
|
}
|
|
return p;
|
|
}
|
|
#endif
|
|
|
|
int Assembler::findMemFor(LIns *ins)
|
|
{
|
|
#if NJ_USES_QUAD_CONSTANTS
|
|
NanoAssert(!ins->isconstq());
|
|
#endif
|
|
if (!ins->isUsed())
|
|
ins->markAsUsed();
|
|
if (!ins->getArIndex()) {
|
|
uint32_t const arIndex = arReserve(ins);
|
|
ins->setArIndex(arIndex);
|
|
NanoAssert(_activation.isValidEntry(ins->getArIndex(), ins) == (arIndex != 0));
|
|
}
|
|
return disp(ins);
|
|
}
|
|
|
|
// XXX: this function is dangerous and should be phased out;
|
|
// See bug 513615. Calls to it should replaced it with a
|
|
// prepareResultReg() / generate code / freeResourcesOf() sequence.
|
|
Register Assembler::prepResultReg(LIns *ins, RegisterMask allow)
|
|
{
|
|
#ifdef NANOJIT_IA32
|
|
const bool pop = (allow & rmask(FST0)) &&
|
|
(ins->isUnusedOrHasUnknownReg() || ins->getReg() != FST0);
|
|
#else
|
|
const bool pop = false;
|
|
#endif
|
|
Register r = findRegFor(ins, allow);
|
|
freeRsrcOf(ins, pop);
|
|
return r;
|
|
}
|
|
|
|
// Finds a register in 'allow' to hold the result of 'ins'. Also
|
|
// generates code to spill the result if necessary. Called just prior to
|
|
// generating the code for 'ins' (because we generate code backwards).
|
|
//
|
|
// An example where no spill is necessary. Lines marked '*' are those
|
|
// done by this function.
|
|
//
|
|
// regstate: R
|
|
// asm: define res into r
|
|
// * regstate: R + r(res)
|
|
// ...
|
|
// asm: use res in r
|
|
//
|
|
// An example where a spill is necessary.
|
|
//
|
|
// regstate: R
|
|
// asm: define res into r
|
|
// * regstate: R + r(res)
|
|
// * asm: spill res from r
|
|
// regstate: R
|
|
// ...
|
|
// asm: restore res into r2
|
|
// regstate: R + r2(res) + other changes from "..."
|
|
// asm: use res in r2
|
|
//
|
|
Register Assembler::prepareResultReg(LIns *ins, RegisterMask allow)
|
|
{
|
|
// At this point, we know the result of 'ins' result has a use later
|
|
// in the code. (Exception: if 'ins' is a call to an impure function
|
|
// the return value may not be used, but 'ins' will still be present
|
|
// because it has side-effects.) It may have had to be evicted, in
|
|
// which case the restore will have already been generated, so we now
|
|
// generate the spill (unless the restore was actually a
|
|
// rematerialize, in which case it's not necessary).
|
|
//
|
|
// As for 'pop': it's only relevant on i386 and if 'allow' includes
|
|
// FST0, in which case we have to pop if 'ins' isn't in FST0 in the
|
|
// post-regstate. This could be because 'ins' is unused, 'ins' is in
|
|
// a spill slot, or 'ins' is in an XMM register.
|
|
#ifdef NANOJIT_IA32
|
|
const bool pop = (allow & rmask(FST0)) &&
|
|
(ins->isUnusedOrHasUnknownReg() || ins->getReg() != FST0);
|
|
#else
|
|
const bool pop = false;
|
|
#endif
|
|
Register r = findRegFor(ins, allow);
|
|
asm_spilli(ins, pop);
|
|
return r;
|
|
}
|
|
|
|
void Assembler::asm_spilli(LInsp ins, bool pop)
|
|
{
|
|
int d = disp(ins);
|
|
Register r = ins->getReg();
|
|
verbose_only( if (d && (_logc->lcbits & LC_Assembly)) {
|
|
setOutputForEOL(" <= spill %s",
|
|
_thisfrag->lirbuf->names->formatRef(ins)); } )
|
|
asm_spill(r, d, pop, ins->isQuad());
|
|
}
|
|
|
|
// XXX: This function is error-prone and should be phased out; see bug 513615.
|
|
void Assembler::freeRsrcOf(LIns *ins, bool pop)
|
|
{
|
|
Register r = ins->getReg();
|
|
if (isKnownReg(r)) {
|
|
asm_spilli(ins, pop);
|
|
_allocator.retire(r); // free any register associated with entry
|
|
}
|
|
arFreeIfInUse(ins); // free any stack stack space associated with entry
|
|
ins->markAsClear();
|
|
}
|
|
|
|
// Frees all record of registers and spill slots used by 'ins'.
|
|
void Assembler::freeResourcesOf(LIns *ins)
|
|
{
|
|
Register r = ins->getReg();
|
|
if (isKnownReg(r)) {
|
|
_allocator.retire(r); // free any register associated with entry
|
|
}
|
|
arFreeIfInUse(ins); // free any stack stack space associated with entry
|
|
ins->markAsClear();
|
|
}
|
|
|
|
// Frees 'r' in the RegAlloc regstate, if it's not already free.
|
|
void Assembler::evictIfActive(Register r)
|
|
{
|
|
if (LIns* vic = _allocator.getActive(r)) {
|
|
NanoAssert(vic->getReg() == r);
|
|
evict(vic);
|
|
}
|
|
}
|
|
|
|
// Frees 'r' (which currently holds the result of 'vic') in the regstate.
|
|
// An example:
|
|
//
|
|
// pre-regstate: eax(ld1)
|
|
// instruction: mov ebx,-4(ebp) <= restore add1 # %ebx is dest
|
|
// post-regstate: eax(ld1) ebx(add1)
|
|
//
|
|
// At run-time we are *restoring* 'add1' into %ebx, hence the call to
|
|
// asm_restore(). But at regalloc-time we are moving backwards through
|
|
// the code, so in that sense we are *evicting* 'add1' from %ebx.
|
|
//
|
|
void Assembler::evict(LIns* vic)
|
|
{
|
|
// Not free, need to steal.
|
|
counter_increment(steals);
|
|
|
|
Register r = vic->getReg();
|
|
|
|
NanoAssert(!_allocator.isFree(r));
|
|
NanoAssert(vic == _allocator.getActive(r));
|
|
|
|
verbose_only( if (_logc->lcbits & LC_Assembly) {
|
|
setOutputForEOL(" <= restore %s",
|
|
_thisfrag->lirbuf->names->formatRef(vic)); } )
|
|
asm_restore(vic, r);
|
|
|
|
_allocator.retire(r);
|
|
if (vic->isUsed())
|
|
vic->setReg(UnknownReg);
|
|
|
|
// At this point 'vic' is unused (if rematerializable), or in a spill
|
|
// slot (if not).
|
|
}
|
|
|
|
void Assembler::patch(GuardRecord *lr)
|
|
{
|
|
if (!lr->jmp) // the guard might have been eliminated as redundant
|
|
return;
|
|
Fragment *frag = lr->exit->target;
|
|
NanoAssert(frag->fragEntry != 0);
|
|
nPatchBranch((NIns*)lr->jmp, frag->fragEntry);
|
|
CodeAlloc::flushICache(lr->jmp, LARGEST_BRANCH_PATCH);
|
|
verbose_only(verbose_outputf("patching jump at %p to target %p\n",
|
|
lr->jmp, frag->fragEntry);)
|
|
}
|
|
|
|
void Assembler::patch(SideExit *exit)
|
|
{
|
|
GuardRecord *rec = exit->guards;
|
|
NanoAssert(rec);
|
|
while (rec) {
|
|
patch(rec);
|
|
rec = rec->next;
|
|
}
|
|
}
|
|
|
|
#ifdef NANOJIT_IA32
|
|
void Assembler::patch(SideExit* exit, SwitchInfo* si)
|
|
{
|
|
for (GuardRecord* lr = exit->guards; lr; lr = lr->next) {
|
|
Fragment *frag = lr->exit->target;
|
|
NanoAssert(frag->fragEntry != 0);
|
|
si->table[si->index] = frag->fragEntry;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
NIns* Assembler::asm_exit(LInsp guard)
|
|
{
|
|
SideExit *exit = guard->record()->exit;
|
|
NIns* at = 0;
|
|
if (!_branchStateMap.get(exit))
|
|
{
|
|
at = asm_leave_trace(guard);
|
|
}
|
|
else
|
|
{
|
|
RegAlloc* captured = _branchStateMap.get(exit);
|
|
intersectRegisterState(*captured);
|
|
at = exit->target->fragEntry;
|
|
NanoAssert(at != 0);
|
|
_branchStateMap.remove(exit);
|
|
}
|
|
return at;
|
|
}
|
|
|
|
NIns* Assembler::asm_leave_trace(LInsp guard)
|
|
{
|
|
verbose_only( int32_t nativeSave = _stats.native );
|
|
verbose_only( verbose_outputf("----------------------------------- ## END exit block %p", guard);)
|
|
|
|
// This point is unreachable. So free all the registers. If an
|
|
// instruction has a stack entry we will leave it alone, otherwise we
|
|
// free it entirely. intersectRegisterState() will restore.
|
|
RegAlloc capture = _allocator;
|
|
releaseRegisters();
|
|
|
|
swapCodeChunks();
|
|
_inExit = true;
|
|
|
|
#ifdef NANOJIT_IA32
|
|
debug_only( _sv_fpuStkDepth = _fpuStkDepth; _fpuStkDepth = 0; )
|
|
#endif
|
|
|
|
nFragExit(guard);
|
|
|
|
// Restore the callee-saved register and parameters.
|
|
assignSavedRegs();
|
|
assignParamRegs();
|
|
|
|
intersectRegisterState(capture);
|
|
|
|
// this can be useful for breaking whenever an exit is taken
|
|
//INT3();
|
|
//NOP();
|
|
|
|
// we are done producing the exit logic for the guard so demark where our exit block code begins
|
|
NIns* jmpTarget = _nIns; // target in exit path for our mainline conditional jump
|
|
|
|
// swap back pointers, effectively storing the last location used in the exit path
|
|
swapCodeChunks();
|
|
_inExit = false;
|
|
|
|
//verbose_only( verbose_outputf(" LIR_xt/xf swapCodeChunks, _nIns is now %08X(%08X), _nExitIns is now %08X(%08X)",_nIns, *_nIns,_nExitIns,*_nExitIns) );
|
|
verbose_only( verbose_outputf("%010lx:", (unsigned long)jmpTarget);)
|
|
verbose_only( verbose_outputf("----------------------------------- ## BEGIN exit block (LIR_xt|LIR_xf)") );
|
|
|
|
#ifdef NANOJIT_IA32
|
|
NanoAssertMsgf(_fpuStkDepth == _sv_fpuStkDepth, "LIR_xtf, _fpuStkDepth=%d, expect %d",_fpuStkDepth, _sv_fpuStkDepth);
|
|
debug_only( _fpuStkDepth = _sv_fpuStkDepth; _sv_fpuStkDepth = 9999; )
|
|
#endif
|
|
|
|
verbose_only(_stats.exitnative += (_stats.native-nativeSave));
|
|
|
|
return jmpTarget;
|
|
}
|
|
|
|
void Assembler::beginAssembly(Fragment *frag)
|
|
{
|
|
verbose_only( codeBytes = 0; )
|
|
verbose_only( exitBytes = 0; )
|
|
|
|
reset();
|
|
|
|
NanoAssert(codeList == 0);
|
|
NanoAssert(codeStart == 0);
|
|
NanoAssert(codeEnd == 0);
|
|
NanoAssert(exitStart == 0);
|
|
NanoAssert(exitEnd == 0);
|
|
NanoAssert(_nIns == 0);
|
|
NanoAssert(_nExitIns == 0);
|
|
|
|
_thisfrag = frag;
|
|
_inExit = false;
|
|
|
|
counter_reset(native);
|
|
counter_reset(exitnative);
|
|
counter_reset(steals);
|
|
counter_reset(spills);
|
|
counter_reset(remats);
|
|
|
|
setError(None);
|
|
|
|
// native code gen buffer setup
|
|
nativePageSetup();
|
|
|
|
// make sure we got memory at least one page
|
|
if (error()) return;
|
|
|
|
#ifdef PERFM
|
|
_stats.pages = 0;
|
|
_stats.codeStart = _nIns-1;
|
|
_stats.codeExitStart = _nExitIns-1;
|
|
#endif /* PERFM */
|
|
|
|
_epilogue = NULL;
|
|
|
|
nBeginAssembly();
|
|
}
|
|
|
|
void Assembler::assemble(Fragment* frag, LirFilter* reader)
|
|
{
|
|
if (error()) return;
|
|
_thisfrag = frag;
|
|
|
|
// check the fragment is starting out with a sane profiling state
|
|
verbose_only( NanoAssert(frag->nStaticExits == 0); )
|
|
verbose_only( NanoAssert(frag->nCodeBytes == 0); )
|
|
verbose_only( NanoAssert(frag->nExitBytes == 0); )
|
|
verbose_only( NanoAssert(frag->profCount == 0); )
|
|
verbose_only( if (_logc->lcbits & LC_FragProfile)
|
|
NanoAssert(frag->profFragID > 0);
|
|
else
|
|
NanoAssert(frag->profFragID == 0); )
|
|
|
|
_inExit = false;
|
|
|
|
gen(reader);
|
|
|
|
if (!error()) {
|
|
// patch all branches
|
|
NInsMap::Iter iter(_patches);
|
|
while (iter.next()) {
|
|
NIns* where = iter.key();
|
|
LIns* target = iter.value();
|
|
if (target->isop(LIR_jtbl)) {
|
|
// Need to patch up a whole jump table, 'where' is the table.
|
|
LIns *jtbl = target;
|
|
NIns** native_table = (NIns**) where;
|
|
for (uint32_t i = 0, n = jtbl->getTableSize(); i < n; i++) {
|
|
LabelState* lstate = _labels.get(jtbl->getTarget(i));
|
|
NIns* ntarget = lstate->addr;
|
|
if (ntarget) {
|
|
native_table[i] = ntarget;
|
|
} else {
|
|
setError(UnknownBranch);
|
|
break;
|
|
}
|
|
}
|
|
} else {
|
|
// target is a label for a single-target branch
|
|
LabelState *lstate = _labels.get(target);
|
|
NIns* ntarget = lstate->addr;
|
|
if (ntarget) {
|
|
nPatchBranch(where, ntarget);
|
|
} else {
|
|
setError(UnknownBranch);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void Assembler::endAssembly(Fragment* frag)
|
|
{
|
|
// don't try to patch code if we are in an error state since we might have partially
|
|
// overwritten the code cache already
|
|
if (error()) {
|
|
// something went wrong, release all allocated code memory
|
|
_codeAlloc.freeAll(codeList);
|
|
_codeAlloc.free(exitStart, exitEnd);
|
|
_codeAlloc.free(codeStart, codeEnd);
|
|
return;
|
|
}
|
|
|
|
NIns* fragEntry = genPrologue();
|
|
verbose_only( asm_output("[prologue]"); )
|
|
|
|
debug_only(_activation.checkForResourceLeaks());
|
|
|
|
NanoAssert(!_inExit);
|
|
// save used parts of current block on fragment's code list, free the rest
|
|
#ifdef NANOJIT_ARM
|
|
// [codeStart, _nSlot) ... gap ... [_nIns, codeEnd)
|
|
_codeAlloc.addRemainder(codeList, exitStart, exitEnd, _nExitSlot, _nExitIns);
|
|
_codeAlloc.addRemainder(codeList, codeStart, codeEnd, _nSlot, _nIns);
|
|
verbose_only( exitBytes -= (_nExitIns - _nExitSlot) * sizeof(NIns); )
|
|
verbose_only( codeBytes -= (_nIns - _nSlot) * sizeof(NIns); )
|
|
#else
|
|
// [codeStart ... gap ... [_nIns, codeEnd))
|
|
_codeAlloc.addRemainder(codeList, exitStart, exitEnd, exitStart, _nExitIns);
|
|
_codeAlloc.addRemainder(codeList, codeStart, codeEnd, codeStart, _nIns);
|
|
verbose_only( exitBytes -= (_nExitIns - exitStart) * sizeof(NIns); )
|
|
verbose_only( codeBytes -= (_nIns - codeStart) * sizeof(NIns); )
|
|
#endif
|
|
|
|
// at this point all our new code is in the d-cache and not the i-cache,
|
|
// so flush the i-cache on cpu's that need it.
|
|
CodeAlloc::flushICache(codeList);
|
|
|
|
// save entry point pointers
|
|
frag->fragEntry = fragEntry;
|
|
frag->setCode(_nIns);
|
|
PERFM_NVPROF("code", CodeAlloc::size(codeList));
|
|
|
|
#ifdef NANOJIT_IA32
|
|
NanoAssertMsgf(_fpuStkDepth == 0,"_fpuStkDepth %d\n",_fpuStkDepth);
|
|
#endif
|
|
|
|
debug_only( pageValidate(); )
|
|
NanoAssert(_branchStateMap.isEmpty());
|
|
}
|
|
|
|
void Assembler::releaseRegisters()
|
|
{
|
|
for (Register r = FirstReg; r <= LastReg; r = nextreg(r))
|
|
{
|
|
LIns *ins = _allocator.getActive(r);
|
|
if (ins) {
|
|
// Clear reg allocation, preserve stack allocation.
|
|
_allocator.retire(r);
|
|
NanoAssert(r == ins->getReg());
|
|
ins->setReg(UnknownReg);
|
|
|
|
if (!ins->getArIndex()) {
|
|
ins->markAsClear();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef PERFM
|
|
#define countlir_live() _nvprof("lir-live",1)
|
|
#define countlir_ret() _nvprof("lir-ret",1)
|
|
#define countlir_alloc() _nvprof("lir-alloc",1)
|
|
#define countlir_var() _nvprof("lir-var",1)
|
|
#define countlir_use() _nvprof("lir-use",1)
|
|
#define countlir_def() _nvprof("lir-def",1)
|
|
#define countlir_imm() _nvprof("lir-imm",1)
|
|
#define countlir_param() _nvprof("lir-param",1)
|
|
#define countlir_cmov() _nvprof("lir-cmov",1)
|
|
#define countlir_ld() _nvprof("lir-ld",1)
|
|
#define countlir_ldq() _nvprof("lir-ldq",1)
|
|
#define countlir_alu() _nvprof("lir-alu",1)
|
|
#define countlir_qjoin() _nvprof("lir-qjoin",1)
|
|
#define countlir_qlo() _nvprof("lir-qlo",1)
|
|
#define countlir_qhi() _nvprof("lir-qhi",1)
|
|
#define countlir_fpu() _nvprof("lir-fpu",1)
|
|
#define countlir_st() _nvprof("lir-st",1)
|
|
#define countlir_stq() _nvprof("lir-stq",1)
|
|
#define countlir_jmp() _nvprof("lir-jmp",1)
|
|
#define countlir_jcc() _nvprof("lir-jcc",1)
|
|
#define countlir_label() _nvprof("lir-label",1)
|
|
#define countlir_xcc() _nvprof("lir-xcc",1)
|
|
#define countlir_x() _nvprof("lir-x",1)
|
|
#define countlir_call() _nvprof("lir-call",1)
|
|
#define countlir_jtbl() _nvprof("lir-jtbl",1)
|
|
#else
|
|
#define countlir_live()
|
|
#define countlir_ret()
|
|
#define countlir_alloc()
|
|
#define countlir_var()
|
|
#define countlir_use()
|
|
#define countlir_def()
|
|
#define countlir_imm()
|
|
#define countlir_param()
|
|
#define countlir_cmov()
|
|
#define countlir_ld()
|
|
#define countlir_ldq()
|
|
#define countlir_alu()
|
|
#define countlir_qjoin()
|
|
#define countlir_qlo()
|
|
#define countlir_qhi()
|
|
#define countlir_fpu()
|
|
#define countlir_st()
|
|
#define countlir_stq()
|
|
#define countlir_jmp()
|
|
#define countlir_jcc()
|
|
#define countlir_label()
|
|
#define countlir_xcc()
|
|
#define countlir_x()
|
|
#define countlir_call()
|
|
#define countlir_jtbl()
|
|
#endif
|
|
|
|
void Assembler::gen(LirFilter* reader)
|
|
{
|
|
NanoAssert(_thisfrag->nStaticExits == 0);
|
|
|
|
// trace must end with LIR_x, LIR_[f]ret, LIR_xtbl, or LIR_[f]live
|
|
NanoAssert(reader->pos()->isop(LIR_x) ||
|
|
reader->pos()->isop(LIR_ret) ||
|
|
reader->pos()->isop(LIR_fret) ||
|
|
reader->pos()->isop(LIR_xtbl) ||
|
|
reader->pos()->isop(LIR_flive) ||
|
|
reader->pos()->isop(LIR_live));
|
|
|
|
InsList pending_lives(alloc);
|
|
|
|
for (LInsp ins = reader->read(); !ins->isop(LIR_start) && !error();
|
|
ins = reader->read())
|
|
{
|
|
/* What's going on here: we're visiting all the LIR instructions
|
|
in the buffer, working strictly backwards in buffer-order, and
|
|
generating machine instructions for them as we go.
|
|
|
|
For each LIns, we first determine whether it's actually
|
|
necessary, and if not skip it. Otherwise we generate code for
|
|
it. There are two kinds of "necessary" instructions:
|
|
|
|
- "Statement" instructions, which have side effects. Anything
|
|
that could change control flow or the state of memory.
|
|
|
|
- "Value" or "expression" instructions, which compute a value
|
|
based only on the operands to the instruction (and, in the
|
|
case of loads, the state of memory). Because we visit
|
|
instructions in reverse order, if some previously visited
|
|
instruction uses the value computed by this instruction, then
|
|
this instruction will already have a register assigned to
|
|
hold that value. Hence we can consult the instruction to
|
|
detect whether its value is in fact used (i.e. not dead).
|
|
|
|
Note that the backwards code traversal can make register
|
|
allocation confusing. (For example, we restore a value before
|
|
we spill it!) In particular, words like "before" and "after"
|
|
must be used very carefully -- their meaning at regalloc-time is
|
|
opposite to their meaning at run-time. We use the term
|
|
"pre-regstate" to refer to the register allocation state that
|
|
occurs prior to an instruction's execution, and "post-regstate"
|
|
to refer to the state that occurs after an instruction's
|
|
execution, e.g.:
|
|
|
|
pre-regstate: ebx(ins)
|
|
instruction: mov eax, ebx // mov dst, src
|
|
post-regstate: eax(ins)
|
|
|
|
At run-time, the instruction updates the pre-regstate into the
|
|
post-regstate (and these states are the real machine's
|
|
regstates). But when allocating registers, because we go
|
|
backwards, the pre-regstate is constructed from the
|
|
post-regstate (and these regstates are those stored in
|
|
RegAlloc).
|
|
|
|
One consequence of generating code backwards is that we tend to
|
|
both spill and restore registers as early (at run-time) as
|
|
possible; this is good for tolerating memory latency. If we
|
|
generated code forwards, we would expect to both spill and
|
|
restore registers as late (at run-time) as possible; this might
|
|
be better for reducing register pressure.
|
|
*/
|
|
bool required = ins->isStmt() || ins->isUsed();
|
|
if (!required)
|
|
continue;
|
|
|
|
#ifdef NJ_VERBOSE
|
|
// Output the post-regstate (registers and/or activation).
|
|
// Because asm output comes in reverse order, doing it now means
|
|
// it is printed after the LIR and asm, exactly when the
|
|
// post-regstate should be shown.
|
|
if ((_logc->lcbits & LC_Assembly) && (_logc->lcbits & LC_Activation))
|
|
printActivationState();
|
|
if ((_logc->lcbits & LC_Assembly) && (_logc->lcbits & LC_RegAlloc))
|
|
printRegState();
|
|
#endif
|
|
|
|
LOpcode op = ins->opcode();
|
|
switch(op)
|
|
{
|
|
default:
|
|
NanoAssertMsgf(false, "unsupported LIR instruction: %d\n", op);
|
|
break;
|
|
|
|
case LIR_regfence:
|
|
evictAllActiveRegs();
|
|
break;
|
|
|
|
case LIR_flive:
|
|
case LIR_live: {
|
|
countlir_live();
|
|
LInsp op1 = ins->oprnd1();
|
|
// alloca's are meant to live until the point of the LIR_live instruction, marking
|
|
// other expressions as live ensures that they remain so at loop bottoms.
|
|
// alloca areas require special treatment because they are accessed indirectly and
|
|
// the indirect accesses are invisible to the assembler, other than via LIR_live.
|
|
// other expression results are only accessed directly in ways that are visible to
|
|
// the assembler, so extending those expression's lifetimes past the last loop edge
|
|
// isn't necessary.
|
|
if (op1->isop(LIR_alloc)) {
|
|
findMemFor(op1);
|
|
} else {
|
|
pending_lives.add(ins);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case LIR_fret:
|
|
case LIR_ret: {
|
|
countlir_ret();
|
|
asm_ret(ins);
|
|
break;
|
|
}
|
|
|
|
// Allocate some stack space. The value of this instruction
|
|
// is the address of the stack space.
|
|
case LIR_alloc: {
|
|
countlir_alloc();
|
|
NanoAssert(ins->getArIndex() != 0);
|
|
Register r = ins->getReg();
|
|
if (isKnownReg(r)) {
|
|
asm_restore(ins, r);
|
|
_allocator.retire(r);
|
|
ins->setReg(UnknownReg);
|
|
}
|
|
freeResourcesOf(ins);
|
|
break;
|
|
}
|
|
case LIR_int:
|
|
{
|
|
countlir_imm();
|
|
asm_int(ins);
|
|
break;
|
|
}
|
|
case LIR_float:
|
|
case LIR_quad:
|
|
{
|
|
countlir_imm();
|
|
asm_quad(ins);
|
|
break;
|
|
}
|
|
#if !defined NANOJIT_64BIT
|
|
case LIR_callh:
|
|
{
|
|
// return result of quad-call in register
|
|
prepResultReg(ins, rmask(retRegs[1]));
|
|
// if hi half was used, we must use the call to ensure it happens
|
|
findSpecificRegFor(ins->oprnd1(), retRegs[0]);
|
|
break;
|
|
}
|
|
#endif
|
|
case LIR_param:
|
|
{
|
|
countlir_param();
|
|
asm_param(ins);
|
|
break;
|
|
}
|
|
case LIR_qlo:
|
|
{
|
|
countlir_qlo();
|
|
asm_qlo(ins);
|
|
break;
|
|
}
|
|
case LIR_qhi:
|
|
{
|
|
countlir_qhi();
|
|
asm_qhi(ins);
|
|
break;
|
|
}
|
|
case LIR_qcmov:
|
|
case LIR_cmov:
|
|
{
|
|
countlir_cmov();
|
|
asm_cmov(ins);
|
|
break;
|
|
}
|
|
case LIR_ldzb:
|
|
case LIR_ldzs:
|
|
case LIR_ldsb:
|
|
case LIR_ldss:
|
|
case LIR_ldcsb:
|
|
case LIR_ldcss:
|
|
case LIR_ld:
|
|
case LIR_ldc:
|
|
case LIR_ldcb:
|
|
case LIR_ldcs:
|
|
{
|
|
countlir_ld();
|
|
asm_load32(ins);
|
|
break;
|
|
}
|
|
|
|
case LIR_ld32f:
|
|
case LIR_ldc32f:
|
|
case LIR_ldq:
|
|
case LIR_ldqc:
|
|
case LIR_ldf:
|
|
case LIR_ldfc:
|
|
{
|
|
countlir_ldq();
|
|
asm_load64(ins);
|
|
break;
|
|
}
|
|
case LIR_neg:
|
|
case LIR_not:
|
|
{
|
|
countlir_alu();
|
|
asm_neg_not(ins);
|
|
break;
|
|
}
|
|
case LIR_qjoin:
|
|
{
|
|
countlir_qjoin();
|
|
asm_qjoin(ins);
|
|
break;
|
|
}
|
|
|
|
#if defined NANOJIT_64BIT
|
|
case LIR_qiadd:
|
|
case LIR_qiand:
|
|
case LIR_qilsh:
|
|
case LIR_qursh:
|
|
case LIR_qirsh:
|
|
case LIR_qior:
|
|
case LIR_qaddp:
|
|
case LIR_qxor:
|
|
{
|
|
asm_qbinop(ins);
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
case LIR_add:
|
|
case LIR_iaddp:
|
|
case LIR_sub:
|
|
case LIR_mul:
|
|
case LIR_and:
|
|
case LIR_or:
|
|
case LIR_xor:
|
|
case LIR_lsh:
|
|
case LIR_rsh:
|
|
case LIR_ush:
|
|
case LIR_div:
|
|
case LIR_mod:
|
|
{
|
|
countlir_alu();
|
|
asm_arith(ins);
|
|
break;
|
|
}
|
|
case LIR_fneg:
|
|
{
|
|
countlir_fpu();
|
|
asm_fneg(ins);
|
|
break;
|
|
}
|
|
case LIR_fadd:
|
|
case LIR_fsub:
|
|
case LIR_fmul:
|
|
case LIR_fdiv:
|
|
{
|
|
countlir_fpu();
|
|
asm_fop(ins);
|
|
break;
|
|
}
|
|
case LIR_i2f:
|
|
{
|
|
countlir_fpu();
|
|
asm_i2f(ins);
|
|
break;
|
|
}
|
|
case LIR_u2f:
|
|
{
|
|
countlir_fpu();
|
|
asm_u2f(ins);
|
|
break;
|
|
}
|
|
case LIR_i2q:
|
|
case LIR_u2q:
|
|
{
|
|
countlir_alu();
|
|
asm_promote(ins);
|
|
break;
|
|
}
|
|
case LIR_stb:
|
|
case LIR_sts:
|
|
case LIR_sti:
|
|
{
|
|
countlir_st();
|
|
asm_store32(op, ins->oprnd1(), ins->disp(), ins->oprnd2());
|
|
break;
|
|
}
|
|
case LIR_st32f:
|
|
case LIR_stqi:
|
|
case LIR_stfi:
|
|
{
|
|
countlir_stq();
|
|
LIns* value = ins->oprnd1();
|
|
LIns* base = ins->oprnd2();
|
|
int dr = ins->disp();
|
|
if (value->isop(LIR_qjoin) && op == LIR_stfi)
|
|
{
|
|
// This is correct for little-endian only.
|
|
asm_store32(LIR_sti, value->oprnd1(), dr, base);
|
|
asm_store32(LIR_sti, value->oprnd2(), dr+4, base);
|
|
}
|
|
else
|
|
{
|
|
asm_store64(op, value, dr, base);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case LIR_j:
|
|
{
|
|
countlir_jmp();
|
|
LInsp to = ins->getTarget();
|
|
LabelState *label = _labels.get(to);
|
|
// the jump is always taken so whatever register state we
|
|
// have from downstream code, is irrelevant to code before
|
|
// this jump. so clear it out. we will pick up register
|
|
// state from the jump target, if we have seen that label.
|
|
releaseRegisters();
|
|
if (label && label->addr) {
|
|
// forward jump - pick up register state from target.
|
|
unionRegisterState(label->regs);
|
|
JMP(label->addr);
|
|
}
|
|
else {
|
|
// backwards jump
|
|
handleLoopCarriedExprs(pending_lives);
|
|
if (!label) {
|
|
// save empty register state at loop header
|
|
_labels.add(to, 0, _allocator);
|
|
}
|
|
else {
|
|
intersectRegisterState(label->regs);
|
|
}
|
|
JMP(0);
|
|
_patches.put(_nIns, to);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case LIR_jt:
|
|
case LIR_jf:
|
|
{
|
|
countlir_jcc();
|
|
LInsp to = ins->getTarget();
|
|
LIns* cond = ins->oprnd1();
|
|
LabelState *label = _labels.get(to);
|
|
if (label && label->addr) {
|
|
// forward jump to known label. need to merge with label's register state.
|
|
unionRegisterState(label->regs);
|
|
asm_branch(op == LIR_jf, cond, label->addr);
|
|
}
|
|
else {
|
|
// back edge.
|
|
handleLoopCarriedExprs(pending_lives);
|
|
if (!label) {
|
|
// evict all registers, most conservative approach.
|
|
evictAllActiveRegs();
|
|
_labels.add(to, 0, _allocator);
|
|
}
|
|
else {
|
|
// evict all registers, most conservative approach.
|
|
intersectRegisterState(label->regs);
|
|
}
|
|
NIns *branch = asm_branch(op == LIR_jf, cond, 0);
|
|
_patches.put(branch,to);
|
|
}
|
|
break;
|
|
}
|
|
|
|
#if NJ_JTBL_SUPPORTED
|
|
case LIR_jtbl:
|
|
{
|
|
countlir_jtbl();
|
|
// Multiway jump can contain both forward and backward jumps.
|
|
// Out of range indices aren't allowed or checked.
|
|
// Code after this jtbl instruction is unreachable.
|
|
releaseRegisters();
|
|
AvmAssert(_allocator.countActive() == 0);
|
|
|
|
uint32_t count = ins->getTableSize();
|
|
bool has_back_edges = false;
|
|
|
|
// Merge the regstates of labels we have already seen.
|
|
for (uint32_t i = count; i-- > 0;) {
|
|
LIns* to = ins->getTarget(i);
|
|
LabelState *lstate = _labels.get(to);
|
|
if (lstate) {
|
|
unionRegisterState(lstate->regs);
|
|
asm_output(" %u: [&%s]", i, _thisfrag->lirbuf->names->formatRef(to));
|
|
} else {
|
|
has_back_edges = true;
|
|
}
|
|
}
|
|
asm_output("forward edges");
|
|
|
|
// In a multi-way jump, the register allocator has no ability to deal
|
|
// with two existing edges that have conflicting register assignments, unlike
|
|
// a conditional branch where code can be inserted on the fall-through path
|
|
// to reconcile registers. So, frontends *must* insert LIR_regfence at labels of
|
|
// forward jtbl jumps. Check here to make sure no registers were picked up from
|
|
// any forward edges.
|
|
AvmAssert(_allocator.countActive() == 0);
|
|
|
|
if (has_back_edges) {
|
|
handleLoopCarriedExprs(pending_lives);
|
|
// save merged (empty) register state at target labels we haven't seen yet
|
|
for (uint32_t i = count; i-- > 0;) {
|
|
LIns* to = ins->getTarget(i);
|
|
LabelState *lstate = _labels.get(to);
|
|
if (!lstate) {
|
|
_labels.add(to, 0, _allocator);
|
|
asm_output(" %u: [&%s]", i, _thisfrag->lirbuf->names->formatRef(to));
|
|
}
|
|
}
|
|
asm_output("backward edges");
|
|
}
|
|
|
|
// Emit the jump instruction, which allocates 1 register for the jump index.
|
|
NIns** native_table = new (_dataAlloc) NIns*[count];
|
|
asm_output("[%p]:", (void*)native_table);
|
|
_patches.put((NIns*)native_table, ins);
|
|
asm_jtbl(ins, native_table);
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
case LIR_label:
|
|
{
|
|
countlir_label();
|
|
LabelState *label = _labels.get(ins);
|
|
// add profiling inc, if necessary.
|
|
verbose_only( if (_logc->lcbits & LC_FragProfile) {
|
|
if (ins == _thisfrag->loopLabel)
|
|
asm_inc_m32(& _thisfrag->profCount);
|
|
})
|
|
if (!label) {
|
|
// label seen first, normal target of forward jump, save addr & allocator
|
|
_labels.add(ins, _nIns, _allocator);
|
|
}
|
|
else {
|
|
// we're at the top of a loop
|
|
NanoAssert(label->addr == 0);
|
|
//evictAllActiveRegs();
|
|
intersectRegisterState(label->regs);
|
|
label->addr = _nIns;
|
|
}
|
|
verbose_only( if (_logc->lcbits & LC_Assembly) {
|
|
asm_output("[%s]", _thisfrag->lirbuf->names->formatRef(ins));
|
|
})
|
|
break;
|
|
}
|
|
case LIR_xbarrier: {
|
|
break;
|
|
}
|
|
#ifdef NANOJIT_IA32
|
|
case LIR_xtbl: {
|
|
NIns* exit = asm_exit(ins); // does intersectRegisterState()
|
|
asm_switch(ins, exit);
|
|
break;
|
|
}
|
|
#else
|
|
case LIR_xtbl:
|
|
NanoAssertMsg(0, "Not supported for this architecture");
|
|
break;
|
|
#endif
|
|
case LIR_xt:
|
|
case LIR_xf:
|
|
{
|
|
verbose_only( _thisfrag->nStaticExits++; )
|
|
countlir_xcc();
|
|
// we only support cmp with guard right now, also assume it is 'close' and only emit the branch
|
|
NIns* exit = asm_exit(ins); // does intersectRegisterState()
|
|
LIns* cond = ins->oprnd1();
|
|
asm_branch(op == LIR_xf, cond, exit);
|
|
break;
|
|
}
|
|
case LIR_x:
|
|
{
|
|
verbose_only( _thisfrag->nStaticExits++; )
|
|
countlir_x();
|
|
// generate the side exit branch on the main trace.
|
|
NIns *exit = asm_exit(ins);
|
|
JMP( exit );
|
|
break;
|
|
}
|
|
|
|
case LIR_feq:
|
|
case LIR_fle:
|
|
case LIR_flt:
|
|
case LIR_fgt:
|
|
case LIR_fge:
|
|
{
|
|
countlir_fpu();
|
|
asm_fcond(ins);
|
|
break;
|
|
}
|
|
case LIR_eq:
|
|
case LIR_ov:
|
|
case LIR_le:
|
|
case LIR_lt:
|
|
case LIR_gt:
|
|
case LIR_ge:
|
|
case LIR_ult:
|
|
case LIR_ule:
|
|
case LIR_ugt:
|
|
case LIR_uge:
|
|
#ifdef NANOJIT_64BIT
|
|
case LIR_qeq:
|
|
case LIR_qle:
|
|
case LIR_qlt:
|
|
case LIR_qgt:
|
|
case LIR_qge:
|
|
case LIR_qult:
|
|
case LIR_qule:
|
|
case LIR_qugt:
|
|
case LIR_quge:
|
|
#endif
|
|
{
|
|
countlir_alu();
|
|
asm_cond(ins);
|
|
break;
|
|
}
|
|
|
|
case LIR_fcall:
|
|
#ifdef NANOJIT_64BIT
|
|
case LIR_qcall:
|
|
#endif
|
|
case LIR_icall:
|
|
{
|
|
countlir_call();
|
|
asm_call(ins);
|
|
break;
|
|
}
|
|
|
|
#ifdef VTUNE
|
|
case LIR_file:
|
|
{
|
|
// we traverse backwards so we are now hitting the file
|
|
// that is associated with a bunch of LIR_lines we already have seen
|
|
uintptr_t currentFile = ins->oprnd1()->imm32();
|
|
cgen->jitFilenameUpdate(currentFile);
|
|
break;
|
|
}
|
|
case LIR_line:
|
|
{
|
|
// add a new table entry, we don't yet knwo which file it belongs
|
|
// to so we need to add it to the update table too
|
|
// note the alloc, actual act is delayed; see above
|
|
uint32_t currentLine = (uint32_t) ins->oprnd1()->imm32();
|
|
cgen->jitLineNumUpdate(currentLine);
|
|
cgen->jitAddRecord((uintptr_t)_nIns, 0, currentLine, true);
|
|
break;
|
|
}
|
|
#endif // VTUNE
|
|
}
|
|
|
|
#ifdef NJ_VERBOSE
|
|
// We have to do final LIR printing inside this loop. If we do it
|
|
// before this loop, we we end up printing a lot of dead LIR
|
|
// instructions.
|
|
//
|
|
// We print the LIns after generating the code. This ensures that
|
|
// the LIns will appear in debug output *before* the generated
|
|
// code, because Assembler::outputf() prints everything in reverse.
|
|
//
|
|
// Note that some live LIR instructions won't be printed. Eg. an
|
|
// immediate won't be printed unless it is explicitly loaded into
|
|
// a register (as opposed to being incorporated into an immediate
|
|
// field in another machine instruction).
|
|
//
|
|
if (_logc->lcbits & LC_Assembly) {
|
|
LirNameMap* names = _thisfrag->lirbuf->names;
|
|
outputf(" %s", names->formatIns(ins));
|
|
if (ins->isGuard() && ins->oprnd1()) {
|
|
// Special case: code is generated for guard conditions at
|
|
// the same time that code is generated for the guard
|
|
// itself. If the condition is only used by the guard, we
|
|
// must print it now otherwise it won't get printed. So
|
|
// we do print it now, with an explanatory comment. If
|
|
// the condition *is* used again we'll end up printing it
|
|
// twice, but that's ok.
|
|
outputf(" %s # codegen'd with the %s",
|
|
names->formatIns(ins->oprnd1()), lirNames[op]);
|
|
|
|
} else if (ins->isop(LIR_cmov) || ins->isop(LIR_qcmov)) {
|
|
// Likewise for cmov conditions.
|
|
outputf(" %s # codegen'd with the %s",
|
|
names->formatIns(ins->oprnd1()), lirNames[op]);
|
|
|
|
} else if (ins->isop(LIR_mod)) {
|
|
// There's a similar case when a div feeds into a mod.
|
|
outputf(" %s # codegen'd with the mod",
|
|
names->formatIns(ins->oprnd1()));
|
|
}
|
|
}
|
|
#endif
|
|
|
|
if (error())
|
|
return;
|
|
|
|
#ifdef VTUNE
|
|
cgen->jitCodePosUpdate((uintptr_t)_nIns);
|
|
#endif
|
|
|
|
// check that all is well (don't check in exit paths since its more complicated)
|
|
debug_only( pageValidate(); )
|
|
debug_only( resourceConsistencyCheck(); )
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Write a jump table for the given SwitchInfo and store the table
|
|
* address in the SwitchInfo. Every entry will initially point to
|
|
* target.
|
|
*/
|
|
void Assembler::emitJumpTable(SwitchInfo* si, NIns* target)
|
|
{
|
|
si->table = (NIns **) alloc.alloc(si->count * sizeof(NIns*));
|
|
for (uint32_t i = 0; i < si->count; ++i)
|
|
si->table[i] = target;
|
|
}
|
|
|
|
void Assembler::assignSavedRegs()
|
|
{
|
|
// Restore saved regsters.
|
|
LirBuffer *b = _thisfrag->lirbuf;
|
|
for (int i=0, n = NumSavedRegs; i < n; i++) {
|
|
LIns *p = b->savedRegs[i];
|
|
if (p)
|
|
findSpecificRegForUnallocated(p, savedRegs[p->paramArg()]);
|
|
}
|
|
}
|
|
|
|
void Assembler::reserveSavedRegs()
|
|
{
|
|
LirBuffer *b = _thisfrag->lirbuf;
|
|
for (int i=0, n = NumSavedRegs; i < n; i++) {
|
|
LIns *p = b->savedRegs[i];
|
|
if (p)
|
|
findMemFor(p);
|
|
}
|
|
}
|
|
|
|
void Assembler::assignParamRegs()
|
|
{
|
|
LInsp state = _thisfrag->lirbuf->state;
|
|
if (state)
|
|
findSpecificRegForUnallocated(state, argRegs[state->paramArg()]);
|
|
LInsp param1 = _thisfrag->lirbuf->param1;
|
|
if (param1)
|
|
findSpecificRegForUnallocated(param1, argRegs[param1->paramArg()]);
|
|
}
|
|
|
|
void Assembler::handleLoopCarriedExprs(InsList& pending_lives)
|
|
{
|
|
// ensure that exprs spanning the loop are marked live at the end of the loop
|
|
reserveSavedRegs();
|
|
for (Seq<LIns*> *p = pending_lives.get(); p != NULL; p = p->tail) {
|
|
LIns *i = p->head;
|
|
NanoAssert(i->isop(LIR_live) || i->isop(LIR_flive));
|
|
LIns *op1 = i->oprnd1();
|
|
// must findMemFor even if we're going to findRegFor; loop-carried
|
|
// operands may spill on another edge, and we need them to always
|
|
// spill to the same place.
|
|
#if NJ_USES_QUAD_CONSTANTS
|
|
// exception: if quad constants are true constants, we should
|
|
// never call findMemFor on those ops
|
|
if (!op1->isconstq())
|
|
#endif
|
|
{
|
|
findMemFor(op1);
|
|
}
|
|
if (! (op1->isconst() || op1->isconstf() || op1->isconstq()))
|
|
findRegFor(op1, i->isop(LIR_flive) ? FpRegs : GpRegs);
|
|
}
|
|
|
|
// clear this list since we have now dealt with those lifetimes. extending
|
|
// their lifetimes again later (earlier in the code) serves no purpose.
|
|
pending_lives.clear();
|
|
}
|
|
|
|
void AR::freeEntryAt(uint32_t idx)
|
|
{
|
|
NanoAssert(idx > 0 && idx <= _highWaterMark);
|
|
|
|
// NB: this loop relies on using entry[0] being NULL,
|
|
// so that we are guaranteed to terminate
|
|
// without access negative entries.
|
|
LIns* i = _entries[idx];
|
|
NanoAssert(i != NULL);
|
|
do {
|
|
_entries[idx] = NULL;
|
|
idx--;
|
|
} while (_entries[idx] == i);
|
|
}
|
|
|
|
#ifdef NJ_VERBOSE
|
|
void Assembler::printRegState()
|
|
{
|
|
char* s = &outline[0];
|
|
VMPI_memset(s, ' ', 26); s[26] = '\0';
|
|
s += VMPI_strlen(s);
|
|
VMPI_sprintf(s, "RR");
|
|
s += VMPI_strlen(s);
|
|
|
|
for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) {
|
|
LIns *ins = _allocator.getActive(r);
|
|
if (ins) {
|
|
NanoAssertMsg(!_allocator.isFree(r),
|
|
"Coding error; register is both free and active! " );
|
|
const char* n = _thisfrag->lirbuf->names->formatRef(ins);
|
|
|
|
if (ins->isop(LIR_param) && ins->paramKind()==1 &&
|
|
r == Assembler::savedRegs[ins->paramArg()])
|
|
{
|
|
// dont print callee-saved regs that arent used
|
|
continue;
|
|
}
|
|
|
|
const char* rname = ins->isQuad() ? fpn(r) : gpn(r);
|
|
VMPI_sprintf(s, " %s(%s)", rname, n);
|
|
s += VMPI_strlen(s);
|
|
}
|
|
}
|
|
output();
|
|
}
|
|
|
|
void Assembler::printActivationState()
|
|
{
|
|
char* s = &outline[0];
|
|
VMPI_memset(s, ' ', 26); s[26] = '\0';
|
|
s += VMPI_strlen(s);
|
|
VMPI_sprintf(s, "AR");
|
|
s += VMPI_strlen(s);
|
|
|
|
LIns* ins = 0;
|
|
uint32_t nStackSlots = 0;
|
|
int32_t arIndex = 0;
|
|
for (AR::Iter iter(_activation); iter.next(ins, nStackSlots, arIndex); )
|
|
{
|
|
const char* n = _thisfrag->lirbuf->names->formatRef(ins);
|
|
if (nStackSlots > 1) {
|
|
VMPI_sprintf(s," %d-%d(%s)", 4*arIndex, 4*(arIndex+nStackSlots-1), n);
|
|
}
|
|
else {
|
|
VMPI_sprintf(s," %d(%s)", 4*arIndex, n);
|
|
}
|
|
s += VMPI_strlen(s);
|
|
}
|
|
output();
|
|
}
|
|
#endif
|
|
|
|
inline bool AR::isEmptyRange(uint32_t start, uint32_t nStackSlots) const
|
|
{
|
|
for (uint32_t i=0; i < nStackSlots; i++)
|
|
{
|
|
if (_entries[start-i] != NULL)
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
uint32_t AR::reserveEntry(LIns* ins)
|
|
{
|
|
uint32_t const nStackSlots = nStackSlotsFor(ins);
|
|
|
|
if (nStackSlots == 1)
|
|
{
|
|
for (uint32_t i = 1; i <= _highWaterMark; i++)
|
|
{
|
|
if (_entries[i] == NULL)
|
|
{
|
|
_entries[i] = ins;
|
|
return i;
|
|
}
|
|
}
|
|
uint32_t const spaceLeft = NJ_MAX_STACK_ENTRY - _highWaterMark - 1;
|
|
if (spaceLeft >= 1)
|
|
{
|
|
NanoAssert(_entries[_highWaterMark+1] == BAD_ENTRY);
|
|
_entries[++_highWaterMark] = ins;
|
|
return _highWaterMark;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// alloc larger block on 8byte boundary.
|
|
uint32_t const start = nStackSlots + (nStackSlots & 1);
|
|
for (uint32_t i = start; i <= _highWaterMark; i += 2)
|
|
{
|
|
if (isEmptyRange(i, nStackSlots))
|
|
{
|
|
// place the entry in the table and mark the instruction with it
|
|
for (uint32_t j=0; j < nStackSlots; j++)
|
|
{
|
|
NanoAssert(i-j <= _highWaterMark);
|
|
NanoAssert(_entries[i-j] == NULL);
|
|
_entries[i-j] = ins;
|
|
}
|
|
return i;
|
|
}
|
|
}
|
|
|
|
// Be sure to account for any 8-byte-round-up when calculating spaceNeeded.
|
|
uint32_t const spaceLeft = NJ_MAX_STACK_ENTRY - _highWaterMark - 1;
|
|
uint32_t const spaceNeeded = nStackSlots + (_highWaterMark & 1);
|
|
if (spaceLeft >= spaceNeeded)
|
|
{
|
|
if (_highWaterMark & 1)
|
|
{
|
|
NanoAssert(_entries[_highWaterMark+1] == BAD_ENTRY);
|
|
_entries[_highWaterMark+1] = NULL;
|
|
}
|
|
_highWaterMark += spaceNeeded;
|
|
for (uint32_t j = 0; j < nStackSlots; j++)
|
|
{
|
|
NanoAssert(_highWaterMark-j < NJ_MAX_STACK_ENTRY);
|
|
NanoAssert(_entries[_highWaterMark-j] == BAD_ENTRY);
|
|
_entries[_highWaterMark-j] = ins;
|
|
}
|
|
return _highWaterMark;
|
|
}
|
|
}
|
|
// no space. oh well.
|
|
return 0;
|
|
}
|
|
|
|
#ifdef _DEBUG
|
|
void AR::checkForResourceLeaks() const
|
|
{
|
|
for (uint32_t i = 1; i <= _highWaterMark; i++) {
|
|
NanoAssertMsgf(_entries[i] == NULL, "frame entry %d wasn't freed\n",-4*i);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
uint32_t Assembler::arReserve(LIns* ins)
|
|
{
|
|
uint32_t i = _activation.reserveEntry(ins);
|
|
if (!i)
|
|
setError(StackFull);
|
|
return i;
|
|
}
|
|
|
|
void Assembler::arFreeIfInUse(LIns* ins)
|
|
{
|
|
uint32_t arIndex = ins->getArIndex();
|
|
if (arIndex) {
|
|
NanoAssert(_activation.isValidEntry(arIndex, ins));
|
|
_activation.freeEntryAt(arIndex); // free any stack stack space associated with entry
|
|
}
|
|
}
|
|
|
|
/**
|
|
* move regs around so the SavedRegs contains the highest priority regs.
|
|
*/
|
|
void Assembler::evictScratchRegs()
|
|
{
|
|
// find the top GpRegs that are candidates to put in SavedRegs
|
|
|
|
// tosave is a binary heap stored in an array. the root is tosave[0],
|
|
// left child is at i+1, right child is at i+2.
|
|
|
|
Register tosave[LastReg-FirstReg+1];
|
|
int len=0;
|
|
RegAlloc *regs = &_allocator;
|
|
for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) {
|
|
if (rmask(r) & GpRegs) {
|
|
LIns *ins = regs->getActive(r);
|
|
if (ins) {
|
|
if (canRemat(ins)) {
|
|
NanoAssert(ins->getReg() == r);
|
|
evict(ins);
|
|
}
|
|
else {
|
|
int32_t pri = regs->getPriority(r);
|
|
// add to heap by adding to end and bubbling up
|
|
int j = len++;
|
|
while (j > 0 && pri > regs->getPriority(tosave[j/2])) {
|
|
tosave[j] = tosave[j/2];
|
|
j /= 2;
|
|
}
|
|
NanoAssert(size_t(j) < sizeof(tosave)/sizeof(tosave[0]));
|
|
tosave[j] = r;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// now primap has the live exprs in priority order.
|
|
// allocate each of the top priority exprs to a SavedReg
|
|
|
|
RegisterMask allow = SavedRegs;
|
|
while (allow && len > 0) {
|
|
// get the highest priority var
|
|
Register hi = tosave[0];
|
|
if (!(rmask(hi) & SavedRegs)) {
|
|
LIns *ins = regs->getActive(hi);
|
|
Register r = findRegFor(ins, allow);
|
|
allow &= ~rmask(r);
|
|
}
|
|
else {
|
|
// hi is already in a saved reg, leave it alone.
|
|
allow &= ~rmask(hi);
|
|
}
|
|
|
|
// remove from heap by replacing root with end element and bubbling down.
|
|
if (allow && --len > 0) {
|
|
Register last = tosave[len];
|
|
int j = 0;
|
|
while (j+1 < len) {
|
|
int child = j+1;
|
|
if (j+2 < len && regs->getPriority(tosave[j+2]) > regs->getPriority(tosave[j+1]))
|
|
child++;
|
|
if (regs->getPriority(last) > regs->getPriority(tosave[child]))
|
|
break;
|
|
tosave[j] = tosave[child];
|
|
j = child;
|
|
}
|
|
tosave[j] = last;
|
|
}
|
|
}
|
|
|
|
// now evict everything else.
|
|
evictSomeActiveRegs(~SavedRegs);
|
|
}
|
|
|
|
void Assembler::evictAllActiveRegs()
|
|
{
|
|
// generate code to restore callee saved registers
|
|
// @todo speed this up
|
|
for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) {
|
|
evictIfActive(r);
|
|
}
|
|
}
|
|
|
|
void Assembler::evictSomeActiveRegs(RegisterMask regs)
|
|
{
|
|
// generate code to restore callee saved registers
|
|
// @todo speed this up
|
|
for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) {
|
|
if ((rmask(r) & regs)) {
|
|
evictIfActive(r);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Merge the current regstate with a previously stored version.
|
|
*
|
|
* Situation Change to _allocator
|
|
* --------- --------------------
|
|
* !current & !saved
|
|
* !current & saved add saved
|
|
* current & !saved evict current (unionRegisterState does nothing)
|
|
* current & saved & current==saved
|
|
* current & saved & current!=saved evict current, add saved
|
|
*/
|
|
void Assembler::intersectRegisterState(RegAlloc& saved)
|
|
{
|
|
Register regsTodo[LastReg + 1];
|
|
LIns* insTodo[LastReg + 1];
|
|
int nTodo = 0;
|
|
|
|
// Do evictions and pops first.
|
|
verbose_only(bool shouldMention=false; )
|
|
// The obvious thing to do here is to iterate from FirstReg to LastReg.
|
|
// viz: for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) ...
|
|
// However, on ARM that causes lower-numbered integer registers
|
|
// to be be saved at higher addresses, which inhibits the formation
|
|
// of load/store multiple instructions. Hence iterate the loop the
|
|
// other way. The "r <= LastReg" guards against wraparound in
|
|
// the case where Register is treated as unsigned and FirstReg is zero.
|
|
//
|
|
// Note, the loop var is deliberately typed as int (*not* Register)
|
|
// to outsmart compilers that will otherwise report
|
|
// "error: comparison is always true due to limited range of data type".
|
|
for (int ri = LastReg; ri >= FirstReg && ri <= LastReg; ri = int(prevreg(Register(ri))))
|
|
{
|
|
Register const r = Register(ri);
|
|
LIns* curins = _allocator.getActive(r);
|
|
LIns* savedins = saved.getActive(r);
|
|
if (curins != savedins)
|
|
{
|
|
if (savedins) {
|
|
regsTodo[nTodo] = r;
|
|
insTodo[nTodo] = savedins;
|
|
nTodo++;
|
|
}
|
|
if (curins) {
|
|
//_nvprof("intersect-evict",1);
|
|
verbose_only( shouldMention=true; )
|
|
NanoAssert(curins->getReg() == r);
|
|
evict(curins);
|
|
}
|
|
|
|
#ifdef NANOJIT_IA32
|
|
if (savedins && (rmask(r) & x87Regs)) {
|
|
verbose_only( shouldMention=true; )
|
|
FSTP(r);
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
// Now reassign mainline registers.
|
|
for (int i = 0; i < nTodo; i++) {
|
|
findSpecificRegFor(insTodo[i], regsTodo[i]);
|
|
}
|
|
verbose_only(
|
|
if (shouldMention)
|
|
verbose_outputf("## merging registers (intersect) with existing edge");
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Merge the current state of the registers with a previously stored version.
|
|
*
|
|
* Situation Change to _allocator
|
|
* --------- --------------------
|
|
* !current & !saved none
|
|
* !current & saved add saved
|
|
* current & !saved none (intersectRegisterState evicts current)
|
|
* current & saved & current==saved none
|
|
* current & saved & current!=saved evict current, add saved
|
|
*/
|
|
void Assembler::unionRegisterState(RegAlloc& saved)
|
|
{
|
|
Register regsTodo[LastReg + 1];
|
|
LIns* insTodo[LastReg + 1];
|
|
int nTodo = 0;
|
|
|
|
// Do evictions and pops first.
|
|
verbose_only(bool shouldMention=false; )
|
|
for (Register r = FirstReg; r <= LastReg; r = nextreg(r))
|
|
{
|
|
LIns* curins = _allocator.getActive(r);
|
|
LIns* savedins = saved.getActive(r);
|
|
if (curins != savedins)
|
|
{
|
|
if (savedins) {
|
|
regsTodo[nTodo] = r;
|
|
insTodo[nTodo] = savedins;
|
|
nTodo++;
|
|
}
|
|
if (curins && savedins) {
|
|
//_nvprof("union-evict",1);
|
|
verbose_only( shouldMention=true; )
|
|
NanoAssert(curins->getReg() == r);
|
|
evict(curins);
|
|
}
|
|
|
|
#ifdef NANOJIT_IA32
|
|
if (rmask(r) & x87Regs) {
|
|
if (savedins) {
|
|
FSTP(r);
|
|
}
|
|
else if (curins) {
|
|
// saved state did not have fpu reg allocated,
|
|
// so we must evict here to keep x87 stack balanced.
|
|
evict(curins);
|
|
}
|
|
verbose_only( shouldMention=true; )
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
// Now reassign mainline registers.
|
|
for (int i = 0; i < nTodo; i++) {
|
|
findSpecificRegFor(insTodo[i], regsTodo[i]);
|
|
}
|
|
verbose_only(
|
|
if (shouldMention)
|
|
verbose_outputf("## merging registers (union) with existing edge");
|
|
)
|
|
}
|
|
|
|
// Scan table for instruction with the lowest priority, meaning it is used
|
|
// furthest in the future.
|
|
LIns* Assembler::findVictim(RegisterMask allow)
|
|
{
|
|
NanoAssert(allow);
|
|
LIns *ins, *vic = 0;
|
|
int allow_pri = 0x7fffffff;
|
|
for (Register r = FirstReg; r <= LastReg; r = nextreg(r))
|
|
{
|
|
if ((allow & rmask(r)) && (ins = _allocator.getActive(r)) != 0)
|
|
{
|
|
int pri = canRemat(ins) ? 0 : _allocator.getPriority(r);
|
|
if (!vic || pri < allow_pri) {
|
|
vic = ins;
|
|
allow_pri = pri;
|
|
}
|
|
}
|
|
}
|
|
NanoAssert(vic != 0);
|
|
return vic;
|
|
}
|
|
|
|
#ifdef NJ_VERBOSE
|
|
char Assembler::outline[8192];
|
|
char Assembler::outlineEOL[512];
|
|
|
|
void Assembler::output()
|
|
{
|
|
// The +1 is for the terminating NUL char.
|
|
VMPI_strncat(outline, outlineEOL, sizeof(outline)-(strlen(outline)+1));
|
|
|
|
if (_outputCache) {
|
|
char* str = new (alloc) char[VMPI_strlen(outline)+1];
|
|
VMPI_strcpy(str, outline);
|
|
_outputCache->insert(str);
|
|
} else {
|
|
_logc->printf("%s\n", outline);
|
|
}
|
|
|
|
outline[0] = '\0';
|
|
outlineEOL[0] = '\0';
|
|
}
|
|
|
|
void Assembler::outputf(const char* format, ...)
|
|
{
|
|
va_list args;
|
|
va_start(args, format);
|
|
|
|
outline[0] = '\0';
|
|
vsprintf(outline, format, args);
|
|
output();
|
|
}
|
|
|
|
void Assembler::setOutputForEOL(const char* format, ...)
|
|
{
|
|
va_list args;
|
|
va_start(args, format);
|
|
|
|
outlineEOL[0] = '\0';
|
|
vsprintf(outlineEOL, format, args);
|
|
}
|
|
#endif // NJ_VERBOSE
|
|
|
|
uint32_t CallInfo::_count_args(uint32_t mask) const
|
|
{
|
|
uint32_t argc = 0;
|
|
uint32_t argt = _argtypes;
|
|
for (uint32_t i = 0; i < MAXARGS; ++i) {
|
|
argt >>= ARGSIZE_SHIFT;
|
|
if (!argt)
|
|
break;
|
|
argc += (argt & mask) != 0;
|
|
}
|
|
return argc;
|
|
}
|
|
|
|
uint32_t CallInfo::get_sizes(ArgSize* sizes) const
|
|
{
|
|
uint32_t argt = _argtypes;
|
|
uint32_t argc = 0;
|
|
for (uint32_t i = 0; i < MAXARGS; i++) {
|
|
argt >>= ARGSIZE_SHIFT;
|
|
ArgSize a = ArgSize(argt & ARGSIZE_MASK_ANY);
|
|
if (a != ARGSIZE_NONE) {
|
|
sizes[argc++] = a;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
return argc;
|
|
}
|
|
|
|
void LabelStateMap::add(LIns *label, NIns *addr, RegAlloc ®s) {
|
|
LabelState *st = new (alloc) LabelState(addr, regs);
|
|
labels.put(label, st);
|
|
}
|
|
|
|
LabelState* LabelStateMap::get(LIns *label) {
|
|
return labels.get(label);
|
|
}
|
|
}
|
|
#endif /* FEATURE_NANOJIT */
|