mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
Bug 517910 - NJ: add more alias-set annotations to LIR so as to improve CSEing of loads. r=edwsmith.
--HG-- extra : convert_revision : 26cbea5a2acdcc6156b4a72b0c40c0d675f69571
This commit is contained in:
parent
02b8d2a422
commit
1b27c99df0
@ -518,7 +518,8 @@ FragmentAssembler::FragmentAssembler(Lirasm &parent, const string &fragmentName,
|
||||
mLir = mBufWriter = new LirBufWriter(mParent.mLirbuf, nanojit::AvmCore::config);
|
||||
#ifdef DEBUG
|
||||
if (optimize) { // don't re-validate if no optimization has taken place
|
||||
mLir = mValidateWriter2 = new ValidateWriter(mLir, "end of writer pipeline");
|
||||
mLir = mValidateWriter2 =
|
||||
new ValidateWriter(mLir, mFragment->lirbuf->printer, "end of writer pipeline");
|
||||
}
|
||||
#endif
|
||||
#ifdef DEBUG
|
||||
@ -540,7 +541,8 @@ FragmentAssembler::FragmentAssembler(Lirasm &parent, const string &fragmentName,
|
||||
mLir = mExprFilter = new ExprFilter(mLir);
|
||||
}
|
||||
#ifdef DEBUG
|
||||
mLir = mValidateWriter1 = new ValidateWriter(mLir, "start of writer pipeline");
|
||||
mLir = mValidateWriter1 =
|
||||
new ValidateWriter(mLir, mFragment->lirbuf->printer, "start of writer pipeline");
|
||||
#endif
|
||||
|
||||
mReturnTypeBits = 0;
|
||||
@ -634,7 +636,7 @@ FragmentAssembler::assemble_load()
|
||||
mTokens[1].find_first_of("0123456789") == 0) {
|
||||
return mLir->insLoad(mOpcode,
|
||||
ref(mTokens[0]),
|
||||
imm(mTokens[1]));
|
||||
imm(mTokens[1]), ACC_LOAD_ANY);
|
||||
}
|
||||
bad("immediate offset required for load");
|
||||
return NULL; // not reached
|
||||
@ -1059,7 +1061,7 @@ FragmentAssembler::assembleFragment(LirTokenStream &in, bool implicitBegin, cons
|
||||
need(3);
|
||||
ins = mLir->insStore(mOpcode, ref(mTokens[0]),
|
||||
ref(mTokens[1]),
|
||||
imm(mTokens[2]));
|
||||
imm(mTokens[2]), ACC_STORE_ANY);
|
||||
break;
|
||||
|
||||
#if NJ_EXPANDED_LOADSTORE_SUPPORTED
|
||||
@ -1808,7 +1810,7 @@ FragmentAssembler::assembleRandomFragment(int nIns)
|
||||
vector<LIns*> Ms = rnd(2) ? M4s : M8ps;
|
||||
if (!Ms.empty()) {
|
||||
LIns* base = rndPick(Ms);
|
||||
ins = mLir->insLoad(rndPick(I_loads), base, rndOffset32(base->size()));
|
||||
ins = mLir->insLoad(rndPick(I_loads), base, rndOffset32(base->size()), ACC_LOAD_ANY);
|
||||
addOrReplace(Is, ins);
|
||||
n++;
|
||||
}
|
||||
@ -1819,7 +1821,7 @@ FragmentAssembler::assembleRandomFragment(int nIns)
|
||||
case LLD_Q:
|
||||
if (!M8ps.empty()) {
|
||||
LIns* base = rndPick(M8ps);
|
||||
ins = mLir->insLoad(rndPick(Q_loads), base, rndOffset64(base->size()));
|
||||
ins = mLir->insLoad(rndPick(Q_loads), base, rndOffset64(base->size()), ACC_LOAD_ANY);
|
||||
addOrReplace(Qs, ins);
|
||||
n++;
|
||||
}
|
||||
@ -1829,7 +1831,7 @@ FragmentAssembler::assembleRandomFragment(int nIns)
|
||||
case LLD_F:
|
||||
if (!M8ps.empty()) {
|
||||
LIns* base = rndPick(M8ps);
|
||||
ins = mLir->insLoad(rndPick(F_loads), base, rndOffset64(base->size()));
|
||||
ins = mLir->insLoad(rndPick(F_loads), base, rndOffset64(base->size()), ACC_LOAD_ANY);
|
||||
addOrReplace(Fs, ins);
|
||||
n++;
|
||||
}
|
||||
@ -1839,7 +1841,7 @@ FragmentAssembler::assembleRandomFragment(int nIns)
|
||||
vector<LIns*> Ms = rnd(2) ? M4s : M8ps;
|
||||
if (!Ms.empty() && !Is.empty()) {
|
||||
LIns* base = rndPick(Ms);
|
||||
mLir->insStorei(rndPick(Is), base, rndOffset32(base->size()));
|
||||
mLir->insStorei(rndPick(Is), base, rndOffset32(base->size()), ACC_STORE_ANY);
|
||||
n++;
|
||||
}
|
||||
break;
|
||||
@ -1849,7 +1851,7 @@ FragmentAssembler::assembleRandomFragment(int nIns)
|
||||
case LST_Q:
|
||||
if (!M8ps.empty() && !Qs.empty()) {
|
||||
LIns* base = rndPick(M8ps);
|
||||
mLir->insStorei(rndPick(Qs), base, rndOffset64(base->size()));
|
||||
mLir->insStorei(rndPick(Qs), base, rndOffset64(base->size()), ACC_STORE_ANY);
|
||||
n++;
|
||||
}
|
||||
break;
|
||||
@ -1858,7 +1860,7 @@ FragmentAssembler::assembleRandomFragment(int nIns)
|
||||
case LST_F:
|
||||
if (!M8ps.empty() && !Fs.empty()) {
|
||||
LIns* base = rndPick(M8ps);
|
||||
mLir->insStorei(rndPick(Fs), base, rndOffset64(base->size()));
|
||||
mLir->insStorei(rndPick(Fs), base, rndOffset64(base->size()), ACC_STORE_ANY);
|
||||
n++;
|
||||
}
|
||||
break;
|
||||
|
@ -1149,24 +1149,31 @@ namespace nanojit
|
||||
m_list[kind] = new (alloc) LInsp[m_cap[kind]];
|
||||
}
|
||||
clear();
|
||||
m_find[LInsImm] = &LInsHashSet::findImm;
|
||||
m_find[LInsImmq] = PTR_SIZE(NULL, &LInsHashSet::findImmq);
|
||||
m_find[LInsImmf] = &LInsHashSet::findImmf;
|
||||
m_find[LIns1] = &LInsHashSet::find1;
|
||||
m_find[LIns2] = &LInsHashSet::find2;
|
||||
m_find[LIns3] = &LInsHashSet::find3;
|
||||
m_find[LInsLoad] = &LInsHashSet::findLoad;
|
||||
m_find[LInsCall] = &LInsHashSet::findCall;
|
||||
m_find[LInsImm] = &LInsHashSet::findImm;
|
||||
m_find[LInsImmq] = PTR_SIZE(NULL, &LInsHashSet::findImmq);
|
||||
m_find[LInsImmf] = &LInsHashSet::findImmf;
|
||||
m_find[LIns1] = &LInsHashSet::find1;
|
||||
m_find[LIns2] = &LInsHashSet::find2;
|
||||
m_find[LIns3] = &LInsHashSet::find3;
|
||||
m_find[LInsCall] = &LInsHashSet::findCall;
|
||||
m_find[LInsLoadReadOnly] = &LInsHashSet::findLoadReadOnly;
|
||||
m_find[LInsLoadStack] = &LInsHashSet::findLoadStack;
|
||||
m_find[LInsLoadRStack] = &LInsHashSet::findLoadRStack;
|
||||
m_find[LInsLoadOther] = &LInsHashSet::findLoadOther;
|
||||
m_find[LInsLoadMultiple] = &LInsHashSet::findLoadMultiple;
|
||||
}
|
||||
|
||||
void LInsHashSet::clear(LInsHashKind kind) {
|
||||
VMPI_memset(m_list[kind], 0, sizeof(LInsp)*m_cap[kind]);
|
||||
m_used[kind] = 0;
|
||||
}
|
||||
|
||||
void LInsHashSet::clear() {
|
||||
for (LInsHashKind kind = LInsFirst; kind <= LInsLast; kind = nextKind(kind)) {
|
||||
VMPI_memset(m_list[kind], 0, sizeof(LInsp)*m_cap[kind]);
|
||||
m_used[kind] = 0;
|
||||
clear(kind);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline uint32_t LInsHashSet::hashImm(int32_t a) {
|
||||
return _hashfinish(_hash32(0,a));
|
||||
}
|
||||
@ -1194,10 +1201,15 @@ namespace nanojit
|
||||
return _hashfinish(_hashptr(hash, c));
|
||||
}
|
||||
|
||||
inline uint32_t LInsHashSet::hashLoad(LOpcode op, LInsp a, int32_t d) {
|
||||
NanoStaticAssert(sizeof(AccSet) == 1); // required for hashLoad to work properly
|
||||
|
||||
// Nb: no need to hash the load's AccSet because each region's loads go in
|
||||
// a different hash table.
|
||||
inline uint32_t LInsHashSet::hashLoad(LOpcode op, LInsp a, int32_t d, AccSet accSet) {
|
||||
uint32_t hash = _hash8(0,uint8_t(op));
|
||||
hash = _hashptr(hash, a);
|
||||
return _hashfinish(_hash32(hash, d));
|
||||
hash = _hash32(hash, d);
|
||||
return _hashfinish(_hash8(hash, accSet));
|
||||
}
|
||||
|
||||
inline uint32_t LInsHashSet::hashCall(const CallInfo *ci, uint32_t argc, LInsp args[]) {
|
||||
@ -1219,11 +1231,12 @@ namespace nanojit
|
||||
LInsp ins = oldlist[i];
|
||||
if (!ins) continue;
|
||||
uint32_t j = (this->*find)(ins);
|
||||
NanoAssert(!m_list[kind][j]);
|
||||
m_list[kind][j] = ins;
|
||||
}
|
||||
}
|
||||
|
||||
LInsp LInsHashSet::add(LInsHashKind kind, LInsp ins, uint32_t k)
|
||||
void LInsHashSet::add(LInsHashKind kind, LInsp ins, uint32_t k)
|
||||
{
|
||||
NanoAssert(!m_list[kind][k]);
|
||||
m_used[kind]++;
|
||||
@ -1231,20 +1244,21 @@ namespace nanojit
|
||||
if ((m_used[kind] * 4) >= (m_cap[kind] * 3)) { // load factor of 0.75
|
||||
grow(kind);
|
||||
}
|
||||
return ins;
|
||||
}
|
||||
|
||||
LInsp LInsHashSet::findImm(int32_t a, uint32_t &k)
|
||||
{
|
||||
LInsHashKind kind = LInsImm;
|
||||
const uint32_t bitmask = m_cap[kind] - 1;
|
||||
uint32_t hash = hashImm(a) & bitmask;
|
||||
k = hashImm(a) & bitmask;
|
||||
uint32_t n = 1;
|
||||
LInsp ins;
|
||||
while ((ins = m_list[kind][hash]) != NULL &&
|
||||
(ins->imm32() != a))
|
||||
{
|
||||
while (true) {
|
||||
LInsp ins = m_list[kind][k];
|
||||
if (!ins)
|
||||
return NULL;
|
||||
NanoAssert(ins->isconst());
|
||||
if (ins->imm32() == a)
|
||||
return ins;
|
||||
// Quadratic probe: h(k,i) = h(k) + 0.5i + 0.5i^2, which gives the
|
||||
// sequence h(k), h(k)+1, h(k)+3, h(k)+6, h+10, ... This is a
|
||||
// good sequence for 2^n-sized tables as the values h(k,i) for i
|
||||
@ -1252,11 +1266,9 @@ namespace nanojit
|
||||
// See http://portal.acm.org/citation.cfm?id=360737 and
|
||||
// http://en.wikipedia.org/wiki/Quadratic_probing (fetched
|
||||
// 06-Nov-2009) for more details.
|
||||
hash = (hash + n) & bitmask;
|
||||
k = (k + n) & bitmask;
|
||||
n += 1;
|
||||
}
|
||||
k = hash;
|
||||
return ins;
|
||||
}
|
||||
|
||||
uint32_t LInsHashSet::findImm(LInsp ins)
|
||||
@ -1271,18 +1283,18 @@ namespace nanojit
|
||||
{
|
||||
LInsHashKind kind = LInsImmq;
|
||||
const uint32_t bitmask = m_cap[kind] - 1;
|
||||
uint32_t hash = hashImmq(a) & bitmask;
|
||||
k = hashImmq(a) & bitmask;
|
||||
uint32_t n = 1;
|
||||
LInsp ins;
|
||||
while ((ins = m_list[kind][hash]) != NULL &&
|
||||
(ins->imm64() != a))
|
||||
{
|
||||
while (true) {
|
||||
LInsp ins = m_list[kind][k];
|
||||
if (!ins)
|
||||
return NULL;
|
||||
NanoAssert(ins->isconstq());
|
||||
hash = (hash + n) & bitmask;
|
||||
if (ins->imm64() == a)
|
||||
return ins;
|
||||
k = (k + n) & bitmask;
|
||||
n += 1;
|
||||
}
|
||||
k = hash;
|
||||
return ins;
|
||||
}
|
||||
|
||||
uint32_t LInsHashSet::findImmq(LInsp ins)
|
||||
@ -1297,18 +1309,18 @@ namespace nanojit
|
||||
{
|
||||
LInsHashKind kind = LInsImmf;
|
||||
const uint32_t bitmask = m_cap[kind] - 1;
|
||||
uint32_t hash = hashImmq(a) & bitmask;
|
||||
k = hashImmq(a) & bitmask;
|
||||
uint32_t n = 1;
|
||||
LInsp ins;
|
||||
while ((ins = m_list[kind][hash]) != NULL &&
|
||||
(ins->imm64() != a))
|
||||
{
|
||||
while (true) {
|
||||
LInsp ins = m_list[kind][k];
|
||||
if (!ins)
|
||||
return NULL;
|
||||
NanoAssert(ins->isconstf());
|
||||
hash = (hash + n) & bitmask;
|
||||
if (ins->imm64() == a)
|
||||
return ins;
|
||||
k = (k + n) & bitmask;
|
||||
n += 1;
|
||||
}
|
||||
k = hash;
|
||||
return ins;
|
||||
}
|
||||
|
||||
uint32_t LInsHashSet::findImmf(LInsp ins)
|
||||
@ -1322,17 +1334,17 @@ namespace nanojit
|
||||
{
|
||||
LInsHashKind kind = LIns1;
|
||||
const uint32_t bitmask = m_cap[kind] - 1;
|
||||
uint32_t hash = hash1(op,a) & bitmask;
|
||||
k = hash1(op, a) & bitmask;
|
||||
uint32_t n = 1;
|
||||
LInsp ins;
|
||||
while ((ins = m_list[kind][hash]) != NULL &&
|
||||
(ins->opcode() != op || ins->oprnd1() != a))
|
||||
{
|
||||
hash = (hash + n) & bitmask;
|
||||
while (true) {
|
||||
LInsp ins = m_list[kind][k];
|
||||
if (!ins)
|
||||
return NULL;
|
||||
if (ins->isop(op) && ins->oprnd1() == a)
|
||||
return ins;
|
||||
k = (k + n) & bitmask;
|
||||
n += 1;
|
||||
}
|
||||
k = hash;
|
||||
return ins;
|
||||
}
|
||||
|
||||
uint32_t LInsHashSet::find1(LInsp ins)
|
||||
@ -1346,17 +1358,17 @@ namespace nanojit
|
||||
{
|
||||
LInsHashKind kind = LIns2;
|
||||
const uint32_t bitmask = m_cap[kind] - 1;
|
||||
uint32_t hash = hash2(op,a,b) & bitmask;
|
||||
k = hash2(op, a, b) & bitmask;
|
||||
uint32_t n = 1;
|
||||
LInsp ins;
|
||||
while ((ins = m_list[kind][hash]) != NULL &&
|
||||
(ins->opcode() != op || ins->oprnd1() != a || ins->oprnd2() != b))
|
||||
{
|
||||
hash = (hash + n) & bitmask;
|
||||
while (true) {
|
||||
LInsp ins = m_list[kind][k];
|
||||
if (!ins)
|
||||
return NULL;
|
||||
if (ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b)
|
||||
return ins;
|
||||
k = (k + n) & bitmask;
|
||||
n += 1;
|
||||
}
|
||||
k = hash;
|
||||
return ins;
|
||||
}
|
||||
|
||||
uint32_t LInsHashSet::find2(LInsp ins)
|
||||
@ -1370,17 +1382,17 @@ namespace nanojit
|
||||
{
|
||||
LInsHashKind kind = LIns3;
|
||||
const uint32_t bitmask = m_cap[kind] - 1;
|
||||
uint32_t hash = hash3(op,a,b,c) & bitmask;
|
||||
k = hash3(op, a, b, c) & bitmask;
|
||||
uint32_t n = 1;
|
||||
LInsp ins;
|
||||
while ((ins = m_list[kind][hash]) != NULL &&
|
||||
(ins->opcode() != op || ins->oprnd1() != a || ins->oprnd2() != b || ins->oprnd3() != c))
|
||||
{
|
||||
hash = (hash + n) & bitmask;
|
||||
while (true) {
|
||||
LInsp ins = m_list[kind][k];
|
||||
if (!ins)
|
||||
return NULL;
|
||||
if (ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b && ins->oprnd3() == c)
|
||||
return ins;
|
||||
k = (k + n) & bitmask;
|
||||
n += 1;
|
||||
}
|
||||
k = hash;
|
||||
return ins;
|
||||
}
|
||||
|
||||
uint32_t LInsHashSet::find3(LInsp ins)
|
||||
@ -1390,27 +1402,57 @@ namespace nanojit
|
||||
return k;
|
||||
}
|
||||
|
||||
LInsp LInsHashSet::findLoad(LOpcode op, LInsp a, int32_t d, uint32_t &k)
|
||||
LInsp LInsHashSet::findLoad(LOpcode op, LInsp a, int32_t d, AccSet accSet, LInsHashKind kind,
|
||||
uint32_t &k)
|
||||
{
|
||||
LInsHashKind kind = LInsLoad;
|
||||
(void)accSet;
|
||||
const uint32_t bitmask = m_cap[kind] - 1;
|
||||
uint32_t hash = hashLoad(op,a,d) & bitmask;
|
||||
k = hashLoad(op, a, d, accSet) & bitmask;
|
||||
uint32_t n = 1;
|
||||
LInsp ins;
|
||||
while ((ins = m_list[kind][hash]) != NULL &&
|
||||
(ins->opcode() != op || ins->oprnd1() != a || ins->disp() != d))
|
||||
{
|
||||
hash = (hash + n) & bitmask;
|
||||
while (true) {
|
||||
LInsp ins = m_list[kind][k];
|
||||
if (!ins)
|
||||
return NULL;
|
||||
NanoAssert(ins->accSet() == accSet);
|
||||
if (ins->isop(op) && ins->oprnd1() == a && ins->disp() == d)
|
||||
return ins;
|
||||
k = (k + n) & bitmask;
|
||||
n += 1;
|
||||
}
|
||||
k = hash;
|
||||
return ins;
|
||||
}
|
||||
|
||||
uint32_t LInsHashSet::findLoad(LInsp ins)
|
||||
uint32_t LInsHashSet::findLoadReadOnly(LInsp ins)
|
||||
{
|
||||
uint32_t k;
|
||||
findLoad(ins->opcode(), ins->oprnd1(), ins->disp(), k);
|
||||
findLoad(ins->opcode(), ins->oprnd1(), ins->disp(), ins->accSet(), LInsLoadReadOnly, k);
|
||||
return k;
|
||||
}
|
||||
|
||||
uint32_t LInsHashSet::findLoadStack(LInsp ins)
|
||||
{
|
||||
uint32_t k;
|
||||
findLoad(ins->opcode(), ins->oprnd1(), ins->disp(), ins->accSet(), LInsLoadStack, k);
|
||||
return k;
|
||||
}
|
||||
|
||||
uint32_t LInsHashSet::findLoadRStack(LInsp ins)
|
||||
{
|
||||
uint32_t k;
|
||||
findLoad(ins->opcode(), ins->oprnd1(), ins->disp(), ins->accSet(), LInsLoadRStack, k);
|
||||
return k;
|
||||
}
|
||||
|
||||
uint32_t LInsHashSet::findLoadOther(LInsp ins)
|
||||
{
|
||||
uint32_t k;
|
||||
findLoad(ins->opcode(), ins->oprnd1(), ins->disp(), ins->accSet(), LInsLoadOther, k);
|
||||
return k;
|
||||
}
|
||||
|
||||
uint32_t LInsHashSet::findLoadMultiple(LInsp ins)
|
||||
{
|
||||
uint32_t k;
|
||||
findLoad(ins->opcode(), ins->oprnd1(), ins->disp(), ins->accSet(), LInsLoadMultiple, k);
|
||||
return k;
|
||||
}
|
||||
|
||||
@ -1426,17 +1468,17 @@ namespace nanojit
|
||||
{
|
||||
LInsHashKind kind = LInsCall;
|
||||
const uint32_t bitmask = m_cap[kind] - 1;
|
||||
uint32_t hash = hashCall(ci, argc, args) & bitmask;
|
||||
k = hashCall(ci, argc, args) & bitmask;
|
||||
uint32_t n = 1;
|
||||
LInsp ins;
|
||||
while ((ins = m_list[kind][hash]) != NULL &&
|
||||
(!ins->isCall() || ins->callInfo() != ci || !argsmatch(ins, argc, args)))
|
||||
{
|
||||
hash = (hash + n) & bitmask;
|
||||
while (true) {
|
||||
LInsp ins = m_list[kind][k];
|
||||
if (!ins)
|
||||
return NULL;
|
||||
if (ins->isCall() && ins->callInfo() == ci && argsmatch(ins, argc, args))
|
||||
return ins;
|
||||
k = (k + n) & bitmask;
|
||||
n += 1;
|
||||
}
|
||||
k = hash;
|
||||
return ins;
|
||||
}
|
||||
|
||||
uint32_t LInsHashSet::findCall(LInsp ins)
|
||||
@ -1728,7 +1770,6 @@ namespace nanojit
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void LirNameMap::addNameWithSuffix(LInsp ins, const char *name, int suffix,
|
||||
bool ignoreOneSuffix) {
|
||||
// The lookup may succeed, ie. we may already have a name for this
|
||||
@ -1792,23 +1833,18 @@ namespace nanojit
|
||||
}
|
||||
|
||||
|
||||
char* LInsPrinter::formatAccSet(RefBuf* buf, LInsp ins, bool isLoad) {
|
||||
AccSet accSet = ins->accSet();
|
||||
char* LInsPrinter::formatAccSet(RefBuf* buf, AccSet accSet) {
|
||||
int i = 0;
|
||||
if ((isLoad && accSet == ACC_LOAD_ANY) ||
|
||||
(!isLoad && accSet == ACC_STORE_ANY))
|
||||
{
|
||||
// boring, don't bother with a suffix
|
||||
} else {
|
||||
buf->buf[i++] = '.';
|
||||
if (accSet & ACC_READONLY) { buf->buf[i++] = 'r'; accSet &= ~ACC_READONLY; }
|
||||
if (accSet & ACC_STACK) { buf->buf[i++] = 's'; accSet &= ~ACC_STACK; }
|
||||
if (accSet & ACC_OTHER) { buf->buf[i++] = 'o'; accSet &= ~ACC_OTHER; }
|
||||
// This assertion will fail if we add a new accSet value but
|
||||
// forget to handle it here.
|
||||
NanoAssert(accSet == 0);
|
||||
}
|
||||
// 'c' is short for "const", because 'r' is used for RSTACK.
|
||||
if (accSet & ACC_READONLY) { buf->buf[i++] = 'c'; accSet &= ~ACC_READONLY; }
|
||||
if (accSet & ACC_STACK) { buf->buf[i++] = 's'; accSet &= ~ACC_STACK; }
|
||||
if (accSet & ACC_RSTACK) { buf->buf[i++] = 'r'; accSet &= ~ACC_RSTACK; }
|
||||
if (accSet & ACC_OTHER) { buf->buf[i++] = 'o'; accSet &= ~ACC_OTHER; }
|
||||
// This assertion will fail if we add a new accSet value but
|
||||
// forget to handle it here.
|
||||
NanoAssert(accSet == 0);
|
||||
buf->buf[i] = 0;
|
||||
NanoAssert(size_t(i) < buf->len);
|
||||
return buf->buf;
|
||||
}
|
||||
|
||||
@ -1919,11 +1955,12 @@ namespace nanojit
|
||||
int32_t argc = i->argc();
|
||||
int32_t m = int32_t(n); // Windows doesn't have 'ssize_t'
|
||||
if (call->isIndirect())
|
||||
m -= VMPI_snprintf(s, m, "%s = %s [%s] ( ", formatRef(&b1, i), lirNames[op],
|
||||
formatRef(&b2, i->arg(--argc)));
|
||||
m -= VMPI_snprintf(s, m, "%s = %s.%s [%s] ( ", formatRef(&b1, i), lirNames[op],
|
||||
formatAccSet(&b2, call->_storeAccSet),
|
||||
formatRef(&b3, i->arg(--argc)));
|
||||
else
|
||||
m -= VMPI_snprintf(s, m, "%s = %s #%s ( ", formatRef(&b1, i), lirNames[op],
|
||||
call->_name);
|
||||
m -= VMPI_snprintf(s, m, "%s = %s.%s #%s ( ", formatRef(&b1, i), lirNames[op],
|
||||
formatAccSet(&b2, call->_storeAccSet), call->_name);
|
||||
if (m < 0) break;
|
||||
for (int32_t j = argc - 1; j >= 0; j--) {
|
||||
s += VMPI_strlen(s);
|
||||
@ -2074,8 +2111,8 @@ namespace nanojit
|
||||
case LIR_ldsb:
|
||||
case LIR_ldss:
|
||||
case LIR_ld32f:
|
||||
VMPI_snprintf(s, n, "%s = %s%s %s[%d]", formatRef(&b1, i), lirNames[op],
|
||||
formatAccSet(&b2, i, /*isLoad*/true),
|
||||
VMPI_snprintf(s, n, "%s = %s.%s %s[%d]", formatRef(&b1, i), lirNames[op],
|
||||
formatAccSet(&b2, i->accSet()),
|
||||
formatRef(&b3, i->oprnd1()),
|
||||
i->disp());
|
||||
break;
|
||||
@ -2086,8 +2123,8 @@ namespace nanojit
|
||||
case LIR_stb:
|
||||
case LIR_sts:
|
||||
case LIR_st32f:
|
||||
VMPI_snprintf(s, n, "%s%s %s[%d] = %s", lirNames[op],
|
||||
formatAccSet(&b1, i, /*isLoad*/false),
|
||||
VMPI_snprintf(s, n, "%s.%s %s[%d] = %s", lirNames[op],
|
||||
formatAccSet(&b1, i->accSet()),
|
||||
formatRef(&b2, i->oprnd2()),
|
||||
i->disp(),
|
||||
formatRef(&b3, i->oprnd1()));
|
||||
@ -2103,17 +2140,21 @@ namespace nanojit
|
||||
|
||||
|
||||
CseFilter::CseFilter(LirWriter *out, Allocator& alloc)
|
||||
: LirWriter(out)
|
||||
: LirWriter(out), storesSinceLastLoad(ACC_NONE)
|
||||
{
|
||||
uint32_t kInitialCaps[LInsLast + 1];
|
||||
kInitialCaps[LInsImm] = 128;
|
||||
kInitialCaps[LInsImmq] = PTR_SIZE(0, 16);
|
||||
kInitialCaps[LInsImmf] = 16;
|
||||
kInitialCaps[LIns1] = 256;
|
||||
kInitialCaps[LIns2] = 512;
|
||||
kInitialCaps[LIns3] = 16;
|
||||
kInitialCaps[LInsLoad] = 16;
|
||||
kInitialCaps[LInsCall] = 64;
|
||||
kInitialCaps[LInsImm] = 128;
|
||||
kInitialCaps[LInsImmq] = PTR_SIZE(0, 16);
|
||||
kInitialCaps[LInsImmf] = 16;
|
||||
kInitialCaps[LIns1] = 256;
|
||||
kInitialCaps[LIns2] = 512;
|
||||
kInitialCaps[LIns3] = 16;
|
||||
kInitialCaps[LInsCall] = 64;
|
||||
kInitialCaps[LInsLoadReadOnly] = 16;
|
||||
kInitialCaps[LInsLoadStack] = 16;
|
||||
kInitialCaps[LInsLoadRStack] = 16;
|
||||
kInitialCaps[LInsLoadOther] = 16;
|
||||
kInitialCaps[LInsLoadMultiple] = 16;
|
||||
exprs = new (alloc) LInsHashSet(alloc, kInitialCaps);
|
||||
}
|
||||
|
||||
@ -2121,13 +2162,14 @@ namespace nanojit
|
||||
{
|
||||
uint32_t k;
|
||||
LInsp ins = exprs->findImm(imm, k);
|
||||
if (ins)
|
||||
return ins;
|
||||
ins = out->insImm(imm);
|
||||
if (!ins) {
|
||||
ins = out->insImm(imm);
|
||||
exprs->add(LInsImm, ins, k);
|
||||
}
|
||||
// We assume that downstream stages do not modify the instruction, so
|
||||
// that we can insert 'ins' into slot 'k'. Check this.
|
||||
NanoAssert(ins->opcode() == LIR_int && ins->imm32() == imm);
|
||||
return exprs->add(LInsImm, ins, k);
|
||||
NanoAssert(ins->isop(LIR_int) && ins->imm32() == imm);
|
||||
return ins;
|
||||
}
|
||||
|
||||
#ifdef NANOJIT_64BIT
|
||||
@ -2135,11 +2177,12 @@ namespace nanojit
|
||||
{
|
||||
uint32_t k;
|
||||
LInsp ins = exprs->findImmq(q, k);
|
||||
if (ins)
|
||||
return ins;
|
||||
ins = out->insImmq(q);
|
||||
NanoAssert(ins->opcode() == LIR_quad && ins->imm64() == q);
|
||||
return exprs->add(LInsImmq, ins, k);
|
||||
if (!ins) {
|
||||
ins = out->insImmq(q);
|
||||
exprs->add(LInsImmq, ins, k);
|
||||
}
|
||||
NanoAssert(ins->isop(LIR_quad) && ins->imm64() == q);
|
||||
return ins;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -2154,85 +2197,121 @@ namespace nanojit
|
||||
} u;
|
||||
u.d = d;
|
||||
LInsp ins = exprs->findImmf(u.u64, k);
|
||||
if (ins)
|
||||
return ins;
|
||||
ins = out->insImmf(d);
|
||||
NanoAssert(ins->opcode() == LIR_float && ins->imm64() == u.u64);
|
||||
return exprs->add(LInsImmf, ins, k);
|
||||
if (!ins) {
|
||||
ins = out->insImmf(d);
|
||||
exprs->add(LInsImmf, ins, k);
|
||||
}
|
||||
NanoAssert(ins->isop(LIR_float) && ins->imm64() == u.u64);
|
||||
return ins;
|
||||
}
|
||||
|
||||
LIns* CseFilter::ins0(LOpcode v)
|
||||
LIns* CseFilter::ins0(LOpcode op)
|
||||
{
|
||||
if (v == LIR_label)
|
||||
if (op == LIR_label)
|
||||
exprs->clear();
|
||||
return out->ins0(v);
|
||||
return out->ins0(op);
|
||||
}
|
||||
|
||||
LIns* CseFilter::ins1(LOpcode v, LInsp a)
|
||||
LIns* CseFilter::ins1(LOpcode op, LInsp a)
|
||||
{
|
||||
if (isCseOpcode(v)) {
|
||||
LInsp ins;
|
||||
if (isCseOpcode(op)) {
|
||||
uint32_t k;
|
||||
LInsp ins = exprs->find1(v, a, k);
|
||||
if (ins)
|
||||
return ins;
|
||||
ins = out->ins1(v, a);
|
||||
NanoAssert(ins->opcode() == v && ins->oprnd1() == a);
|
||||
return exprs->add(LIns1, ins, k);
|
||||
}
|
||||
return out->ins1(v,a);
|
||||
}
|
||||
|
||||
LIns* CseFilter::ins2(LOpcode v, LInsp a, LInsp b)
|
||||
{
|
||||
if (isCseOpcode(v)) {
|
||||
uint32_t k;
|
||||
LInsp ins = exprs->find2(v, a, b, k);
|
||||
if (ins)
|
||||
return ins;
|
||||
ins = out->ins2(v, a, b);
|
||||
NanoAssert(ins->opcode() == v && ins->oprnd1() == a && ins->oprnd2() == b);
|
||||
return exprs->add(LIns2, ins, k);
|
||||
}
|
||||
return out->ins2(v,a,b);
|
||||
}
|
||||
|
||||
LIns* CseFilter::ins3(LOpcode v, LInsp a, LInsp b, LInsp c)
|
||||
{
|
||||
NanoAssert(isCseOpcode(v));
|
||||
uint32_t k;
|
||||
LInsp ins = exprs->find3(v, a, b, c, k);
|
||||
if (ins)
|
||||
return ins;
|
||||
ins = out->ins3(v, a, b, c);
|
||||
NanoAssert(ins->opcode() == v && ins->oprnd1() == a && ins->oprnd2() == b &&
|
||||
ins->oprnd3() == c);
|
||||
return exprs->add(LIns3, ins, k);
|
||||
}
|
||||
|
||||
LIns* CseFilter::insLoad(LOpcode v, LInsp base, int32_t disp, AccSet accSet)
|
||||
{
|
||||
if (isS16(disp)) {
|
||||
// XXX: This condition is overly strict. Bug 517910 will make it better.
|
||||
if (accSet == ACC_READONLY) {
|
||||
uint32_t k;
|
||||
LInsp ins = exprs->findLoad(v, base, disp, k);
|
||||
if (ins)
|
||||
return ins;
|
||||
ins = out->insLoad(v, base, disp, accSet);
|
||||
NanoAssert(ins->opcode() == v && ins->oprnd1() == base && ins->disp() == disp);
|
||||
return exprs->add(LInsLoad, ins, k);
|
||||
ins = exprs->find1(op, a, k);
|
||||
if (!ins) {
|
||||
ins = out->ins1(op, a);
|
||||
exprs->add(LIns1, ins, k);
|
||||
}
|
||||
return out->insLoad(v, base, disp, accSet);
|
||||
} else {
|
||||
ins = out->ins1(op, a);
|
||||
}
|
||||
NanoAssert(ins->isop(op) && ins->oprnd1() == a);
|
||||
return ins;
|
||||
}
|
||||
|
||||
LIns* CseFilter::ins2(LOpcode op, LInsp a, LInsp b)
|
||||
{
|
||||
LInsp ins;
|
||||
if (isCseOpcode(op)) {
|
||||
uint32_t k;
|
||||
ins = exprs->find2(op, a, b, k);
|
||||
if (!ins) {
|
||||
ins = out->ins2(op, a, b);
|
||||
exprs->add(LIns2, ins, k);
|
||||
}
|
||||
} else {
|
||||
ins = out->ins2(op, a, b);
|
||||
}
|
||||
NanoAssert(ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b);
|
||||
return ins;
|
||||
}
|
||||
|
||||
LIns* CseFilter::ins3(LOpcode op, LInsp a, LInsp b, LInsp c)
|
||||
{
|
||||
NanoAssert(isCseOpcode(op));
|
||||
uint32_t k;
|
||||
LInsp ins = exprs->find3(op, a, b, c, k);
|
||||
if (!ins) {
|
||||
ins = out->ins3(op, a, b, c);
|
||||
exprs->add(LIns3, ins, k);
|
||||
}
|
||||
NanoAssert(ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b && ins->oprnd3() == c);
|
||||
return ins;
|
||||
}
|
||||
|
||||
LIns* CseFilter::insLoad(LOpcode op, LInsp base, int32_t disp, AccSet loadAccSet)
|
||||
{
|
||||
LInsp ins;
|
||||
if (isS16(disp)) {
|
||||
// Clear all loads aliased by stores and calls since the last time
|
||||
// we were in this function.
|
||||
if (storesSinceLastLoad != ACC_NONE) {
|
||||
NanoAssert(!(storesSinceLastLoad & ACC_READONLY)); // can't store to READONLY
|
||||
if (storesSinceLastLoad & ACC_STACK) { exprs->clear(LInsLoadStack); }
|
||||
if (storesSinceLastLoad & ACC_RSTACK) { exprs->clear(LInsLoadRStack); }
|
||||
if (storesSinceLastLoad & ACC_OTHER) { exprs->clear(LInsLoadOther); }
|
||||
// Loads marked with multiple access regions must be treated
|
||||
// conservatively -- we always clear all of them.
|
||||
exprs->clear(LInsLoadMultiple);
|
||||
storesSinceLastLoad = ACC_NONE;
|
||||
}
|
||||
|
||||
LInsHashKind kind;
|
||||
switch (loadAccSet) {
|
||||
case ACC_READONLY: kind = LInsLoadReadOnly; break;
|
||||
case ACC_STACK: kind = LInsLoadStack; break;
|
||||
case ACC_RSTACK: kind = LInsLoadRStack; break;
|
||||
case ACC_OTHER: kind = LInsLoadOther; break;
|
||||
default: kind = LInsLoadMultiple; break;
|
||||
}
|
||||
|
||||
uint32_t k;
|
||||
ins = exprs->findLoad(op, base, disp, loadAccSet, kind, k);
|
||||
if (!ins) {
|
||||
ins = out->insLoad(op, base, disp, loadAccSet);
|
||||
exprs->add(kind, ins, k);
|
||||
}
|
||||
NanoAssert(ins->isop(op) && ins->oprnd1() == base && ins->disp() == disp);
|
||||
|
||||
} else {
|
||||
// If the displacement is more than 16 bits, put it in a separate
|
||||
// instruction. LirBufWriter also does this, we do it here as
|
||||
// well because CseFilter relies on LirBufWriter not changing
|
||||
// code.
|
||||
return insLoad(v, ins2(LIR_addp, base, insImmWord(disp)), 0, accSet);
|
||||
// instruction. Nb: LirBufWriter also does this, we do it here
|
||||
// too because CseFilter relies on LirBufWriter not changing code.
|
||||
ins = insLoad(op, ins2(LIR_addp, base, insImmWord(disp)), 0, loadAccSet);
|
||||
}
|
||||
return ins;
|
||||
}
|
||||
|
||||
LInsp CseFilter::insGuard(LOpcode v, LInsp c, GuardRecord *gr)
|
||||
LIns* CseFilter::insStore(LOpcode op, LInsp value, LInsp base, int32_t disp, AccSet accSet)
|
||||
{
|
||||
storesSinceLastLoad |= accSet;
|
||||
LIns* ins = out->insStore(op, value, base, disp, accSet);
|
||||
NanoAssert(ins->isop(op) && ins->oprnd1() == value && ins->oprnd2() == base &&
|
||||
ins->disp() == disp && ins->accSet() == accSet);
|
||||
return ins;
|
||||
}
|
||||
|
||||
LInsp CseFilter::insGuard(LOpcode op, LInsp c, GuardRecord *gr)
|
||||
{
|
||||
// LIR_xt and LIR_xf guards are CSEable. Note that we compare the
|
||||
// opcode and condition when determining if two guards are equivalent
|
||||
@ -2251,104 +2330,58 @@ namespace nanojit
|
||||
// - The CSE algorithm will always keep guard 1 and remove guard 2
|
||||
// (not vice versa). The current algorithm does this.
|
||||
//
|
||||
if (isCseOpcode(v)) {
|
||||
LInsp ins;
|
||||
if (isCseOpcode(op)) {
|
||||
// conditional guard
|
||||
uint32_t k;
|
||||
LInsp ins = exprs->find1(v, c, k);
|
||||
if (ins)
|
||||
return 0;
|
||||
ins = out->insGuard(v, c, gr);
|
||||
NanoAssert(ins->opcode() == v && ins->oprnd1() == c);
|
||||
return exprs->add(LIns1, ins, k);
|
||||
ins = exprs->find1(op, c, k);
|
||||
if (!ins) {
|
||||
ins = out->insGuard(op, c, gr);
|
||||
exprs->add(LIns1, ins, k);
|
||||
}
|
||||
} else {
|
||||
ins = out->insGuard(op, c, gr);
|
||||
}
|
||||
return out->insGuard(v, c, gr);
|
||||
NanoAssert(ins->isop(op) && ins->oprnd1() == c);
|
||||
return ins;
|
||||
}
|
||||
|
||||
LInsp CseFilter::insGuardXov(LOpcode v, LInsp a, LInsp b, GuardRecord *gr)
|
||||
LInsp CseFilter::insGuardXov(LOpcode op, LInsp a, LInsp b, GuardRecord *gr)
|
||||
{
|
||||
// LIR_*xov are CSEable. See CseFilter::insGuard() for details.
|
||||
NanoAssert(isCseOpcode(v));
|
||||
NanoAssert(isCseOpcode(op));
|
||||
// conditional guard
|
||||
uint32_t k;
|
||||
LInsp ins = exprs->find2(v, a, b, k);
|
||||
if (ins)
|
||||
return ins;
|
||||
ins = out->insGuardXov(v, a, b, gr);
|
||||
NanoAssert(ins->opcode() == v && ins->oprnd1() == a && ins->oprnd2() == b);
|
||||
return exprs->add(LIns2, ins, k);
|
||||
LInsp ins = exprs->find2(op, a, b, k);
|
||||
if (!ins) {
|
||||
ins = out->insGuardXov(op, a, b, gr);
|
||||
exprs->add(LIns2, ins, k);
|
||||
}
|
||||
NanoAssert(ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b);
|
||||
return ins;
|
||||
}
|
||||
|
||||
LInsp CseFilter::insCall(const CallInfo *ci, LInsp args[])
|
||||
{
|
||||
LInsp ins;
|
||||
uint32_t argc = ci->count_args();
|
||||
if (ci->_isPure) {
|
||||
NanoAssert(ci->_storeAccSet == ACC_NONE);
|
||||
uint32_t k;
|
||||
uint32_t argc = ci->count_args();
|
||||
LInsp ins = exprs->findCall(ci, argc, args, k);
|
||||
if (ins)
|
||||
return ins;
|
||||
ins = out->insCall(ci, args);
|
||||
NanoAssert(ins->isCall() && ins->callInfo() == ci && argsmatch(ins, argc, args));
|
||||
return exprs->add(LInsCall, ins, k);
|
||||
}
|
||||
return out->insCall(ci, args);
|
||||
}
|
||||
|
||||
LInsp LoadFilter::insLoad(LOpcode v, LInsp base, int32_t disp, AccSet accSet)
|
||||
{
|
||||
if (base != sp && base != rp)
|
||||
{
|
||||
switch (v)
|
||||
{
|
||||
case LIR_ld:
|
||||
CASE64(LIR_ldq:)
|
||||
case LIR_ldf:
|
||||
case LIR_ld32f:
|
||||
case LIR_ldsb:
|
||||
case LIR_ldss:
|
||||
case LIR_ldzb:
|
||||
case LIR_ldzs:
|
||||
{
|
||||
uint32_t k;
|
||||
LInsp ins = exprs->findLoad(v, base, disp, k);
|
||||
if (ins)
|
||||
return ins;
|
||||
ins = out->insLoad(v, base, disp, accSet);
|
||||
return exprs->add(LInsLoad, ins, k);
|
||||
}
|
||||
default:
|
||||
// fall thru
|
||||
break;
|
||||
ins = exprs->findCall(ci, argc, args, k);
|
||||
if (!ins) {
|
||||
ins = out->insCall(ci, args);
|
||||
exprs->add(LInsCall, ins, k);
|
||||
}
|
||||
} else {
|
||||
// We only need to worry about aliasing if !ci->_isPure.
|
||||
storesSinceLastLoad |= ci->_storeAccSet;
|
||||
ins = out->insCall(ci, args);
|
||||
}
|
||||
return out->insLoad(v, base, disp, accSet);
|
||||
NanoAssert(ins->isCall() && ins->callInfo() == ci && argsmatch(ins, argc, args));
|
||||
return ins;
|
||||
}
|
||||
|
||||
void LoadFilter::clear(LInsp p)
|
||||
{
|
||||
if (p != sp && p != rp)
|
||||
exprs->clear();
|
||||
}
|
||||
|
||||
LInsp LoadFilter::insStore(LOpcode op, LInsp v, LInsp b, int32_t d, AccSet accSet)
|
||||
{
|
||||
clear(b);
|
||||
return out->insStore(op, v, b, d, accSet);
|
||||
}
|
||||
|
||||
LInsp LoadFilter::insCall(const CallInfo *ci, LInsp args[])
|
||||
{
|
||||
if (!ci->_isPure)
|
||||
exprs->clear();
|
||||
return out->insCall(ci, args);
|
||||
}
|
||||
|
||||
LInsp LoadFilter::ins0(LOpcode op)
|
||||
{
|
||||
if (op == LIR_label)
|
||||
exprs->clear();
|
||||
return out->ins0(op);
|
||||
}
|
||||
|
||||
#if NJ_SOFTFLOAT_SUPPORTED
|
||||
static double FASTCALL i2f(int32_t i) { return i; }
|
||||
@ -2564,7 +2597,7 @@ namespace nanojit
|
||||
NanoAssertMsgf(0,
|
||||
"LIR type error (%s): arg %d of '%s' is '%s' "
|
||||
"which has type %s (expected %s)",
|
||||
_whereInPipeline, i+1, lirNames[op],
|
||||
whereInPipeline, i+1, lirNames[op],
|
||||
lirNames[args[i]->opcode()],
|
||||
type2string(actual), type2string(formal));
|
||||
}
|
||||
@ -2576,15 +2609,16 @@ namespace nanojit
|
||||
{
|
||||
NanoAssertMsgf(0,
|
||||
"LIR structure error (%s): %s %d of '%s' is '%s' (expected %s)",
|
||||
_whereInPipeline, argDesc, argN,
|
||||
whereInPipeline, argDesc, argN,
|
||||
lirNames[op], lirNames[arg->opcode()], shouldBeDesc);
|
||||
}
|
||||
|
||||
void ValidateWriter::errorAccSetShould(const char* what, AccSet accSet, const char* shouldDesc)
|
||||
void ValidateWriter::errorAccSet(const char* what, AccSet accSet, const char* shouldDesc)
|
||||
{
|
||||
RefBuf b;
|
||||
NanoAssertMsgf(0,
|
||||
"LIR AccSet error (%s): '%s' AccSet is %d; it should %s",
|
||||
_whereInPipeline, what, accSet, shouldDesc);
|
||||
"LIR AccSet error (%s): '%s' AccSet is '%s'; %s",
|
||||
whereInPipeline, what, printer->formatAccSet(&b, accSet), shouldDesc);
|
||||
}
|
||||
|
||||
void ValidateWriter::checkLInsIsACondOrConst(LOpcode op, int argN, LIns* ins)
|
||||
@ -2609,17 +2643,60 @@ namespace nanojit
|
||||
errorStructureShouldBe(op, "argument", argN, ins, lirNames[op2]);
|
||||
}
|
||||
|
||||
ValidateWriter::ValidateWriter(LirWriter *out, const char* stageName)
|
||||
: LirWriter(out), _whereInPipeline(stageName)
|
||||
void ValidateWriter::checkAccSet(LOpcode op, LInsp base, AccSet accSet, AccSet maxAccSet)
|
||||
{
|
||||
if (accSet == ACC_NONE)
|
||||
errorAccSet(lirNames[op], accSet, "it should not equal ACC_NONE");
|
||||
|
||||
if (accSet & ~maxAccSet)
|
||||
errorAccSet(lirNames[op], accSet,
|
||||
"it should not contain bits that aren't in ACC_LOAD_ANY/ACC_STORE_ANY");
|
||||
|
||||
// Some sanity checking, which is based on the following assumptions:
|
||||
// - STACK ones should use 'sp' or 'sp+k' as the base. (We could look
|
||||
// for more complex patterns, but that feels dangerous. Better to
|
||||
// keep it really simple.)
|
||||
// - RSTACK ones should use 'rp' as the base.
|
||||
// - READONLY/OTHER ones should not use 'sp'/'sp+k' or 'rp' as the base.
|
||||
//
|
||||
// Things that aren't checked:
|
||||
// - There's no easy way to check if READONLY ones really are read-only.
|
||||
|
||||
bool isStack = base == sp ||
|
||||
(base->isop(LIR_piadd) && base->oprnd1() == sp && base->oprnd2()->isconstp());
|
||||
bool isRStack = base == rp;
|
||||
|
||||
switch (accSet) {
|
||||
case ACC_STACK:
|
||||
if (!isStack)
|
||||
errorAccSet(lirNames[op], accSet, "but it's not a stack access");
|
||||
break;
|
||||
|
||||
case ACC_RSTACK:
|
||||
if (!isRStack)
|
||||
errorAccSet(lirNames[op], accSet, "but it's not an rstack access");
|
||||
break;
|
||||
|
||||
case ACC_READONLY:
|
||||
case ACC_OTHER:
|
||||
if (isStack)
|
||||
errorAccSet(lirNames[op], accSet, "but it's a stack access");
|
||||
if (isRStack)
|
||||
errorAccSet(lirNames[op], accSet, "but it's an rstack access");
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ValidateWriter::ValidateWriter(LirWriter *out, LInsPrinter* printer, const char* where)
|
||||
: LirWriter(out), printer(printer), whereInPipeline(where), sp(0), rp(0)
|
||||
{}
|
||||
|
||||
LIns* ValidateWriter::insLoad(LOpcode op, LIns* base, int32_t d, AccSet accSet)
|
||||
{
|
||||
if (accSet == ACC_NONE)
|
||||
errorAccSetShould(lirNames[op], accSet, "not equal ACC_NONE");
|
||||
|
||||
if (accSet & ~ACC_LOAD_ANY)
|
||||
errorAccSetShould(lirNames[op], accSet, "not contain bits that aren't in ACC_LOAD_ANY");
|
||||
checkAccSet(op, base, accSet, ACC_LOAD_ANY);
|
||||
|
||||
int nArgs = 1;
|
||||
LTy formals[1] = { LTy_Ptr };
|
||||
@ -2646,11 +2723,7 @@ namespace nanojit
|
||||
|
||||
LIns* ValidateWriter::insStore(LOpcode op, LIns* value, LIns* base, int32_t d, AccSet accSet)
|
||||
{
|
||||
if (accSet == ACC_NONE)
|
||||
errorAccSetShould(lirNames[op], accSet, "not equal ACC_NONE");
|
||||
|
||||
if (accSet & ~ACC_STORE_ANY)
|
||||
errorAccSetShould(lirNames[op], accSet, "not contain bits that aren't in ACC_STORE_ANY");
|
||||
checkAccSet(op, base, accSet, ACC_STORE_ANY);
|
||||
|
||||
int nArgs = 2;
|
||||
LTy formals[2] = { LTy_Void, LTy_Ptr }; // LTy_Void is overwritten shortly
|
||||
@ -2680,7 +2753,7 @@ namespace nanojit
|
||||
|
||||
typeCheckArgs(op, nArgs, formals, args);
|
||||
|
||||
return out->insStore(op, value, base, d);
|
||||
return out->insStore(op, value, base, d, accSet);
|
||||
}
|
||||
|
||||
LIns* ValidateWriter::ins0(LOpcode op)
|
||||
@ -2923,11 +2996,11 @@ namespace nanojit
|
||||
LOpcode op = getCallOpcode(ci);
|
||||
|
||||
if (ci->_isPure && ci->_storeAccSet != ACC_NONE)
|
||||
errorAccSetShould(ci->_name, ci->_storeAccSet, "equal ACC_NONE for pure functions");
|
||||
errorAccSet(ci->_name, ci->_storeAccSet, "it should be ACC_NONE for pure functions");
|
||||
|
||||
if (ci->_storeAccSet & ~ACC_STORE_ANY)
|
||||
errorAccSetShould(lirNames[op], ci->_storeAccSet,
|
||||
"not contain bits that aren't in ACC_STORE_ANY");
|
||||
errorAccSet(lirNames[op], ci->_storeAccSet,
|
||||
"it should not contain bits that aren't in ACC_STORE_ANY");
|
||||
|
||||
// This loop iterates over the args from right-to-left (because arg()
|
||||
// and getArgTypes() use right-to-left order), but puts the results
|
||||
|
@ -210,7 +210,10 @@ namespace nanojit
|
||||
// A load from a READONLY region will never alias with any stores.
|
||||
//
|
||||
// - STACK: the stack. Stack loads/stores can usually be easily
|
||||
// identified because they use SP as the stack pointer.
|
||||
// identified because they use SP as the base pointer.
|
||||
//
|
||||
// - RSTACK: the return stack. Return stack loads/stores can usually be
|
||||
// easily identified because they use RP as the base pointer.
|
||||
//
|
||||
// - OTHER: all other regions of memory.
|
||||
//
|
||||
@ -259,6 +262,14 @@ namespace nanojit
|
||||
// true for the store set of a function.)
|
||||
//
|
||||
// Such imprecision is safe but may reduce optimisation opportunities.
|
||||
//
|
||||
// Optimisations that use access region info
|
||||
// -----------------------------------------
|
||||
// Currently only CseFilter uses this, and only for determining whether
|
||||
// loads can be CSE'd. Note that CseFilter treats loads that are marked
|
||||
// with a single access region precisely, but all loads marked with
|
||||
// multiple access regions get lumped together. So if you can't mark a
|
||||
// load with a single access region, you might as well use ACC_LOAD_ANY.
|
||||
//-----------------------------------------------------------------------
|
||||
|
||||
// An access region set is represented as a bitset. Nb: this restricts us
|
||||
@ -267,11 +278,13 @@ namespace nanojit
|
||||
|
||||
// The access regions. Note that because of the bitset representation
|
||||
// these constants are also valid (singleton) AccSet values. If you add
|
||||
// new ones please update ACC_ALL_WRITABLE and LirNameMap::formatAccSet().
|
||||
// new ones please update ACC_ALL_STORABLE and formatAccSet() and
|
||||
// CseFilter.
|
||||
//
|
||||
static const AccSet ACC_READONLY = 1 << 0; // 0000_0001b
|
||||
static const AccSet ACC_STACK = 1 << 1; // 0000_0010b
|
||||
static const AccSet ACC_OTHER = 1 << 2; // 0000_0100b
|
||||
static const AccSet ACC_RSTACK = 1 << 2; // 0000_0100b
|
||||
static const AccSet ACC_OTHER = 1 << 3; // 0000_1000b
|
||||
|
||||
// Some common (non-singleton) access region sets. ACC_NONE does not make
|
||||
// sense for loads or stores (which must access at least one region), it
|
||||
@ -279,15 +292,14 @@ namespace nanojit
|
||||
//
|
||||
// A convention that's worth using: use ACC_LOAD_ANY/ACC_STORE_ANY for
|
||||
// cases that you're unsure about or haven't considered carefully. Use
|
||||
// ACC_ALL/ACC_ALL_WRITABLE for cases that you have considered carefully.
|
||||
// ACC_ALL/ACC_ALL_STORABLE for cases that you have considered carefully.
|
||||
// That way it's easy to tell which ones have been considered and which
|
||||
// haven't.
|
||||
static const AccSet ACC_NONE = 0x0;
|
||||
static const AccSet ACC_ALL_WRITABLE = ACC_STACK | ACC_OTHER;
|
||||
static const AccSet ACC_ALL = ACC_READONLY | ACC_ALL_WRITABLE;
|
||||
static const AccSet ACC_ALL_STORABLE = ACC_STACK | ACC_RSTACK | ACC_OTHER;
|
||||
static const AccSet ACC_ALL = ACC_READONLY | ACC_ALL_STORABLE;
|
||||
static const AccSet ACC_LOAD_ANY = ACC_ALL; // synonym
|
||||
static const AccSet ACC_STORE_ANY = ACC_ALL_WRITABLE; // synonym
|
||||
|
||||
static const AccSet ACC_STORE_ANY = ACC_ALL_STORABLE; // synonym
|
||||
|
||||
struct CallInfo
|
||||
{
|
||||
@ -1488,17 +1500,6 @@ namespace nanojit
|
||||
|
||||
// Chooses LIR_sti or LIR_stqi based on size of value.
|
||||
LIns* insStorei(LIns* value, LIns* base, int32_t d, AccSet accSet);
|
||||
|
||||
// Insert a load/store with the most pessimistic region access info, which is always safe.
|
||||
LIns* insLoad(LOpcode op, LIns* base, int32_t d) {
|
||||
return insLoad(op, base, d, ACC_LOAD_ANY);
|
||||
}
|
||||
LIns* insStore(LOpcode op, LIns* value, LIns* base, int32_t d) {
|
||||
return insStore(op, value, base, d, ACC_STORE_ANY);
|
||||
}
|
||||
LIns* insStorei(LIns* value, LIns* base, int32_t d) {
|
||||
return insStorei(value, base, d, ACC_STORE_ANY);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -1598,7 +1599,6 @@ namespace nanojit
|
||||
void formatImmq(RefBuf* buf, uint64_t c);
|
||||
void formatGuard(InsBuf* buf, LInsp ins);
|
||||
void formatGuardXov(InsBuf* buf, LInsp ins);
|
||||
char* formatAccSet(RefBuf* buf, LInsp ins, bool isLoad);
|
||||
|
||||
public:
|
||||
LInsPrinter(Allocator& alloc)
|
||||
@ -1611,6 +1611,7 @@ namespace nanojit
|
||||
char *formatAddr(RefBuf* buf, void* p);
|
||||
char *formatRef(RefBuf* buf, LInsp ref);
|
||||
char *formatIns(InsBuf* buf, LInsp ins);
|
||||
char *formatAccSet(RefBuf* buf, AccSet accSet);
|
||||
|
||||
AddrNameMap* addrNameMap;
|
||||
LirNameMap* lirNameMap;
|
||||
@ -1739,23 +1740,35 @@ namespace nanojit
|
||||
// We divide instruction kinds into groups for the use of LInsHashSet.
|
||||
// LIns0 isn't present because we don't need to record any 0-ary
|
||||
// instructions.
|
||||
LInsImm = 0,
|
||||
LInsImmq = 1, // only occurs on 64-bit platforms
|
||||
LInsImmf = 2,
|
||||
LIns1 = 3,
|
||||
LIns2 = 4,
|
||||
LIns3 = 5,
|
||||
LInsLoad = 6,
|
||||
LInsCall = 7,
|
||||
LInsImm = 0,
|
||||
LInsImmq = 1, // only occurs on 64-bit platforms
|
||||
LInsImmf = 2,
|
||||
LIns1 = 3,
|
||||
LIns2 = 4,
|
||||
LIns3 = 5,
|
||||
LInsCall = 6,
|
||||
|
||||
// Loads are special. We group them by access region: one table for
|
||||
// each region, and then a catch-all table for any loads marked with
|
||||
// multiple regions. This arrangement makes the removal of
|
||||
// invalidated loads fast -- eg. we can invalidate all STACK loads by
|
||||
// just clearing the LInsLoadStack table. The disadvantage is that
|
||||
// loads marked with multiple regions must be invalidated
|
||||
// conservatively, eg. if any intervening stores occur. But loads
|
||||
// marked with multiple regions should be rare.
|
||||
LInsLoadReadOnly = 7,
|
||||
LInsLoadStack = 8,
|
||||
LInsLoadRStack = 9,
|
||||
LInsLoadOther = 10,
|
||||
LInsLoadMultiple = 11,
|
||||
|
||||
LInsFirst = 0,
|
||||
LInsLast = 7,
|
||||
LInsLast = 11,
|
||||
// need a value after "last" to outsmart compilers that will insist last+1 is impossible
|
||||
LInsInvalid = 8
|
||||
LInsInvalid = 12
|
||||
};
|
||||
#define nextKind(kind) LInsHashKind(kind+1)
|
||||
|
||||
// @todo, this could be replaced by a generic HashMap or HashSet, if we had one
|
||||
class LInsHashSet
|
||||
{
|
||||
// Must be a power of 2.
|
||||
@ -1772,14 +1785,15 @@ namespace nanojit
|
||||
uint32_t m_used[LInsLast + 1];
|
||||
typedef uint32_t (LInsHashSet::*find_t)(LInsp);
|
||||
find_t m_find[LInsLast + 1];
|
||||
|
||||
Allocator& alloc;
|
||||
|
||||
static uint32_t hashImm(int32_t);
|
||||
static uint32_t hashImmq(uint64_t); // not NANOJIT_64BIT only used by findImmf()
|
||||
static uint32_t hash1(LOpcode v, LInsp);
|
||||
static uint32_t hash2(LOpcode v, LInsp, LInsp);
|
||||
static uint32_t hash3(LOpcode v, LInsp, LInsp, LInsp);
|
||||
static uint32_t hashLoad(LOpcode v, LInsp, int32_t);
|
||||
static uint32_t hashImmq(uint64_t); // not NANOJIT_64BIT-only -- used by findImmf()
|
||||
static uint32_t hash1(LOpcode op, LInsp);
|
||||
static uint32_t hash2(LOpcode op, LInsp, LInsp);
|
||||
static uint32_t hash3(LOpcode op, LInsp, LInsp, LInsp);
|
||||
static uint32_t hashLoad(LOpcode op, LInsp, int32_t, AccSet);
|
||||
static uint32_t hashCall(const CallInfo *call, uint32_t argc, LInsp args[]);
|
||||
|
||||
// These private versions are used after an LIns has been created;
|
||||
@ -1792,8 +1806,12 @@ namespace nanojit
|
||||
uint32_t find1(LInsp ins);
|
||||
uint32_t find2(LInsp ins);
|
||||
uint32_t find3(LInsp ins);
|
||||
uint32_t findLoad(LInsp ins);
|
||||
uint32_t findCall(LInsp ins);
|
||||
uint32_t findLoadReadOnly(LInsp ins);
|
||||
uint32_t findLoadStack(LInsp ins);
|
||||
uint32_t findLoadRStack(LInsp ins);
|
||||
uint32_t findLoadOther(LInsp ins);
|
||||
uint32_t findLoadMultiple(LInsp ins);
|
||||
|
||||
void grow(LInsHashKind kind);
|
||||
|
||||
@ -1810,19 +1828,22 @@ namespace nanojit
|
||||
LInsp find1(LOpcode v, LInsp a, uint32_t &k);
|
||||
LInsp find2(LOpcode v, LInsp a, LInsp b, uint32_t &k);
|
||||
LInsp find3(LOpcode v, LInsp a, LInsp b, LInsp c, uint32_t &k);
|
||||
LInsp findLoad(LOpcode v, LInsp a, int32_t b, uint32_t &k);
|
||||
LInsp findLoad(LOpcode v, LInsp a, int32_t b, AccSet accSet, LInsHashKind kind,
|
||||
uint32_t &k);
|
||||
LInsp findCall(const CallInfo *call, uint32_t argc, LInsp args[], uint32_t &k);
|
||||
|
||||
// 'k' is the index found by findXYZ().
|
||||
LInsp add(LInsHashKind kind, LInsp ins, uint32_t k);
|
||||
void add(LInsHashKind kind, LInsp ins, uint32_t k);
|
||||
|
||||
void clear();
|
||||
void clear(); // clears all tables
|
||||
void clear(LInsHashKind); // clears one table
|
||||
};
|
||||
|
||||
class CseFilter: public LirWriter
|
||||
{
|
||||
private:
|
||||
LInsHashSet* exprs;
|
||||
AccSet storesSinceLastLoad; // regions stored to since the last load
|
||||
|
||||
public:
|
||||
CseFilter(LirWriter *out, Allocator&);
|
||||
@ -1836,7 +1857,8 @@ namespace nanojit
|
||||
LIns* ins1(LOpcode v, LInsp);
|
||||
LIns* ins2(LOpcode v, LInsp, LInsp);
|
||||
LIns* ins3(LOpcode v, LInsp, LInsp, LInsp);
|
||||
LIns* insLoad(LOpcode op, LInsp cond, int32_t d, AccSet accSet);
|
||||
LIns* insLoad(LOpcode op, LInsp base, int32_t d, AccSet accSet);
|
||||
LIns* insStore(LOpcode op, LInsp value, LInsp base, int32_t d, AccSet accSet);
|
||||
LIns* insCall(const CallInfo *call, LInsp args[]);
|
||||
LIns* insGuard(LOpcode op, LInsp cond, GuardRecord *gr);
|
||||
LIns* insGuardXov(LOpcode op, LInsp a, LInsp b, GuardRecord *gr);
|
||||
@ -1975,37 +1997,6 @@ namespace nanojit
|
||||
LInsp read();
|
||||
};
|
||||
|
||||
// eliminate redundant loads by watching for stores & mutator calls
|
||||
class LoadFilter: public LirWriter
|
||||
{
|
||||
public:
|
||||
LInsp sp, rp;
|
||||
LInsHashSet* exprs;
|
||||
|
||||
void clear(LInsp p);
|
||||
|
||||
public:
|
||||
LoadFilter(LirWriter *out, Allocator& alloc)
|
||||
: LirWriter(out), sp(NULL), rp(NULL)
|
||||
{
|
||||
uint32_t kInitialCaps[LInsLast + 1];
|
||||
kInitialCaps[LInsImm] = 1;
|
||||
kInitialCaps[LInsImmq] = 1;
|
||||
kInitialCaps[LInsImmf] = 1;
|
||||
kInitialCaps[LIns1] = 1;
|
||||
kInitialCaps[LIns2] = 1;
|
||||
kInitialCaps[LIns3] = 1;
|
||||
kInitialCaps[LInsLoad] = 64;
|
||||
kInitialCaps[LInsCall] = 1;
|
||||
exprs = new (alloc) LInsHashSet(alloc, kInitialCaps);
|
||||
}
|
||||
|
||||
LInsp ins0(LOpcode);
|
||||
LInsp insLoad(LOpcode op, LInsp base, int32_t disp, AccSet accSet);
|
||||
LInsp insStore(LOpcode op, LInsp value, LInsp base, int32_t disp, AccSet accSet);
|
||||
LInsp insCall(const CallInfo *call, LInsp args[]);
|
||||
};
|
||||
|
||||
struct SoftFloatOps
|
||||
{
|
||||
const CallInfo* opmap[LIR_sentinel];
|
||||
@ -2049,19 +2040,26 @@ namespace nanojit
|
||||
class ValidateWriter : public LirWriter
|
||||
{
|
||||
private:
|
||||
const char* _whereInPipeline;
|
||||
LInsPrinter* printer;
|
||||
const char* whereInPipeline;
|
||||
|
||||
const char* type2string(LTy type);
|
||||
void typeCheckArgs(LOpcode op, int nArgs, LTy formals[], LIns* args[]);
|
||||
void errorStructureShouldBe(LOpcode op, const char* argDesc, int argN, LIns* arg,
|
||||
const char* shouldBeDesc);
|
||||
void errorAccSetShould(const char* what, AccSet accSet, const char* shouldDesc);
|
||||
void errorAccSet(const char* what, AccSet accSet, const char* shouldDesc);
|
||||
void checkLInsHasOpcode(LOpcode op, int argN, LIns* ins, LOpcode op2);
|
||||
void checkLInsIsACondOrConst(LOpcode op, int argN, LIns* ins);
|
||||
void checkLInsIsNull(LOpcode op, int argN, LIns* ins);
|
||||
void checkAccSet(LOpcode op, LInsp base, AccSet accSet, AccSet maxAccSet);
|
||||
|
||||
LInsp sp, rp;
|
||||
|
||||
public:
|
||||
ValidateWriter(LirWriter* out, const char* stageName);
|
||||
ValidateWriter(LirWriter* out, LInsPrinter* printer, const char* where);
|
||||
void setSp(LInsp ins) { sp = ins; }
|
||||
void setRp(LInsp ins) { rp = ins; }
|
||||
|
||||
LIns* insLoad(LOpcode op, LIns* base, int32_t d, AccSet accSet);
|
||||
LIns* insStore(LOpcode op, LIns* value, LIns* base, int32_t d, AccSet accSet);
|
||||
LIns* ins0(LOpcode v);
|
||||
|
Loading…
Reference in New Issue
Block a user