mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
1298 lines
45 KiB
C++
1298 lines
45 KiB
C++
/* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
|
|
/* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
|
|
/* ***** BEGIN LICENSE BLOCK *****
|
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
*
|
|
* The contents of this file are subject to the Mozilla Public License Version
|
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
* http://www.mozilla.org/MPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
* for the specific language governing rights and limitations under the
|
|
* License.
|
|
*
|
|
* The Original Code is [Open Source Virtual Machine].
|
|
*
|
|
* The Initial Developer of the Original Code is
|
|
* Adobe System Incorporated.
|
|
* Portions created by the Initial Developer are Copyright (C) 2009
|
|
* the Initial Developer. All Rights Reserved.
|
|
*
|
|
* Contributor(s):
|
|
* Adobe AS3 Team
|
|
*
|
|
* Alternatively, the contents of this file may be used under the terms of
|
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
* of those above. If you wish to allow use of your version of this file only
|
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
* use your version of this file under the terms of the MPL, indicate your
|
|
* decision by deleting the provisions above and replace them with the notice
|
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
* the provisions above, a recipient may use your version of this file under
|
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
*
|
|
* ***** END LICENSE BLOCK ***** */
|
|
|
|
#include "nanojit.h"
|
|
|
|
// uncomment this to enable _vprof/_nvprof macros
|
|
//#define DOPROF
|
|
#include "../vprof/vprof.h"
|
|
|
|
#if defined FEATURE_NANOJIT && defined NANOJIT_X64
|
|
|
|
/*
|
|
completion
|
|
- 64bit branch offsets
|
|
- finish cmov/qcmov with other conditions
|
|
- validate asm_cond with other conditions
|
|
|
|
better code
|
|
- put R12 back in play as a base register
|
|
- no-disp addr modes (except RBP/R13)
|
|
- disp64 branch/call
|
|
- spill gp values to xmm registers?
|
|
- prefer xmm registers for copies since gprs are in higher demand?
|
|
- stack arg doubles
|
|
- stack based LIR_param
|
|
|
|
tracing
|
|
- asm_qjoin
|
|
- asm_qhi
|
|
- nFragExit
|
|
|
|
*/
|
|
|
|
namespace nanojit
|
|
{
|
|
const Register Assembler::retRegs[] = { RAX };
|
|
#ifdef _MSC_VER
|
|
const Register Assembler::argRegs[] = { RCX, RDX, R8, R9 };
|
|
const Register Assembler::savedRegs[] = { RBX, RSI, RDI, R12, R13, R14, R15 };
|
|
#else
|
|
const Register Assembler::argRegs[] = { RDI, RSI, RDX, RCX, R8, R9 };
|
|
const Register Assembler::savedRegs[] = { RBX, R12, R13, R14, R15 };
|
|
#endif
|
|
|
|
const char *regNames[] = {
|
|
"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
|
|
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
|
|
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
|
|
"xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
|
|
};
|
|
|
|
#ifdef _DEBUG
|
|
#define TODO(x) todo(#x)
|
|
static void todo(const char *s) {
|
|
verbose_only( avmplus::AvmLog("%s",s); )
|
|
NanoAssertMsgf(false, "%s", s);
|
|
}
|
|
#else
|
|
#define TODO(x)
|
|
#endif
|
|
|
|
// MODRM and restrictions:
|
|
// memory access modes != 11 require SIB if base&7 == 4 (RSP or R12)
|
|
// mode 00 with base&7 == 5 means RIP+disp32 (RBP or R13), use mode 01 disp8=0 instead
|
|
// rex prefix required to use RSP-R15 as 8bit registers in mod/rm8 modes.
|
|
|
|
// take R12 out of play as a base register because it requires the SIB byte like ESP
|
|
const RegisterMask BaseRegs = GpRegs & ~rmask(R12);
|
|
|
|
static inline int oplen(uint64_t op) {
|
|
return op & 255;
|
|
}
|
|
|
|
// encode 2-register rex prefix. dropped if none of its bits are set.
|
|
static inline uint64_t rexrb(uint64_t op, Register r, Register b) {
|
|
int shift = 64 - 8*oplen(op);
|
|
uint64_t rex = ((op >> shift) & 255) | ((r&8)>>1) | ((b&8)>>3);
|
|
return rex != 0x40 ? op | rex << shift : op - 1;
|
|
}
|
|
|
|
// encode 2-register rex prefix. dropped if none of its bits are set, but
|
|
// keep REX if b >= rsp, to allow uniform use of all 16 8bit registers
|
|
static inline uint64_t rexrb8(uint64_t op, Register r, Register b) {
|
|
int shift = 64 - 8*oplen(op);
|
|
uint64_t rex = ((op >> shift) & 255) | ((r&8)>>1) | ((b&8)>>3);
|
|
return ((rex | (b & ~3)) != 0x40) ? (op | (rex << shift)) : op - 1;
|
|
}
|
|
|
|
// encode 2-register rex prefix that follows a manditory prefix (66,F2,F3)
|
|
// [prefix][rex][opcode]
|
|
static inline uint64_t rexprb(uint64_t op, Register r, Register b) {
|
|
int shift = 64 - 8*oplen(op) + 8;
|
|
uint64_t rex = ((op >> shift) & 255) | ((r&8)>>1) | ((b&8)>>3);
|
|
// to drop rex, we replace rex with manditory prefix, and decrement length
|
|
return rex != 0x40 ? op | rex << shift :
|
|
((op & ~(255LL<<shift)) | (op>>(shift-8)&255) << shift) - 1;
|
|
}
|
|
|
|
// [rex][opcode][mod-rr]
|
|
static inline uint64_t mod_rr(uint64_t op, Register r, Register b) {
|
|
return op | uint64_t((r&7)<<3 | (b&7))<<56;
|
|
}
|
|
|
|
static inline uint64_t mod_disp32(uint64_t op, Register r, Register b, int32_t d) {
|
|
NanoAssert(IsGpReg(r) && IsGpReg(b));
|
|
NanoAssert((b & 7) != 4); // using RSP or R12 as base requires SIB
|
|
if (isS8(d)) {
|
|
// op is: 0x[disp32=0][mod=2:r:b][op][rex][len]
|
|
NanoAssert((((op>>24)&255)>>6) == 2); // disp32 mode
|
|
int len = oplen(op);
|
|
op = (op & ~0xff000000LL) | (0x40 | (r&7)<<3 | (b&7))<<24; // replace mod
|
|
return op<<24 | int64_t(d)<<56 | (len-3); // shrink disp, add disp8
|
|
} else {
|
|
// op is: 0x[disp32][mod][op][rex][len]
|
|
return op | int64_t(d)<<32 | uint64_t((r&7)<<3 | (b&7))<<24;
|
|
}
|
|
}
|
|
|
|
#ifdef NJ_VERBOSE
|
|
void Assembler::dis(NIns *p, int bytes) {
|
|
char b[32], *s = b; // room for 8 hex bytes plus null
|
|
*s++ = ' ';
|
|
for (NIns *end = p + bytes; p < end; p++) {
|
|
VMPI_sprintf(s, "%02x ", *p);
|
|
s += 3;
|
|
}
|
|
*s = 0;
|
|
asm_output("%s", b);
|
|
}
|
|
#endif
|
|
|
|
void Assembler::emit(uint64_t op) {
|
|
int len = oplen(op);
|
|
// we will only move nIns by -len bytes, but we write 8
|
|
// bytes. so need to protect 8 so we dont stomp the page
|
|
// header or the end of the preceding page (might segf)
|
|
underrunProtect(8);
|
|
((int64_t*)_nIns)[-1] = op;
|
|
_nIns -= len; // move pointer by length encoded in opcode
|
|
_nvprof("x64-bytes", len);
|
|
verbose_only( if (_logc->lcbits & LC_Assembly) dis(_nIns, len); )
|
|
}
|
|
|
|
void Assembler::emit8(uint64_t op, int64_t v) {
|
|
NanoAssert(isS8(v));
|
|
emit(op | uint64_t(v)<<56);
|
|
}
|
|
|
|
void Assembler::emit32(uint64_t op, int64_t v) {
|
|
NanoAssert(isS32(v));
|
|
emit(op | uint64_t(uint32_t(v))<<32);
|
|
}
|
|
|
|
// 2-register modrm32 form
|
|
void Assembler::emitrr(uint64_t op, Register r, Register b) {
|
|
emit(rexrb(mod_rr(op, r, b), r, b));
|
|
}
|
|
|
|
// 2-register modrm8 form (8 bit operand size)
|
|
void Assembler::emitrr8(uint64_t op, Register r, Register b) {
|
|
emit(rexrb8(mod_rr(op, r, b), r, b));
|
|
}
|
|
|
|
// same as emitrr, but with a prefix byte
|
|
void Assembler::emitprr(uint64_t op, Register r, Register b) {
|
|
emit(rexprb(mod_rr(op, r, b), r, b));
|
|
}
|
|
|
|
// disp32 modrm form, when the disp fits in the instruction (opcode is 1-3 bytes)
|
|
void Assembler::emitrm(uint64_t op, Register r, int32_t d, Register b) {
|
|
emit(rexrb(mod_disp32(op, r, b, d), r, b));
|
|
}
|
|
|
|
// disp32 modrm form when the disp must be written separately (opcode is 4+ bytes)
|
|
void Assembler::emitprm(uint64_t op, Register r, int32_t d, Register b) {
|
|
if (isS8(d)) {
|
|
NanoAssert(((op>>56)&0xC0) == 0x80); // make sure mod bits == 2 == disp32 mode
|
|
underrunProtect(1+8);
|
|
*(--_nIns) = (NIns) d;
|
|
_nvprof("x64-bytes", 1);
|
|
op ^= 0xC000000000000000LL; // change mod bits to 1 == disp8 mode
|
|
} else {
|
|
underrunProtect(4+8); // room for displ plus fullsize op
|
|
*((int32_t*)(_nIns -= 4)) = d;
|
|
_nvprof("x64-bytes", 4);
|
|
}
|
|
emitprr(op, r, b);
|
|
}
|
|
|
|
void Assembler::emitrr_imm(uint64_t op, Register r, Register b, int32_t imm) {
|
|
NanoAssert(IsGpReg(r) && IsGpReg(b));
|
|
underrunProtect(4+8); // room for imm plus fullsize op
|
|
*((int32_t*)(_nIns -= 4)) = imm;
|
|
_nvprof("x86-bytes", 4);
|
|
emitrr(op, r, b);
|
|
}
|
|
|
|
// op = [rex][opcode][modrm][imm8]
|
|
void Assembler::emitr_imm8(uint64_t op, Register b, int32_t imm8) {
|
|
NanoAssert(IsGpReg(b) && isS8(imm8));
|
|
op |= uint64_t(imm8)<<56 | uint64_t(b&7)<<48; // modrm is 2nd to last byte
|
|
emit(rexrb(op, (Register)0, b));
|
|
}
|
|
|
|
void Assembler::MR(Register d, Register s) {
|
|
NanoAssert(IsGpReg(d) && IsGpReg(s));
|
|
emitrr(X64_movqr, d, s);
|
|
}
|
|
|
|
void Assembler::JMP(NIns *target) {
|
|
if (!target || isS32(target - _nIns)) {
|
|
underrunProtect(8); // must do this before calculating offset
|
|
if (target && isS8(target - _nIns)) {
|
|
emit8(X64_jmp8, target - _nIns);
|
|
} else {
|
|
emit32(X64_jmp, target ? target - _nIns : 0);
|
|
}
|
|
} else {
|
|
TODO(jmp64);
|
|
}
|
|
}
|
|
|
|
// register allocation for 2-address style ops of the form R = R (op) B
|
|
void Assembler::regalloc_binary(LIns *ins, RegisterMask allow, Register &rr, Register &ra, Register &rb) {
|
|
rb = UnknownReg;
|
|
LIns *a = ins->oprnd1();
|
|
LIns *b = ins->oprnd2();
|
|
if (a != b) {
|
|
rb = findRegFor(b, allow);
|
|
allow &= ~rmask(rb);
|
|
}
|
|
rr = prepResultReg(ins, allow);
|
|
Reservation* rA = getresv(a);
|
|
// if this is last use of a in reg, we can re-use result reg
|
|
if (rA == 0 || (ra = rA->reg) == UnknownReg) {
|
|
ra = findSpecificRegFor(a, rr);
|
|
} else {
|
|
// rA already has a register assigned
|
|
}
|
|
if (a == b) {
|
|
rb = ra;
|
|
}
|
|
}
|
|
|
|
void Assembler::asm_qbinop(LIns *ins) {
|
|
asm_arith(ins);
|
|
}
|
|
|
|
void Assembler::asm_shift(LIns *ins) {
|
|
// shift require rcx for shift count
|
|
LIns *b = ins->oprnd2();
|
|
if (b->isconst()) {
|
|
asm_shift_imm(ins);
|
|
return;
|
|
}
|
|
Register rr, ra;
|
|
if (b != ins->oprnd1()) {
|
|
findSpecificRegFor(b, RCX);
|
|
regalloc_unary(ins, GpRegs & ~rmask(RCX), rr, ra);
|
|
} else {
|
|
// a == b means both must be in RCX
|
|
regalloc_unary(ins, rmask(RCX), rr, ra);
|
|
}
|
|
X64Opcode xop;
|
|
switch (ins->opcode()) {
|
|
default:
|
|
TODO(asm_shift);
|
|
case LIR_qursh: xop = X64_shrq; break;
|
|
case LIR_qirsh: xop = X64_sarq; break;
|
|
case LIR_qilsh: xop = X64_shlq; break;
|
|
case LIR_ush: xop = X64_shr; break;
|
|
case LIR_rsh: xop = X64_sar; break;
|
|
case LIR_lsh: xop = X64_shl; break;
|
|
}
|
|
emitr(xop, rr);
|
|
if (rr != ra)
|
|
MR(rr, ra);
|
|
}
|
|
|
|
void Assembler::asm_shift_imm(LIns *ins) {
|
|
Register rr, ra;
|
|
regalloc_unary(ins, GpRegs, rr, ra);
|
|
X64Opcode xop;
|
|
switch (ins->opcode()) {
|
|
default: TODO(shiftimm);
|
|
case LIR_qursh: xop = X64_shrqi; break;
|
|
case LIR_qirsh: xop = X64_sarqi; break;
|
|
case LIR_qilsh: xop = X64_shlqi; break;
|
|
case LIR_ush: xop = X64_shri; break;
|
|
case LIR_rsh: xop = X64_sari; break;
|
|
case LIR_lsh: xop = X64_shli; break;
|
|
}
|
|
int shift = ins->oprnd2()->imm32() & 255;
|
|
emit8(rexrb(xop | uint64_t(rr&7)<<48, (Register)0, rr), shift);
|
|
if (rr != ra)
|
|
MR(rr, ra);
|
|
}
|
|
|
|
static bool isImm32(LIns *ins) {
|
|
return ins->isconst() || (ins->isconstq() && isS32(ins->imm64()));
|
|
}
|
|
static int32_t getImm32(LIns *ins) {
|
|
return ins->isconst() ? ins->imm32() : int32_t(ins->imm64());
|
|
}
|
|
|
|
// binary op, integer regs, rhs is int32 const
|
|
void Assembler::asm_arith_imm(LIns *ins) {
|
|
LIns *b = ins->oprnd2();
|
|
int32_t imm = getImm32(b);
|
|
LOpcode op = ins->opcode();
|
|
Register rr, ra;
|
|
if (op == LIR_mul) {
|
|
// imul has true 3-addr form, it doesn't clobber ra
|
|
rr = prepResultReg(ins, GpRegs);
|
|
LIns *a = ins->oprnd1();
|
|
ra = findRegFor(a, GpRegs);
|
|
emitrr_imm(X64_imuli, rr, ra, imm);
|
|
return;
|
|
}
|
|
regalloc_unary(ins, GpRegs, rr, ra);
|
|
X64Opcode xop;
|
|
if (isS8(imm)) {
|
|
switch (ins->opcode()) {
|
|
default: TODO(arith_imm8);
|
|
case LIR_iaddp:
|
|
case LIR_add: xop = X64_addlr8; break;
|
|
case LIR_and: xop = X64_andlr8; break;
|
|
case LIR_or: xop = X64_orlr8; break;
|
|
case LIR_sub: xop = X64_sublr8; break;
|
|
case LIR_xor: xop = X64_xorlr8; break;
|
|
case LIR_qiadd:
|
|
case LIR_qaddp: xop = X64_addqr8; break;
|
|
case LIR_qiand: xop = X64_andqr8; break;
|
|
case LIR_qior: xop = X64_orqr8; break;
|
|
case LIR_qxor: xop = X64_xorqr8; break;
|
|
}
|
|
emitr_imm8(xop, rr, imm);
|
|
} else {
|
|
switch (ins->opcode()) {
|
|
default: TODO(arith_imm);
|
|
case LIR_iaddp:
|
|
case LIR_add: xop = X64_addlri; break;
|
|
case LIR_and: xop = X64_andlri; break;
|
|
case LIR_or: xop = X64_orlri; break;
|
|
case LIR_sub: xop = X64_sublri; break;
|
|
case LIR_xor: xop = X64_xorlri; break;
|
|
case LIR_qiadd:
|
|
case LIR_qaddp: xop = X64_addqri; break;
|
|
case LIR_qiand: xop = X64_andqri; break;
|
|
case LIR_qior: xop = X64_orqri; break;
|
|
case LIR_qxor: xop = X64_xorqri; break;
|
|
}
|
|
emitr_imm(xop, rr, imm);
|
|
}
|
|
if (rr != ra)
|
|
MR(rr, ra);
|
|
}
|
|
|
|
// binary op with integer registers
|
|
void Assembler::asm_arith(LIns *ins) {
|
|
Register rr, ra, rb;
|
|
LOpcode op = ins->opcode();
|
|
if ((op & ~LIR64) >= LIR_lsh && (op & ~LIR64) <= LIR_ush) {
|
|
asm_shift(ins);
|
|
return;
|
|
}
|
|
LIns *b = ins->oprnd2();
|
|
if (isImm32(b)) {
|
|
asm_arith_imm(ins);
|
|
return;
|
|
}
|
|
regalloc_binary(ins, GpRegs, rr, ra, rb);
|
|
X64Opcode xop;
|
|
switch (ins->opcode()) {
|
|
default:
|
|
TODO(asm_arith);
|
|
case LIR_or:
|
|
xop = X64_orlrr;
|
|
break;
|
|
case LIR_sub:
|
|
xop = X64_subrr;
|
|
break;
|
|
case LIR_iaddp:
|
|
case LIR_add:
|
|
xop = X64_addrr;
|
|
break;
|
|
case LIR_and:
|
|
xop = X64_andrr;
|
|
break;
|
|
case LIR_xor:
|
|
xop = X64_xorrr;
|
|
break;
|
|
case LIR_mul:
|
|
xop = X64_imul;
|
|
break;
|
|
case LIR_qxor:
|
|
xop = X64_xorqrr;
|
|
break;
|
|
case LIR_qior:
|
|
xop = X64_orqrr;
|
|
break;
|
|
case LIR_qiand:
|
|
xop = X64_andqrr;
|
|
break;
|
|
case LIR_qiadd:
|
|
case LIR_qaddp:
|
|
xop = X64_addqrr;
|
|
break;
|
|
}
|
|
emitrr(xop, rr, rb);
|
|
if (rr != ra)
|
|
MR(rr,ra);
|
|
}
|
|
|
|
// binary op with fp registers
|
|
void Assembler::asm_fop(LIns *ins) {
|
|
Register rr, ra, rb;
|
|
regalloc_binary(ins, FpRegs, rr, ra, rb);
|
|
X64Opcode xop;
|
|
switch (ins->opcode()) {
|
|
default:
|
|
TODO(asm_fop);
|
|
case LIR_fdiv:
|
|
xop = X64_divsd;
|
|
break;
|
|
case LIR_fmul:
|
|
xop = X64_mulsd;
|
|
break;
|
|
case LIR_fadd:
|
|
xop = X64_addsd;
|
|
break;
|
|
case LIR_fsub:
|
|
xop = X64_subsd;
|
|
break;
|
|
}
|
|
emitprr(xop, rr, rb);
|
|
if (rr != ra) {
|
|
asm_nongp_copy(rr, ra);
|
|
}
|
|
}
|
|
|
|
void Assembler::asm_neg_not(LIns *ins) {
|
|
Register rr, ra;
|
|
regalloc_unary(ins, GpRegs, rr, ra);
|
|
NanoAssert(IsGpReg(ra));
|
|
X64Opcode xop;
|
|
if (ins->isop(LIR_not)) {
|
|
xop = X64_not;
|
|
} else {
|
|
xop = X64_neg;
|
|
}
|
|
emitr(xop, rr);
|
|
if (rr != ra)
|
|
MR(rr, ra);
|
|
}
|
|
|
|
void Assembler::asm_call(LIns *ins) {
|
|
const CallInfo *call = ins->callInfo();
|
|
ArgSize sizes[MAXARGS];
|
|
int argc = call->get_sizes(sizes);
|
|
|
|
bool indirect = call->isIndirect();
|
|
if (!indirect) {
|
|
verbose_only(if (_logc->lcbits & LC_Assembly)
|
|
outputf(" %p:", _nIns);
|
|
)
|
|
NIns *target = (NIns*)call->_address;
|
|
// must do underrunProtect before calculating offset
|
|
underrunProtect(8);
|
|
if (isS32(target - _nIns)) {
|
|
emit32(X64_call, target - _nIns);
|
|
} else {
|
|
// can't reach target from here, load imm64 and do an indirect jump
|
|
emit(X64_callrax);
|
|
emit_quad(RAX, (uint64_t)target);
|
|
}
|
|
} else {
|
|
// Indirect call: we assign the address arg to RAX since it's not
|
|
// used for regular arguments, and is otherwise scratch since it's
|
|
// clobberred by the call.
|
|
asm_regarg(ARGSIZE_P, ins->arg(--argc), RAX);
|
|
emit(X64_callrax);
|
|
}
|
|
|
|
#ifdef _MSC_VER
|
|
int stk_used = 32; // always reserve 32byte shadow area
|
|
#else
|
|
int stk_used = 0;
|
|
Register fr = XMM0;
|
|
#endif
|
|
int arg_index = 0;
|
|
for (int i = 0; i < argc; i++) {
|
|
int j = argc - i - 1;
|
|
ArgSize sz = sizes[j];
|
|
LIns* arg = ins->arg(j);
|
|
if ((sz & ARGSIZE_MASK_INT) && arg_index < NumArgRegs) {
|
|
// gp arg
|
|
asm_regarg(sz, arg, argRegs[arg_index]);
|
|
arg_index++;
|
|
}
|
|
#ifdef _MSC_VER
|
|
else if (sz == ARGSIZE_F && arg_index < NumArgRegs) {
|
|
// double goes in XMM reg # based on overall arg_index
|
|
asm_regarg(sz, arg, Register(XMM0+arg_index));
|
|
arg_index++;
|
|
}
|
|
#else
|
|
else if (sz == ARGSIZE_F && fr < XMM8) {
|
|
// double goes in next available XMM register
|
|
asm_regarg(sz, arg, fr);
|
|
fr = nextreg(fr);
|
|
}
|
|
#endif
|
|
else {
|
|
asm_stkarg(sz, arg, stk_used);
|
|
stk_used += sizeof(void*);
|
|
}
|
|
}
|
|
|
|
if (stk_used > max_stk_used)
|
|
max_stk_used = stk_used;
|
|
}
|
|
|
|
void Assembler::asm_regarg(ArgSize sz, LIns *p, Register r) {
|
|
if (sz == ARGSIZE_I) {
|
|
NanoAssert(!p->isQuad());
|
|
if (p->isconst()) {
|
|
emit_quad(r, int64_t(p->imm32()));
|
|
return;
|
|
}
|
|
// sign extend int32 to int64
|
|
emitrr(X64_movsxdr, r, r);
|
|
} else if (sz == ARGSIZE_U) {
|
|
NanoAssert(!p->isQuad());
|
|
if (p->isconst()) {
|
|
emit_quad(r, uint64_t(uint32_t(p->imm32())));
|
|
return;
|
|
}
|
|
// zero extend with 32bit mov, auto-zeros upper 32bits
|
|
emitrr(X64_movlr, r, r);
|
|
}
|
|
/* there is no point in folding an immediate here, because
|
|
* the argument register must be a scratch register and we're
|
|
* just before a call. Just reserving the register will cause
|
|
* the constant to be rematerialized nearby in asm_restore(),
|
|
* which is the same instruction we would otherwise emit right
|
|
* here, and moving it earlier in the stream provides more scheduling
|
|
* freedom to the cpu. */
|
|
findSpecificRegFor(p, r);
|
|
}
|
|
|
|
void Assembler::asm_stkarg(ArgSize sz, LIns *p, int stk_off) {
|
|
NanoAssert(isS8(stk_off));
|
|
if (sz & ARGSIZE_MASK_INT) {
|
|
Register r = findRegFor(p, GpRegs);
|
|
uint64_t xop = X64_movqspr | uint64_t(stk_off) << 56; // movq [rsp+d8], r
|
|
xop |= uint64_t((r&7)<<3) << 40 | uint64_t((r&8)>>1) << 24; // insert r into mod/rm and rex bytes
|
|
emit(xop);
|
|
if (sz == ARGSIZE_I) {
|
|
// extend int32 to int64
|
|
NanoAssert(!p->isQuad());
|
|
emitrr(X64_movsxdr, r, r);
|
|
} else if (sz == ARGSIZE_U) {
|
|
// extend uint32 to uint64
|
|
NanoAssert(!p->isQuad());
|
|
emitrr(X64_movlr, r, r);
|
|
}
|
|
} else {
|
|
TODO(asm_stkarg_non_int);
|
|
}
|
|
}
|
|
|
|
void Assembler::asm_promote(LIns *ins) {
|
|
Register rr, ra;
|
|
regalloc_unary(ins, GpRegs, rr, ra);
|
|
NanoAssert(IsGpReg(ra));
|
|
if (ins->isop(LIR_u2q)) {
|
|
emitrr(X64_movlr, rr, ra); // 32bit mov zeros the upper 32bits of the target
|
|
} else {
|
|
NanoAssert(ins->isop(LIR_i2q));
|
|
emitrr(X64_movsxdr, rr, ra); // sign extend 32->64
|
|
}
|
|
}
|
|
|
|
// the CVTSI2SD instruction only writes to the low 64bits of the target
|
|
// XMM register, which hinders register renaming and makes dependence
|
|
// chains longer. So we precede with XORPS to clear the target register.
|
|
|
|
void Assembler::asm_i2f(LIns *ins) {
|
|
Register r = prepResultReg(ins, FpRegs);
|
|
Register b = findRegFor(ins->oprnd1(), GpRegs);
|
|
emitprr(X64_cvtsi2sd, r, b); // cvtsi2sd xmmr, b only writes xmm:0:64
|
|
emitprr(X64_xorps, r, r); // xorpd xmmr,xmmr to break dependency chains
|
|
}
|
|
|
|
void Assembler::asm_u2f(LIns *ins) {
|
|
Register r = prepResultReg(ins, FpRegs);
|
|
Register b = findRegFor(ins->oprnd1(), GpRegs);
|
|
NanoAssert(!ins->oprnd1()->isQuad());
|
|
// since oprnd1 value is 32bit, its okay to zero-extend the value without worrying about clobbering.
|
|
emitprr(X64_cvtsq2sd, r, b); // convert int64 to double
|
|
emitprr(X64_xorps, r, r); // xorpd xmmr,xmmr to break dependency chains
|
|
emitrr(X64_movlr, b, b); // zero extend u32 to int64
|
|
}
|
|
|
|
void Assembler::asm_cmov(LIns *ins) {
|
|
LIns* cond = ins->oprnd1();
|
|
LIns* iftrue = ins->oprnd2();
|
|
LIns* iffalse = ins->oprnd3();
|
|
NanoAssert(cond->isCmp());
|
|
NanoAssert((ins->isop(LIR_qcmov) && iftrue->isQuad() && iffalse->isQuad()) ||
|
|
(ins->isop(LIR_cmov) && !iftrue->isQuad() && !iffalse->isQuad()));
|
|
|
|
// this code assumes that neither LD nor MR nor MRcc set any of the condition flags.
|
|
// (This is true on Intel, is it true on all architectures?)
|
|
const Register rr = prepResultReg(ins, GpRegs);
|
|
const Register rf = findRegFor(iffalse, GpRegs & ~rmask(rr));
|
|
X64Opcode xop;
|
|
switch (cond->opcode()) {
|
|
default: TODO(asm_cmov);
|
|
case LIR_qeq:
|
|
xop = X64_cmovqne;
|
|
break;
|
|
}
|
|
emitrr(xop, rr, rf);
|
|
/*const Register rt =*/ findSpecificRegFor(iftrue, rr);
|
|
asm_cmp(cond);
|
|
}
|
|
|
|
NIns* Assembler::asm_branch(bool onFalse, LIns *cond, NIns *target) {
|
|
LOpcode condop = cond->opcode();
|
|
if (condop >= LIR_feq && condop <= LIR_fge)
|
|
return asm_fbranch(onFalse, cond, target);
|
|
|
|
// we must ensure there's room for the instr before calculating
|
|
// the offset. and the offset, determines the opcode (8bit or 32bit)
|
|
underrunProtect(8);
|
|
if (target && isS8(target - _nIns)) {
|
|
static const X64Opcode j8[] = {
|
|
X64_je8, // eq
|
|
X64_jl8, X64_jg8, X64_jle8, X64_jge8, // lt, gt, le, ge
|
|
X64_jb8, X64_ja8, X64_jbe8, X64_jae8 // ult, ugt, ule, uge
|
|
};
|
|
uint64_t xop = j8[(condop & ~LIR64) - LIR_eq];
|
|
xop ^= onFalse ? (uint64_t)X64_jneg8 : 0;
|
|
emit8(xop, target - _nIns);
|
|
} else {
|
|
static const X64Opcode j32[] = {
|
|
X64_je, // eq
|
|
X64_jl, X64_jg, X64_jle, X64_jge, // lt, gt, le, ge
|
|
X64_jb, X64_ja, X64_jbe, X64_jae // ult, ugt, ule, uge
|
|
};
|
|
uint64_t xop = j32[(condop & ~LIR64) - LIR_eq];
|
|
xop ^= onFalse ? (uint64_t)X64_jneg : 0;
|
|
emit32(xop, target ? target - _nIns : 0);
|
|
}
|
|
NIns *patch = _nIns; // addr of instr to patch
|
|
asm_cmp(cond);
|
|
return patch;
|
|
}
|
|
|
|
void Assembler::asm_cmp(LIns *cond) {
|
|
LIns *b = cond->oprnd2();
|
|
if (isImm32(b)) {
|
|
asm_cmp_imm(cond);
|
|
return;
|
|
}
|
|
LIns *a = cond->oprnd1();
|
|
Register ra, rb;
|
|
if (a != b) {
|
|
Reservation *resva, *resvb;
|
|
findRegFor2(GpRegs, a, resva, b, resvb);
|
|
ra = resva->reg;
|
|
rb = resvb->reg;
|
|
} else {
|
|
// optimize-me: this will produce a const result!
|
|
ra = rb = findRegFor(a, GpRegs);
|
|
}
|
|
|
|
LOpcode condop = cond->opcode();
|
|
emitrr(condop & LIR64 ? X64_cmpqr : X64_cmplr, ra, rb);
|
|
}
|
|
|
|
void Assembler::asm_cmp_imm(LIns *cond) {
|
|
LIns *a = cond->oprnd1();
|
|
LIns *b = cond->oprnd2();
|
|
Register ra = findRegFor(a, GpRegs);
|
|
int32_t imm = getImm32(b);
|
|
if (isS8(imm)) {
|
|
X64Opcode xop = (cond->opcode() & LIR64) ? X64_cmpqr8 : X64_cmplr8;
|
|
emitr_imm8(xop, ra, imm);
|
|
} else {
|
|
X64Opcode xop = (cond->opcode() & LIR64) ? X64_cmpqri : X64_cmplri;
|
|
emitr_imm(xop, ra, imm);
|
|
}
|
|
}
|
|
|
|
// compiling floating point branches
|
|
// discussion in https://bugzilla.mozilla.org/show_bug.cgi?id=443886
|
|
//
|
|
// fucom/p/pp: c3 c2 c0 jae ja jbe jb je jne
|
|
// ucomisd: Z P C !C !C&!Z C|Z C Z !Z
|
|
// -- -- -- -- ----- --- -- -- --
|
|
// unordered 1 1 1 T T T
|
|
// greater > 0 0 0 T T T
|
|
// less < 0 0 1 T T T
|
|
// equal = 1 0 0 T T T
|
|
//
|
|
// here's the cases, using conditionals:
|
|
//
|
|
// branch >= > <= < =
|
|
// ------ --- --- --- --- ---
|
|
// LIR_jt jae ja swap+jae swap+ja jp over je
|
|
// LIR_jf jb jbe swap+jb swap+jbe jne+jp
|
|
|
|
NIns* Assembler::asm_fbranch(bool onFalse, LIns *cond, NIns *target) {
|
|
LOpcode condop = cond->opcode();
|
|
NIns *patch;
|
|
LIns *a = cond->oprnd1();
|
|
LIns *b = cond->oprnd2();
|
|
if (condop == LIR_feq) {
|
|
if (onFalse) {
|
|
// branch if unordered or !=
|
|
underrunProtect(16); // 12 needed, round up for overhang
|
|
emit32(X64_jp, target ? target - _nIns : 0);
|
|
emit32(X64_jne, target ? target - _nIns : 0);
|
|
patch = _nIns;
|
|
} else {
|
|
// jp skip (2byte)
|
|
// jeq target
|
|
// skip: ...
|
|
underrunProtect(16); // 7 needed but we write 2 instr
|
|
NIns *skip = _nIns;
|
|
emit32(X64_je, target ? target - _nIns : 0);
|
|
patch = _nIns;
|
|
emit8(X64_jp8, skip - _nIns);
|
|
}
|
|
}
|
|
else {
|
|
if (condop == LIR_flt) {
|
|
condop = LIR_fgt;
|
|
LIns *t = a; a = b; b = t;
|
|
} else if (condop == LIR_fle) {
|
|
condop = LIR_fge;
|
|
LIns *t = a; a = b; b = t;
|
|
}
|
|
X64Opcode xop;
|
|
if (condop == LIR_fgt)
|
|
xop = onFalse ? X64_jbe : X64_ja;
|
|
else // LIR_fge
|
|
xop = onFalse ? X64_jb : X64_jae;
|
|
underrunProtect(8);
|
|
emit32(xop, target ? target - _nIns : 0);
|
|
patch = _nIns;
|
|
}
|
|
fcmp(a, b);
|
|
return patch;
|
|
}
|
|
|
|
void Assembler::asm_fcond(LIns *ins) {
|
|
LOpcode op = ins->opcode();
|
|
LIns *a = ins->oprnd1();
|
|
LIns *b = ins->oprnd2();
|
|
if (op == LIR_feq) {
|
|
// result = ZF & !PF, must do logic on flags
|
|
// r = al|bl|cl|dl, can only use rh without rex prefix
|
|
Register r = prepResultReg(ins, 1<<RAX|1<<RCX|1<<RDX|1<<RBX);
|
|
emitrr8(X64_movzx8, r, r); // movzx8 r,rl r[8:63] = 0
|
|
emit(X86_and8r | uint64_t(r<<3|(r|4))<<56); // and rl,rh rl &= rh
|
|
emit(X86_setnp | uint64_t(r|4)<<56); // setnp rh rh = !PF
|
|
emit(X86_sete | uint64_t(r)<<56); // sete rl rl = ZF
|
|
} else {
|
|
if (op == LIR_flt) {
|
|
op = LIR_fgt;
|
|
LIns *t = a; a = b; b = t;
|
|
} else if (op == LIR_fle) {
|
|
op = LIR_fge;
|
|
LIns *t = a; a = b; b = t;
|
|
}
|
|
Register r = prepResultReg(ins, GpRegs); // x64 can use any GPR as setcc target
|
|
emitrr8(X64_movzx8, r, r);
|
|
emitr8(op == LIR_fgt ? X64_seta : X64_setae, r);
|
|
}
|
|
fcmp(a, b);
|
|
}
|
|
|
|
void Assembler::fcmp(LIns *a, LIns *b) {
|
|
Reservation *resva, *resvb;
|
|
findRegFor2(FpRegs, a, resva, b, resvb);
|
|
emitprr(X64_ucomisd, resva->reg, resvb->reg);
|
|
}
|
|
|
|
void Assembler::asm_restore(LIns *ins, Reservation *resv, Register r) {
|
|
(void) r;
|
|
if (ins->isop(LIR_alloc)) {
|
|
int d = disp(resv);
|
|
emitrm(X64_leaqrm, r, d, FP);
|
|
}
|
|
else if (ins->isconst()) {
|
|
if (!resv->arIndex) {
|
|
ins->resv()->clear();
|
|
}
|
|
// unsafe to use xor r,r for zero because it changes cc's
|
|
emit_int(r, ins->imm32());
|
|
}
|
|
else if (ins->isconstq() && IsGpReg(r)) {
|
|
if (!resv->arIndex) {
|
|
ins->resv()->clear();
|
|
}
|
|
// unsafe to use xor r,r for zero because it changes cc's
|
|
emit_quad(r, ins->imm64());
|
|
}
|
|
else {
|
|
int d = findMemFor(ins);
|
|
if (IsFpReg(r)) {
|
|
NanoAssert(ins->isQuad());
|
|
// load 64bits into XMM. don't know if double or int64, assume double.
|
|
emitprm(X64_movsdrm, r, d, FP);
|
|
} else if (ins->isQuad()) {
|
|
emitrm(X64_movqrm, r, d, FP);
|
|
} else {
|
|
emitrm(X64_movlrm, r, d, FP);
|
|
}
|
|
}
|
|
verbose_only( if (_logc->lcbits & LC_RegAlloc) {
|
|
outputForEOL(" <= restore %s",
|
|
_thisfrag->lirbuf->names->formatRef(ins)); } )
|
|
}
|
|
|
|
void Assembler::asm_cond(LIns *ins) {
|
|
LOpcode op = ins->opcode();
|
|
// unlike x86-32, with a rex prefix we can use any GP register as an 8bit target
|
|
Register r = prepResultReg(ins, GpRegs);
|
|
// SETcc only sets low 8 bits, so extend
|
|
emitrr8(X64_movzx8, r, r);
|
|
X64Opcode xop;
|
|
switch (op) {
|
|
default:
|
|
TODO(cond);
|
|
case LIR_qeq:
|
|
case LIR_eq: xop = X64_sete; break;
|
|
case LIR_qlt:
|
|
case LIR_lt: xop = X64_setl; break;
|
|
case LIR_qle:
|
|
case LIR_le: xop = X64_setle; break;
|
|
case LIR_qgt:
|
|
case LIR_gt: xop = X64_setg; break;
|
|
case LIR_qge:
|
|
case LIR_ge: xop = X64_setge; break;
|
|
case LIR_qult:
|
|
case LIR_ult: xop = X64_setb; break;
|
|
case LIR_qule:
|
|
case LIR_ule: xop = X64_setbe; break;
|
|
case LIR_qugt:
|
|
case LIR_ugt: xop = X64_seta; break;
|
|
case LIR_quge:
|
|
case LIR_uge: xop = X64_setae; break;
|
|
case LIR_ov: xop = X64_seto; break;
|
|
}
|
|
emitr8(xop, r);
|
|
asm_cmp(ins);
|
|
}
|
|
|
|
void Assembler::asm_ret(LIns *ins) {
|
|
JMP(_epilogue);
|
|
assignSavedRegs();
|
|
LIns *value = ins->oprnd1();
|
|
Register r = ins->isop(LIR_ret) ? RAX : XMM0;
|
|
findSpecificRegFor(value, r);
|
|
}
|
|
|
|
void Assembler::asm_nongp_copy(Register d, Register s) {
|
|
if (!IsFpReg(d) && IsFpReg(s)) {
|
|
// gpr <- xmm: use movq r/m64, xmm (66 REX.W 0F 7E /r)
|
|
emitprr(X64_movqrx, s, d);
|
|
} else if (IsFpReg(d) && IsFpReg(s)) {
|
|
// xmm <- xmm: use movaps. movsd r,r causes partial register stall
|
|
emitrr(X64_movapsr, d, s);
|
|
} else {
|
|
// xmm <- gpr: use movq xmm, r/m64 (66 REX.W 0F 6E /r)
|
|
emitprr(X64_movqxr, d, s);
|
|
}
|
|
}
|
|
|
|
void Assembler::regalloc_load(LIns *ins, Register &rr, int32_t &dr, Register &rb) {
|
|
dr = ins->disp();
|
|
LIns *base = ins->oprnd1();
|
|
rb = getBaseReg(base, dr, BaseRegs);
|
|
Reservation *resv = getresv(ins);
|
|
if (resv && (rr = resv->reg) != UnknownReg) {
|
|
// keep already assigned register
|
|
freeRsrcOf(ins, false);
|
|
} else {
|
|
// use a gpr in case we're copying a non-double
|
|
rr = prepResultReg(ins, GpRegs & ~rmask(rb));
|
|
}
|
|
}
|
|
|
|
void Assembler::asm_load64(LIns *ins) {
|
|
Register rr, rb;
|
|
int32_t dr;
|
|
regalloc_load(ins, rr, dr, rb);
|
|
if (IsGpReg(rr)) {
|
|
// general 64bit load, 32bit const displacement
|
|
emitrm(X64_movqrm, rr, dr, rb);
|
|
} else {
|
|
// load 64bits into XMM. don't know if double or int64, assume double.
|
|
emitprm(X64_movsdrm, rr, dr, rb);
|
|
}
|
|
}
|
|
|
|
void Assembler::asm_ld(LIns *ins) {
|
|
NanoAssert(!ins->isQuad());
|
|
Register r, b;
|
|
int32_t d;
|
|
regalloc_load(ins, r, d, b);
|
|
emitrm(X64_movlrm, r, d, b);
|
|
}
|
|
|
|
void Assembler::asm_store64(LIns *value, int d, LIns *base) {
|
|
NanoAssert(value->isQuad());
|
|
Register b = getBaseReg(base, d, BaseRegs);
|
|
|
|
// if we have to choose a register, use a GPR, but not the base reg
|
|
Reservation *resv = getresv(value);
|
|
Register r;
|
|
if (!resv || (r = resv->reg) == UnknownReg) {
|
|
r = findRegFor(value, GpRegs & ~rmask(b));
|
|
}
|
|
|
|
if (IsGpReg(r)) {
|
|
// gpr store
|
|
emitrm(X64_movqmr, r, d, b);
|
|
}
|
|
else {
|
|
// xmm store
|
|
emitprm(X64_movsdmr, r, d, b);
|
|
}
|
|
}
|
|
|
|
void Assembler::asm_store32(LIns *value, int d, LIns *base) {
|
|
NanoAssert(!value->isQuad());
|
|
Register b = getBaseReg(base, d, BaseRegs);
|
|
Register r = findRegFor(value, GpRegs & ~rmask(b));
|
|
|
|
// store 32bits to 64bit addr. use rex so we can use all 16 regs
|
|
emitrm(X64_movlmr, r, d, b);
|
|
}
|
|
|
|
// generate a 32bit constant, must not affect condition codes!
|
|
void Assembler::emit_int(Register r, int32_t v) {
|
|
NanoAssert(IsGpReg(r));
|
|
emitr_imm(X64_movi, r, v);
|
|
}
|
|
|
|
// generate a 64bit constant, must not affect condition codes!
|
|
void Assembler::emit_quad(Register r, uint64_t v) {
|
|
NanoAssert(IsGpReg(r));
|
|
if (isU32(v)) {
|
|
emit_int(r, int32_t(v));
|
|
return;
|
|
}
|
|
if (isS32(v)) {
|
|
// safe for sign-extension 32->64
|
|
emitr_imm(X64_movqi32, r, int32_t(v));
|
|
return;
|
|
}
|
|
underrunProtect(8+8); // imm64 + worst case instr len
|
|
((uint64_t*)_nIns)[-1] = v;
|
|
_nIns -= 8;
|
|
_nvprof("x64-bytes", 8);
|
|
emitr(X64_movqi, r);
|
|
}
|
|
|
|
void Assembler::asm_int(LIns *ins) {
|
|
Register r = prepResultReg(ins, GpRegs);
|
|
int32_t v = ins->imm32();
|
|
if (v == 0) {
|
|
// special case for zero
|
|
emitrr(X64_xorrr, r, r);
|
|
return;
|
|
}
|
|
emit_int(r, v);
|
|
}
|
|
|
|
void Assembler::asm_quad(LIns *ins) {
|
|
uint64_t v = ins->imm64();
|
|
RegisterMask allow = v == 0 ? GpRegs|FpRegs : GpRegs;
|
|
Register r = prepResultReg(ins, allow);
|
|
if (v == 0) {
|
|
if (IsGpReg(r)) {
|
|
// special case for zero
|
|
emitrr(X64_xorrr, r, r);
|
|
} else {
|
|
// xorps for xmm
|
|
emitprr(X64_xorps, r, r);
|
|
}
|
|
} else {
|
|
emit_quad(r, v);
|
|
}
|
|
}
|
|
|
|
void Assembler::asm_qjoin(LIns*) {
|
|
TODO(asm_qjoin);
|
|
}
|
|
|
|
Register Assembler::asm_prep_fcall(Reservation*, LIns *ins) {
|
|
return prepResultReg(ins, rmask(XMM0));
|
|
}
|
|
|
|
void Assembler::asm_param(LIns *ins) {
|
|
uint32_t a = ins->paramArg();
|
|
uint32_t kind = ins->paramKind();
|
|
if (kind == 0) {
|
|
// ordinary param
|
|
// first six args always in registers for mac x64
|
|
if (a < 6) {
|
|
// incoming arg in register
|
|
prepResultReg(ins, rmask(argRegs[a]));
|
|
} else {
|
|
// todo: support stack based args, arg 0 is at [FP+off] where off
|
|
// is the # of regs to be pushed in genProlog()
|
|
TODO(asm_param_stk);
|
|
}
|
|
}
|
|
else {
|
|
// saved param
|
|
prepResultReg(ins, rmask(savedRegs[a]));
|
|
}
|
|
}
|
|
|
|
// register allocation for 2-address style unary ops of the form R = (op) R
|
|
void Assembler::regalloc_unary(LIns *ins, RegisterMask allow, Register &rr, Register &ra) {
|
|
LIns *a = ins->oprnd1();
|
|
rr = prepResultReg(ins, allow);
|
|
Reservation* rA = getresv(a);
|
|
// if this is last use of a in reg, we can re-use result reg
|
|
if (rA == 0 || (ra = rA->reg) == UnknownReg) {
|
|
ra = findSpecificRegFor(a, rr);
|
|
} else {
|
|
// rA already has a register assigned. caller must emit a copy
|
|
// to rr once instr code is generated. (ie mov rr,ra ; op rr)
|
|
}
|
|
}
|
|
|
|
static const AVMPLUS_ALIGN16(int64_t) negateMask[] = {0x8000000000000000LL,0};
|
|
|
|
void Assembler::asm_fneg(LIns *ins) {
|
|
Register rr, ra;
|
|
if (isS32((uintptr_t)negateMask) || isS32((NIns*)negateMask - _nIns)) {
|
|
regalloc_unary(ins, FpRegs, rr, ra);
|
|
if (isS32((uintptr_t)negateMask)) {
|
|
// builtin code is in bottom or top 2GB addr space, use absolute addressing
|
|
underrunProtect(4+8);
|
|
*((int32_t*)(_nIns -= 4)) = (int32_t)(uintptr_t)negateMask;
|
|
_nvprof("x64-bytes", 4);
|
|
uint64_t xop = X64_xorpsa | uint64_t((rr&7)<<3)<<48; // put rr[0:2] into mod/rm byte
|
|
xop = rexrb(xop, rr, (Register)0); // put rr[3] into rex byte
|
|
emit(xop);
|
|
} else {
|
|
// jit code is within +/-2GB of builtin code, use rip-relative
|
|
underrunProtect(4+8);
|
|
int32_t d = (int32_t) ((NIns*)negateMask - _nIns);
|
|
*((int32_t*)(_nIns -= 4)) = d;
|
|
_nvprof("x64-bytes", 4);
|
|
emitrr(X64_xorpsm, rr, (Register)0);
|
|
}
|
|
if (ra != rr)
|
|
asm_nongp_copy(rr,ra);
|
|
} else {
|
|
// this is just hideous - can't use RIP-relative load, can't use
|
|
// absolute-address load, and cant move imm64 const to XMM.
|
|
// so do it all in a GPR. hrmph.
|
|
rr = prepResultReg(ins, GpRegs);
|
|
ra = findRegFor(ins->oprnd1(), GpRegs & ~rmask(rr));
|
|
emitrr(X64_xorqrr, rr, ra); // xor rr, ra
|
|
emit_quad(rr, negateMask[0]); // mov rr, 0x8000000000000000
|
|
}
|
|
}
|
|
|
|
void Assembler::asm_qhi(LIns*) {
|
|
TODO(asm_qhi);
|
|
}
|
|
|
|
void Assembler::asm_qlo(LIns *ins) {
|
|
Register rr, ra;
|
|
regalloc_unary(ins, GpRegs, rr, ra);
|
|
NanoAssert(IsGpReg(ra));
|
|
emitrr(X64_movlr, rr, ra); // 32bit mov zeros the upper 32bits of the target
|
|
}
|
|
|
|
void Assembler::asm_spill(Register rr, int d, bool /*pop*/, bool quad) {
|
|
if (d) {
|
|
if (!IsFpReg(rr)) {
|
|
X64Opcode xop = quad ? X64_movqmr : X64_movlmr;
|
|
emitrm(xop, rr, d, FP);
|
|
} else {
|
|
// store 64bits from XMM to memory
|
|
NanoAssert(quad);
|
|
emitprm(X64_movsdmr, rr, d, FP);
|
|
}
|
|
}
|
|
}
|
|
|
|
NIns* Assembler::genPrologue() {
|
|
// activation frame is 4 bytes per entry even on 64bit machines
|
|
uint32_t stackNeeded = max_stk_used + _activation.highwatermark * 4;
|
|
|
|
uint32_t stackPushed =
|
|
sizeof(void*) + // returnaddr
|
|
sizeof(void*); // ebp
|
|
uint32_t aligned = alignUp(stackNeeded + stackPushed, NJ_ALIGN_STACK);
|
|
uint32_t amt = aligned - stackPushed;
|
|
|
|
// Reserve stackNeeded bytes, padded
|
|
// to preserve NJ_ALIGN_STACK-byte alignment.
|
|
if (amt) {
|
|
if (isS8(amt))
|
|
emitr_imm8(X64_subqr8, RSP, amt);
|
|
else
|
|
emitr_imm(X64_subqri, RSP, amt);
|
|
}
|
|
|
|
verbose_only( outputAddr=true; asm_output("[patch entry]"); )
|
|
NIns *patchEntry = _nIns;
|
|
MR(FP, RSP); // Establish our own FP.
|
|
emitr(X64_pushr, FP); // Save caller's FP.
|
|
|
|
return patchEntry;
|
|
}
|
|
|
|
NIns* Assembler::genEpilogue() {
|
|
// mov rsp, rbp
|
|
// pop rbp
|
|
// ret
|
|
max_stk_used = 0;
|
|
emit(X64_ret);
|
|
emitr(X64_popr, RBP);
|
|
MR(RSP, RBP);
|
|
return _nIns;
|
|
}
|
|
|
|
void Assembler::nRegisterResetAll(RegAlloc &a) {
|
|
// add scratch registers to our free list for the allocator
|
|
a.clear();
|
|
a.used = 0;
|
|
#ifdef _MSC_VER
|
|
a.free = 0x001fffcf; // rax-rbx, rsi, rdi, r8-r15, xmm0-xmm5
|
|
#else
|
|
a.free = 0xffffffff & ~(1<<RSP | 1<<RBP);
|
|
#endif
|
|
debug_only( a.managed = a.free; )
|
|
}
|
|
|
|
void Assembler::nPatchBranch(NIns *patch, NIns *target) {
|
|
NIns *next = 0;
|
|
if (patch[0] == 0xE9) {
|
|
// jmp disp32
|
|
next = patch+5;
|
|
} else if (patch[0] == 0x0F && (patch[1] & 0xF0) == 0x80) {
|
|
// jcc disp32
|
|
next = patch+6;
|
|
} else {
|
|
next = 0;
|
|
TODO(unknown_patch);
|
|
}
|
|
NanoAssert(((int32_t*)next)[-1] == 0);
|
|
NanoAssert(isS32(target - next));
|
|
((int32_t*)next)[-1] = int32_t(target - next);
|
|
if (next[0] == 0x0F && next[1] == 0x8A) {
|
|
// code is jne<target>,jp<target>, for LIR_jf(feq)
|
|
// we just patched the jne, now patch the jp.
|
|
next += 6;
|
|
NanoAssert(((int32_t*)next)[-1] == 0);
|
|
NanoAssert(isS32(target - next));
|
|
((int32_t*)next)[-1] = int32_t(target - next);
|
|
}
|
|
}
|
|
|
|
Register Assembler::nRegisterAllocFromSet(RegisterMask set) {
|
|
#if defined _WIN64
|
|
DWORD tr;
|
|
_BitScanForward(&tr, set);
|
|
_allocator.free &= ~rmask((Register)tr);
|
|
return (Register) tr;
|
|
#else
|
|
// gcc asm syntax
|
|
Register r;
|
|
asm("bsf %1, %%eax\n\t"
|
|
"btr %%eax, %2\n\t"
|
|
"movl %%eax, %0\n\t"
|
|
: "=m"(r) : "m"(set), "m"(_allocator.free) : "%eax", "memory");
|
|
(void)set;
|
|
return r;
|
|
#endif
|
|
}
|
|
|
|
void Assembler::nFragExit(LIns*) {
|
|
TODO(nFragExit);
|
|
}
|
|
|
|
void Assembler::nInit(AvmCore*)
|
|
{}
|
|
|
|
void Assembler::underrunProtect(ptrdiff_t bytes) {
|
|
NanoAssertMsg(bytes<=LARGEST_UNDERRUN_PROT, "constant LARGEST_UNDERRUN_PROT is too small");
|
|
NIns *pc = _nIns;
|
|
NIns *top = _inExit ? this->exitStart : this->codeStart;
|
|
|
|
#if PEDANTIC
|
|
// pedanticTop is based on the last call to underrunProtect; any time we call
|
|
// underrunProtect and would use more than what's already protected, then insert
|
|
// a page break jump. Sometimes, this will be to a new page, usually it's just
|
|
// the next instruction
|
|
|
|
NanoAssert(pedanticTop >= top);
|
|
if (pc - bytes < pedanticTop) {
|
|
// no page break required, but insert a far branch anyway just to be difficult
|
|
const int br_size = 8; // opcode + 32bit addr
|
|
if (pc - bytes - br_size < top) {
|
|
// really do need a page break
|
|
verbose_only(if (_logc->lcbits & LC_Assembly) outputf("newpage %p:", pc);)
|
|
codeAlloc();
|
|
}
|
|
// now emit the jump, but make sure we won't need another page break.
|
|
// we're pedantic, but not *that* pedantic.
|
|
pedanticTop = _nIns - br_size;
|
|
JMP(pc);
|
|
pedanticTop = _nIns - bytes;
|
|
}
|
|
#else
|
|
if (pc - bytes < top) {
|
|
verbose_only(if (_logc->lcbits & LC_Assembly) outputf("newpage %p:", pc);)
|
|
codeAlloc();
|
|
// this jump will call underrunProtect again, but since we're on a new
|
|
// page, nothing will happen.
|
|
JMP(pc);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
RegisterMask Assembler::hint(LIns *, RegisterMask allow) {
|
|
return allow;
|
|
}
|
|
|
|
void Assembler::nativePageSetup() {
|
|
if (!_nIns) {
|
|
codeAlloc();
|
|
IF_PEDANTIC( pedanticTop = _nIns; )
|
|
}
|
|
if (!_nExitIns) {
|
|
codeAlloc(true);
|
|
}
|
|
}
|
|
|
|
void Assembler::nativePageReset()
|
|
{}
|
|
|
|
} // namespace nanojit
|
|
|
|
#endif // FEATURE_NANOJIT && NANOJIT_X64
|