mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
894487e422
--HG-- extra : convert_revision : 353c06e32329377cc9ed5f66eeb6bcaf72ff3aea
2772 lines
93 KiB
C++
2772 lines
93 KiB
C++
/* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
|
|
/* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
|
|
/* ***** BEGIN LICENSE BLOCK *****
|
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
*
|
|
* The contents of this file are subject to the Mozilla Public License Version
|
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
* http://www.mozilla.org/MPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
* for the specific language governing rights and limitations under the
|
|
* License.
|
|
*
|
|
* The Original Code is [Open Source Virtual Machine].
|
|
*
|
|
* The Initial Developer of the Original Code is
|
|
* Adobe System Incorporated.
|
|
* Portions created by the Initial Developer are Copyright (C) 2004-2007
|
|
* the Initial Developer. All Rights Reserved.
|
|
*
|
|
* Contributor(s):
|
|
* Adobe AS3 Team
|
|
* Vladimir Vukicevic <vladimir@pobox.com>
|
|
*
|
|
* Alternatively, the contents of this file may be used under the terms of
|
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
* of those above. If you wish to allow use of your version of this file only
|
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
* use your version of this file under the terms of the MPL, indicate your
|
|
* decision by deleting the provisions above and replace them with the notice
|
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
* the provisions above, a recipient may use your version of this file under
|
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
*
|
|
* ***** END LICENSE BLOCK ***** */
|
|
|
|
#include "nanojit.h"
|
|
|
|
#ifdef UNDER_CE
|
|
#include <cmnintrin.h>
|
|
extern "C" bool blx_lr_broken();
|
|
#endif
|
|
|
|
#if defined(FEATURE_NANOJIT) && defined(NANOJIT_ARM)
|
|
|
|
namespace nanojit
|
|
{
|
|
|
|
#ifdef NJ_VERBOSE
|
|
const char* regNames[] = {"r0","r1","r2","r3","r4","r5","r6","r7","r8","r9","r10","fp","ip","sp","lr","pc",
|
|
"d0","d1","d2","d3","d4","d5","d6","d7","s14"};
|
|
const char* condNames[] = {"eq","ne","cs","cc","mi","pl","vs","vc","hi","ls","ge","lt","gt","le",""/*al*/,"nv"};
|
|
const char* shiftNames[] = { "lsl", "lsl", "lsr", "lsr", "asr", "asr", "ror", "ror" };
|
|
#endif
|
|
|
|
const Register Assembler::argRegs[] = { R0, R1, R2, R3 };
|
|
const Register Assembler::retRegs[] = { R0, R1 };
|
|
const Register Assembler::savedRegs[] = { R4, R5, R6, R7, R8, R9, R10 };
|
|
|
|
// --------------------------------
|
|
// ARM-specific utility functions.
|
|
// --------------------------------
|
|
|
|
#ifdef DEBUG
|
|
// Return true if enc is a valid Operand 2 encoding and thus can be used as-is
|
|
// in an ARM arithmetic operation that accepts such encoding.
|
|
//
|
|
// This utility does not know (or determine) the actual value that the encoded
|
|
// value represents, and thus cannot be used to ensure the correct operation of
|
|
// encOp2Imm, but it does ensure that the encoded value can be used to encode a
|
|
// valid ARM instruction. decOp2Imm can be used if you also need to check that
|
|
// a literal is correctly encoded (and thus that encOp2Imm is working
|
|
// correctly).
|
|
inline bool
|
|
Assembler::isOp2Imm(uint32_t enc)
|
|
{
|
|
return ((enc & 0xfff) == enc);
|
|
}
|
|
|
|
// Decodes operand 2 immediate values (for debug output and assertions).
|
|
inline uint32_t
|
|
Assembler::decOp2Imm(uint32_t enc)
|
|
{
|
|
NanoAssert(isOp2Imm(enc));
|
|
|
|
uint32_t imm8 = enc & 0xff;
|
|
uint32_t rot = 32 - ((enc >> 7) & 0x1e);
|
|
|
|
return imm8 << (rot & 0x1f);
|
|
}
|
|
#endif
|
|
|
|
// Calculate the number of leading zeroes in data.
|
|
inline uint32_t
|
|
Assembler::CountLeadingZeroes(uint32_t data)
|
|
{
|
|
uint32_t leading_zeroes;
|
|
|
|
// We can't do CLZ on anything earlier than ARMv5. Architectures as early
|
|
// as that aren't supported, but assert that we aren't running on one
|
|
// anyway.
|
|
// If ARMv4 support is required in the future for some reason, we can do a
|
|
// run-time check on _config.arm_arch and fall back to the C routine, but for
|
|
// now we can avoid the cost of the check as we don't intend to support
|
|
// ARMv4 anyway.
|
|
NanoAssert(_config.arm_arch >= 5);
|
|
|
|
#if defined(__ARMCC__)
|
|
// ARMCC can do this with an intrinsic.
|
|
leading_zeroes = __clz(data);
|
|
|
|
// current Android GCC compiler incorrectly refuses to compile 'clz' for armv5
|
|
// (even though this is a legal instruction there). Since we currently only compile for ARMv5
|
|
// for emulation, we don't care too much (but we DO care for ARMv6+ since those are "real"
|
|
// devices).
|
|
#elif defined(__GNUC__) && !(defined(ANDROID) && __ARM_ARCH__ <= 5)
|
|
// GCC can use inline assembler to insert a CLZ instruction.
|
|
__asm (
|
|
" clz %0, %1 \n"
|
|
: "=r" (leading_zeroes)
|
|
: "r" (data)
|
|
);
|
|
#elif defined(UNDER_CE)
|
|
// WinCE can do this with an intrinsic.
|
|
leading_zeroes = _CountLeadingZeros(data);
|
|
#else
|
|
// Other platforms must fall back to a C routine. This won't be as
|
|
// efficient as the CLZ instruction, but it is functional.
|
|
uint32_t try_shift;
|
|
|
|
leading_zeroes = 0;
|
|
|
|
// This loop does a bisection search rather than the obvious rotation loop.
|
|
// This should be faster, though it will still be no match for CLZ.
|
|
for (try_shift = 16; try_shift != 0; try_shift /= 2) {
|
|
uint32_t shift = leading_zeroes + try_shift;
|
|
if (((data << shift) >> shift) == data) {
|
|
leading_zeroes = shift;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// Assert that the operation worked!
|
|
NanoAssert(((0xffffffff >> leading_zeroes) & data) == data);
|
|
|
|
return leading_zeroes;
|
|
}
|
|
|
|
// The ARM instruction set allows some flexibility to the second operand of
|
|
// most arithmetic operations. When operand 2 is an immediate value, it takes
|
|
// the form of an 8-bit value rotated by an even value in the range 0-30.
|
|
//
|
|
// Some values that can be encoded this scheme — such as 0xf000000f — are
|
|
// probably fairly rare in practice and require extra code to detect, so this
|
|
// function implements a fast CLZ-based heuristic to detect any value that can
|
|
// be encoded using just a shift, and not a full rotation. For example,
|
|
// 0xff000000 and 0x000000ff are both detected, but 0xf000000f is not.
|
|
//
|
|
// This function will return true to indicate that the encoding was successful,
|
|
// or false to indicate that the literal could not be encoded as an operand 2
|
|
// immediate. If successful, the encoded value will be written to *enc.
|
|
inline bool
|
|
Assembler::encOp2Imm(uint32_t literal, uint32_t * enc)
|
|
{
|
|
// The number of leading zeroes in the literal. This is used to calculate
|
|
// the rotation component of the encoding.
|
|
uint32_t leading_zeroes;
|
|
|
|
// Components of the operand 2 encoding.
|
|
int32_t rot;
|
|
uint32_t imm8;
|
|
|
|
// Check the literal to see if it is a simple 8-bit value. I suspect that
|
|
// most literals are in fact small values, so doing this check early should
|
|
// give a decent speed-up.
|
|
if (literal < 256)
|
|
{
|
|
*enc = literal;
|
|
return true;
|
|
}
|
|
|
|
// Determine the number of leading zeroes in the literal. This is used to
|
|
// calculate the required rotation.
|
|
leading_zeroes = CountLeadingZeroes(literal);
|
|
|
|
// We've already done a check to see if the literal is an 8-bit value, so
|
|
// leading_zeroes must be less than (and not equal to) (32-8)=24. However,
|
|
// if it is greater than 24, this algorithm will break, so debug code
|
|
// should use an assertion here to check that we have a value that we
|
|
// expect.
|
|
NanoAssert(leading_zeroes < 24);
|
|
|
|
// Assuming that we have a field of no more than 8 bits for a valid
|
|
// literal, we can calculate the required rotation by subtracting
|
|
// leading_zeroes from (32-8):
|
|
//
|
|
// Example:
|
|
// 0: Known to be zero.
|
|
// 1: Known to be one.
|
|
// X: Either zero or one.
|
|
// .: Zero in a valid operand 2 literal.
|
|
//
|
|
// Literal: [ 1XXXXXXX ........ ........ ........ ]
|
|
// leading_zeroes = 0
|
|
// Therefore rot (left) = 24.
|
|
// Encoded 8-bit literal: [ 1XXXXXXX ]
|
|
//
|
|
// Literal: [ ........ ..1XXXXX XX...... ........ ]
|
|
// leading_zeroes = 10
|
|
// Therefore rot (left) = 14.
|
|
// Encoded 8-bit literal: [ 1XXXXXXX ]
|
|
//
|
|
// Note, however, that we can only encode even shifts, and so
|
|
// "rot=24-leading_zeroes" is not sufficient by itself. By ignoring
|
|
// zero-bits in odd bit positions, we can ensure that we get a valid
|
|
// encoding.
|
|
//
|
|
// Example:
|
|
// Literal: [ 01XXXXXX ........ ........ ........ ]
|
|
// leading_zeroes = 1
|
|
// Therefore rot (left) = round_up(23) = 24.
|
|
// Encoded 8-bit literal: [ 01XXXXXX ]
|
|
rot = 24 - (leading_zeroes & ~1);
|
|
|
|
// The imm8 component of the operand 2 encoding can be calculated from the
|
|
// rot value.
|
|
imm8 = literal >> rot;
|
|
|
|
// The validity of the literal can be checked by reversing the
|
|
// calculation. It is much easier to decode the immediate than it is to
|
|
// encode it!
|
|
if (literal != (imm8 << rot)) {
|
|
// The encoding is not valid, so report the failure. Calling code
|
|
// should use some other method of loading the value (such as LDR).
|
|
return false;
|
|
}
|
|
|
|
// The operand is valid, so encode it.
|
|
// Note that the ARM encoding is actually described by a rotate to the
|
|
// _right_, so rot must be negated here. Calculating a left shift (rather
|
|
// than calculating a right rotation) simplifies the above code.
|
|
*enc = ((-rot << 7) & 0xf00) | imm8;
|
|
|
|
// Assert that the operand was properly encoded.
|
|
NanoAssert(decOp2Imm(*enc) == literal);
|
|
|
|
return true;
|
|
}
|
|
|
|
// Encode "rd = rn + imm" using an appropriate instruction sequence.
|
|
// Set stat to 1 to update the status flags. Otherwise, set it to 0 or omit it.
|
|
// (The declaration in NativeARM.h defines the default value of stat as 0.)
|
|
//
|
|
// It is not valid to call this function if:
|
|
// (rd == IP) AND (rn == IP) AND !encOp2Imm(imm) AND !encOp2Imm(-imm)
|
|
// Where: if (encOp2Imm(imm)), imm can be encoded as an ARM operand 2 using the
|
|
// encOp2Imm method.
|
|
void
|
|
Assembler::asm_add_imm(Register rd, Register rn, int32_t imm, int stat /* =0 */)
|
|
{
|
|
// Operand 2 encoding of the immediate.
|
|
uint32_t op2imm;
|
|
|
|
NanoAssert(IsGpReg(rd));
|
|
NanoAssert(IsGpReg(rn));
|
|
NanoAssert((stat & 1) == stat);
|
|
|
|
// Try to encode the value directly as an operand 2 immediate value, then
|
|
// fall back to loading the value into a register.
|
|
if (encOp2Imm(imm, &op2imm)) {
|
|
ADDis(rd, rn, op2imm, stat);
|
|
} else if (encOp2Imm(-imm, &op2imm)) {
|
|
// We could not encode the value for ADD, so try to encode it for SUB.
|
|
// Note that this is valid even if stat is set, _unless_ imm is 0, but
|
|
// that case is caught above.
|
|
NanoAssert(imm != 0);
|
|
SUBis(rd, rn, op2imm, stat);
|
|
} else {
|
|
// We couldn't encode the value directly, so use an intermediate
|
|
// register to encode the value. We will use IP to do this unless rn is
|
|
// IP; in that case we can reuse rd. This allows every case other than
|
|
// "ADD IP, IP, =#imm".
|
|
Register rm = (rn == IP) ? (rd) : (IP);
|
|
NanoAssert(rn != rm);
|
|
|
|
ADDs(rd, rn, rm, stat);
|
|
asm_ld_imm(rm, imm);
|
|
}
|
|
}
|
|
|
|
// Encode "rd = rn - imm" using an appropriate instruction sequence.
|
|
// Set stat to 1 to update the status flags. Otherwise, set it to 0 or omit it.
|
|
// (The declaration in NativeARM.h defines the default value of stat as 0.)
|
|
//
|
|
// It is not valid to call this function if:
|
|
// (rd == IP) AND (rn == IP) AND !encOp2Imm(imm) AND !encOp2Imm(-imm)
|
|
// Where: if (encOp2Imm(imm)), imm can be encoded as an ARM operand 2 using the
|
|
// encOp2Imm method.
|
|
void
|
|
Assembler::asm_sub_imm(Register rd, Register rn, int32_t imm, int stat /* =0 */)
|
|
{
|
|
// Operand 2 encoding of the immediate.
|
|
uint32_t op2imm;
|
|
|
|
NanoAssert(IsGpReg(rd));
|
|
NanoAssert(IsGpReg(rn));
|
|
NanoAssert((stat & 1) == stat);
|
|
|
|
// Try to encode the value directly as an operand 2 immediate value, then
|
|
// fall back to loading the value into a register.
|
|
if (encOp2Imm(imm, &op2imm)) {
|
|
SUBis(rd, rn, op2imm, stat);
|
|
} else if (encOp2Imm(-imm, &op2imm)) {
|
|
// We could not encode the value for SUB, so try to encode it for ADD.
|
|
// Note that this is valid even if stat is set, _unless_ imm is 0, but
|
|
// that case is caught above.
|
|
NanoAssert(imm != 0);
|
|
ADDis(rd, rn, op2imm, stat);
|
|
} else {
|
|
// We couldn't encode the value directly, so use an intermediate
|
|
// register to encode the value. We will use IP to do this unless rn is
|
|
// IP; in that case we can reuse rd. This allows every case other than
|
|
// "SUB IP, IP, =#imm".
|
|
Register rm = (rn == IP) ? (rd) : (IP);
|
|
NanoAssert(rn != rm);
|
|
|
|
SUBs(rd, rn, rm, stat);
|
|
asm_ld_imm(rm, imm);
|
|
}
|
|
}
|
|
|
|
// Encode "rd = rn & imm" using an appropriate instruction sequence.
|
|
// Set stat to 1 to update the status flags. Otherwise, set it to 0 or omit it.
|
|
// (The declaration in NativeARM.h defines the default value of stat as 0.)
|
|
//
|
|
// It is not valid to call this function if:
|
|
// (rd == IP) AND (rn == IP) AND !encOp2Imm(imm) AND !encOp2Imm(~imm)
|
|
// Where: if (encOp2Imm(imm)), imm can be encoded as an ARM operand 2 using the
|
|
// encOp2Imm method.
|
|
void
|
|
Assembler::asm_and_imm(Register rd, Register rn, int32_t imm, int stat /* =0 */)
|
|
{
|
|
// Operand 2 encoding of the immediate.
|
|
uint32_t op2imm;
|
|
|
|
NanoAssert(IsGpReg(rd));
|
|
NanoAssert(IsGpReg(rn));
|
|
NanoAssert((stat & 1) == stat);
|
|
|
|
// Try to encode the value directly as an operand 2 immediate value, then
|
|
// fall back to loading the value into a register.
|
|
if (encOp2Imm(imm, &op2imm)) {
|
|
ANDis(rd, rn, op2imm, stat);
|
|
} else if (encOp2Imm(~imm, &op2imm)) {
|
|
// Use BIC with the inverted immediate.
|
|
BICis(rd, rn, op2imm, stat);
|
|
} else {
|
|
// We couldn't encode the value directly, so use an intermediate
|
|
// register to encode the value. We will use IP to do this unless rn is
|
|
// IP; in that case we can reuse rd. This allows every case other than
|
|
// "AND IP, IP, =#imm".
|
|
Register rm = (rn == IP) ? (rd) : (IP);
|
|
NanoAssert(rn != rm);
|
|
|
|
ANDs(rd, rn, rm, stat);
|
|
asm_ld_imm(rm, imm);
|
|
}
|
|
}
|
|
|
|
// Encode "rd = rn | imm" using an appropriate instruction sequence.
|
|
// Set stat to 1 to update the status flags. Otherwise, set it to 0 or omit it.
|
|
// (The declaration in NativeARM.h defines the default value of stat as 0.)
|
|
//
|
|
// It is not valid to call this function if:
|
|
// (rd == IP) AND (rn == IP) AND !encOp2Imm(imm)
|
|
// Where: if (encOp2Imm(imm)), imm can be encoded as an ARM operand 2 using the
|
|
// encOp2Imm method.
|
|
void
|
|
Assembler::asm_orr_imm(Register rd, Register rn, int32_t imm, int stat /* =0 */)
|
|
{
|
|
// Operand 2 encoding of the immediate.
|
|
uint32_t op2imm;
|
|
|
|
NanoAssert(IsGpReg(rd));
|
|
NanoAssert(IsGpReg(rn));
|
|
NanoAssert((stat & 1) == stat);
|
|
|
|
// Try to encode the value directly as an operand 2 immediate value, then
|
|
// fall back to loading the value into a register.
|
|
if (encOp2Imm(imm, &op2imm)) {
|
|
ORRis(rd, rn, op2imm, stat);
|
|
} else {
|
|
// We couldn't encode the value directly, so use an intermediate
|
|
// register to encode the value. We will use IP to do this unless rn is
|
|
// IP; in that case we can reuse rd. This allows every case other than
|
|
// "ORR IP, IP, =#imm".
|
|
Register rm = (rn == IP) ? (rd) : (IP);
|
|
NanoAssert(rn != rm);
|
|
|
|
ORRs(rd, rn, rm, stat);
|
|
asm_ld_imm(rm, imm);
|
|
}
|
|
}
|
|
|
|
// Encode "rd = rn ^ imm" using an appropriate instruction sequence.
|
|
// Set stat to 1 to update the status flags. Otherwise, set it to 0 or omit it.
|
|
// (The declaration in NativeARM.h defines the default value of stat as 0.)
|
|
//
|
|
// It is not valid to call this function if:
|
|
// (rd == IP) AND (rn == IP) AND !encOp2Imm(imm)
|
|
// Where: if (encOp2Imm(imm)), imm can be encoded as an ARM operand 2 using the
|
|
// encOp2Imm method.
|
|
void
|
|
Assembler::asm_eor_imm(Register rd, Register rn, int32_t imm, int stat /* =0 */)
|
|
{
|
|
// Operand 2 encoding of the immediate.
|
|
uint32_t op2imm;
|
|
|
|
NanoAssert(IsGpReg(rd));
|
|
NanoAssert(IsGpReg(rn));
|
|
NanoAssert((stat & 1) == stat);
|
|
|
|
// Try to encode the value directly as an operand 2 immediate value, then
|
|
// fall back to loading the value into a register.
|
|
if (encOp2Imm(imm, &op2imm)) {
|
|
EORis(rd, rn, op2imm, stat);
|
|
} else {
|
|
// We couldn't encoder the value directly, so use an intermediate
|
|
// register to encode the value. We will use IP to do this unless rn is
|
|
// IP; in that case we can reuse rd. This allows every case other than
|
|
// "EOR IP, IP, =#imm".
|
|
Register rm = (rn == IP) ? (rd) : (IP);
|
|
NanoAssert(rn != rm);
|
|
|
|
EORs(rd, rn, rm, stat);
|
|
asm_ld_imm(rm, imm);
|
|
}
|
|
}
|
|
|
|
// --------------------------------
|
|
// Assembler functions.
|
|
// --------------------------------
|
|
|
|
void
|
|
Assembler::nInit(AvmCore*)
|
|
{
|
|
#ifdef UNDER_CE
|
|
blx_lr_bug = blx_lr_broken();
|
|
#else
|
|
blx_lr_bug = 0;
|
|
#endif
|
|
}
|
|
|
|
void Assembler::nBeginAssembly()
|
|
{
|
|
max_out_args = 0;
|
|
}
|
|
|
|
NIns*
|
|
Assembler::genPrologue()
|
|
{
|
|
/**
|
|
* Prologue
|
|
*/
|
|
|
|
// NJ_RESV_OFFSET is space at the top of the stack for us
|
|
// to use for parameter passing (8 bytes at the moment)
|
|
uint32_t stackNeeded = max_out_args + STACK_GRANULARITY * _activation.stackSlotsNeeded();
|
|
uint32_t savingCount = 2;
|
|
|
|
uint32_t savingMask = rmask(FP) | rmask(LR);
|
|
|
|
// so for alignment purposes we've pushed return addr and fp
|
|
uint32_t stackPushed = STACK_GRANULARITY * savingCount;
|
|
uint32_t aligned = alignUp(stackNeeded + stackPushed, NJ_ALIGN_STACK);
|
|
int32_t amt = aligned - stackPushed;
|
|
|
|
// Make room on stack for what we are doing
|
|
if (amt)
|
|
asm_sub_imm(SP, SP, amt);
|
|
|
|
verbose_only( asm_output("## %p:",(void*)_nIns); )
|
|
verbose_only( asm_output("## patch entry"); )
|
|
NIns *patchEntry = _nIns;
|
|
|
|
MOV(FP, SP);
|
|
PUSH_mask(savingMask);
|
|
return patchEntry;
|
|
}
|
|
|
|
void
|
|
Assembler::nFragExit(LInsp guard)
|
|
{
|
|
SideExit * exit = guard->record()->exit;
|
|
Fragment * frag = exit->target;
|
|
|
|
bool target_is_known = frag && frag->fragEntry;
|
|
|
|
if (target_is_known) {
|
|
// The target exists so we can simply emit a branch to its location.
|
|
JMP_far(frag->fragEntry);
|
|
} else {
|
|
// The target doesn't exit yet, so emit a jump to the epilogue. If the
|
|
// target is created later on, the jump will be patched.
|
|
|
|
GuardRecord *gr = guard->record();
|
|
|
|
if (!_epilogue)
|
|
_epilogue = genEpilogue();
|
|
|
|
// Jump to the epilogue. This may get patched later, but JMP_far always
|
|
// emits two instructions even when only one is required, so patching
|
|
// will work correctly.
|
|
JMP_far(_epilogue);
|
|
|
|
// In the future you may want to move this further down so that we can
|
|
// overwrite the r0 guard record load during a patch to a different
|
|
// fragment with some assumed input-register state. Not today though.
|
|
gr->jmp = _nIns;
|
|
|
|
// NB: this is a workaround for the fact that, by patching a
|
|
// fragment-exit jump, we could be changing the *meaning* of the R0
|
|
// register we're passing to the jump target. If we jump to the
|
|
// epilogue, ideally R0 means "return value when exiting fragment".
|
|
// If we patch this to jump to another fragment however, R0 means
|
|
// "incoming 0th parameter". This is just a quirk of ARM ABI. So
|
|
// we compromise by passing "return value" to the epilogue in IP,
|
|
// not R0, and have the epilogue MOV(R0, IP) first thing.
|
|
|
|
asm_ld_imm(IP, int(gr));
|
|
}
|
|
|
|
#ifdef NJ_VERBOSE
|
|
if (_config.arm_show_stats) {
|
|
// load R1 with Fragment *fromFrag, target fragment
|
|
// will make use of this when calling fragenter().
|
|
int fromfrag = int((Fragment*)_thisfrag);
|
|
asm_ld_imm(argRegs[1], fromfrag);
|
|
}
|
|
#endif
|
|
|
|
// profiling for the exit
|
|
verbose_only(
|
|
if (_logc->lcbits & LC_FragProfile) {
|
|
asm_inc_m32( &guard->record()->profCount );
|
|
}
|
|
)
|
|
|
|
// Pop the stack frame.
|
|
MOV(SP, FP);
|
|
}
|
|
|
|
NIns*
|
|
Assembler::genEpilogue()
|
|
{
|
|
// On ARMv5+, loading directly to PC correctly handles interworking.
|
|
// Note that we don't support anything older than ARMv5.
|
|
NanoAssert(_config.arm_arch >= 5);
|
|
|
|
RegisterMask savingMask = rmask(FP) | rmask(PC);
|
|
|
|
POP_mask(savingMask); // regs
|
|
|
|
// NB: this is the later half of the dual-nature patchable exit branch
|
|
// workaround noted above in nFragExit. IP has the "return value"
|
|
// incoming, we need to move it to R0.
|
|
MOV(R0, IP);
|
|
|
|
return _nIns;
|
|
}
|
|
|
|
/*
|
|
* asm_arg will encode the specified argument according to the current ABI, and
|
|
* will update r and stkd as appropriate so that the next argument can be
|
|
* encoded.
|
|
*
|
|
* Linux has used ARM's EABI for some time. Windows CE uses the legacy ABI.
|
|
*
|
|
* Under EABI:
|
|
* - doubles are 64-bit aligned both in registers and on the stack.
|
|
* If the next available argument register is R1, it is skipped
|
|
* and the double is placed in R2:R3. If R0:R1 or R2:R3 are not
|
|
* available, the double is placed on the stack, 64-bit aligned.
|
|
* - 32-bit arguments are placed in registers and 32-bit aligned
|
|
* on the stack.
|
|
*
|
|
* Under legacy ABI:
|
|
* - doubles are placed in subsequent arg registers; if the next
|
|
* available register is r3, the low order word goes into r3
|
|
* and the high order goes on the stack.
|
|
* - 32-bit arguments are placed in the next available arg register,
|
|
* - both doubles and 32-bit arguments are placed on stack with 32-bit
|
|
* alignment.
|
|
*/
|
|
void
|
|
Assembler::asm_arg(ArgSize sz, LInsp arg, Register& r, int& stkd)
|
|
{
|
|
// The stack pointer must always be at least aligned to 4 bytes.
|
|
NanoAssert((stkd & 3) == 0);
|
|
|
|
if (sz == ARGSIZE_F) {
|
|
// This task is fairly complex and so is delegated to asm_arg_64.
|
|
asm_arg_64(arg, r, stkd);
|
|
} else {
|
|
NanoAssert(sz == ARGSIZE_I || sz == ARGSIZE_U);
|
|
// pre-assign registers R0-R3 for arguments (if they fit)
|
|
if (r < R4) {
|
|
asm_regarg(sz, arg, r);
|
|
r = nextreg(r);
|
|
} else {
|
|
asm_stkarg(arg, stkd);
|
|
stkd += 4;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Encode a 64-bit floating-point argument using the appropriate ABI.
|
|
// This function operates in the same way as asm_arg, except that it will only
|
|
// handle arguments where (ArgSize)sz == ARGSIZE_F.
|
|
void
|
|
Assembler::asm_arg_64(LInsp arg, Register& r, int& stkd)
|
|
{
|
|
// The stack pointer must always be at least aligned to 4 bytes.
|
|
NanoAssert((stkd & 3) == 0);
|
|
// The only use for this function when we are using soft floating-point
|
|
// is for LIR_qjoin.
|
|
NanoAssert(_config.arm_vfp || arg->isop(LIR_qjoin));
|
|
|
|
Register fp_reg = deprecated_UnknownReg;
|
|
|
|
if (_config.arm_vfp) {
|
|
fp_reg = findRegFor(arg, FpRegs);
|
|
NanoAssert(isKnownReg(fp_reg));
|
|
}
|
|
|
|
#ifdef NJ_ARM_EABI
|
|
// EABI requires that 64-bit arguments are aligned on even-numbered
|
|
// registers, as R0:R1 or R2:R3. If the register base is at an
|
|
// odd-numbered register, advance it. Note that this will push r past
|
|
// R3 if r is R3 to start with, and will force the argument to go on
|
|
// the stack.
|
|
if ((r == R1) || (r == R3)) {
|
|
r = nextreg(r);
|
|
}
|
|
#endif
|
|
|
|
if (r < R3) {
|
|
Register ra = r;
|
|
Register rb = nextreg(r);
|
|
r = nextreg(rb);
|
|
|
|
#ifdef NJ_ARM_EABI
|
|
// EABI requires that 64-bit arguments are aligned on even-numbered
|
|
// registers, as R0:R1 or R2:R3.
|
|
NanoAssert( ((ra == R0) && (rb == R1)) || ((ra == R2) && (rb == R3)) );
|
|
#endif
|
|
|
|
// Put the argument in ra and rb. If the argument is in a VFP register,
|
|
// use FMRRD to move it to ra and rb. Otherwise, let asm_regarg deal
|
|
// with the argument as if it were two 32-bit arguments.
|
|
if (_config.arm_vfp) {
|
|
FMRRD(ra, rb, fp_reg);
|
|
} else {
|
|
asm_regarg(ARGSIZE_LO, arg->oprnd1(), ra);
|
|
asm_regarg(ARGSIZE_LO, arg->oprnd2(), rb);
|
|
}
|
|
|
|
#ifndef NJ_ARM_EABI
|
|
} else if (r == R3) {
|
|
// We only have one register left, but the legacy ABI requires that we
|
|
// put 32 bits of the argument in the register (R3) and the remaining
|
|
// 32 bits on the stack.
|
|
Register ra = r;
|
|
r = nextreg(r);
|
|
|
|
// This really just checks that nextreg() works properly, as we know
|
|
// that r was previously R3.
|
|
NanoAssert(r == R4);
|
|
|
|
// We're splitting the argument between registers and the stack. This
|
|
// must be the first time that the stack is used, so stkd must be at 0.
|
|
NanoAssert(stkd == 0);
|
|
|
|
if (_config.arm_vfp) {
|
|
// TODO: We could optimize the this to store directly from
|
|
// the VFP register to memory using "FMRRD ra, fp_reg[31:0]" and
|
|
// "STR fp_reg[63:32], [SP, #stkd]".
|
|
|
|
// Load from the floating-point register as usual, but use IP
|
|
// as a swap register.
|
|
STR(IP, SP, 0);
|
|
stkd += 4;
|
|
FMRRD(ra, IP, fp_reg);
|
|
} else {
|
|
// Without VFP, we can simply use asm_regarg and asm_stkarg to
|
|
// encode the two 32-bit words as we don't need to load from a VFP
|
|
// register.
|
|
asm_regarg(ARGSIZE_LO, arg->oprnd1(), ra);
|
|
asm_stkarg(arg->oprnd2(), 0);
|
|
stkd += 4;
|
|
}
|
|
#endif
|
|
} else {
|
|
// The argument won't fit in registers, so pass on to asm_stkarg.
|
|
#ifdef NJ_ARM_EABI
|
|
// EABI requires that 64-bit arguments are 64-bit aligned.
|
|
if ((stkd & 7) != 0) {
|
|
// stkd will always be aligned to at least 4 bytes; this was
|
|
// asserted on entry to this function.
|
|
stkd += 4;
|
|
}
|
|
#endif
|
|
asm_stkarg(arg, stkd);
|
|
stkd += 8;
|
|
}
|
|
}
|
|
|
|
void
|
|
Assembler::asm_regarg(ArgSize sz, LInsp p, Register r)
|
|
{
|
|
NanoAssert(isKnownReg(r));
|
|
if (sz & ARGSIZE_MASK_INT)
|
|
{
|
|
// arg goes in specific register
|
|
if (p->isconst()) {
|
|
asm_ld_imm(r, p->imm32());
|
|
} else {
|
|
if (p->isUsed()) {
|
|
if (!p->deprecated_hasKnownReg()) {
|
|
// load it into the arg reg
|
|
int d = findMemFor(p);
|
|
if (p->isop(LIR_alloc)) {
|
|
asm_add_imm(r, FP, d, 0);
|
|
} else {
|
|
LDR(r, FP, d);
|
|
}
|
|
} else {
|
|
// it must be in a saved reg
|
|
MOV(r, p->deprecated_getReg());
|
|
}
|
|
}
|
|
else {
|
|
// this is the last use, so fine to assign it
|
|
// to the scratch reg, it's dead after this point.
|
|
findSpecificRegFor(p, r);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
NanoAssert(sz == ARGSIZE_F);
|
|
// fpu argument in register - should never happen since FPU
|
|
// args are converted to two 32-bit ints on ARM
|
|
NanoAssert(false);
|
|
}
|
|
}
|
|
|
|
void
|
|
Assembler::asm_stkarg(LInsp arg, int stkd)
|
|
{
|
|
bool isF64 = arg->isF64();
|
|
|
|
Register rr;
|
|
if (arg->isUsed() && (rr = arg->deprecated_getReg(), isKnownReg(rr))) {
|
|
// The argument resides somewhere in registers, so we simply need to
|
|
// push it onto the stack.
|
|
if (!_config.arm_vfp || !isF64) {
|
|
NanoAssert(IsGpReg(rr));
|
|
|
|
STR(rr, SP, stkd);
|
|
} else {
|
|
// According to the comments in asm_arg_64, LIR_qjoin
|
|
// can have a 64-bit argument even if VFP is disabled. However,
|
|
// asm_arg_64 will split the argument and issue two 32-bit
|
|
// arguments to asm_stkarg so we can ignore that case here and
|
|
// assert that we will never get 64-bit arguments unless VFP is
|
|
// available.
|
|
NanoAssert(_config.arm_vfp);
|
|
NanoAssert(IsFpReg(rr));
|
|
|
|
#ifdef NJ_ARM_EABI
|
|
// EABI requires that 64-bit arguments are 64-bit aligned.
|
|
NanoAssert((stkd & 7) == 0);
|
|
#endif
|
|
|
|
FSTD(rr, SP, stkd);
|
|
}
|
|
} else {
|
|
// The argument does not reside in registers, so we need to get some
|
|
// memory for it and then copy it onto the stack.
|
|
int d = findMemFor(arg);
|
|
if (!isF64) {
|
|
STR(IP, SP, stkd);
|
|
if (arg->isop(LIR_alloc)) {
|
|
asm_add_imm(IP, FP, d);
|
|
} else {
|
|
LDR(IP, FP, d);
|
|
}
|
|
} else {
|
|
#ifdef NJ_ARM_EABI
|
|
// EABI requires that 64-bit arguments are 64-bit aligned.
|
|
NanoAssert((stkd & 7) == 0);
|
|
#endif
|
|
|
|
STR(IP, SP, stkd+4);
|
|
LDR(IP, FP, d+4);
|
|
STR(IP, SP, stkd);
|
|
LDR(IP, FP, d);
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
Assembler::asm_call(LInsp ins)
|
|
{
|
|
if (_config.arm_vfp && ins->isop(LIR_fcall)) {
|
|
/* Because ARM actually returns the result in (R0,R1), and not in a
|
|
* floating point register, the code to move the result into a correct
|
|
* register is below. We do nothing here.
|
|
*
|
|
* The reason being that if we did something here, the final code
|
|
* sequence we'd get would be something like:
|
|
* MOV {R0-R3},params [from below]
|
|
* BL function [from below]
|
|
* MOV {R0-R3},spilled data [from evictScratchRegs()]
|
|
* MOV Dx,{R0,R1} [from here]
|
|
* which is clearly broken.
|
|
*
|
|
* This is not a problem for non-floating point calls, because the
|
|
* restoring of spilled data into R0 is done via a call to
|
|
* deprecated_prepResultReg(R0) in the other branch of this if-then-else,
|
|
* meaning that evictScratchRegs() will not modify R0. However,
|
|
* deprecated_prepResultReg is not aware of the concept of using a register pair
|
|
* (R0,R1) for the result of a single operation, so it can only be
|
|
* used here with the ultimate VFP register, and not R0/R1, which
|
|
* potentially allows for R0/R1 to get corrupted as described.
|
|
*/
|
|
} else {
|
|
deprecated_prepResultReg(ins, rmask(retRegs[0]));
|
|
}
|
|
|
|
// Do this after we've handled the call result, so we don't
|
|
// force the call result to be spilled unnecessarily.
|
|
|
|
evictScratchRegs();
|
|
|
|
const CallInfo* call = ins->callInfo();
|
|
ArgSize sizes[MAXARGS];
|
|
uint32_t argc = call->get_sizes(sizes);
|
|
bool indirect = call->isIndirect();
|
|
|
|
// If we aren't using VFP, assert that the LIR operation is an integer
|
|
// function call.
|
|
NanoAssert(_config.arm_vfp || ins->isop(LIR_icall));
|
|
|
|
// If we're using VFP, and the return type is a double, it'll come back in
|
|
// R0/R1. We need to either place it in the result fp reg, or store it.
|
|
// See comments above for more details as to why this is necessary here
|
|
// for floating point calls, but not for integer calls.
|
|
if (_config.arm_vfp && ins->isUsed()) {
|
|
// Determine the size (and type) of the instruction result.
|
|
ArgSize rsize = (ArgSize)(call->_argtypes & ARGSIZE_MASK_ANY);
|
|
|
|
// If the result size is a floating-point value, treat the result
|
|
// specially, as described previously.
|
|
if (rsize == ARGSIZE_F) {
|
|
Register rr = ins->deprecated_getReg();
|
|
|
|
NanoAssert(ins->opcode() == LIR_fcall);
|
|
|
|
if (!isKnownReg(rr)) {
|
|
int d = deprecated_disp(ins);
|
|
NanoAssert(d != 0);
|
|
deprecated_freeRsrcOf(ins, false);
|
|
|
|
// The result doesn't have a register allocated, so store the
|
|
// result (in R0,R1) directly to its stack slot.
|
|
STR(R0, FP, d+0);
|
|
STR(R1, FP, d+4);
|
|
} else {
|
|
NanoAssert(IsFpReg(rr));
|
|
|
|
// Copy the result to the (VFP) result register.
|
|
deprecated_prepResultReg(ins, rmask(rr));
|
|
FMDRR(rr, R0, R1);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Emit the branch.
|
|
if (!indirect) {
|
|
verbose_only(if (_logc->lcbits & LC_Assembly)
|
|
outputf(" %p:", _nIns);
|
|
)
|
|
|
|
// Direct call: on v5 and above (where the calling sequence doesn't
|
|
// corrupt LR until the actual branch instruction), we can avoid an
|
|
// interlock in the "long" branch sequence by manually loading the
|
|
// target address into LR ourselves before setting up the parameters
|
|
// in other registers.
|
|
BranchWithLink((NIns*)call->_address);
|
|
} else {
|
|
// Indirect call: we assign the address arg to LR since it's not
|
|
// used for regular arguments, and is otherwise scratch since it's
|
|
// clobberred by the call. On v4/v4T, where we have to manually do
|
|
// the equivalent of a BLX, move LR into IP before corrupting LR
|
|
// with the return address.
|
|
if (blx_lr_bug) {
|
|
// workaround for msft device emulator bug (blx lr emulated as no-op)
|
|
underrunProtect(8);
|
|
BLX(IP);
|
|
MOV(IP,LR);
|
|
} else {
|
|
BLX(LR);
|
|
}
|
|
asm_regarg(ARGSIZE_LO, ins->arg(--argc), LR);
|
|
}
|
|
|
|
// Encode the arguments, starting at R0 and with an empty argument stack.
|
|
Register r = R0;
|
|
int stkd = 0;
|
|
|
|
// Iterate through the argument list and encode each argument according to
|
|
// the ABI.
|
|
// Note that we loop through the arguments backwards as LIR specifies them
|
|
// in reverse order.
|
|
uint32_t i = argc;
|
|
while(i--) {
|
|
asm_arg(sizes[i], ins->arg(i), r, stkd);
|
|
}
|
|
|
|
if (stkd > max_out_args) {
|
|
max_out_args = stkd;
|
|
}
|
|
}
|
|
|
|
Register
|
|
Assembler::nRegisterAllocFromSet(RegisterMask set)
|
|
{
|
|
NanoAssert(set != 0);
|
|
|
|
// The CountLeadingZeroes function will use the CLZ instruction where
|
|
// available. In other cases, it will fall back to a (slower) C
|
|
// implementation.
|
|
Register r = (Register)(31-CountLeadingZeroes(set));
|
|
_allocator.free &= ~rmask(r);
|
|
|
|
NanoAssert(IsGpReg(r) || IsFpReg(r));
|
|
NanoAssert((rmask(r) & set) == rmask(r));
|
|
|
|
return r;
|
|
}
|
|
|
|
void
|
|
Assembler::nRegisterResetAll(RegAlloc& a)
|
|
{
|
|
// add scratch registers to our free list for the allocator
|
|
a.clear();
|
|
a.free =
|
|
rmask(R0) | rmask(R1) | rmask(R2) | rmask(R3) | rmask(R4) |
|
|
rmask(R5) | rmask(R6) | rmask(R7) | rmask(R8) | rmask(R9) |
|
|
rmask(R10) | rmask(LR);
|
|
if (_config.arm_vfp)
|
|
a.free |= FpRegs;
|
|
|
|
debug_only(a.managed = a.free);
|
|
}
|
|
|
|
static inline ConditionCode
|
|
get_cc(NIns *ins)
|
|
{
|
|
return ConditionCode((*ins >> 28) & 0xF);
|
|
}
|
|
|
|
static inline bool
|
|
branch_is_B(NIns* branch)
|
|
{
|
|
return (*branch & 0x0E000000) == 0x0A000000;
|
|
}
|
|
|
|
static inline bool
|
|
branch_is_LDR_PC(NIns* branch)
|
|
{
|
|
return (*branch & 0x0F7FF000) == 0x051FF000;
|
|
}
|
|
|
|
// Is this an instruction of the form ldr/str reg, [fp, #-imm] ?
|
|
static inline bool
|
|
is_ldstr_reg_fp_minus_imm(/*OUT*/uint32_t* isLoad, /*OUT*/uint32_t* rX,
|
|
/*OUT*/uint32_t* immX, NIns i1)
|
|
{
|
|
if ((i1 & 0xFFEF0000) != 0xE50B0000)
|
|
return false;
|
|
*isLoad = (i1 >> 20) & 1;
|
|
*rX = (i1 >> 12) & 0xF;
|
|
*immX = i1 & 0xFFF;
|
|
return true;
|
|
}
|
|
|
|
// Is this an instruction of the form ldmdb/stmdb fp, regset ?
|
|
static inline bool
|
|
is_ldstmdb_fp(/*OUT*/uint32_t* isLoad, /*OUT*/uint32_t* regSet, NIns i1)
|
|
{
|
|
if ((i1 & 0xFFEF0000) != 0xE90B0000)
|
|
return false;
|
|
*isLoad = (i1 >> 20) & 1;
|
|
*regSet = i1 & 0xFFFF;
|
|
return true;
|
|
}
|
|
|
|
// Make an instruction of the form ldmdb/stmdb fp, regset
|
|
static inline NIns
|
|
mk_ldstmdb_fp(uint32_t isLoad, uint32_t regSet)
|
|
{
|
|
return 0xE90B0000 | (regSet & 0xFFFF) | ((isLoad & 1) << 20);
|
|
}
|
|
|
|
// Compute the number of 1 bits in the lowest 16 bits of regSet
|
|
static inline uint32_t
|
|
size_of_regSet(uint32_t regSet)
|
|
{
|
|
uint32_t x = regSet;
|
|
x = (x & 0x5555) + ((x >> 1) & 0x5555);
|
|
x = (x & 0x3333) + ((x >> 2) & 0x3333);
|
|
x = (x & 0x0F0F) + ((x >> 4) & 0x0F0F);
|
|
x = (x & 0x00FF) + ((x >> 8) & 0x00FF);
|
|
return x;
|
|
}
|
|
|
|
// See if two ARM instructions, i1 and i2, can be combined into one
|
|
static bool
|
|
do_peep_2_1(/*OUT*/NIns* merged, NIns i1, NIns i2)
|
|
{
|
|
uint32_t rX, rY, immX, immY, isLoadX, isLoadY, regSet;
|
|
/* ld/str rX, [fp, #-8]
|
|
ld/str rY, [fp, #-4]
|
|
==>
|
|
ld/stmdb fp, {rX, rY}
|
|
when
|
|
X < Y and X != fp and Y != fp and X != 15 and Y != 15
|
|
*/
|
|
if (is_ldstr_reg_fp_minus_imm(&isLoadX, &rX, &immX, i1) &&
|
|
is_ldstr_reg_fp_minus_imm(&isLoadY, &rY, &immY, i2) &&
|
|
immX == 8 && immY == 4 && rX < rY &&
|
|
isLoadX == isLoadY &&
|
|
rX != FP && rY != FP &&
|
|
rX != 15 && rY != 15) {
|
|
*merged = mk_ldstmdb_fp(isLoadX, (1 << rX) | (1<<rY));
|
|
return true;
|
|
}
|
|
/* ld/str rX, [fp, #-N]
|
|
ld/stmdb fp, regset
|
|
==>
|
|
ld/stmdb fp, union(regset,{rX})
|
|
when
|
|
regset is nonempty
|
|
X < all elements of regset
|
|
N == 4 * (1 + card(regset))
|
|
X != fp and X != 15
|
|
*/
|
|
if (is_ldstr_reg_fp_minus_imm(&isLoadX, &rX, &immX, i1) &&
|
|
is_ldstmdb_fp(&isLoadY, ®Set, i2) &&
|
|
regSet != 0 &&
|
|
(regSet & ((1 << (rX + 1)) - 1)) == 0 &&
|
|
immX == 4 * (1 + size_of_regSet(regSet)) &&
|
|
isLoadX == isLoadY &&
|
|
rX != FP && rX != 15) {
|
|
*merged = mk_ldstmdb_fp(isLoadX, regSet | (1 << rX));
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Determine whether or not it's safe to look at _nIns[1].
|
|
// Necessary condition for safe peepholing with do_peep_2_1.
|
|
static inline bool
|
|
does_next_instruction_exist(NIns* _nIns, NIns* codeStart, NIns* codeEnd,
|
|
NIns* exitStart, NIns* exitEnd)
|
|
{
|
|
return (exitStart <= _nIns && _nIns+1 < exitEnd) ||
|
|
(codeStart <= _nIns && _nIns+1 < codeEnd);
|
|
}
|
|
|
|
void
|
|
Assembler::nPatchBranch(NIns* branch, NIns* target)
|
|
{
|
|
// Patch the jump in a loop
|
|
|
|
//
|
|
// There are two feasible cases here, the first of which has 2 sub-cases:
|
|
//
|
|
// (1) We are patching a patchable unconditional jump emitted by
|
|
// JMP_far. All possible encodings we may be looking at with
|
|
// involve 2 words, though we *may* have to change from 1 word to
|
|
// 2 or vice verse.
|
|
//
|
|
// 1a: B ±32MB ; BKPT
|
|
// 1b: LDR PC [PC, #-4] ; $imm
|
|
//
|
|
// (2) We are patching a patchable conditional jump emitted by
|
|
// B_cond_chk. Short conditional jumps are non-patchable, so we
|
|
// won't have one here; will only ever have an instruction of the
|
|
// following form:
|
|
//
|
|
// LDRcc PC [PC, #lit] ...
|
|
//
|
|
// We don't actually know whether the lit-address is in the
|
|
// constant pool or in-line of the instruction stream, following
|
|
// the insn (with a jump over it) and we don't need to. For our
|
|
// purposes here, cases 2, 3 and 4 all look the same.
|
|
//
|
|
// For purposes of handling our patching task, we group cases 1b and 2
|
|
// together, and handle case 1a on its own as it might require expanding
|
|
// from a short-jump to a long-jump.
|
|
//
|
|
// We do not handle contracting from a long-jump to a short-jump, though
|
|
// this is a possible future optimisation for case 1b. For now it seems
|
|
// not worth the trouble.
|
|
//
|
|
|
|
if (branch_is_B(branch)) {
|
|
// Case 1a
|
|
// A short B branch, must be unconditional.
|
|
NanoAssert(get_cc(branch) == AL);
|
|
|
|
int32_t offset = PC_OFFSET_FROM(target, branch);
|
|
if (isS24(offset>>2)) {
|
|
// We can preserve the existing form, just rewrite its offset.
|
|
NIns cond = *branch & 0xF0000000;
|
|
*branch = (NIns)( cond | (0xA<<24) | ((offset>>2) & 0xFFFFFF) );
|
|
} else {
|
|
// We need to expand the existing branch to a long jump.
|
|
// make sure the next instruction is a dummy BKPT
|
|
NanoAssert(*(branch+1) == BKPT_insn);
|
|
|
|
// Set the branch instruction to LDRcc pc, [pc, #-4]
|
|
NIns cond = *branch & 0xF0000000;
|
|
*branch++ = (NIns)( cond | (0x51<<20) | (PC<<16) | (PC<<12) | (4));
|
|
*branch++ = (NIns)target;
|
|
}
|
|
} else {
|
|
// Case 1b & 2
|
|
// Not a B branch, must be LDR, might be any kind of condition.
|
|
NanoAssert(branch_is_LDR_PC(branch));
|
|
|
|
NIns *addr = branch+2;
|
|
int offset = (*branch & 0xFFF) / sizeof(NIns);
|
|
if (*branch & (1<<23)) {
|
|
addr += offset;
|
|
} else {
|
|
addr -= offset;
|
|
}
|
|
|
|
// Just redirect the jump target, leave the insn alone.
|
|
*addr = (NIns) target;
|
|
}
|
|
}
|
|
|
|
RegisterMask
|
|
Assembler::hint(LIns* ins)
|
|
{
|
|
uint32_t op = ins->opcode();
|
|
int prefer = 0;
|
|
if (op == LIR_icall)
|
|
prefer = rmask(R0);
|
|
else if (op == LIR_callh)
|
|
prefer = rmask(R1);
|
|
else if (op == LIR_param) {
|
|
if (ins->paramKind() == 0) {
|
|
if (ins->paramArg() < 4)
|
|
prefer = rmask(argRegs[ins->paramArg()]);
|
|
}
|
|
}
|
|
return prefer;
|
|
}
|
|
|
|
void
|
|
Assembler::asm_qjoin(LIns *ins)
|
|
{
|
|
int d = findMemFor(ins);
|
|
NanoAssert(d);
|
|
LIns* lo = ins->oprnd1();
|
|
LIns* hi = ins->oprnd2();
|
|
|
|
Register r = findRegFor(hi, GpRegs);
|
|
STR(r, FP, d+4);
|
|
|
|
// okay if r gets recycled.
|
|
r = findRegFor(lo, GpRegs);
|
|
STR(r, FP, d);
|
|
deprecated_freeRsrcOf(ins, false); // if we had a reg in use, emit a ST to flush it to mem
|
|
}
|
|
|
|
void
|
|
Assembler::asm_store32(LOpcode op, LIns *value, int dr, LIns *base)
|
|
{
|
|
Register ra, rb;
|
|
getBaseReg2(GpRegs, value, ra, GpRegs, base, rb, dr);
|
|
|
|
switch (op) {
|
|
case LIR_sti:
|
|
if (isU12(-dr) || isU12(dr)) {
|
|
STR(ra, rb, dr);
|
|
} else {
|
|
STR(ra, IP, 0);
|
|
asm_add_imm(IP, rb, dr);
|
|
}
|
|
return;
|
|
case LIR_stb:
|
|
if (isU12(-dr) || isU12(dr)) {
|
|
STRB(ra, rb, dr);
|
|
} else {
|
|
STRB(ra, IP, 0);
|
|
asm_add_imm(IP, rb, dr);
|
|
}
|
|
return;
|
|
case LIR_sts:
|
|
// Similar to the sti/stb case, but the max offset is smaller.
|
|
if (isU8(-dr) || isU8(dr)) {
|
|
STRH(ra, rb, dr);
|
|
} else {
|
|
STRH(ra, IP, 0);
|
|
asm_add_imm(IP, rb, dr);
|
|
}
|
|
return;
|
|
default:
|
|
NanoAssertMsg(0, "asm_store32 should never receive this LIR opcode");
|
|
return;
|
|
}
|
|
}
|
|
|
|
void
|
|
Assembler::asm_restore(LInsp i, Register r)
|
|
{
|
|
if (i->isop(LIR_alloc)) {
|
|
asm_add_imm(r, FP, deprecated_disp(i));
|
|
} else if (i->isconst()) {
|
|
if (!i->deprecated_getArIndex()) {
|
|
i->deprecated_markAsClear();
|
|
}
|
|
asm_ld_imm(r, i->imm32());
|
|
}
|
|
else {
|
|
// We can't easily load immediate values directly into FP registers, so
|
|
// ensure that memory is allocated for the constant and load it from
|
|
// memory.
|
|
int d = findMemFor(i);
|
|
if (_config.arm_vfp && IsFpReg(r)) {
|
|
if (isS8(d >> 2)) {
|
|
FLDD(r, FP, d);
|
|
} else {
|
|
FLDD(r, IP, 0);
|
|
asm_add_imm(IP, FP, d);
|
|
}
|
|
} else {
|
|
NIns merged;
|
|
LDR(r, FP, d);
|
|
// See if we can merge this load into an immediately following
|
|
// one, by creating or extending an LDM instruction.
|
|
if (/* is it safe to poke _nIns[1] ? */
|
|
does_next_instruction_exist(_nIns, codeStart, codeEnd,
|
|
exitStart, exitEnd)
|
|
&& /* can we merge _nIns[0] into _nIns[1] ? */
|
|
do_peep_2_1(&merged, _nIns[0], _nIns[1])) {
|
|
_nIns[1] = merged;
|
|
_nIns++;
|
|
verbose_only( asm_output("merge next into LDMDB"); )
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
Assembler::asm_spill(Register rr, int d, bool pop, bool quad)
|
|
{
|
|
(void) pop;
|
|
(void) quad;
|
|
if (d) {
|
|
if (_config.arm_vfp && IsFpReg(rr)) {
|
|
if (isS8(d >> 2)) {
|
|
FSTD(rr, FP, d);
|
|
} else {
|
|
FSTD(rr, IP, 0);
|
|
asm_add_imm(IP, FP, d);
|
|
}
|
|
} else {
|
|
NIns merged;
|
|
STR(rr, FP, d);
|
|
// See if we can merge this store into an immediately following one,
|
|
// one, by creating or extending a STM instruction.
|
|
if (/* is it safe to poke _nIns[1] ? */
|
|
does_next_instruction_exist(_nIns, codeStart, codeEnd,
|
|
exitStart, exitEnd)
|
|
&& /* can we merge _nIns[0] into _nIns[1] ? */
|
|
do_peep_2_1(&merged, _nIns[0], _nIns[1])) {
|
|
_nIns[1] = merged;
|
|
_nIns++;
|
|
verbose_only( asm_output("merge next into STMDB"); )
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
Assembler::asm_load64(LInsp ins)
|
|
{
|
|
//asm_output("<<< load64");
|
|
|
|
NanoAssert(ins->isF64());
|
|
|
|
LIns* base = ins->oprnd1();
|
|
int offset = ins->disp();
|
|
|
|
Register rr = ins->deprecated_getReg();
|
|
int d = deprecated_disp(ins);
|
|
|
|
Register rb = findRegFor(base, GpRegs);
|
|
NanoAssert(IsGpReg(rb));
|
|
deprecated_freeRsrcOf(ins, false);
|
|
|
|
//outputf("--- load64: Finished register allocation.");
|
|
|
|
switch (ins->opcode()) {
|
|
case LIR_ldf:
|
|
case LIR_ldfc:
|
|
if (_config.arm_vfp && isKnownReg(rr)) {
|
|
// VFP is enabled and the result will go into a register.
|
|
NanoAssert(IsFpReg(rr));
|
|
|
|
if (!isS8(offset >> 2) || (offset&3) != 0) {
|
|
FLDD(rr,IP,0);
|
|
asm_add_imm(IP, rb, offset);
|
|
} else {
|
|
FLDD(rr,rb,offset);
|
|
}
|
|
} else {
|
|
// Either VFP is not available or the result needs to go into memory;
|
|
// in either case, VFP instructions are not required. Note that the
|
|
// result will never be loaded into registers if VFP is not available.
|
|
NanoAssert(!isKnownReg(rr));
|
|
NanoAssert(d != 0);
|
|
|
|
// Check that the offset is 8-byte (64-bit) aligned.
|
|
NanoAssert((d & 0x7) == 0);
|
|
|
|
// *(uint64_t*)(FP+d) = *(uint64_t*)(rb+offset)
|
|
asm_mmq(FP, d, rb, offset);
|
|
}
|
|
return;
|
|
|
|
case LIR_ld32f:
|
|
case LIR_ldc32f:
|
|
if (_config.arm_vfp) {
|
|
if (isKnownReg(rr)) {
|
|
NanoAssert(IsFpReg(rr));
|
|
FCVTDS(rr, S14);
|
|
} else {
|
|
// Normally D7 isn't allowed to be used as an FP reg.
|
|
// In this case we make an explicit exception.
|
|
if (isS8(d)) {
|
|
FSTD_allowD7(D7, FP, d, true);
|
|
} else {
|
|
FSTD_allowD7(D7, IP, 0, true);
|
|
asm_add_imm(IP, FP, d);
|
|
}
|
|
FCVTDS_allowD7(D7, S14, true);
|
|
}
|
|
|
|
// always load into a VFP reg to do the conversion, and always use
|
|
// our S14 scratch reg
|
|
if (!isS8(offset >> 2) || (offset&3) != 0) {
|
|
FLDS(S14, IP, 0);
|
|
asm_add_imm(IP, rb, offset);
|
|
} else {
|
|
FLDS(S14, rb, offset);
|
|
}
|
|
} else {
|
|
NanoAssertMsg(0, "ld32f not supported with non-VFP, fix me");
|
|
}
|
|
return;
|
|
|
|
default:
|
|
NanoAssertMsg(0, "asm_load64 should never receive this LIR opcode");
|
|
return;
|
|
}
|
|
|
|
//asm_output(">>> load64");
|
|
}
|
|
|
|
void
|
|
Assembler::asm_store64(LOpcode op, LInsp value, int dr, LInsp base)
|
|
{
|
|
//asm_output("<<< store64 (dr: %d)", dr);
|
|
|
|
switch (op) {
|
|
case LIR_stfi:
|
|
if (_config.arm_vfp) {
|
|
Register rb = findRegFor(base, GpRegs);
|
|
|
|
if (value->isconstq()) {
|
|
underrunProtect(LD32_size*2 + 8);
|
|
|
|
// XXX use another reg, get rid of dependency
|
|
STR(IP, rb, dr);
|
|
asm_ld_imm(IP, value->imm64_0(), false);
|
|
STR(IP, rb, dr+4);
|
|
asm_ld_imm(IP, value->imm64_1(), false);
|
|
|
|
return;
|
|
}
|
|
|
|
Register rv = findRegFor(value, FpRegs);
|
|
|
|
NanoAssert(isKnownReg(rb));
|
|
NanoAssert(isKnownReg(rv));
|
|
|
|
Register baseReg = rb;
|
|
intptr_t baseOffset = dr;
|
|
|
|
if (!isS8(dr)) {
|
|
baseReg = IP;
|
|
baseOffset = 0;
|
|
}
|
|
|
|
FSTD(rv, baseReg, baseOffset);
|
|
|
|
if (!isS8(dr)) {
|
|
asm_add_imm(IP, rb, dr);
|
|
}
|
|
|
|
// if it's a constant, make sure our baseReg/baseOffset location
|
|
// has the right value
|
|
if (value->isconstq()) {
|
|
underrunProtect(4*4);
|
|
asm_quad_nochk(rv, value->imm64_0(), value->imm64_1());
|
|
}
|
|
} else {
|
|
int da = findMemFor(value);
|
|
Register rb = findRegFor(base, GpRegs);
|
|
// *(uint64_t*)(rb+dr) = *(uint64_t*)(FP+da)
|
|
asm_mmq(rb, dr, FP, da);
|
|
}
|
|
return;
|
|
|
|
case LIR_st32f:
|
|
if (_config.arm_vfp) {
|
|
Register rb = findRegFor(base, GpRegs);
|
|
|
|
if (value->isconstq()) {
|
|
underrunProtect(LD32_size*2 + 8);
|
|
|
|
// XXX use another reg, get rid of dependency
|
|
STR(IP, rb, dr);
|
|
asm_ld_imm(IP, value->imm64_0(), false);
|
|
STR(IP, rb, dr+4);
|
|
asm_ld_imm(IP, value->imm64_1(), false);
|
|
|
|
return;
|
|
}
|
|
|
|
Register rv = findRegFor(value, FpRegs);
|
|
|
|
NanoAssert(isKnownReg(rb));
|
|
NanoAssert(isKnownReg(rv));
|
|
|
|
Register baseReg = rb;
|
|
intptr_t baseOffset = dr;
|
|
|
|
if (!isS8(dr)) {
|
|
baseReg = IP;
|
|
baseOffset = 0;
|
|
}
|
|
|
|
FSTS(S14, baseReg, baseOffset);
|
|
|
|
if (!isS8(dr)) {
|
|
asm_add_imm(IP, rb, dr);
|
|
}
|
|
|
|
FCVTSD(S14, rv);
|
|
|
|
// if it's a constant, make sure our baseReg/baseOffset location
|
|
// has the right value
|
|
if (value->isconstq()) {
|
|
underrunProtect(4*4);
|
|
asm_quad_nochk(rv, value->imm64_0(), value->imm64_1());
|
|
}
|
|
} else {
|
|
NanoAssertMsg(0, "st32f not supported with non-VFP, fix me");
|
|
}
|
|
return;
|
|
default:
|
|
NanoAssertMsg(0, "asm_store64 should never receive this LIR opcode");
|
|
return;
|
|
}
|
|
|
|
//asm_output(">>> store64");
|
|
}
|
|
|
|
// stick a quad into register rr, where p points to the two
|
|
// 32-bit parts of the quad, optinally also storing at FP+d
|
|
void
|
|
Assembler::asm_quad_nochk(Register rr, int32_t imm64_0, int32_t imm64_1)
|
|
{
|
|
// We're not going to use a slot, because it might be too far
|
|
// away. Instead, we're going to stick a branch in the stream to
|
|
// jump over the constants, and then load from a short PC relative
|
|
// offset.
|
|
|
|
// stream should look like:
|
|
// branch A
|
|
// imm64_0
|
|
// imm64_1
|
|
// A: FLDD PC-16
|
|
|
|
FLDD(rr, PC, -16);
|
|
|
|
*(--_nIns) = (NIns) imm64_1;
|
|
*(--_nIns) = (NIns) imm64_0;
|
|
|
|
B_nochk(_nIns+2);
|
|
}
|
|
|
|
void
|
|
Assembler::asm_quad(LInsp ins)
|
|
{
|
|
//asm_output(">>> asm_quad");
|
|
|
|
int d = deprecated_disp(ins);
|
|
Register rr = ins->deprecated_getReg();
|
|
|
|
deprecated_freeRsrcOf(ins, false);
|
|
|
|
if (_config.arm_vfp && isKnownReg(rr))
|
|
{
|
|
asm_spill(rr, d, false, true);
|
|
|
|
underrunProtect(4*4);
|
|
asm_quad_nochk(rr, ins->imm64_0(), ins->imm64_1());
|
|
} else {
|
|
NanoAssert(d);
|
|
// asm_mmq might spill a reg, so don't call it;
|
|
// instead do the equivalent directly.
|
|
//asm_mmq(FP, d, PC, -16);
|
|
|
|
STR(IP, FP, d+4);
|
|
asm_ld_imm(IP, ins->imm64_1());
|
|
STR(IP, FP, d);
|
|
asm_ld_imm(IP, ins->imm64_0());
|
|
}
|
|
|
|
//asm_output("<<< asm_quad");
|
|
}
|
|
|
|
void
|
|
Assembler::asm_nongp_copy(Register r, Register s)
|
|
{
|
|
if (_config.arm_vfp && IsFpReg(r) && IsFpReg(s)) {
|
|
// fp->fp
|
|
FCPYD(r, s);
|
|
} else {
|
|
// We can't move a double-precision FP register into a 32-bit GP
|
|
// register, so assert that no calling code is trying to do that.
|
|
NanoAssert(0);
|
|
}
|
|
}
|
|
|
|
Register
|
|
Assembler::asm_binop_rhs_reg(LInsp)
|
|
{
|
|
return deprecated_UnknownReg;
|
|
}
|
|
|
|
/**
|
|
* copy 64 bits: (rd+dd) <- (rs+ds)
|
|
*/
|
|
void
|
|
Assembler::asm_mmq(Register rd, int dd, Register rs, int ds)
|
|
{
|
|
// The value is either a 64bit struct or maybe a float that isn't live in
|
|
// an FPU reg. Either way, don't put it in an FPU reg just to load & store
|
|
// it.
|
|
// This operation becomes a simple 64-bit memcpy.
|
|
|
|
// In order to make the operation optimal, we will require two GP
|
|
// registers. We can't allocate a register here because the caller may have
|
|
// called deprecated_freeRsrcOf, and allocating a register here may cause something
|
|
// else to spill onto the stack which has just be conveniently freed by
|
|
// deprecated_freeRsrcOf (resulting in stack corruption).
|
|
//
|
|
// Falling back to a single-register implementation of asm_mmq is better
|
|
// than adjusting the callers' behaviour (to allow us to allocate another
|
|
// register here) because spilling a register will end up being slower than
|
|
// just using the same register twice anyway.
|
|
//
|
|
// Thus, if there is a free register which we can borrow, we will emit the
|
|
// following code:
|
|
// LDR rr, [rs, #ds]
|
|
// LDR ip, [rs, #(ds+4)]
|
|
// STR rr, [rd, #dd]
|
|
// STR ip, [rd, #(dd+4)]
|
|
// (Where rr is the borrowed register.)
|
|
//
|
|
// If there is no free register, don't spill an existing allocation. Just
|
|
// do the following:
|
|
// LDR ip, [rs, #ds]
|
|
// STR ip, [rd, #dd]
|
|
// LDR ip, [rs, #(ds+4)]
|
|
// STR ip, [rd, #(dd+4)]
|
|
|
|
// Ensure that the PC is not used as either base register. The instruction
|
|
// generation macros call underrunProtect, and a side effect of this is
|
|
// that we may be pushed onto another page, so the PC is not a reliable
|
|
// base register.
|
|
NanoAssert(rs != PC);
|
|
NanoAssert(rd != PC);
|
|
|
|
// Find the list of free registers from the allocator's free list and the
|
|
// GpRegs mask. This excludes any floating-point registers that may be on
|
|
// the free list.
|
|
RegisterMask free = _allocator.free & AllowableFlagRegs;
|
|
|
|
if (free) {
|
|
// There is at least one register on the free list, so grab one for
|
|
// temporary use. There is no need to allocate it explicitly because
|
|
// we won't need it after this function returns.
|
|
|
|
// The CountLeadingZeroes can be used to quickly find a set bit in the
|
|
// free mask.
|
|
Register rr = (Register)(31-CountLeadingZeroes(free));
|
|
|
|
// Note: Not every register in GpRegs is usable here. However, these
|
|
// registers will never appear on the free list.
|
|
NanoAssert((free & rmask(PC)) == 0);
|
|
NanoAssert((free & rmask(LR)) == 0);
|
|
NanoAssert((free & rmask(SP)) == 0);
|
|
NanoAssert((free & rmask(IP)) == 0);
|
|
NanoAssert((free & rmask(FP)) == 0);
|
|
|
|
// Emit the actual instruction sequence.
|
|
|
|
STR(IP, rd, dd+4);
|
|
STR(rr, rd, dd);
|
|
LDR(IP, rs, ds+4);
|
|
LDR(rr, rs, ds);
|
|
} else {
|
|
// There are no free registers, so fall back to using IP twice.
|
|
STR(IP, rd, dd+4);
|
|
LDR(IP, rs, ds+4);
|
|
STR(IP, rd, dd);
|
|
LDR(IP, rs, ds);
|
|
}
|
|
}
|
|
|
|
// Increment the 32-bit profiling counter at pCtr, without
|
|
// changing any registers.
|
|
verbose_only(
|
|
void Assembler::asm_inc_m32(uint32_t* pCtr)
|
|
{
|
|
// We need to temporarily free up two registers to do this, so
|
|
// just push r0 and r1 on the stack. This assumes that the area
|
|
// at r13 - 8 .. r13 - 1 isn't being used for anything else at
|
|
// this point. This guaranteed us by the EABI; although the
|
|
// situation with the legacy ABI I'm not sure of.
|
|
//
|
|
// Plan: emit the following bit of code. It's not efficient, but
|
|
// this is for profiling debug builds only, and is self contained,
|
|
// except for above comment re stack use.
|
|
//
|
|
// E92D0003 push {r0,r1}
|
|
// E59F0000 ldr r0, [r15] ; pCtr
|
|
// EA000000 b .+8 ; jump over imm
|
|
// 12345678 .word 0x12345678 ; pCtr
|
|
// E5901000 ldr r1, [r0]
|
|
// E2811001 add r1, r1, #1
|
|
// E5801000 str r1, [r0]
|
|
// E8BD0003 pop {r0,r1}
|
|
|
|
// We need keep the 4 words beginning at "ldr r0, [r15]"
|
|
// together. Simplest to underrunProtect the whole thing.
|
|
underrunProtect(8*4);
|
|
IMM32(0xE8BD0003); // pop {r0,r1}
|
|
IMM32(0xE5801000); // str r1, [r0]
|
|
IMM32(0xE2811001); // add r1, r1, #1
|
|
IMM32(0xE5901000); // ldr r1, [r0]
|
|
IMM32((uint32_t)pCtr); // .word pCtr
|
|
IMM32(0xEA000000); // b .+8
|
|
IMM32(0xE59F0000); // ldr r0, [r15]
|
|
IMM32(0xE92D0003); // push {r0,r1}
|
|
}
|
|
)
|
|
|
|
void
|
|
Assembler::nativePageReset()
|
|
{
|
|
_nSlot = 0;
|
|
_nExitSlot = 0;
|
|
}
|
|
|
|
void
|
|
Assembler::nativePageSetup()
|
|
{
|
|
NanoAssert(!_inExit);
|
|
if (!_nIns)
|
|
codeAlloc(codeStart, codeEnd, _nIns verbose_only(, codeBytes));
|
|
|
|
// constpool starts at top of page and goes down,
|
|
// code starts at bottom of page and moves up
|
|
if (!_nSlot)
|
|
_nSlot = codeStart;
|
|
}
|
|
|
|
|
|
void
|
|
Assembler::underrunProtect(int bytes)
|
|
{
|
|
NanoAssertMsg(bytes<=LARGEST_UNDERRUN_PROT, "constant LARGEST_UNDERRUN_PROT is too small");
|
|
NanoAssert(_nSlot != 0 && int(_nIns)-int(_nSlot) <= 4096);
|
|
uintptr_t top = uintptr_t(_nSlot);
|
|
uintptr_t pc = uintptr_t(_nIns);
|
|
if (pc - bytes < top)
|
|
{
|
|
verbose_only(verbose_outputf(" %p:", _nIns);)
|
|
NIns* target = _nIns;
|
|
// This may be in a normal code chunk or an exit code chunk.
|
|
codeAlloc(codeStart, codeEnd, _nIns verbose_only(, codeBytes));
|
|
|
|
_nSlot = codeStart;
|
|
|
|
// _nSlot points to the first empty position in the new code block
|
|
// _nIns points just past the last empty position.
|
|
// Assume B_nochk won't ever try to write to _nSlot. See B_cond_chk macro.
|
|
B_nochk(target);
|
|
}
|
|
}
|
|
|
|
void
|
|
Assembler::JMP_far(NIns* addr)
|
|
{
|
|
// Even if a simple branch is all that is required, this function must emit
|
|
// two words so that the branch can be arbitrarily patched later on.
|
|
underrunProtect(8);
|
|
|
|
intptr_t offs = PC_OFFSET_FROM(addr,_nIns-2);
|
|
|
|
if (isS24(offs>>2)) {
|
|
// Emit a BKPT to ensure that we reserve enough space for a full 32-bit
|
|
// branch patch later on. The BKPT should never be executed.
|
|
BKPT_nochk();
|
|
|
|
asm_output("bkpt");
|
|
|
|
// B [PC+offs]
|
|
*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | ((offs>>2) & 0xFFFFFF) );
|
|
|
|
asm_output("b %p", (void*)addr);
|
|
} else {
|
|
// Insert the target address as a constant in the instruction stream.
|
|
*(--_nIns) = (NIns)((addr));
|
|
// ldr pc, [pc, #-4] // load the address into pc, reading it from [pc-4] (e.g.,
|
|
// the next instruction)
|
|
*(--_nIns) = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | (PC<<12) | (4));
|
|
|
|
asm_output("ldr pc, =%p", (void*)addr);
|
|
}
|
|
}
|
|
|
|
// Perform a branch with link, and ARM/Thumb exchange if necessary. The actual
|
|
// BLX instruction is only available from ARMv5 onwards, but as we don't
|
|
// support anything older than that this function will not attempt to output
|
|
// pre-ARMv5 sequences.
|
|
//
|
|
// Note: This function is not designed to be used with branches which will be
|
|
// patched later, though it will work if the patcher knows how to patch the
|
|
// generated instruction sequence.
|
|
void
|
|
Assembler::BranchWithLink(NIns* addr)
|
|
{
|
|
// Most branches emitted by TM are loaded through a register, so always
|
|
// reserve enough space for the LDR sequence. This should give us a slight
|
|
// net gain over reserving the exact amount required for shorter branches.
|
|
// This _must_ be called before PC_OFFSET_FROM as it can move _nIns!
|
|
underrunProtect(4+LD32_size);
|
|
|
|
// Calculate the offset from the instruction that is about to be
|
|
// written (at _nIns-1) to the target.
|
|
intptr_t offs = PC_OFFSET_FROM(addr,_nIns-1);
|
|
|
|
// ARMv5 and above can use BLX <imm> for branches within ±32MB of the
|
|
// PC and BLX Rm for long branches.
|
|
if (isS24(offs>>2)) {
|
|
// the value we need to stick in the instruction; masked,
|
|
// because it will be sign-extended back to 32 bits.
|
|
intptr_t offs2 = (offs>>2) & 0xffffff;
|
|
|
|
if (((intptr_t)addr & 1) == 0) {
|
|
// The target is ARM, so just emit a BL.
|
|
|
|
// BL target
|
|
*(--_nIns) = (NIns)( (COND_AL) | (0xB<<24) | (offs2) );
|
|
asm_output("bl %p", (void*)addr);
|
|
} else {
|
|
// The target is Thumb, so emit a BLX.
|
|
|
|
// We need to emit an ARMv5+ instruction, so assert that we have a
|
|
// suitable processor. Note that we don't support ARMv4(T), but
|
|
// this serves as a useful sanity check.
|
|
NanoAssert(_config.arm_arch >= 5);
|
|
|
|
// The (pre-shifted) value of the "H" bit in the BLX encoding.
|
|
uint32_t H = (offs & 0x2) << 23;
|
|
|
|
// BLX addr
|
|
*(--_nIns) = (NIns)( (0xF << 28) | (0x5<<25) | (H) | (offs2) );
|
|
asm_output("blx %p", (void*)addr);
|
|
}
|
|
} else {
|
|
// Load the target address into IP and branch to that. We've already
|
|
// done underrunProtect, so we can skip that here.
|
|
BLX(IP, false);
|
|
|
|
// LDR IP, =addr
|
|
asm_ld_imm(IP, (int32_t)addr, false);
|
|
}
|
|
}
|
|
|
|
// This is identical to BranchWithLink(NIns*) but emits a branch to an address
|
|
// held in a register rather than a literal address.
|
|
inline void
|
|
Assembler::BLX(Register addr, bool chk /* = true */)
|
|
{
|
|
// We need to emit an ARMv5+ instruction, so assert that we have a suitable
|
|
// processor. Note that we don't support ARMv4(T), but this serves as a
|
|
// useful sanity check.
|
|
NanoAssert(_config.arm_arch >= 5);
|
|
|
|
NanoAssert(IsGpReg(addr));
|
|
// There is a bug in the WinCE device emulator which stops "BLX LR" from
|
|
// working as expected. Assert that we never do that!
|
|
if (blx_lr_bug) { NanoAssert(addr != LR); }
|
|
|
|
if (chk) {
|
|
underrunProtect(4);
|
|
}
|
|
|
|
// BLX IP
|
|
*(--_nIns) = (NIns)( (COND_AL) | (0x12<<20) | (0xFFF<<8) | (0x3<<4) | (addr) );
|
|
asm_output("blx ip");
|
|
}
|
|
|
|
// Emit the code required to load a memory address into a register as follows:
|
|
// d = *(b+off)
|
|
// underrunProtect calls from this function can be disabled by setting chk to
|
|
// false. However, this function can use more than LD32_size bytes of space if
|
|
// the offset is out of the range of a LDR instruction; the maximum space this
|
|
// function requires for underrunProtect is 4+LD32_size.
|
|
void
|
|
Assembler::asm_ldr_chk(Register d, Register b, int32_t off, bool chk)
|
|
{
|
|
if (_config.arm_vfp && IsFpReg(d)) {
|
|
FLDD_chk(d,b,off,chk);
|
|
return;
|
|
}
|
|
|
|
NanoAssert(IsGpReg(d));
|
|
NanoAssert(IsGpReg(b));
|
|
|
|
// We can't use underrunProtect if the base register is the PC because
|
|
// underrunProtect might move the PC if there isn't enough space on the
|
|
// current page.
|
|
NanoAssert((b != PC) || (!chk));
|
|
|
|
if (isU12(off)) {
|
|
// LDR d, b, #+off
|
|
if (chk) underrunProtect(4);
|
|
*(--_nIns) = (NIns)( COND_AL | (0x59<<20) | (b<<16) | (d<<12) | off );
|
|
} else if (isU12(-off)) {
|
|
// LDR d, b, #-off
|
|
if (chk) underrunProtect(4);
|
|
*(--_nIns) = (NIns)( COND_AL | (0x51<<20) | (b<<16) | (d<<12) | -off );
|
|
} else {
|
|
// The offset is over 4096 (and outside the range of LDR), so we need
|
|
// to add a level of indirection to get the address into IP.
|
|
|
|
// Because of that, we can't do a PC-relative load unless it fits within
|
|
// the single-instruction forms above.
|
|
|
|
NanoAssert(b != PC);
|
|
NanoAssert(b != IP);
|
|
|
|
if (chk) underrunProtect(4+LD32_size);
|
|
|
|
*(--_nIns) = (NIns)( COND_AL | (0x79<<20) | (b<<16) | (d<<12) | IP );
|
|
asm_ld_imm(IP, off, false);
|
|
}
|
|
|
|
asm_output("ldr %s, [%s, #%d]",gpn(d),gpn(b),(off));
|
|
}
|
|
|
|
// Emit the code required to load an immediate value (imm) into general-purpose
|
|
// register d. Optimal (MOV-based) mechanisms are used if the immediate can be
|
|
// encoded using ARM's operand 2 encoding. Otherwise, a slot is used on the
|
|
// literal pool and LDR is used to load the value.
|
|
//
|
|
// chk can be explicitly set to false in order to disable underrunProtect calls
|
|
// from this function; this allows the caller to perform the check manually.
|
|
// This function guarantees not to use more than LD32_size bytes of space.
|
|
void
|
|
Assembler::asm_ld_imm(Register d, int32_t imm, bool chk /* = true */)
|
|
{
|
|
uint32_t op2imm;
|
|
|
|
NanoAssert(IsGpReg(d));
|
|
|
|
// Attempt to encode the immediate using the second operand of MOV or MVN.
|
|
// This is the simplest solution and generates the shortest and fastest
|
|
// code, but can only encode a limited set of values.
|
|
|
|
if (encOp2Imm(imm, &op2imm)) {
|
|
// Use MOV to encode the literal.
|
|
MOVis(d, op2imm, 0);
|
|
return;
|
|
}
|
|
|
|
if (encOp2Imm(~imm, &op2imm)) {
|
|
// Use MVN to encode the inverted literal.
|
|
MVNis(d, op2imm, 0);
|
|
return;
|
|
}
|
|
|
|
// Try to use simple MOV, MVN or MOV(W|T) instructions to load the
|
|
// immediate. If this isn't possible, load it from memory.
|
|
// - We cannot use MOV(W|T) on cores older than the introduction of
|
|
// Thumb-2 or if the target register is the PC.
|
|
//
|
|
// (Note that we use Thumb-2 if arm_arch is ARMv7 or later; the only earlier
|
|
// ARM core that provided Thumb-2 is ARMv6T2/ARM1156, which is a real-time
|
|
// core that nanojit is unlikely to ever target.)
|
|
if (_config.arm_arch >= 7 && (d != PC)) {
|
|
// ARMv6T2 and above have MOVW and MOVT.
|
|
uint32_t high_h = (uint32_t)imm >> 16;
|
|
uint32_t low_h = imm & 0xffff;
|
|
|
|
if (high_h != 0) {
|
|
// Load the high half-word (if necessary).
|
|
MOVTi_chk(d, high_h, chk);
|
|
}
|
|
// Load the low half-word. This also zeroes the high half-word, and
|
|
// thus must execute _before_ MOVT, and is necessary even if low_h is 0
|
|
// because MOVT will not change the existing low half-word.
|
|
MOVWi_chk(d, low_h, chk);
|
|
|
|
return;
|
|
}
|
|
|
|
// We couldn't encode the literal in the instruction stream, so load it
|
|
// from memory.
|
|
|
|
// Because the literal pool is on the same page as the generated code, it
|
|
// will almost always be within the ±4096 range of a LDR. However, this may
|
|
// not be the case if _nSlot is at the start of the page and _nIns is at
|
|
// the end because the PC is 8 bytes ahead of _nIns. This is unlikely to
|
|
// happen, but if it does occur we can simply waste a word or two of
|
|
// literal space.
|
|
|
|
// We must do the underrunProtect before PC_OFFSET_FROM as underrunProtect
|
|
// can move the PC if there isn't enough space on the current page!
|
|
if (chk) {
|
|
underrunProtect(LD32_size);
|
|
}
|
|
|
|
int offset = PC_OFFSET_FROM(_nSlot, _nIns-1);
|
|
// If the offset is out of range, waste literal space until it is in range.
|
|
while (offset <= -4096) {
|
|
++_nSlot;
|
|
offset += sizeof(_nSlot);
|
|
}
|
|
NanoAssert((isU12(-offset) || isU12(offset)) && (offset <= -8));
|
|
|
|
// Write the literal.
|
|
*(_nSlot++) = imm;
|
|
asm_output("## imm= 0x%x", imm);
|
|
|
|
// Load the literal.
|
|
LDR_nochk(d,PC,offset);
|
|
NanoAssert(uintptr_t(_nIns) + 8 + offset == uintptr_t(_nSlot-1));
|
|
NanoAssert(*((int32_t*)_nSlot-1) == imm);
|
|
}
|
|
|
|
// Branch to target address _t with condition _c, doing underrun
|
|
// checks (_chk == 1) or skipping them (_chk == 0).
|
|
//
|
|
// Set the target address (_t) to 0 if the target is not yet known and the
|
|
// branch will be patched up later.
|
|
//
|
|
// If the jump is to a known address (with _t != 0) and it fits in a relative
|
|
// jump (±32MB), emit that.
|
|
// If the jump is unconditional, emit the dest address inline in
|
|
// the instruction stream and load it into pc.
|
|
// If the jump has a condition, but noone's mucked with _nIns and our _nSlot
|
|
// pointer is valid, stick the constant in the slot and emit a conditional
|
|
// load into pc.
|
|
// Otherwise, emit the conditional load into pc from a nearby constant,
|
|
// and emit a jump to jump over it it in case the condition fails.
|
|
//
|
|
// NB: B_nochk depends on this not calling samepage() when _c == AL
|
|
void
|
|
Assembler::B_cond_chk(ConditionCode _c, NIns* _t, bool _chk)
|
|
{
|
|
int32_t offs = PC_OFFSET_FROM(_t,_nIns-1);
|
|
//nj_dprintf("B_cond_chk target: 0x%08x offset: %d @0x%08x\n", _t, offs, _nIns-1);
|
|
|
|
// optimistically check if this will fit in 24 bits
|
|
if (_chk && isS24(offs>>2) && (_t != 0)) {
|
|
underrunProtect(4);
|
|
// recalculate the offset, because underrunProtect may have
|
|
// moved _nIns to a new page
|
|
offs = PC_OFFSET_FROM(_t,_nIns-1);
|
|
}
|
|
|
|
// Emit one of the following patterns:
|
|
//
|
|
// --- Short branch. This can never be emitted if the branch target is not
|
|
// known.
|
|
// B(cc) ±32MB
|
|
//
|
|
// --- Long unconditional branch.
|
|
// LDR PC, #lit
|
|
// lit: #target
|
|
//
|
|
// --- Long conditional branch. Note that conditional branches will never
|
|
// be patched, so the nPatchBranch function doesn't need to know where
|
|
// the literal pool is located.
|
|
// LDRcc PC, #lit
|
|
// ; #lit is in the literal pool at _nSlot
|
|
//
|
|
// --- Long conditional branch (if the slot isn't on the same page as the instruction).
|
|
// LDRcc PC, #lit
|
|
// B skip ; Jump over the literal data.
|
|
// lit: #target
|
|
// skip: [...]
|
|
|
|
if (isS24(offs>>2) && (_t != 0)) {
|
|
// The underrunProtect for this was done above (if required by _chk).
|
|
*(--_nIns) = (NIns)( ((_c)<<28) | (0xA<<24) | (((offs)>>2) & 0xFFFFFF) );
|
|
asm_output("b%s %p", _c == AL ? "" : condNames[_c], (void*)(_t));
|
|
} else if (_c == AL) {
|
|
if(_chk) underrunProtect(8);
|
|
*(--_nIns) = (NIns)(_t);
|
|
*(--_nIns) = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | (PC<<12) | 0x4 );
|
|
asm_output("b%s %p", _c == AL ? "" : condNames[_c], (void*)(_t));
|
|
} else if (PC_OFFSET_FROM(_nSlot, _nIns-1) > -0x1000) {
|
|
if(_chk) underrunProtect(8);
|
|
*(_nSlot++) = (NIns)(_t);
|
|
offs = PC_OFFSET_FROM(_nSlot-1,_nIns-1);
|
|
NanoAssert(offs < 0);
|
|
*(--_nIns) = (NIns)( ((_c)<<28) | (0x51<<20) | (PC<<16) | (PC<<12) | ((-offs) & 0xFFF) );
|
|
asm_output("ldr%s %s, [%s, #-%d]", condNames[_c], gpn(PC), gpn(PC), -offs);
|
|
NanoAssert(uintptr_t(_nIns)+8+offs == uintptr_t(_nSlot-1));
|
|
} else {
|
|
if(_chk) underrunProtect(12);
|
|
// Emit a pointer to the target as a literal in the instruction stream.
|
|
*(--_nIns) = (NIns)(_t);
|
|
// Emit a branch to skip over the literal. The PC value is 8 bytes
|
|
// ahead of the executing instruction, so to branch two instructions
|
|
// forward this must branch 8-8=0 bytes.
|
|
*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | 0x0 );
|
|
// Emit the conditional branch.
|
|
*(--_nIns) = (NIns)( ((_c)<<28) | (0x51<<20) | (PC<<16) | (PC<<12) | 0x0 );
|
|
asm_output("b%s %p", _c == AL ? "" : condNames[_c], (void*)(_t));
|
|
}
|
|
}
|
|
|
|
/*
|
|
* VFP
|
|
*/
|
|
|
|
void
|
|
Assembler::asm_i2f(LInsp ins)
|
|
{
|
|
Register rr = deprecated_prepResultReg(ins, FpRegs);
|
|
Register srcr = findRegFor(ins->oprnd1(), GpRegs);
|
|
|
|
// todo: support int value in memory, as per x86
|
|
NanoAssert(isKnownReg(srcr));
|
|
|
|
FSITOD(rr, S14);
|
|
FMSR(S14, srcr);
|
|
}
|
|
|
|
void
|
|
Assembler::asm_u2f(LInsp ins)
|
|
{
|
|
Register rr = deprecated_prepResultReg(ins, FpRegs);
|
|
Register sr = findRegFor(ins->oprnd1(), GpRegs);
|
|
|
|
// todo: support int value in memory, as per x86
|
|
NanoAssert(isKnownReg(sr));
|
|
|
|
FUITOD(rr, S14);
|
|
FMSR(S14, sr);
|
|
}
|
|
|
|
void Assembler::asm_f2i(LInsp ins)
|
|
{
|
|
// where our result goes
|
|
Register rr = deprecated_prepResultReg(ins, GpRegs);
|
|
Register sr = findRegFor(ins->oprnd1(), FpRegs);
|
|
|
|
FMRS(rr, S14);
|
|
FTOSID(S14, sr);
|
|
}
|
|
|
|
void
|
|
Assembler::asm_fneg(LInsp ins)
|
|
{
|
|
LInsp lhs = ins->oprnd1();
|
|
Register rr = deprecated_prepResultReg(ins, FpRegs);
|
|
|
|
Register sr = ( !lhs->isInReg()
|
|
? findRegFor(lhs, FpRegs)
|
|
: lhs->deprecated_getReg() );
|
|
|
|
FNEGD(rr, sr);
|
|
}
|
|
|
|
void
|
|
Assembler::asm_fop(LInsp ins)
|
|
{
|
|
LInsp lhs = ins->oprnd1();
|
|
LInsp rhs = ins->oprnd2();
|
|
LOpcode op = ins->opcode();
|
|
|
|
NanoAssert(op >= LIR_fadd && op <= LIR_fdiv);
|
|
|
|
// rr = ra OP rb
|
|
|
|
Register rr = deprecated_prepResultReg(ins, FpRegs);
|
|
|
|
Register ra = findRegFor(lhs, FpRegs);
|
|
Register rb = (rhs == lhs) ? ra : findRegFor(rhs, FpRegs & ~rmask(ra));
|
|
|
|
// XXX special-case 1.0 and 0.0
|
|
|
|
switch (op)
|
|
{
|
|
case LIR_fadd: FADDD(rr,ra,rb); break;
|
|
case LIR_fsub: FSUBD(rr,ra,rb); break;
|
|
case LIR_fmul: FMULD(rr,ra,rb); break;
|
|
case LIR_fdiv: FDIVD(rr,ra,rb); break;
|
|
default: NanoAssert(0); break;
|
|
}
|
|
}
|
|
|
|
void
|
|
Assembler::asm_fcmp(LInsp ins)
|
|
{
|
|
LInsp lhs = ins->oprnd1();
|
|
LInsp rhs = ins->oprnd2();
|
|
LOpcode op = ins->opcode();
|
|
|
|
NanoAssert(op >= LIR_feq && op <= LIR_fge);
|
|
|
|
Register ra, rb;
|
|
findRegFor2(FpRegs, lhs, ra, FpRegs, rhs, rb);
|
|
|
|
int e_bit = (op != LIR_feq);
|
|
|
|
// do the comparison and get results loaded in ARM status register
|
|
FMSTAT();
|
|
FCMPD(ra, rb, e_bit);
|
|
}
|
|
|
|
/* Call this with targ set to 0 if the target is not yet known and the branch
|
|
* will be patched up later.
|
|
*/
|
|
NIns*
|
|
Assembler::asm_branch(bool branchOnFalse, LInsp cond, NIns* targ)
|
|
{
|
|
LOpcode condop = cond->opcode();
|
|
NanoAssert(cond->isCond());
|
|
NanoAssert(_config.arm_vfp || ((condop < LIR_feq) || (condop > LIR_fge)));
|
|
|
|
// The old "never" condition code has special meaning on newer ARM cores,
|
|
// so use "always" as a sensible default code.
|
|
ConditionCode cc = AL;
|
|
|
|
// Detect whether or not this is a floating-point comparison.
|
|
bool fp_cond;
|
|
|
|
// Because MUL can't set the V flag, we use SMULL and CMP to set the Z flag
|
|
// to detect overflow on multiply. Thus, if cond points to a LIR_ov which
|
|
// in turn points to a LIR_mul, we must be conditional on !Z, not V.
|
|
if ((condop == LIR_ov) && (cond->oprnd1()->isop(LIR_mul))) {
|
|
condop = LIR_eq;
|
|
branchOnFalse = !branchOnFalse;
|
|
}
|
|
|
|
// Select the appropriate ARM condition code to match the LIR instruction.
|
|
switch (condop)
|
|
{
|
|
// Floating-point conditions. Note that the VFP LT/LE conditions
|
|
// require use of the unsigned condition codes, even though
|
|
// float-point comparisons are always signed.
|
|
case LIR_feq: cc = EQ; fp_cond = true; break;
|
|
case LIR_flt: cc = LO; fp_cond = true; break;
|
|
case LIR_fle: cc = LS; fp_cond = true; break;
|
|
case LIR_fge: cc = GE; fp_cond = true; break;
|
|
case LIR_fgt: cc = GT; fp_cond = true; break;
|
|
|
|
// Standard signed and unsigned integer comparisons.
|
|
case LIR_eq: cc = EQ; fp_cond = false; break;
|
|
case LIR_ov: cc = VS; fp_cond = false; break;
|
|
case LIR_lt: cc = LT; fp_cond = false; break;
|
|
case LIR_le: cc = LE; fp_cond = false; break;
|
|
case LIR_gt: cc = GT; fp_cond = false; break;
|
|
case LIR_ge: cc = GE; fp_cond = false; break;
|
|
case LIR_ult: cc = LO; fp_cond = false; break;
|
|
case LIR_ule: cc = LS; fp_cond = false; break;
|
|
case LIR_ugt: cc = HI; fp_cond = false; break;
|
|
case LIR_uge: cc = HS; fp_cond = false; break;
|
|
|
|
// Default case for invalid or unexpected LIR instructions.
|
|
default: cc = AL; fp_cond = false; break;
|
|
}
|
|
|
|
// Invert the condition if required.
|
|
if (branchOnFalse)
|
|
cc = OppositeCond(cc);
|
|
|
|
// Ensure that we got a sensible condition code.
|
|
NanoAssert((cc != AL) && (cc != NV));
|
|
|
|
// Ensure that we don't hit floating-point LIR codes if VFP is disabled.
|
|
NanoAssert(_config.arm_vfp || !fp_cond);
|
|
|
|
// Emit a suitable branch instruction.
|
|
B_cond(cc, targ);
|
|
|
|
// Store the address of the branch instruction so that we can return it.
|
|
// asm_[f]cmp will move _nIns so we must do this now.
|
|
NIns *at = _nIns;
|
|
|
|
if (_config.arm_vfp && fp_cond)
|
|
asm_fcmp(cond);
|
|
else
|
|
asm_cmp(cond);
|
|
|
|
return at;
|
|
}
|
|
|
|
void
|
|
Assembler::asm_cmp(LIns *cond)
|
|
{
|
|
LOpcode condop = cond->opcode();
|
|
|
|
// LIR_ov recycles the flags set by arithmetic ops
|
|
if ((condop == LIR_ov))
|
|
return;
|
|
|
|
LInsp lhs = cond->oprnd1();
|
|
LInsp rhs = cond->oprnd2();
|
|
|
|
NanoAssert(lhs->isI32() && rhs->isI32());
|
|
|
|
// ready to issue the compare
|
|
if (rhs->isconst()) {
|
|
int c = rhs->imm32();
|
|
Register r = findRegFor(lhs, GpRegs);
|
|
if (c == 0 && cond->isop(LIR_eq)) {
|
|
TST(r, r);
|
|
} else {
|
|
asm_cmpi(r, c);
|
|
}
|
|
} else {
|
|
Register ra, rb;
|
|
findRegFor2(GpRegs, lhs, ra, GpRegs, rhs, rb);
|
|
CMP(ra, rb);
|
|
}
|
|
}
|
|
|
|
void
|
|
Assembler::asm_cmpi(Register r, int32_t imm)
|
|
{
|
|
if (imm < 0) {
|
|
if (imm > -256) {
|
|
ALUi(AL, cmn, 1, 0, r, -imm);
|
|
} else {
|
|
underrunProtect(4 + LD32_size);
|
|
CMP(r, IP);
|
|
asm_ld_imm(IP, imm);
|
|
}
|
|
} else {
|
|
if (imm < 256) {
|
|
ALUi(AL, cmp, 1, 0, r, imm);
|
|
} else {
|
|
underrunProtect(4 + LD32_size);
|
|
CMP(r, IP);
|
|
asm_ld_imm(IP, imm);
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
Assembler::asm_fcond(LInsp ins)
|
|
{
|
|
// only want certain regs
|
|
Register r = deprecated_prepResultReg(ins, AllowableFlagRegs);
|
|
|
|
switch (ins->opcode()) {
|
|
case LIR_feq: SETEQ(r); break;
|
|
case LIR_flt: SETLO(r); break; // } note: VFP LT/LE operations require use of
|
|
case LIR_fle: SETLS(r); break; // } unsigned LO/LS condition codes!
|
|
case LIR_fge: SETGE(r); break;
|
|
case LIR_fgt: SETGT(r); break;
|
|
default: NanoAssert(0); break;
|
|
}
|
|
|
|
asm_fcmp(ins);
|
|
}
|
|
|
|
void
|
|
Assembler::asm_cond(LInsp ins)
|
|
{
|
|
Register r = deprecated_prepResultReg(ins, AllowableFlagRegs);
|
|
LOpcode op = ins->opcode();
|
|
|
|
switch(op)
|
|
{
|
|
case LIR_eq: SETEQ(r); break;
|
|
case LIR_lt: SETLT(r); break;
|
|
case LIR_le: SETLE(r); break;
|
|
case LIR_gt: SETGT(r); break;
|
|
case LIR_ge: SETGE(r); break;
|
|
case LIR_ult: SETLO(r); break;
|
|
case LIR_ule: SETLS(r); break;
|
|
case LIR_ugt: SETHI(r); break;
|
|
case LIR_uge: SETHS(r); break;
|
|
case LIR_ov:
|
|
// Because MUL can't set the V flag, we use SMULL and CMP to set
|
|
// the Z flag to detect overflow on multiply. Thus, if ins points
|
|
// to a LIR_ov which in turn points to a LIR_mul, we must be
|
|
// conditional on !Z, not V.
|
|
if (!ins->oprnd1()->isop(LIR_mul)) {
|
|
SETVS(r);
|
|
} else {
|
|
SETNE(r);
|
|
}
|
|
break;
|
|
default: NanoAssert(0); break;
|
|
}
|
|
asm_cmp(ins);
|
|
}
|
|
|
|
void
|
|
Assembler::asm_arith(LInsp ins)
|
|
{
|
|
LOpcode op = ins->opcode();
|
|
LInsp lhs = ins->oprnd1();
|
|
LInsp rhs = ins->oprnd2();
|
|
|
|
RegisterMask allow = GpRegs;
|
|
|
|
// We always need the result register and the first operand register.
|
|
Register rr = deprecated_prepResultReg(ins, allow);
|
|
|
|
// If this is the last use of lhs in reg, we can re-use the result reg.
|
|
// Else, lhs already has a register assigned.
|
|
Register ra = ( !lhs->isInReg()
|
|
? findSpecificRegFor(lhs, rr)
|
|
: lhs->deprecated_getReg() );
|
|
|
|
// Don't re-use the registers we've already allocated.
|
|
NanoAssert(isKnownReg(rr));
|
|
NanoAssert(isKnownReg(ra));
|
|
allow &= ~rmask(rr);
|
|
allow &= ~rmask(ra);
|
|
|
|
// If the rhs is constant, we can use the instruction-specific code to
|
|
// determine if the value can be encoded in an ARM instruction. If the
|
|
// value cannot be encoded, it will be loaded into a register.
|
|
//
|
|
// Note that the MUL instruction can never take an immediate argument so
|
|
// even if the argument is constant, we must allocate a register for it.
|
|
//
|
|
// Note: It is possible to use a combination of the barrel shifter and the
|
|
// basic arithmetic instructions to generate constant multiplications.
|
|
// However, LIR_mul is never invoked with a constant during
|
|
// trace-tests.js so it is very unlikely to be worthwhile implementing it.
|
|
if (rhs->isconst() && op != LIR_mul)
|
|
{
|
|
if ((op == LIR_add || op == LIR_iaddp) && lhs->isop(LIR_ialloc)) {
|
|
// Add alloc+const. The result should be the address of the
|
|
// allocated space plus a constant.
|
|
Register rs = deprecated_prepResultReg(ins, allow);
|
|
int d = findMemFor(lhs) + rhs->imm32();
|
|
|
|
NanoAssert(isKnownReg(rs));
|
|
asm_add_imm(rs, FP, d);
|
|
}
|
|
|
|
int32_t imm32 = rhs->imm32();
|
|
|
|
switch (op)
|
|
{
|
|
case LIR_iaddp: asm_add_imm(rr, ra, imm32); break;
|
|
case LIR_add: asm_add_imm(rr, ra, imm32, 1); break;
|
|
case LIR_sub: asm_sub_imm(rr, ra, imm32, 1); break;
|
|
case LIR_and: asm_and_imm(rr, ra, imm32); break;
|
|
case LIR_or: asm_orr_imm(rr, ra, imm32); break;
|
|
case LIR_xor: asm_eor_imm(rr, ra, imm32); break;
|
|
case LIR_lsh: LSLi(rr, ra, imm32); break;
|
|
case LIR_rsh: ASRi(rr, ra, imm32); break;
|
|
case LIR_ush: LSRi(rr, ra, imm32); break;
|
|
|
|
default:
|
|
NanoAssertMsg(0, "Unsupported");
|
|
break;
|
|
}
|
|
|
|
// We've already emitted an instruction, so return now.
|
|
return;
|
|
}
|
|
|
|
// The rhs is either a register or cannot be encoded as a constant.
|
|
|
|
Register rb;
|
|
if (lhs == rhs) {
|
|
rb = ra;
|
|
} else {
|
|
rb = asm_binop_rhs_reg(ins);
|
|
if (!isKnownReg(rb))
|
|
rb = findRegFor(rhs, allow);
|
|
allow &= ~rmask(rb);
|
|
}
|
|
NanoAssert(isKnownReg(rb));
|
|
|
|
const Register SBZ = (Register)0;
|
|
switch (op)
|
|
{
|
|
case LIR_iaddp: ADDs(rr, ra, rb, 0); break;
|
|
case LIR_add: ADDs(rr, ra, rb, 1); break;
|
|
case LIR_sub: SUBs(rr, ra, rb, 1); break;
|
|
case LIR_and: ANDs(rr, ra, rb, 0); break;
|
|
case LIR_or: ORRs(rr, ra, rb, 0); break;
|
|
case LIR_xor: EORs(rr, ra, rb, 0); break;
|
|
|
|
case LIR_mul:
|
|
// ARMv5 and earlier cores cannot do a MUL where the first operand
|
|
// is also the result, so we need a special case to handle that.
|
|
//
|
|
// We try to use rb as the first operand by default because it is
|
|
// common for (rr == ra) and is thus likely to be the most
|
|
// efficient method.
|
|
|
|
if ((_config.arm_arch > 5) || (rr != rb)) {
|
|
// IP is used to temporarily store the high word of the result from
|
|
// SMULL, so we make use of this to perform an overflow check, as
|
|
// ARM's MUL instruction can't set the overflow flag by itself.
|
|
// We can check for overflow using the following:
|
|
// SMULL rr, ip, ra, rb
|
|
// CMP ip, rr, ASR #31
|
|
// An explanation can be found in bug 521161. This sets Z if we did
|
|
// _not_ overflow, and clears it if we did.
|
|
ALUr_shi(AL, cmp, 1, SBZ, IP, rr, ASR_imm, 31);
|
|
SMULL(rr, IP, rb, ra);
|
|
} else {
|
|
// _config.arm_arch is ARMv5 (or below) and rr == rb, so we must
|
|
// find a different way to encode the instruction.
|
|
|
|
// If possible, swap the arguments to avoid the restriction.
|
|
if (rr != ra) {
|
|
// We know that rr == rb, so this will be something like
|
|
// rX = rY * rX.
|
|
// Other than swapping ra and rb, this works in the same as
|
|
// as the ARMv6+ case, above.
|
|
ALUr_shi(AL, cmp, 1, SBZ, IP, rr, ASR_imm, 31);
|
|
SMULL(rr, IP, ra, rb);
|
|
} else {
|
|
// We're trying to do rX = rX * rX, but we also need to
|
|
// check for overflow so we would need two extra registers
|
|
// on ARMv5 and below. We achieve this by observing the
|
|
// following:
|
|
// - abs(rX)*abs(rX) = rX*rX, so we force the input to be
|
|
// positive to simplify the detection logic.
|
|
// - Any argument greater than 0xffff will _always_
|
|
// overflow, and we can easily check that the top 16
|
|
// bits are zero.
|
|
// - Any argument lower than (or equal to) 0xffff that
|
|
// also overflows is guaranteed to set output bit 31.
|
|
//
|
|
// Thus, we know we have _not_ overflowed if:
|
|
// abs(rX)&0xffff0000 == 0 AND result[31] == 0
|
|
//
|
|
// The following instruction sequence will be emitted:
|
|
// MOVS IP, rX // Put abs(rX) into IP.
|
|
// RSBMI IP, IP, #0 // ...
|
|
// MUL rX, IP, IP // Do the actual multiplication.
|
|
// MOVS IP, IP, LSR #16 // Check that abs(arg)<=0xffff
|
|
// CMPEQ IP, rX, ASR #31 // Check that result[31] == 0
|
|
|
|
NanoAssert(rr != IP);
|
|
|
|
ALUr_shi(AL, cmp, 1, SBZ, rr, rr, ASR_imm, 31);
|
|
ALUr_shi(AL, mov, 1, IP, SBZ, IP, LSR_imm, 16);
|
|
MUL(rr, IP, IP);
|
|
ALUi(MI, rsb, 0, IP, IP, 0);
|
|
ALUr(AL, mov, 1, IP, ra, ra);
|
|
}
|
|
}
|
|
break;
|
|
|
|
// The shift operations need a mask to match the JavaScript
|
|
// specification because the ARM architecture allows a greater shift
|
|
// range than JavaScript.
|
|
case LIR_lsh:
|
|
LSL(rr, ra, IP);
|
|
ANDi(IP, rb, 0x1f);
|
|
break;
|
|
case LIR_rsh:
|
|
ASR(rr, ra, IP);
|
|
ANDi(IP, rb, 0x1f);
|
|
break;
|
|
case LIR_ush:
|
|
LSR(rr, ra, IP);
|
|
ANDi(IP, rb, 0x1f);
|
|
break;
|
|
default:
|
|
NanoAssertMsg(0, "Unsupported");
|
|
break;
|
|
}
|
|
}
|
|
|
|
void
|
|
Assembler::asm_neg_not(LInsp ins)
|
|
{
|
|
LOpcode op = ins->opcode();
|
|
Register rr = deprecated_prepResultReg(ins, GpRegs);
|
|
|
|
LIns* lhs = ins->oprnd1();
|
|
// If this is the last use of lhs in reg, we can re-use result reg.
|
|
// Else, lhs already has a register assigned.
|
|
Register ra = ( !lhs->isInReg()
|
|
? findSpecificRegFor(lhs, rr)
|
|
: lhs->deprecated_getReg() );
|
|
NanoAssert(isKnownReg(ra));
|
|
|
|
if (op == LIR_not)
|
|
MVN(rr, ra);
|
|
else
|
|
RSBS(rr, ra);
|
|
}
|
|
|
|
void
|
|
Assembler::asm_load32(LInsp ins)
|
|
{
|
|
LOpcode op = ins->opcode();
|
|
LIns* base = ins->oprnd1();
|
|
int d = ins->disp();
|
|
|
|
Register rr = deprecated_prepResultReg(ins, GpRegs);
|
|
Register ra = getBaseReg(base, d, GpRegs);
|
|
|
|
switch (op) {
|
|
case LIR_ldzb:
|
|
case LIR_ldcb:
|
|
if (isU12(-d) || isU12(d)) {
|
|
LDRB(rr, ra, d);
|
|
} else {
|
|
LDRB(rr, IP, 0);
|
|
asm_add_imm(IP, ra, d);
|
|
}
|
|
return;
|
|
case LIR_ldzs:
|
|
case LIR_ldcs:
|
|
// Some ARM machines require 2-byte alignment here.
|
|
// Similar to the ldcb/ldzb case, but the max offset is smaller.
|
|
if (isU8(-d) || isU8(d)) {
|
|
LDRH(rr, ra, d);
|
|
} else {
|
|
LDRH(rr, IP, 0);
|
|
asm_add_imm(IP, ra, d);
|
|
}
|
|
return;
|
|
case LIR_ld:
|
|
case LIR_ldc:
|
|
// Some ARM machines require 4-byte alignment here.
|
|
if (isU12(-d) || isU12(d)) {
|
|
LDR(rr, ra, d);
|
|
} else {
|
|
LDR(rr, IP, 0);
|
|
asm_add_imm(IP, ra, d);
|
|
}
|
|
return;
|
|
case LIR_ldsb:
|
|
case LIR_ldcsb:
|
|
if (isU8(-d) || isU8(d)) {
|
|
LDRSB(rr, ra, d);
|
|
} else {
|
|
LDRSB(rr, IP, 0);
|
|
asm_add_imm(IP, ra, d);
|
|
}
|
|
return;
|
|
case LIR_ldss:
|
|
case LIR_ldcss:
|
|
if (isU8(-d) || isU8(d)) {
|
|
LDRSH(rr, ra, d);
|
|
} else {
|
|
LDRSH(rr, IP, 0);
|
|
asm_add_imm(IP, ra, d);
|
|
}
|
|
return;
|
|
default:
|
|
NanoAssertMsg(0, "asm_load32 should never receive this LIR opcode");
|
|
return;
|
|
}
|
|
}
|
|
|
|
void
|
|
Assembler::asm_cmov(LInsp ins)
|
|
{
|
|
LIns* condval = ins->oprnd1();
|
|
LIns* iftrue = ins->oprnd2();
|
|
LIns* iffalse = ins->oprnd3();
|
|
|
|
NanoAssert(condval->isCmp());
|
|
NanoAssert(ins->opcode() == LIR_cmov && iftrue->isI32() && iffalse->isI32());
|
|
|
|
const Register rr = deprecated_prepResultReg(ins, GpRegs);
|
|
|
|
// this code assumes that neither LD nor MR nor MRcc set any of the condition flags.
|
|
// (This is true on Intel, is it true on all architectures?)
|
|
const Register iffalsereg = findRegFor(iffalse, GpRegs & ~rmask(rr));
|
|
switch (condval->opcode()) {
|
|
// note that these are all opposites...
|
|
case LIR_eq: MOVNE(rr, iffalsereg); break;
|
|
case LIR_lt: MOVGE(rr, iffalsereg); break;
|
|
case LIR_le: MOVGT(rr, iffalsereg); break;
|
|
case LIR_gt: MOVLE(rr, iffalsereg); break;
|
|
case LIR_ge: MOVLT(rr, iffalsereg); break;
|
|
case LIR_ult: MOVHS(rr, iffalsereg); break;
|
|
case LIR_ule: MOVHI(rr, iffalsereg); break;
|
|
case LIR_ugt: MOVLS(rr, iffalsereg); break;
|
|
case LIR_uge: MOVLO(rr, iffalsereg); break;
|
|
case LIR_ov:
|
|
// Because MUL can't set the V flag, we use SMULL and CMP to set
|
|
// the Z flag to detect overflow on multiply. Thus, if ins points
|
|
// to a LIR_ov which in turn points to a LIR_mul, we must be
|
|
// conditional on !Z, not V.
|
|
if (!condval->oprnd1()->isop(LIR_mul)) {
|
|
MOVVC(rr, iffalsereg);
|
|
} else {
|
|
MOVEQ(rr, iffalsereg);
|
|
}
|
|
break;
|
|
default: debug_only( NanoAssert(0) ); break;
|
|
}
|
|
/*const Register iftruereg =*/ findSpecificRegFor(iftrue, rr);
|
|
asm_cmp(condval);
|
|
}
|
|
|
|
void
|
|
Assembler::asm_qhi(LInsp ins)
|
|
{
|
|
Register rr = deprecated_prepResultReg(ins, GpRegs);
|
|
LIns *q = ins->oprnd1();
|
|
int d = findMemFor(q);
|
|
LDR(rr, FP, d+4);
|
|
}
|
|
|
|
void
|
|
Assembler::asm_qlo(LInsp ins)
|
|
{
|
|
Register rr = deprecated_prepResultReg(ins, GpRegs);
|
|
LIns *q = ins->oprnd1();
|
|
int d = findMemFor(q);
|
|
LDR(rr, FP, d);
|
|
}
|
|
|
|
void
|
|
Assembler::asm_param(LInsp ins)
|
|
{
|
|
uint32_t a = ins->paramArg();
|
|
uint32_t kind = ins->paramKind();
|
|
if (kind == 0) {
|
|
// ordinary param
|
|
AbiKind abi = _thisfrag->lirbuf->abi;
|
|
uint32_t abi_regcount = abi == ABI_CDECL ? 4 : abi == ABI_FASTCALL ? 2 : abi == ABI_THISCALL ? 1 : 0;
|
|
if (a < abi_regcount) {
|
|
// incoming arg in register
|
|
deprecated_prepResultReg(ins, rmask(argRegs[a]));
|
|
} else {
|
|
// incoming arg is on stack, and EBP points nearby (see genPrologue)
|
|
Register r = deprecated_prepResultReg(ins, GpRegs);
|
|
int d = (a - abi_regcount) * sizeof(intptr_t) + 8;
|
|
LDR(r, FP, d);
|
|
}
|
|
} else {
|
|
// saved param
|
|
deprecated_prepResultReg(ins, rmask(savedRegs[a]));
|
|
}
|
|
}
|
|
|
|
void
|
|
Assembler::asm_int(LInsp ins)
|
|
{
|
|
Register rr = deprecated_prepResultReg(ins, GpRegs);
|
|
asm_ld_imm(rr, ins->imm32());
|
|
}
|
|
|
|
void
|
|
Assembler::asm_ret(LIns *ins)
|
|
{
|
|
genEpilogue();
|
|
|
|
// NB: our contract with genEpilogue is actually that the return value
|
|
// we are intending for R0 is currently IP, not R0. This has to do with
|
|
// the strange dual-nature of the patchable jump in a side-exit. See
|
|
// nPatchBranch.
|
|
|
|
MOV(IP, R0);
|
|
|
|
// Pop the stack frame.
|
|
MOV(SP,FP);
|
|
|
|
releaseRegisters();
|
|
assignSavedRegs();
|
|
LIns *value = ins->oprnd1();
|
|
if (ins->isop(LIR_ret)) {
|
|
findSpecificRegFor(value, R0);
|
|
}
|
|
else {
|
|
NanoAssert(ins->isop(LIR_fret));
|
|
if (_config.arm_vfp) {
|
|
Register reg = findRegFor(value, FpRegs);
|
|
FMRRD(R0, R1, reg);
|
|
} else {
|
|
NanoAssert(value->isop(LIR_qjoin));
|
|
findSpecificRegFor(value->oprnd1(), R0); // lo
|
|
findSpecificRegFor(value->oprnd2(), R1); // hi
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
Assembler::asm_jtbl(LIns* ins, NIns** table)
|
|
{
|
|
Register indexreg = findRegFor(ins->oprnd1(), GpRegs);
|
|
Register tmp = registerAllocTmp(GpRegs & ~rmask(indexreg));
|
|
LDR_scaled(PC, tmp, indexreg, 2); // LDR PC, [tmp + index*4]
|
|
asm_ld_imm(tmp, (int32_t)table); // tmp = #table
|
|
}
|
|
|
|
void Assembler::swapCodeChunks() {
|
|
if (!_nExitIns)
|
|
codeAlloc(exitStart, exitEnd, _nExitIns verbose_only(, exitBytes));
|
|
if (!_nExitSlot)
|
|
_nExitSlot = exitStart;
|
|
SWAP(NIns*, _nIns, _nExitIns);
|
|
SWAP(NIns*, _nSlot, _nExitSlot); // this one is ARM-specific
|
|
SWAP(NIns*, codeStart, exitStart);
|
|
SWAP(NIns*, codeEnd, exitEnd);
|
|
verbose_only( SWAP(size_t, codeBytes, exitBytes); )
|
|
}
|
|
|
|
}
|
|
#endif /* FEATURE_NANOJIT */
|