Bug 913282: More Float32 operators: TruncateToInt32; p=dougc,bbouvier, r=jonco,jandem

dougc for the ARM parts, bbouvier for the rest.

--HG--
extra : rebase_source : ab8ab9bd043cb04e6a98d793688298c0e5c3a7e7
This commit is contained in:
Benjamin Bouvier 2013-07-18 16:45:16 -07:00
parent 97ce6e6236
commit 16bbccbc9c
23 changed files with 270 additions and 21 deletions

View File

@ -269,7 +269,8 @@ private:
OP_INT3 = 0xCC,
OP_GROUP2_Ev1 = 0xD1,
OP_GROUP2_EvCL = 0xD3,
OP_FPU6 = 0xDD,
OP_FPU6 = 0xDD,
OP_FLD32 = 0xD9,
OP_CALL_rel32 = 0xE8,
OP_JMP_rel32 = 0xE9,
PRE_SSE_F2 = 0xF2,
@ -723,6 +724,11 @@ public:
spew("fld %s0x%x(%s)", PRETTY_PRINT_OFFSET(offset), nameIReg(base));
m_formatter.oneByteOp(OP_FPU6, FPU6_OP_FLD, base, offset);
}
void fld32_m(int offset, RegisterID base)
{
spew("fld %s0x%x(%s)", PRETTY_PRINT_OFFSET(offset), nameIReg(base));
m_formatter.oneByteOp(OP_FLD32, FPU6_OP_FLD, base, offset);
}
void fisttp_m(int offset, RegisterID base)
{
spew("fisttp %s0x%x(%s)", PRETTY_PRINT_OFFSET(offset), nameIReg(base));
@ -2408,14 +2414,19 @@ public:
#if WTF_CPU_X86_64
void cvttsd2sq_rr(XMMRegisterID src, RegisterID dst)
{
// We call this instruction cvttsd2sq to differentiate the 64-bit
// version from the 32-bit version, but in assembler it's just
// called cvttsd2si and it's disambiguated by the register name.
spew("cvttsd2si %s, %s",
nameFPReg(src), nameIReg(dst));
m_formatter.prefix(PRE_SSE_F2);
m_formatter.twoByteOp64(OP2_CVTTSD2SI_GdWsd, dst, (RegisterID)src);
}
void cvttss2sq_rr(XMMRegisterID src, RegisterID dst)
{
spew("cvttss2si %s, %s",
nameFPReg(src), nameIReg(dst));
m_formatter.prefix(PRE_SSE_F3);
m_formatter.twoByteOp64(OP2_CVTTSD2SI_GdWsd, dst, (RegisterID)src);
}
#endif
void unpcklps_rr(XMMRegisterID src, XMMRegisterID dst)

View File

@ -2774,6 +2774,24 @@ class LTruncateDToInt32 : public LInstructionHelper<1, 1, 1>
}
};
// Convert a float32 to a truncated int32.
// Input: floating-point register
// Output: 32-bit integer
class LTruncateFToInt32 : public LInstructionHelper<1, 1, 1>
{
public:
LIR_HEADER(TruncateFToInt32)
LTruncateFToInt32(const LAllocation &in, const LDefinition &temp) {
setOperand(0, in);
setTemp(0, temp);
}
const LDefinition *tempFloat() {
return getTemp(0);
}
};
// Convert an integer hosted on one definition to a string with a function call.
class LIntToString : public LInstructionHelper<1, 1, 0>
{

View File

@ -130,6 +130,7 @@
_(DoubleToInt32) \
_(Float32ToInt32) \
_(TruncateDToInt32) \
_(TruncateFToInt32) \
_(IntToString) \
_(DoubleToString) \
_(Start) \

View File

@ -1736,6 +1736,9 @@ LIRGenerator::visitTruncateToInt32(MTruncateToInt32 *truncate)
case MIRType_Double:
return lowerTruncateDToInt32(truncate);
case MIRType_Float32:
return lowerTruncateFToInt32(truncate);
default:
// Objects might be effectful.
// Strings are complicated - we don't handle them yet.

View File

@ -800,9 +800,9 @@ IonBuilder::inlineMathImul(CallInfo &callInfo)
if (returnType != MIRType_Int32)
return InliningStatus_NotInlined;
if (!IsNumberType(callInfo.getArg(0)->type()) || callInfo.getArg(0)->type() == MIRType_Float32)
if (!IsNumberType(callInfo.getArg(0)->type()))
return InliningStatus_NotInlined;
if (!IsNumberType(callInfo.getArg(1)->type()) || callInfo.getArg(1)->type() == MIRType_Float32)
if (!IsNumberType(callInfo.getArg(1)->type()))
return InliningStatus_NotInlined;
callInfo.unwrapArgs();

View File

@ -2953,6 +2953,11 @@ class MTruncateToInt32 : public MUnaryInstruction
void computeRange();
bool isOperandTruncated(size_t index) const;
# ifdef DEBUG
bool isConsistentFloat32Use() const {
return true;
}
#endif
};
// Converts any type to a string

View File

@ -340,13 +340,6 @@ BitwisePolicy::adjustInputs(MInstruction *ins)
if (in->type() == MIRType_Object || in->type() == MIRType_String)
in = boxAt(ins, in);
if (in->type() == MIRType_Float32) {
MToDouble *replace = MToDouble::New(in);
ins->block()->insertBefore(ins, replace);
ins->replaceOperand(i, replace);
in = replace;
}
MInstruction *replace = MTruncateToInt32::New(in);
ins->block()->insertBefore(ins, replace);
ins->replaceOperand(i, replace);
@ -652,11 +645,6 @@ StoreTypedArrayPolicy::adjustValueInput(MInstruction *ins, int arrayType,
case ScalarTypeRepresentation::TYPE_INT32:
case ScalarTypeRepresentation::TYPE_UINT32:
if (value->type() != MIRType_Int32) {
// Workaround for bug 915903
if (value->type() == MIRType_Float32) {
value = MToDouble::New(value);
ins->block()->insertBefore(ins, value->toInstruction());
}
value = MTruncateToInt32::New(value);
ins->block()->insertBefore(ins, value->toInstruction());
}

View File

@ -1213,6 +1213,12 @@ CodeGeneratorARM::visitTruncateDToInt32(LTruncateDToInt32 *ins)
return emitTruncateDouble(ToFloatRegister(ins->input()), ToRegister(ins->output()));
}
bool
CodeGeneratorARM::visitTruncateFToInt32(LTruncateFToInt32 *ins)
{
return emitTruncateFloat32(ToFloatRegister(ins->input()), ToRegister(ins->output()));
}
static const uint32_t FrameSizes[] = { 128, 256, 512, 1024 };
FrameSizeClass

View File

@ -105,6 +105,7 @@ class CodeGeneratorARM : public CodeGeneratorShared
virtual bool visitFloor(LFloor *lir);
virtual bool visitRound(LRound *lir);
virtual bool visitTruncateDToInt32(LTruncateDToInt32 *ins);
virtual bool visitTruncateFToInt32(LTruncateFToInt32 *ins);
// Out of line visitors.
bool visitOutOfLineBailout(OutOfLineBailout *ool);

View File

@ -521,6 +521,15 @@ LIRGeneratorARM::lowerTruncateDToInt32(MTruncateToInt32 *ins)
return define(new LTruncateDToInt32(useRegister(opd), LDefinition::BogusTemp()), ins);
}
bool
LIRGeneratorARM::lowerTruncateFToInt32(MTruncateToInt32 *ins)
{
MDefinition *opd = ins->input();
JS_ASSERT(opd->type() == MIRType_Float32);
return define(new LTruncateFToInt32(useRegister(opd), LDefinition::BogusTemp()), ins);
}
bool
LIRGeneratorARM::visitStoreTypedArrayElementStatic(MStoreTypedArrayElementStatic *ins)
{

View File

@ -61,6 +61,7 @@ class LIRGeneratorARM : public LIRGeneratorShared
bool lowerConstantDouble(double d, MInstruction *ins);
bool lowerConstantFloat32(float d, MInstruction *ins);
bool lowerTruncateDToInt32(MTruncateToInt32 *ins);
bool lowerTruncateFToInt32(MTruncateToInt32 *ins);
bool lowerDivI(MDiv *div);
bool lowerModI(MMod *mod);
bool lowerMulI(MMul *mul, MDefinition *lhs, MDefinition *rhs);

View File

@ -485,7 +485,8 @@ class MacroAssemblerARMCompat : public MacroAssemblerARM
enum Result {
GENERAL,
DOUBLE
DOUBLE,
FLOAT
};
MacroAssemblerARMCompat()
@ -605,6 +606,9 @@ class MacroAssemblerARMCompat : public MacroAssemblerARM
void push(const Register &reg) {
ma_push(reg);
}
void push(const FloatRegister &reg) {
ma_vpush(VFPRegister(reg));
}
void pushWithPadding(const Register &reg, const Imm32 extraSpace) {
Imm32 totSpace = Imm32(extraSpace.value + 4);
ma_dtr(IsStore, sp, totSpace, reg, PreIndex);
@ -620,6 +624,9 @@ class MacroAssemblerARMCompat : public MacroAssemblerARM
void pop(const Register &reg) {
ma_pop(reg);
}
void pop(const FloatRegister &reg) {
ma_vpop(VFPRegister(reg));
}
void popN(const Register &reg, Imm32 extraSpace) {
Imm32 totSpace = Imm32(extraSpace.value + 4);

View File

@ -676,10 +676,11 @@ class OutOfLineTruncateSlow : public OutOfLineCodeBase<CodeGeneratorShared>
{
FloatRegister src_;
Register dest_;
bool needFloat32Conversion_;
public:
OutOfLineTruncateSlow(FloatRegister src, Register dest)
: src_(src), dest_(dest)
OutOfLineTruncateSlow(FloatRegister src, Register dest, bool needFloat32Conversion = false)
: src_(src), dest_(dest), needFloat32Conversion_(needFloat32Conversion)
{ }
bool accept(CodeGeneratorShared *codegen) {
@ -691,6 +692,10 @@ class OutOfLineTruncateSlow : public OutOfLineCodeBase<CodeGeneratorShared>
Register dest() const {
return dest_;
}
bool needFloat32Conversion() const {
return needFloat32Conversion_;
}
};
OutOfLineCode *
@ -714,6 +719,18 @@ CodeGeneratorShared::emitTruncateDouble(const FloatRegister &src, const Register
return true;
}
bool
CodeGeneratorShared::emitTruncateFloat32(const FloatRegister &src, const Register &dest)
{
OutOfLineTruncateSlow *ool = new OutOfLineTruncateSlow(src, dest, true);
if (!addOutOfLineCode(ool))
return false;
masm.branchTruncateFloat32(src, dest, ool->entry());
masm.bind(ool->rejoin());
return true;
}
bool
CodeGeneratorShared::visitOutOfLineTruncateSlow(OutOfLineTruncateSlow *ool)
{
@ -722,6 +739,11 @@ CodeGeneratorShared::visitOutOfLineTruncateSlow(OutOfLineTruncateSlow *ool)
saveVolatile(dest);
if (ool->needFloat32Conversion()) {
masm.push(src);
masm.convertFloatToDouble(src, src);
}
masm.setupUnalignedABICall(1, dest);
masm.passABIArg(src);
if (gen->compilingAsmJS())
@ -732,6 +754,9 @@ CodeGeneratorShared::visitOutOfLineTruncateSlow(OutOfLineTruncateSlow *ool)
restoreVolatile(dest);
if (ool->needFloat32Conversion())
masm.pop(src);
masm.jump(ool->rejoin());
return true;
}

View File

@ -285,6 +285,7 @@ class CodeGeneratorShared : public LInstructionVisitor
OutOfLineCode *oolTruncateDouble(const FloatRegister &src, const Register &dest);
bool emitTruncateDouble(const FloatRegister &src, const Register &dest);
bool emitTruncateFloat32(const FloatRegister &src, const Register &dest);
void emitPreBarrier(Register base, const LAllocation *index, MIRType type);
void emitPreBarrier(Address address, MIRType type);

View File

@ -294,3 +294,13 @@ LIRGeneratorX86Shared::lowerTruncateDToInt32(MTruncateToInt32 *ins)
LDefinition maybeTemp = Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempFloat();
return define(new LTruncateDToInt32(useRegister(opd), maybeTemp), ins);
}
bool
LIRGeneratorX86Shared::lowerTruncateFToInt32(MTruncateToInt32 *ins)
{
MDefinition *opd = ins->input();
JS_ASSERT(opd->type() == MIRType_Float32);
LDefinition maybeTemp = Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempFloat();
return define(new LTruncateFToInt32(useRegister(opd), maybeTemp), ins);
}

View File

@ -48,6 +48,7 @@ class LIRGeneratorX86Shared : public LIRGeneratorShared
bool lowerConstantDouble(double d, MInstruction *ins);
bool lowerConstantFloat32(float d, MInstruction *ins);
bool lowerTruncateDToInt32(MTruncateToInt32 *ins);
bool lowerTruncateFToInt32(MTruncateToInt32 *ins);
};
} // namespace jit

View File

@ -665,6 +665,9 @@ class Assembler : public AssemblerX86Shared
void cvttsd2sq(const FloatRegister &src, const Register &dest) {
masm.cvttsd2sq_rr(src.code(), dest.code());
}
void cvttss2sq(const FloatRegister &src, const Register &dest) {
masm.cvttss2sq_rr(src.code(), dest.code());
}
void cvtsq2sd(const Register &src, const FloatRegister &dest) {
masm.cvtsq2sd_rr(src.code(), dest.code());
}

View File

@ -568,3 +568,15 @@ CodeGeneratorX64::visitTruncateDToInt32(LTruncateDToInt32 *ins)
// call a stub if it fails.
return emitTruncateDouble(input, output);
}
bool
CodeGeneratorX64::visitTruncateFToInt32(LTruncateFToInt32 *ins)
{
FloatRegister input = ToFloatRegister(ins->input());
Register output = ToRegister(ins->output());
// On x64, branchTruncateFloat32 uses cvttss2sq. Unlike the x86
// implementation, this should handle most floats and we can just
// call a stub if it fails.
return emitTruncateFloat32(input, output);
}

View File

@ -50,6 +50,7 @@ class CodeGeneratorX64 : public CodeGeneratorX86Shared
bool visitCompareV(LCompareV *lir);
bool visitCompareVAndBranch(LCompareVAndBranch *lir);
bool visitTruncateDToInt32(LTruncateDToInt32 *ins);
bool visitTruncateFToInt32(LTruncateFToInt32 *ins);
bool visitLoadTypedArrayElementStatic(LLoadTypedArrayElementStatic *ins);
bool visitStoreTypedArrayElementStatic(LStoreTypedArrayElementStatic *ins);
bool visitAsmJSLoadHeap(LAsmJSLoadHeap *ins);

View File

@ -1050,6 +1050,15 @@ class MacroAssemblerX64 : public MacroAssemblerX86Shared
movl(dest, dest); // Zero upper 32-bits.
}
void branchTruncateFloat32(const FloatRegister &src, const Register &dest, Label *fail) {
cvttss2sq(src, dest);
// Same trick as for Doubles
cmpq(dest, Imm32(1));
j(Assembler::Overflow, fail);
movl(dest, dest); // Zero upper 32-bits.
}
Condition testInt32Truthy(bool truthy, const ValueOperand &operand) {
testl(operand.valueReg(), operand.valueReg());

View File

@ -268,6 +268,16 @@ class Assembler : public AssemblerX86Shared
return leal(src, dest);
}
void fld32(const Operand &dest) {
switch (dest.kind()) {
case Operand::MEM_REG_DISP:
masm.fld32_m(dest.disp(), dest.base());
break;
default:
MOZ_ASSUME_UNREACHABLE("unexpected operand kind");
}
}
void cmpl(const Register src, ImmWord ptr) {
masm.cmpl_ir(ptr.value, src.code());
}

View File

@ -26,6 +26,9 @@ using namespace js::jit;
using mozilla::DebugOnly;
using mozilla::DoubleExponentBias;
using mozilla::DoubleExponentShift;
using mozilla::FloatExponentBias;
using mozilla::FloatExponentShift;
using mozilla::FloatExponentBits;
using JS::GenericNaN;
CodeGeneratorX86::CodeGeneratorX86(MIRGenerator *gen, LIRGraph *graph, MacroAssembler *masm)
@ -790,6 +793,23 @@ class OutOfLineTruncate : public OutOfLineCodeBase<CodeGeneratorX86>
}
};
class OutOfLineTruncateFloat32 : public OutOfLineCodeBase<CodeGeneratorX86>
{
LTruncateFToInt32 *ins_;
public:
OutOfLineTruncateFloat32(LTruncateFToInt32 *ins)
: ins_(ins)
{ }
bool accept(CodeGeneratorX86 *codegen) {
return codegen->visitOutOfLineTruncateFloat32(this);
}
LTruncateFToInt32 *ins() const {
return ins_;
}
};
} // namespace jit
} // namespace js
@ -808,6 +828,21 @@ CodeGeneratorX86::visitTruncateDToInt32(LTruncateDToInt32 *ins)
return true;
}
bool
CodeGeneratorX86::visitTruncateFToInt32(LTruncateFToInt32 *ins)
{
FloatRegister input = ToFloatRegister(ins->input());
Register output = ToRegister(ins->output());
OutOfLineTruncateFloat32 *ool = new OutOfLineTruncateFloat32(ins);
if (!addOutOfLineCode(ool))
return false;
masm.branchTruncateFloat32(input, output, ool->entry());
masm.bind(ool->rejoin());
return true;
}
bool
CodeGeneratorX86::visitOutOfLineTruncate(OutOfLineTruncate *ool)
{
@ -895,3 +930,92 @@ CodeGeneratorX86::visitOutOfLineTruncate(OutOfLineTruncate *ool)
masm.jump(ool->rejoin());
return true;
}
bool
CodeGeneratorX86::visitOutOfLineTruncateFloat32(OutOfLineTruncateFloat32 *ool)
{
LTruncateFToInt32 *ins = ool->ins();
FloatRegister input = ToFloatRegister(ins->input());
Register output = ToRegister(ins->output());
Label fail;
if (Assembler::HasSSE3()) {
// Push float32, but subtracts 64 bits so that the value popped by fisttp fits
masm.subl(Imm32(sizeof(uint64_t)), esp);
masm.storeFloat(input, Operand(esp, 0));
static const uint32_t EXPONENT_MASK = FloatExponentBits;
static const uint32_t EXPONENT_SHIFT = FloatExponentShift;
// Integers are still 64 bits long, so we can still test for an exponent > 63.
static const uint32_t TOO_BIG_EXPONENT = (FloatExponentBias + 63) << EXPONENT_SHIFT;
// Check exponent to avoid fp exceptions.
Label failPopFloat;
masm.movl(Operand(esp, 0), output);
masm.and32(Imm32(EXPONENT_MASK), output);
masm.branch32(Assembler::GreaterThanOrEqual, output, Imm32(TOO_BIG_EXPONENT), &failPopFloat);
// Load float, perform 32-bit truncation.
masm.fld32(Operand(esp, 0));
masm.fisttp(Operand(esp, 0));
// Load low word, pop 64bits and jump back.
masm.movl(Operand(esp, 0), output);
masm.addl(Imm32(sizeof(uint64_t)), esp);
masm.jump(ool->rejoin());
masm.bind(&failPopFloat);
masm.addl(Imm32(sizeof(uint64_t)), esp);
masm.jump(&fail);
} else {
FloatRegister temp = ToFloatRegister(ins->tempFloat());
// Try to convert float32 representing integers within 2^32 of a signed
// integer, by adding/subtracting 2^32 and then trying to convert to int32.
// This has to be an exact conversion, as otherwise the truncation works
// incorrectly on the modified value.
masm.xorps(ScratchFloatReg, ScratchFloatReg);
masm.ucomiss(input, ScratchFloatReg);
masm.j(Assembler::Parity, &fail);
{
Label positive;
masm.j(Assembler::Above, &positive);
masm.loadConstantFloat32(4294967296.f, temp);
Label skip;
masm.jmp(&skip);
masm.bind(&positive);
masm.loadConstantFloat32(-4294967296.f, temp);
masm.bind(&skip);
}
masm.addss(input, temp);
masm.cvttss2si(temp, output);
masm.cvtsi2ss(output, ScratchFloatReg);
masm.ucomiss(temp, ScratchFloatReg);
masm.j(Assembler::Parity, &fail);
masm.j(Assembler::Equal, ool->rejoin());
}
masm.bind(&fail);
{
saveVolatile(output);
masm.push(input);
masm.setupUnalignedABICall(1, output);
masm.cvtss2sd(input, input);
masm.passABIArg(input);
masm.callWithABI(JS_FUNC_TO_DATA_PTR(void *, js::ToInt32));
masm.storeCallResult(output);
masm.pop(input);
restoreVolatile(output);
}
masm.jump(ool->rejoin());
return true;
}

View File

@ -15,6 +15,7 @@ namespace jit {
class OutOfLineLoadTypedArrayOutOfBounds;
class OutOfLineTruncate;
class OutOfLineTruncateFloat32;
class CodeGeneratorX86 : public CodeGeneratorX86Shared
{
@ -64,6 +65,7 @@ class CodeGeneratorX86 : public CodeGeneratorX86Shared
bool visitCompareVAndBranch(LCompareVAndBranch *lir);
bool visitAsmJSUInt32ToDouble(LAsmJSUInt32ToDouble *lir);
bool visitTruncateDToInt32(LTruncateDToInt32 *ins);
bool visitTruncateFToInt32(LTruncateFToInt32 *ins);
bool visitLoadTypedArrayElementStatic(LLoadTypedArrayElementStatic *ins);
bool visitStoreTypedArrayElementStatic(LStoreTypedArrayElementStatic *ins);
bool visitAsmJSLoadHeap(LAsmJSLoadHeap *ins);
@ -75,6 +77,7 @@ class CodeGeneratorX86 : public CodeGeneratorX86Shared
bool visitOutOfLineLoadTypedArrayOutOfBounds(OutOfLineLoadTypedArrayOutOfBounds *ool);
bool visitOutOfLineTruncate(OutOfLineTruncate *ool);
bool visitOutOfLineTruncateFloat32(OutOfLineTruncateFloat32 *ool);
void postAsmJSCall(LAsmJSCall *lir);
};