Bug 1044256 - SIMD backend: implement unary arithmetic operations; a=ijibaja,bbouvier; r=sunfish

2024-09-13 09:24:08 -07:00 · 2014-07-25 14:37:34 -07:00 · 2014-07-25 14:37:34 -07:00 · 3ce9319009
commit 3ce9319009
parent b25afe6e84
13 changed files with 296 additions and 0 deletions
--- a/js/src/jit/IonTypes.h
+++ b/js/src/jit/IonTypes.h
@ -284,6 +284,11 @@ class SimdConstant {
        cst.fillInt32x4(array[0], array[1], array[2], array[3]);
        return cst;
    }
+    static SimdConstant SplatX4(int32_t v) {
+        SimdConstant cst;
+        cst.fillInt32x4(v, v, v, v);
+        return cst;
+    }
    static SimdConstant CreateX4(float x, float y, float z, float w) {
        SimdConstant cst;
        cst.fillFloat32x4(x, y, z, w);
@ -294,6 +299,11 @@ class SimdConstant {
        cst.fillFloat32x4(array[0], array[1], array[2], array[3]);
        return cst;
    }
+    static SimdConstant SplatX4(float v) {
+        SimdConstant cst;
+        cst.fillFloat32x4(v, v, v, v);
+        return cst;
+    }

    uint32_t length() const {
        MOZ_ASSERT(defined());
--- a/js/src/jit/LIR-Common.h
+++ b/js/src/jit/LIR-Common.h
@ -310,6 +310,34 @@ class LSimdBinaryArithFx4 : public LSimdBinaryArith
    LSimdBinaryArithFx4() : LSimdBinaryArith() {}
 };

+// Unary SIMD arithmetic operation on a SIMD operand
+class LSimdUnaryArith : public LInstructionHelper<1, 1, 0>
+{
+  public:
+    explicit LSimdUnaryArith(const LAllocation &in) {
+        setOperand(0, in);
+    }
+    MSimdUnaryArith::Operation operation() const {
+        return mir_->toSimdUnaryArith()->operation();
+    }
+};
+
+// Unary SIMD arithmetic operation on a Int32x4 operand
+class LSimdUnaryArithIx4 : public LSimdUnaryArith
+{
+  public:
+    LIR_HEADER(SimdUnaryArithIx4);
+    explicit LSimdUnaryArithIx4(const LAllocation &in) : LSimdUnaryArith(in) {}
+};
+
+// Unary SIMD arithmetic operation on a Float32x4 operand
+class LSimdUnaryArithFx4 : public LSimdUnaryArith
+{
+  public:
+    LIR_HEADER(SimdUnaryArithFx4);
+    explicit LSimdUnaryArithFx4(const LAllocation &in) : LSimdUnaryArith(in) {}
+};
+
 // Binary SIMD bitwise operation between two int32x4 or float32x4 operands
 class LSimdBinaryBitwiseX4 : public LInstructionHelper<1, 2, 0>
 {
--- a/js/src/jit/LOpcodes.h
+++ b/js/src/jit/LOpcodes.h
@ -24,6 +24,8 @@
    _(SimdInsertElementI)           \
    _(SimdInsertElementF)           \
    _(SimdSignMaskX4)               \
+    _(SimdUnaryArithIx4)            \
+    _(SimdUnaryArithFx4)            \
    _(SimdBinaryCompIx4)            \
    _(SimdBinaryCompFx4)            \
    _(SimdBinaryArithIx4)           \
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@ -3801,6 +3801,28 @@ LIRGenerator::visitSimdSignMask(MSimdSignMask *ins)
    }
 }

+bool
+LIRGenerator::visitSimdUnaryArith(MSimdUnaryArith *ins)
+{
+    MOZ_ASSERT(IsSimdType(ins->type()));
+
+    // Cannot be at start, as the ouput is used as a temporary to store values.
+    LUse in = use(ins->input());
+
+    if (ins->type() == MIRType_Int32x4) {
+        LSimdUnaryArithIx4 *lir = new(alloc()) LSimdUnaryArithIx4(in);
+        return define(lir, ins);
+    }
+
+    if (ins->type() == MIRType_Float32x4) {
+        LSimdUnaryArithFx4 *lir = new(alloc()) LSimdUnaryArithFx4(in);
+        return define(lir, ins);
+    }
+
+    MOZ_CRASH("Unknown SIMD kind for unary operation");
+    return false;
+}
+
 bool
 LIRGenerator::visitSimdBinaryComp(MSimdBinaryComp *ins)
 {
--- a/js/src/jit/Lowering.h
+++ b/js/src/jit/Lowering.h
@ -271,6 +271,7 @@ class LIRGenerator : public LIRGeneratorSpecific
    bool visitSimdExtractElement(MSimdExtractElement *ins);
    bool visitSimdInsertElement(MSimdInsertElement *ins);
    bool visitSimdSignMask(MSimdSignMask *ins);
+    bool visitSimdUnaryArith(MSimdUnaryArith *ins);
    bool visitSimdBinaryComp(MSimdBinaryComp *ins);
    bool visitSimdBinaryArith(MSimdBinaryArith *ins);
    bool visitSimdBinaryBitwise(MSimdBinaryBitwise *ins);
--- a/js/src/jit/MIR.h
+++ b/js/src/jit/MIR.h
@ -1563,6 +1563,52 @@ class MSimdSignMask : public MUnaryInstruction
    ALLOW_CLONE(MSimdSignMask)
 };

+class MSimdUnaryArith : public MUnaryInstruction
+{
+  public:
+    enum Operation {
+        abs,
+        neg,
+        not_,
+        reciprocal,
+        reciprocalSqrt,
+        sqrt
+    };
+
+  private:
+    Operation operation_;
+
+    MSimdUnaryArith(MDefinition *def, Operation op, MIRType type)
+      : MUnaryInstruction(def), operation_(op)
+    {
+        MOZ_ASSERT(IsSimdType(type));
+        MOZ_ASSERT(def->type() == type);
+        MOZ_ASSERT_IF(type == MIRType_Int32x4, op == neg || op == not_);
+        setResultType(type);
+        setMovable();
+    }
+
+  public:
+    INSTRUCTION_HEADER(SimdUnaryArith);
+    static MSimdUnaryArith *NewAsmJS(TempAllocator &alloc, MDefinition *def,
+                                     Operation op, MIRType t)
+    {
+        return new(alloc) MSimdUnaryArith(def, op, t);
+    }
+
+    Operation operation() const { return operation_; }
+
+    AliasSet getAliasSet() const {
+        return AliasSet::None();
+    }
+
+    bool congruentTo(const MDefinition *ins) const {
+        return congruentIfOperandsEqual(ins) && ins->toSimdUnaryArith()->operation() == operation();
+    }
+
+    ALLOW_CLONE(MSimdUnaryArith);
+};
+
 // Compares each value of a SIMD vector to each corresponding lane's value of
 // another SIMD vector, and returns a int32x4 vector containing the results of
 // the comparison: all bits are set to 1 if the comparison is true, 0 otherwise.
--- a/js/src/jit/MOpcodes.h
+++ b/js/src/jit/MOpcodes.h
@ -20,6 +20,7 @@ namespace jit {
    _(SimdExtractElement)                                                   \
    _(SimdInsertElement)                                                    \
    _(SimdSignMask)                                                         \
+    _(SimdUnaryArith)                                                       \
    _(SimdBinaryComp)                                                       \
    _(SimdBinaryArith)                                                      \
    _(SimdBinaryBitwise)                                                    \
--- a/js/src/jit/ParallelSafetyAnalysis.cpp
+++ b/js/src/jit/ParallelSafetyAnalysis.cpp
@ -119,6 +119,7 @@ class ParallelSafetyVisitor : public MDefinitionVisitor
    SAFE_OP(SimdExtractElement)
    SAFE_OP(SimdInsertElement)
    SAFE_OP(SimdSignMask)
+    SAFE_OP(SimdUnaryArith)
    SAFE_OP(SimdBinaryComp)
    SAFE_OP(SimdBinaryArith)
    SAFE_OP(SimdBinaryBitwise)
--- a/js/src/jit/shared/Assembler-x86-shared.h
+++ b/js/src/jit/shared/Assembler-x86-shared.h
@ -1600,6 +1600,54 @@ class AssemblerX86Shared : public AssemblerShared
            MOZ_CRASH("unexpected operand kind");
        }
    }
+    void rcpps(const Operand &src, FloatRegister dest) {
+        MOZ_ASSERT(HasSSE2());
+        switch (src.kind()) {
+          case Operand::FPREG:
+            masm.rcpps_rr(src.fpu(), dest.code());
+            break;
+          case Operand::MEM_REG_DISP:
+            masm.rcpps_mr(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.rcpps_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
+    void sqrtps(const Operand &src, FloatRegister dest) {
+        MOZ_ASSERT(HasSSE2());
+        switch (src.kind()) {
+          case Operand::FPREG:
+            masm.sqrtps_rr(src.fpu(), dest.code());
+            break;
+          case Operand::MEM_REG_DISP:
+            masm.sqrtps_mr(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.sqrtps_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
+    void rsqrtps(const Operand &src, FloatRegister dest) {
+        MOZ_ASSERT(HasSSE2());
+        switch (src.kind()) {
+          case Operand::FPREG:
+            masm.rsqrtps_rr(src.fpu(), dest.code());
+            break;
+          case Operand::MEM_REG_DISP:
+            masm.rsqrtps_mr(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.rsqrtps_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
    void movd(Register src, FloatRegister dest) {
        MOZ_ASSERT(HasSSE2());
        masm.movd_rr(src.code(), dest.code());
--- a/js/src/jit/shared/BaseAssembler-x86-shared.h
+++ b/js/src/jit/shared/BaseAssembler-x86-shared.h
@ -326,6 +326,9 @@ private:
        OP2_MAXPS_VpsWps    = 0x5F,
        OP2_SQRTSD_VsdWsd   = 0x51,
        OP2_SQRTSS_VssWss   = 0x51,
+        OP2_SQRTPS_VpsWps   = 0x51,
+        OP2_RSQRTPS_VpsWps  = 0x52,
+        OP2_RCPPS_VpsWps    = 0x53,
        OP2_ANDPD_VpdWpd    = 0x54,
        OP2_ORPD_VpdWpd     = 0x56,
        OP2_XORPD_VpdWpd    = 0x57,
@ -2674,6 +2677,56 @@ public:
        m_formatter.immediate8(order);
    }

+    void rcpps_rr(XMMRegisterID src, XMMRegisterID dst){
+        spew("rcpps      %s, %s",
+             nameFPReg(src), nameFPReg(dst));
+        m_formatter.twoByteOp(OP2_RCPPS_VpsWps, (RegisterID)dst, (RegisterID)src);
+    }
+    void rcpps_mr(int offset, RegisterID base, XMMRegisterID dst){
+        spew("rcpps      %s0x%x(%s), %s",
+             PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
+        m_formatter.twoByteOp(OP2_RCPPS_VpsWps, (RegisterID)dst, base, offset);
+    }
+    void rcpps_mr(const void* address, XMMRegisterID dst){
+        spew("rcpps      %p, %s",
+             address, nameFPReg(dst));
+        m_formatter.twoByteOp(OP2_RCPPS_VpsWps, (RegisterID)dst, address);
+    }
+
+    void rsqrtps_rr(XMMRegisterID src, XMMRegisterID dst){
+        spew("rsqrtps    %s, %s",
+             nameFPReg(src), nameFPReg(dst));
+        m_formatter.twoByteOp(OP2_RSQRTPS_VpsWps, (RegisterID)dst, (RegisterID)src);
+    }
+    void rsqrtps_mr(int offset, RegisterID base, XMMRegisterID dst){
+        spew("rsqrtps    %s0x%x(%s), %s",
+             PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
+        m_formatter.twoByteOp(OP2_RSQRTPS_VpsWps, (RegisterID)dst, base, offset);
+    }
+    void rsqrtps_mr(const void* address, XMMRegisterID dst){
+        spew("rsqrtps    %p, %s",
+             address, nameFPReg(dst));
+        m_formatter.twoByteOp(OP2_RSQRTPS_VpsWps, (RegisterID)dst, address);
+    }
+
+    void sqrtps_rr(XMMRegisterID src, XMMRegisterID dst){
+        spew("sqrtps    %s, %s",
+             nameFPReg(src), nameFPReg(dst));
+        m_formatter.twoByteOp(OP2_SQRTPS_VpsWps, (RegisterID)dst, (RegisterID)src);
+    }
+
+    void sqrtps_mr(int offset, RegisterID base, XMMRegisterID dst){
+        spew("sqrtps    %s0x%x(%s), %s",
+             PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
+        m_formatter.twoByteOp(OP2_SQRTPS_VpsWps, (RegisterID)dst, base, offset);
+    }
+
+    void sqrtps_mr(const void* address, XMMRegisterID dst){
+        spew("sqrtps    %p, %s",
+             address, nameFPReg(dst));
+        m_formatter.twoByteOp(OP2_SQRTPS_VpsWps, (RegisterID)dst, address);
+    }
+
    void addsd_rr(XMMRegisterID src, XMMRegisterID dst)
    {
        spew("addsd      %s, %s",
--- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp
@ -2525,6 +2525,75 @@ CodeGeneratorX86Shared::visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *ins)
    MOZ_CRASH("unexpected SIMD op");
 }

+bool
+CodeGeneratorX86Shared::visitSimdUnaryArithIx4(LSimdUnaryArithIx4 *ins)
+{
+    Operand in = ToOperand(ins->input());
+    FloatRegister out = ToFloatRegister(ins->output());
+
+    static const SimdConstant allOnes = SimdConstant::CreateX4(-1, -1, -1, -1);
+
+    switch (ins->operation()) {
+      case MSimdUnaryArith::neg:
+        masm.pxor(out, out);
+        masm.packedSubInt32(in, out);
+        return true;
+      case MSimdUnaryArith::not_:
+        masm.loadConstantInt32x4(allOnes, out);
+        masm.bitwiseXorX4(in, out);
+        return true;
+      case MSimdUnaryArith::abs:
+      case MSimdUnaryArith::reciprocal:
+      case MSimdUnaryArith::reciprocalSqrt:
+      case MSimdUnaryArith::sqrt:
+        break;
+    }
+    MOZ_CRASH("unexpected SIMD op");
+}
+
+bool
+CodeGeneratorX86Shared::visitSimdUnaryArithFx4(LSimdUnaryArithFx4 *ins)
+{
+    Operand in = ToOperand(ins->input());
+    FloatRegister out = ToFloatRegister(ins->output());
+
+    // All ones but the sign bit
+    float signMask = SpecificNaN<float>(0, FloatingPoint<float>::kSignificandBits);
+    static const SimdConstant signMasks = SimdConstant::SplatX4(signMask);
+
+    // All ones including the sign bit
+    float ones = SpecificNaN<float>(1, FloatingPoint<float>::kSignificandBits);
+    static const SimdConstant allOnes = SimdConstant::SplatX4(ones);
+
+    // All zeros but the sign bit
+    static const SimdConstant minusZero = SimdConstant::SplatX4(-0.f);
+
+    switch (ins->operation()) {
+      case MSimdUnaryArith::abs:
+        masm.loadConstantFloat32x4(signMasks, out);
+        masm.bitwiseAndX4(in, out);
+        return true;
+      case MSimdUnaryArith::neg:
+        masm.loadConstantFloat32x4(minusZero, out);
+        masm.bitwiseXorX4(in, out);
+        return true;
+      case MSimdUnaryArith::not_:
+        masm.loadConstantFloat32x4(allOnes, out);
+        masm.bitwiseXorX4(in, out);
+        return true;
+      case MSimdUnaryArith::reciprocal:
+        masm.packedReciprocalFloat32x4(in, out);
+        return true;
+      case MSimdUnaryArith::reciprocalSqrt:
+        masm.packedReciprocalSqrtFloat32x4(in, out);
+        return true;
+      case MSimdUnaryArith::sqrt:
+        masm.packedSqrtFloat32x4(in, out);
+        return true;
+    }
+    MOZ_CRASH("unexpected SIMD op");
+}
+
 bool
 CodeGeneratorX86Shared::visitSimdBinaryBitwiseX4(LSimdBinaryBitwiseX4 *ins)
 {
--- a/js/src/jit/shared/CodeGenerator-x86-shared.h
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.h
@ -219,6 +219,8 @@ class CodeGeneratorX86Shared : public CodeGeneratorShared
    bool visitSimdInsertElementI(LSimdInsertElementI *lir);
    bool visitSimdInsertElementF(LSimdInsertElementF *lir);
    bool visitSimdSignMaskX4(LSimdSignMaskX4 *ins);
+    bool visitSimdUnaryArithIx4(LSimdUnaryArithIx4 *lir);
+    bool visitSimdUnaryArithFx4(LSimdUnaryArithFx4 *lir);
    bool visitSimdBinaryCompIx4(LSimdBinaryCompIx4 *lir);
    bool visitSimdBinaryCompFx4(LSimdBinaryCompFx4 *lir);
    bool visitSimdBinaryArithIx4(LSimdBinaryArithIx4 *lir);
--- a/js/src/jit/shared/MacroAssembler-x86-shared.h
+++ b/js/src/jit/shared/MacroAssembler-x86-shared.h
@ -532,6 +532,19 @@ class MacroAssemblerX86Shared : public Assembler
    void packedSubInt32(const Operand &src, FloatRegister dest) {
        psubd(src, dest);
    }
+    void packedReciprocalFloat32x4(const Operand &src, FloatRegister dest) {
+        // This function is an approximation of the result, this might need
+        // fix up if the spec requires a given precision for this operation.
+        // TODO See also bug 1068028.
+        rcpps(src, dest);
+    }
+    void packedReciprocalSqrtFloat32x4(const Operand &src, FloatRegister dest) {
+        // TODO See comment above. See also bug 1068028.
+        rsqrtps(src, dest);
+    }
+    void packedSqrtFloat32x4(const Operand &src, FloatRegister dest) {
+        sqrtps(src, dest);
+    }

    void packedLeftShiftByScalar(FloatRegister src, FloatRegister dest) {
        pslld(src, dest);