Bug 969722 - Part 2: Inline ForkJoinGetSlice. (r=nmatsakis)

This commit is contained in:
Shu-yu Guo 2014-02-14 19:18:07 -08:00
parent 31a57bfaef
commit 6e0c6962f0
31 changed files with 451 additions and 5 deletions

View File

@ -1201,7 +1201,6 @@ public:
// Otherwise, [%base+offset] -> %eax. // Otherwise, [%base+offset] -> %eax.
spew("cmpxchg %s, %s0x%x(%s)", spew("cmpxchg %s, %s0x%x(%s)",
nameIReg(src), PRETTY_PRINT_OFFSET(offset), nameIReg(base)); nameIReg(src), PRETTY_PRINT_OFFSET(offset), nameIReg(base));
m_formatter.oneByteOp(PRE_LOCK);
m_formatter.twoByteOp(OP2_CMPXCHG_GvEw, src, base, offset); m_formatter.twoByteOp(OP2_CMPXCHG_GvEw, src, base, offset);
} }
@ -1406,6 +1405,14 @@ public:
} }
} }
void cmpw_rr(RegisterID src, RegisterID dst)
{
spew("cmpw %s, %s",
nameIReg(2, src), nameIReg(2, dst));
m_formatter.prefix(PRE_OPERAND_SIZE);
m_formatter.oneByteOp(OP_CMP_EvGv, src, dst);
}
void cmpw_rm(RegisterID src, int offset, RegisterID base, RegisterID index, int scale) void cmpw_rm(RegisterID src, int offset, RegisterID base, RegisterID index, int scale)
{ {
FIXME_INSN_PRINTING; FIXME_INSN_PRINTING;
@ -2064,6 +2071,13 @@ public:
} }
#endif #endif
void movzwl_rr(RegisterID src, RegisterID dst)
{
spew("movzwl %s, %s",
nameIReg(2, src), nameIReg(4, dst));
m_formatter.twoByteOp(OP2_MOVZX_GvEw, dst, src);
}
void movzwl_mr(int offset, RegisterID base, RegisterID dst) void movzwl_mr(int offset, RegisterID base, RegisterID dst)
{ {
spew("movzwl %s0x%x(%s), %s", spew("movzwl %s0x%x(%s), %s",

View File

@ -79,6 +79,12 @@ CompileRuntime::addressOfInterruptPar()
} }
#endif #endif
const void *
CompileRuntime::addressOfThreadPool()
{
return &runtime()->threadPool;
}
const JitRuntime * const JitRuntime *
CompileRuntime::jitRuntime() CompileRuntime::jitRuntime()
{ {

View File

@ -56,6 +56,8 @@ class CompileRuntime
const void *addressOfInterruptPar(); const void *addressOfInterruptPar();
#endif #endif
const void *addressOfThreadPool();
const JitRuntime *jitRuntime(); const JitRuntime *jitRuntime();
// Compilation does not occur off thread when the SPS profiler is enabled. // Compilation does not occur off thread when the SPS profiler is enabled.

View File

@ -157,6 +157,7 @@ JitRuntime::JitRuntime()
parallelArgumentsRectifier_(nullptr), parallelArgumentsRectifier_(nullptr),
invalidator_(nullptr), invalidator_(nullptr),
debugTrapHandler_(nullptr), debugTrapHandler_(nullptr),
forkJoinGetSliceStub_(nullptr),
functionWrappers_(nullptr), functionWrappers_(nullptr),
osrTempData_(nullptr), osrTempData_(nullptr),
flusher_(nullptr), flusher_(nullptr),
@ -288,6 +289,18 @@ JitRuntime::debugTrapHandler(JSContext *cx)
return debugTrapHandler_; return debugTrapHandler_;
} }
bool
JitRuntime::ensureForkJoinGetSliceStubExists(JSContext *cx)
{
if (!forkJoinGetSliceStub_) {
IonSpew(IonSpew_Codegen, "# Emitting ForkJoinGetSlice stub");
AutoLockForExclusiveAccess lock(cx);
AutoCompartment ac(cx, cx->runtime()->atomsCompartment());
forkJoinGetSliceStub_ = generateForkJoinGetSliceStub(cx);
}
return !!forkJoinGetSliceStub_;
}
uint8_t * uint8_t *
JitRuntime::allocateOsrTempData(size_t size) JitRuntime::allocateOsrTempData(size_t size)
{ {
@ -1667,6 +1680,13 @@ IonCompile(JSContext *cx, JSScript *script,
if (!cx->compartment()->jitCompartment()->ensureIonStubsExist(cx)) if (!cx->compartment()->jitCompartment()->ensureIonStubsExist(cx))
return AbortReason_Alloc; return AbortReason_Alloc;
if (executionMode == ParallelExecution &&
LIRGenerator::allowInlineForkJoinGetSlice() &&
!cx->runtime()->jitRuntime()->ensureForkJoinGetSliceStubExists(cx))
{
return AbortReason_Alloc;
}
MIRGraph *graph = alloc->new_<MIRGraph>(temp); MIRGraph *graph = alloc->new_<MIRGraph>(temp);
if (!graph) if (!graph)
return AbortReason_Alloc; return AbortReason_Alloc;

View File

@ -674,6 +674,9 @@ class IonBuilder : public MIRGenerator
InliningStatus inlineUnsafeSetReservedSlot(CallInfo &callInfo); InliningStatus inlineUnsafeSetReservedSlot(CallInfo &callInfo);
InliningStatus inlineUnsafeGetReservedSlot(CallInfo &callInfo); InliningStatus inlineUnsafeGetReservedSlot(CallInfo &callInfo);
// ForkJoin intrinsics
InliningStatus inlineForkJoinGetSlice(CallInfo &callInfo);
// Utility intrinsics. // Utility intrinsics.
InliningStatus inlineIsCallable(CallInfo &callInfo); InliningStatus inlineIsCallable(CallInfo &callInfo);
InliningStatus inlineHaveSameClass(CallInfo &callInfo); InliningStatus inlineHaveSameClass(CallInfo &callInfo);

View File

@ -377,6 +377,10 @@ class MacroAssembler : public MacroAssemblerSpecific
rshiftPtr(Imm32(JSString::LENGTH_SHIFT), dest); rshiftPtr(Imm32(JSString::LENGTH_SHIFT), dest);
} }
void loadSliceBounds(Register worker, Register dest) {
loadPtr(Address(worker, ThreadPoolWorker::offsetOfSliceBounds()), dest);
}
void loadJSContext(const Register &dest) { void loadJSContext(const Register &dest) {
loadPtr(AbsoluteAddress(GetIonContext()->runtime->addressOfJSContext()), dest); loadPtr(AbsoluteAddress(GetIonContext()->runtime->addressOfJSContext()), dest);
} }
@ -833,6 +837,12 @@ class MacroAssembler : public MacroAssemblerSpecific
Push(ImmPtr(nullptr)); Push(ImmPtr(nullptr));
} }
void loadThreadPool(Register pool) {
// JitRuntimes are tied to JSRuntimes and there is one ThreadPool per
// JSRuntime, so we can hardcode the ThreadPool address here.
movePtr(ImmPtr(GetIonContext()->runtime->addressOfThreadPool()), pool);
}
void loadForkJoinContext(Register cx, Register scratch); void loadForkJoinContext(Register cx, Register scratch);
void loadContext(Register cxReg, Register scratch, ExecutionMode executionMode); void loadContext(Register cxReg, Register scratch, ExecutionMode executionMode);

View File

@ -189,6 +189,9 @@ class JitRuntime
// Thunk used by the debugger for breakpoint and step mode. // Thunk used by the debugger for breakpoint and step mode.
JitCode *debugTrapHandler_; JitCode *debugTrapHandler_;
// Stub used to inline the ForkJoinGetSlice intrinsic.
JitCode *forkJoinGetSliceStub_;
// Map VMFunction addresses to the JitCode of the wrapper. // Map VMFunction addresses to the JitCode of the wrapper.
typedef WeakCache<const VMFunction *, JitCode *> VMWrapperMap; typedef WeakCache<const VMFunction *, JitCode *> VMWrapperMap;
VMWrapperMap *functionWrappers_; VMWrapperMap *functionWrappers_;
@ -219,6 +222,7 @@ class JitRuntime
JitCode *generateInvalidator(JSContext *cx); JitCode *generateInvalidator(JSContext *cx);
JitCode *generatePreBarrier(JSContext *cx, MIRType type); JitCode *generatePreBarrier(JSContext *cx, MIRType type);
JitCode *generateDebugTrapHandler(JSContext *cx); JitCode *generateDebugTrapHandler(JSContext *cx);
JitCode *generateForkJoinGetSliceStub(JSContext *cx);
JitCode *generateVMWrapper(JSContext *cx, const VMFunction &f); JitCode *generateVMWrapper(JSContext *cx, const VMFunction &f);
JSC::ExecutableAllocator *createIonAlloc(JSContext *cx); JSC::ExecutableAllocator *createIonAlloc(JSContext *cx);
@ -321,6 +325,11 @@ class JitRuntime
JitCode *shapePreBarrier() const { JitCode *shapePreBarrier() const {
return shapePreBarrier_; return shapePreBarrier_;
} }
bool ensureForkJoinGetSliceStubExists(JSContext *cx);
JitCode *forkJoinGetSliceStub() const {
return forkJoinGetSliceStub_;
}
}; };
class JitCompartment class JitCompartment

View File

@ -4785,6 +4785,38 @@ class LForkJoinContext : public LCallInstructionHelper<1, 0, 1>
} }
}; };
class LForkJoinGetSlice : public LInstructionHelper<1, 1, 4>
{
public:
LIR_HEADER(ForkJoinGetSlice);
LForkJoinGetSlice(const LAllocation &cx,
const LDefinition &temp1, const LDefinition &temp2,
const LDefinition &temp3, const LDefinition &temp4) {
setOperand(0, cx);
setTemp(0, temp1);
setTemp(1, temp2);
setTemp(2, temp3);
setTemp(3, temp4);
}
const LAllocation *forkJoinContext() {
return getOperand(0);
}
const LDefinition *temp1() {
return getTemp(0);
}
const LDefinition *temp2() {
return getTemp(1);
}
const LDefinition *temp3() {
return getTemp(2);
}
const LDefinition *temp4() {
return getTemp(3);
}
};
class LCallGetProperty : public LCallInstructionHelper<BOX_PIECES, BOX_PIECES, 0> class LCallGetProperty : public LCallInstructionHelper<BOX_PIECES, BOX_PIECES, 0>
{ {
public: public:

View File

@ -214,6 +214,7 @@
_(StoreFixedSlotT) \ _(StoreFixedSlotT) \
_(FunctionEnvironment) \ _(FunctionEnvironment) \
_(ForkJoinContext) \ _(ForkJoinContext) \
_(ForkJoinGetSlice) \
_(GetPropertyCacheV) \ _(GetPropertyCacheV) \
_(GetPropertyCacheT) \ _(GetPropertyCacheT) \
_(GetPropertyPolymorphicV) \ _(GetPropertyPolymorphicV) \

View File

@ -146,6 +146,8 @@ IonBuilder::inlineNativeCall(CallInfo &callInfo, JSNative native)
if (native == intrinsic_ShouldForceSequential || if (native == intrinsic_ShouldForceSequential ||
native == intrinsic_InParallelSection) native == intrinsic_InParallelSection)
return inlineForceSequentialOrInParallelSection(callInfo); return inlineForceSequentialOrInParallelSection(callInfo);
if (native == intrinsic_ForkJoinGetSlice)
return inlineForkJoinGetSlice(callInfo);
// Utility intrinsics. // Utility intrinsics.
if (native == intrinsic_IsCallable) if (native == intrinsic_IsCallable)
@ -1389,6 +1391,40 @@ IonBuilder::inlineForceSequentialOrInParallelSection(CallInfo &callInfo)
MOZ_ASSUME_UNREACHABLE("Invalid execution mode"); MOZ_ASSUME_UNREACHABLE("Invalid execution mode");
} }
IonBuilder::InliningStatus
IonBuilder::inlineForkJoinGetSlice(CallInfo &callInfo)
{
if (info().executionMode() != ParallelExecution)
return InliningStatus_NotInlined;
// Assert the way the function is used instead of testing, as it is a
// self-hosted function which must be used in a particular fashion.
MOZ_ASSERT(callInfo.argc() == 1 && !callInfo.constructing());
MOZ_ASSERT(callInfo.getArg(0)->type() == MIRType_Int32);
MOZ_ASSERT(getInlineReturnType() == MIRType_Int32);
callInfo.setImplicitlyUsedUnchecked();
switch (info().executionMode()) {
case SequentialExecution:
case DefinitePropertiesAnalysis:
// ForkJoinGetSlice acts as identity for sequential execution.
current->push(callInfo.getArg(0));
return InliningStatus_Inlined;
case ParallelExecution:
if (LIRGenerator::allowInlineForkJoinGetSlice()) {
MForkJoinGetSlice *getSlice = MForkJoinGetSlice::New(alloc(),
graph().forkJoinContext());
current->add(getSlice);
current->push(getSlice);
return InliningStatus_Inlined;
}
return InliningStatus_NotInlined;
}
MOZ_ASSUME_UNREACHABLE("Invalid execution mode");
}
IonBuilder::InliningStatus IonBuilder::InliningStatus
IonBuilder::inlineNewDenseArray(CallInfo &callInfo) IonBuilder::inlineNewDenseArray(CallInfo &callInfo)
{ {

View File

@ -7489,6 +7489,33 @@ class MForkJoinContext
} }
}; };
// Calls the ForkJoinGetSlice stub, used for inlining the eponymous intrinsic.
// Only applicable in ParallelExecution.
class MForkJoinGetSlice
: public MUnaryInstruction
{
MForkJoinGetSlice(MDefinition *cx)
: MUnaryInstruction(cx)
{
setResultType(MIRType_Int32);
}
public:
INSTRUCTION_HEADER(ForkJoinGetSlice);
static MForkJoinGetSlice *New(TempAllocator &alloc, MDefinition *cx) {
return new(alloc) MForkJoinGetSlice(cx);
}
MDefinition *forkJoinContext() {
return getOperand(0);
}
bool possiblyCalls() const {
return true;
}
};
// Store to vp[slot] (slots that are not inline in an object). // Store to vp[slot] (slots that are not inline in an object).
class MStoreSlot class MStoreSlot
: public MBinaryInstruction, : public MBinaryInstruction,

View File

@ -214,6 +214,7 @@ namespace jit {
_(LambdaPar) \ _(LambdaPar) \
_(RestPar) \ _(RestPar) \
_(ForkJoinContext) \ _(ForkJoinContext) \
_(ForkJoinGetSlice) \
_(GuardThreadExclusive) \ _(GuardThreadExclusive) \
_(InterruptCheckPar) \ _(InterruptCheckPar) \
_(RecompileCheck) _(RecompileCheck)

View File

@ -268,6 +268,7 @@ class ParallelSafetyVisitor : public MInstructionVisitor
UNSAFE_OP(InstanceOf) UNSAFE_OP(InstanceOf)
CUSTOM_OP(InterruptCheck) CUSTOM_OP(InterruptCheck)
SAFE_OP(ForkJoinContext) SAFE_OP(ForkJoinContext)
SAFE_OP(ForkJoinGetSlice)
SAFE_OP(NewPar) SAFE_OP(NewPar)
SAFE_OP(NewDenseArrayPar) SAFE_OP(NewDenseArrayPar)
SAFE_OP(NewCallObjectPar) SAFE_OP(NewCallObjectPar)

View File

@ -2310,3 +2310,15 @@ CodeGeneratorARM::visitNegF(LNegF *ins)
masm.ma_vneg_f32(input, ToFloatRegister(ins->output())); masm.ma_vneg_f32(input, ToFloatRegister(ins->output()));
return true; return true;
} }
bool
CodeGeneratorARM::visitForkJoinGetSlice(LForkJoinGetSlice *ins)
{
MOZ_ASSUME_UNREACHABLE("NYI");
}
JitCode *
JitRuntime::generateForkJoinGetSliceStub(JSContext *cx)
{
MOZ_ASSUME_UNREACHABLE("NYI");
}

View File

@ -170,9 +170,10 @@ class CodeGeneratorARM : public CodeGeneratorShared
bool visitAsmJSStoreGlobalVar(LAsmJSStoreGlobalVar *ins); bool visitAsmJSStoreGlobalVar(LAsmJSStoreGlobalVar *ins);
bool visitAsmJSLoadFuncPtr(LAsmJSLoadFuncPtr *ins); bool visitAsmJSLoadFuncPtr(LAsmJSLoadFuncPtr *ins);
bool visitAsmJSLoadFFIFunc(LAsmJSLoadFFIFunc *ins); bool visitAsmJSLoadFFIFunc(LAsmJSLoadFFIFunc *ins);
bool visitAsmJSPassStackArg(LAsmJSPassStackArg *ins); bool visitAsmJSPassStackArg(LAsmJSPassStackArg *ins);
bool visitForkJoinGetSlice(LForkJoinGetSlice *ins);
bool generateInvalidateEpilogue(); bool generateInvalidateEpilogue();
protected: protected:
void postAsmJSCall(LAsmJSCall *lir) { void postAsmJSCall(LAsmJSCall *lir) {

View File

@ -544,4 +544,10 @@ LIRGeneratorARM::visitStoreTypedArrayElementStatic(MStoreTypedArrayElementStatic
MOZ_ASSUME_UNREACHABLE("NYI"); MOZ_ASSUME_UNREACHABLE("NYI");
} }
bool
LIRGeneratorARM::visitForkJoinGetSlice(MForkJoinGetSlice *ins)
{
MOZ_ASSUME_UNREACHABLE("NYI");
}
//__aeabi_uidiv //__aeabi_uidiv

View File

@ -88,6 +88,7 @@ class LIRGeneratorARM : public LIRGeneratorShared
bool visitAsmJSStoreHeap(MAsmJSStoreHeap *ins); bool visitAsmJSStoreHeap(MAsmJSStoreHeap *ins);
bool visitAsmJSLoadFuncPtr(MAsmJSLoadFuncPtr *ins); bool visitAsmJSLoadFuncPtr(MAsmJSLoadFuncPtr *ins);
bool visitStoreTypedArrayElementStatic(MStoreTypedArrayElementStatic *ins); bool visitStoreTypedArrayElementStatic(MStoreTypedArrayElementStatic *ins);
bool visitForkJoinGetSlice(MForkJoinGetSlice *ins);
static bool allowFloat32Optimizations() { static bool allowFloat32Optimizations() {
return true; return true;

View File

@ -525,6 +525,9 @@ class AssemblerX86Shared
} }
void movzwl(const Operand &src, const Register &dest) { void movzwl(const Operand &src, const Register &dest) {
switch (src.kind()) { switch (src.kind()) {
case Operand::REG:
masm.movzwl_rr(src.reg(), dest.code());
break;
case Operand::MEM_REG_DISP: case Operand::MEM_REG_DISP:
masm.movzwl_mr(src.disp(), src.base(), dest.code()); masm.movzwl_mr(src.disp(), src.base(), dest.code());
break; break;
@ -535,7 +538,9 @@ class AssemblerX86Shared
MOZ_ASSUME_UNREACHABLE("unexpected operand kind"); MOZ_ASSUME_UNREACHABLE("unexpected operand kind");
} }
} }
void movzwl(const Register &src, const Register &dest) {
masm.movzwl_rr(src.code(), dest.code());
}
void movw(const Register &src, const Operand &dest) { void movw(const Register &src, const Operand &dest) {
switch (dest.kind()) { switch (dest.kind()) {
case Operand::MEM_REG_DISP: case Operand::MEM_REG_DISP:
@ -858,6 +863,9 @@ class AssemblerX86Shared
void cmpl(const Operand &op, ImmPtr imm) { void cmpl(const Operand &op, ImmPtr imm) {
cmpl(op, ImmWord(uintptr_t(imm.value))); cmpl(op, ImmWord(uintptr_t(imm.value)));
} }
void cmpw(const Register &lhs, const Register &rhs) {
masm.cmpw_rr(lhs.code(), rhs.code());
}
void setCC(Condition cond, const Register &r) { void setCC(Condition cond, const Register &r) {
masm.setCC_r(static_cast<JSC::X86Assembler::Condition>(cond), r.code()); masm.setCC_r(static_cast<JSC::X86Assembler::Condition>(cond), r.code());
} }
@ -866,6 +874,9 @@ class AssemblerX86Shared
JS_ASSERT(GeneralRegisterSet(Registers::SingleByteRegs).has(rhs)); JS_ASSERT(GeneralRegisterSet(Registers::SingleByteRegs).has(rhs));
masm.testb_rr(rhs.code(), lhs.code()); masm.testb_rr(rhs.code(), lhs.code());
} }
void testw(const Register &lhs, const Register &rhs) {
masm.testw_rr(rhs.code(), lhs.code());
}
void testl(const Register &lhs, const Register &rhs) { void testl(const Register &lhs, const Register &rhs) {
masm.testl_rr(rhs.code(), lhs.code()); masm.testl_rr(rhs.code(), lhs.code());
} }

View File

@ -9,7 +9,10 @@
#include "mozilla/DebugOnly.h" #include "mozilla/DebugOnly.h"
#include "mozilla/MathAlgorithms.h" #include "mozilla/MathAlgorithms.h"
#include "jsmath.h"
#include "jit/IonFrames.h" #include "jit/IonFrames.h"
#include "jit/IonLinker.h"
#include "jit/JitCompartment.h" #include "jit/JitCompartment.h"
#include "jit/RangeAnalysis.h" #include "jit/RangeAnalysis.h"
@ -1768,5 +1771,177 @@ CodeGeneratorX86Shared::visitNegF(LNegF *ins)
return true; return true;
} }
bool
CodeGeneratorX86Shared::visitForkJoinGetSlice(LForkJoinGetSlice *ins)
{
MOZ_ASSERT(gen->info().executionMode() == ParallelExecution);
MOZ_ASSERT(ToRegister(ins->forkJoinContext()) == ForkJoinGetSliceReg_cx);
MOZ_ASSERT(ToRegister(ins->temp1()) == eax);
MOZ_ASSERT(ToRegister(ins->temp2()) == edx);
MOZ_ASSERT(ToRegister(ins->temp3()) == ForkJoinGetSliceReg_temp0);
MOZ_ASSERT(ToRegister(ins->temp4()) == ForkJoinGetSliceReg_temp1);
MOZ_ASSERT(ToRegister(ins->output()) == ForkJoinGetSliceReg_output);
masm.call(gen->jitRuntime()->forkJoinGetSliceStub());
return true;
}
JitCode *
JitRuntime::generateForkJoinGetSliceStub(JSContext *cx)
{
#ifdef JS_THREADSAFE
MacroAssembler masm(cx);
// We need two fixed temps. We need to fix eax for cmpxchg, and edx for
// div.
Register cxReg = ForkJoinGetSliceReg_cx, worker = cxReg;
Register pool = ForkJoinGetSliceReg_temp0;
Register bounds = ForkJoinGetSliceReg_temp1;
Register output = ForkJoinGetSliceReg_output;
MOZ_ASSERT(worker != eax && worker != edx);
MOZ_ASSERT(pool != eax && pool != edx);
MOZ_ASSERT(bounds != eax && bounds != edx);
MOZ_ASSERT(output != eax && output != edx);
Label stealWork, noMoreWork, gotSlice;
Operand workerSliceBounds(Address(worker, ThreadPoolWorker::offsetOfSliceBounds()));
// Clobber cx to load the worker.
masm.push(cxReg);
masm.loadPtr(Address(cxReg, ForkJoinContext::offsetOfWorker()), worker);
// Load the thread pool, which is used in all cases below.
masm.loadThreadPool(pool);
{
// Try to get a slice from the current thread.
Label getOwnSliceLoopHead;
masm.bind(&getOwnSliceLoopHead);
// Load the slice bounds for the current thread.
masm.loadSliceBounds(worker, bounds);
// The slice bounds is a uint32 composed from two uint16s:
// [ from , to ]
// ^~~~ ^~
// upper 16 bits | lower 16 bits
masm.move32(bounds, output);
masm.shrl(Imm32(16), output);
// If we don't have any slices left ourselves, move on to stealing.
masm.branch16(Assembler::Equal, output, bounds, &stealWork);
// If we still have work, try to CAS [ from+1, to ].
masm.move32(bounds, edx);
masm.add32(Imm32(0x10000), edx);
masm.move32(bounds, eax);
masm.atomic_cmpxchg32(edx, workerSliceBounds, eax);
masm.j(Assembler::NonZero, &getOwnSliceLoopHead);
// If the CAS succeeded, return |from| in output.
masm.jump(&gotSlice);
}
// Try to steal work.
masm.bind(&stealWork);
// It's not technically correct to test whether work-stealing is turned on
// only during stub-generation time, but it's a DEBUG only thing.
if (cx->runtime()->threadPool.workStealing()) {
Label stealWorkLoopHead;
masm.bind(&stealWorkLoopHead);
// Check if we have work.
masm.branch32(Assembler::Equal,
Address(pool, ThreadPool::offsetOfPendingSlices()),
Imm32(0), &noMoreWork);
// Get an id at random. The following is an inline of
// the 32-bit xorshift in ThreadPoolWorker::randomWorker().
{
// Reload the current worker.
masm.loadPtr(Address(StackPointer, 0), cxReg);
masm.loadPtr(Address(cxReg, ForkJoinContext::offsetOfWorker()), worker);
// Perform the xorshift to get a random number in eax, using edx
// as a temp.
Address rngState(worker, ThreadPoolWorker::offsetOfSchedulerRNGState());
masm.load32(rngState, eax);
masm.move32(eax, edx);
masm.shll(Imm32(ThreadPoolWorker::XORSHIFT_A), eax);
masm.xor32(edx, eax);
masm.move32(eax, edx);
masm.shrl(Imm32(ThreadPoolWorker::XORSHIFT_B), eax);
masm.xor32(edx, eax);
masm.move32(eax, edx);
masm.shll(Imm32(ThreadPoolWorker::XORSHIFT_C), eax);
masm.xor32(edx, eax);
masm.store32(eax, rngState);
// Compute the random worker id by computing % numWorkers. Reuse
// output as a temp.
masm.move32(Imm32(0), edx);
masm.move32(Imm32(cx->runtime()->threadPool.numWorkers()), output);
masm.udiv(output);
}
// Load the worker from the workers array.
masm.loadPtr(Address(pool, ThreadPool::offsetOfWorkers()), worker);
masm.loadPtr(BaseIndex(worker, edx, ScalePointer), worker);
// Try to get a slice from the designated victim worker.
Label stealSliceFromWorkerLoopHead;
masm.bind(&stealSliceFromWorkerLoopHead);
// Load the slice bounds and decompose for the victim worker.
masm.loadSliceBounds(worker, bounds);
masm.move32(bounds, eax);
masm.shrl(Imm32(16), eax);
// If the victim worker has no more slices left, find another worker.
masm.branch16(Assembler::Equal, eax, bounds, &stealWorkLoopHead);
// If the victim worker still has work, try to CAS [ from, to-1 ].
masm.move32(bounds, output);
masm.sub32(Imm32(1), output);
masm.move32(bounds, eax);
masm.atomic_cmpxchg32(output, workerSliceBounds, eax);
masm.j(Assembler::NonZero, &stealSliceFromWorkerLoopHead);
// If the CAS succeeded, return |to-1| in output.
#ifdef DEBUG
masm.atomic_inc32(Operand(Address(pool, ThreadPool::offsetOfStolenSlices())));
#endif
// Copies lower 16 bits only.
masm.movzwl(output, output);
}
// If we successfully got a slice, decrement pool->pendingSlices_ and
// return the slice.
masm.bind(&gotSlice);
masm.atomic_dec32(Operand(Address(pool, ThreadPool::offsetOfPendingSlices())));
masm.pop(cxReg);
masm.ret();
// There's no more slices to give out, return -1.
masm.bind(&noMoreWork);
masm.move32(Imm32(-1), output);
masm.pop(cxReg);
masm.ret();
Linker linker(masm);
JitCode *code = linker.newCode<NoGC>(cx, JSC::OTHER_CODE);
#ifdef JS_ION_PERF
writePerfSpewerJitCodeProfile(code, "ForkJoinGetSliceStub");
#endif
return code;
#else
return nullptr;
#endif // JS_THREADSAFE
}
} // namespace jit } // namespace jit
} // namespace js } // namespace js

View File

@ -122,6 +122,8 @@ class CodeGeneratorX86Shared : public CodeGeneratorShared
virtual bool visitUDivOrMod(LUDivOrMod *ins); virtual bool visitUDivOrMod(LUDivOrMod *ins);
virtual bool visitAsmJSPassStackArg(LAsmJSPassStackArg *ins); virtual bool visitAsmJSPassStackArg(LAsmJSPassStackArg *ins);
bool visitForkJoinGetSlice(LForkJoinGetSlice *ins);
bool visitNegI(LNegI *lir); bool visitNegI(LNegI *lir);
bool visitNegD(LNegD *lir); bool visitNegD(LNegD *lir);
bool visitNegF(LNegF *lir); bool visitNegF(LNegF *lir);

View File

@ -191,6 +191,11 @@ class LIRGeneratorShared : public MInstructionVisitorWithDefaults
static bool allowFloat32Optimizations() { static bool allowFloat32Optimizations() {
return false; return false;
} }
// Whether we can inline ForkJoinGetSlice.
static bool allowInlineForkJoinGetSlice() {
return false;
}
}; };
} // namespace jit } // namespace jit

View File

@ -295,3 +295,16 @@ LIRGeneratorX86Shared::lowerTruncateFToInt32(MTruncateToInt32 *ins)
LDefinition maybeTemp = Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempFloat32(); LDefinition maybeTemp = Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempFloat32();
return define(new(alloc()) LTruncateFToInt32(useRegister(opd), maybeTemp), ins); return define(new(alloc()) LTruncateFToInt32(useRegister(opd), maybeTemp), ins);
} }
bool
LIRGeneratorX86Shared::visitForkJoinGetSlice(MForkJoinGetSlice *ins)
{
// We fix eax and edx for cmpxchg and div.
LForkJoinGetSlice *lir = new(alloc())
LForkJoinGetSlice(useFixed(ins->forkJoinContext(), ForkJoinGetSliceReg_cx),
tempFixed(eax),
tempFixed(edx),
tempFixed(ForkJoinGetSliceReg_temp0),
tempFixed(ForkJoinGetSliceReg_temp1));
return defineFixed(lir, ins, LAllocation(AnyRegister(ForkJoinGetSliceReg_output)));
}

View File

@ -47,6 +47,7 @@ class LIRGeneratorX86Shared : public LIRGeneratorShared
bool lowerConstantFloat32(float d, MInstruction *ins); bool lowerConstantFloat32(float d, MInstruction *ins);
bool lowerTruncateDToInt32(MTruncateToInt32 *ins); bool lowerTruncateDToInt32(MTruncateToInt32 *ins);
bool lowerTruncateFToInt32(MTruncateToInt32 *ins); bool lowerTruncateFToInt32(MTruncateToInt32 *ins);
bool visitForkJoinGetSlice(MForkJoinGetSlice *ins);
}; };
} // namespace jit } // namespace jit

View File

@ -111,6 +111,9 @@ class MacroAssemblerX86Shared : public Assembler
void and32(const Imm32 &imm, const Address &dest) { void and32(const Imm32 &imm, const Address &dest) {
andl(imm, Operand(dest)); andl(imm, Operand(dest));
} }
void or32(const Register &src, const Register &dest) {
orl(src, dest);
}
void or32(const Imm32 &imm, const Register &dest) { void or32(const Imm32 &imm, const Register &dest) {
orl(imm, dest); orl(imm, dest);
} }
@ -156,6 +159,9 @@ class MacroAssemblerX86Shared : public Assembler
void xor32(Imm32 imm, Register dest) { void xor32(Imm32 imm, Register dest) {
xorl(imm, dest); xorl(imm, dest);
} }
void xor32(Register src, Register dest) {
xorl(src, dest);
}
void not32(Register reg) { void not32(Register reg) {
notl(reg); notl(reg);
} }
@ -177,6 +183,10 @@ class MacroAssemblerX86Shared : public Assembler
lock_cmpxchg32(src, addr); lock_cmpxchg32(src, addr);
} }
void branch16(Condition cond, const Register &lhs, const Register &rhs, Label *label) {
cmpw(lhs, rhs);
j(cond, label);
}
void branch32(Condition cond, const Operand &lhs, const Register &rhs, Label *label) { void branch32(Condition cond, const Operand &lhs, const Register &rhs, Label *label) {
cmpl(lhs, rhs); cmpl(lhs, rhs);
j(cond, label); j(cond, label);
@ -201,6 +211,10 @@ class MacroAssemblerX86Shared : public Assembler
cmpl(lhs, rhs); cmpl(lhs, rhs);
j(cond, label); j(cond, label);
} }
void branchTest16(Condition cond, const Register &lhs, const Register &rhs, Label *label) {
testw(lhs, rhs);
j(cond, label);
}
void branchTest32(Condition cond, const Register &lhs, const Register &rhs, Label *label) { void branchTest32(Condition cond, const Register &lhs, const Register &rhs, Label *label) {
testl(lhs, rhs); testl(lhs, rhs);
j(cond, label); j(cond, label);

View File

@ -129,6 +129,13 @@ static MOZ_CONSTEXPR_VAR uint32_t NumFloatArgRegs = 8;
static MOZ_CONSTEXPR_VAR FloatRegister FloatArgRegs[NumFloatArgRegs] = { xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 }; static MOZ_CONSTEXPR_VAR FloatRegister FloatArgRegs[NumFloatArgRegs] = { xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 };
#endif #endif
// The convention used by the ForkJoinGetSlice stub. None of these can be rax
// or rdx, which the stub also needs for cmpxchg and div, respectively.
static MOZ_CONSTEXPR_VAR Register ForkJoinGetSliceReg_cx = rdi;
static MOZ_CONSTEXPR_VAR Register ForkJoinGetSliceReg_temp0 = rbx;
static MOZ_CONSTEXPR_VAR Register ForkJoinGetSliceReg_temp1 = rcx;
static MOZ_CONSTEXPR_VAR Register ForkJoinGetSliceReg_output = rsi;
class ABIArgGenerator class ABIArgGenerator
{ {
#if defined(XP_WIN) #if defined(XP_WIN)

View File

@ -55,6 +55,10 @@ class LIRGeneratorX64 : public LIRGeneratorX86Shared
static bool allowFloat32Optimizations() { static bool allowFloat32Optimizations() {
return true; return true;
} }
static bool allowInlineForkJoinGetSlice() {
return true;
}
}; };
typedef LIRGeneratorX64 LIRGeneratorSpecific; typedef LIRGeneratorX64 LIRGeneratorSpecific;

View File

@ -55,6 +55,13 @@ static MOZ_CONSTEXPR_VAR Register CallTempReg3 = ecx;
static MOZ_CONSTEXPR_VAR Register CallTempReg4 = esi; static MOZ_CONSTEXPR_VAR Register CallTempReg4 = esi;
static MOZ_CONSTEXPR_VAR Register CallTempReg5 = edx; static MOZ_CONSTEXPR_VAR Register CallTempReg5 = edx;
// The convention used by the ForkJoinGetSlice stub. None of these can be eax
// or edx, which the stub also needs for cmpxchg and div, respectively.
static MOZ_CONSTEXPR_VAR Register ForkJoinGetSliceReg_cx = edi;
static MOZ_CONSTEXPR_VAR Register ForkJoinGetSliceReg_temp0 = ebx;
static MOZ_CONSTEXPR_VAR Register ForkJoinGetSliceReg_temp1 = ecx;
static MOZ_CONSTEXPR_VAR Register ForkJoinGetSliceReg_output = esi;
// We have no arg regs, so our NonArgRegs are just our CallTempReg* // We have no arg regs, so our NonArgRegs are just our CallTempReg*
static MOZ_CONSTEXPR_VAR Register CallTempNonArgRegs[] = { edi, eax, ebx, ecx, esi, edx }; static MOZ_CONSTEXPR_VAR Register CallTempNonArgRegs[] = { edi, eax, ebx, ecx, esi, edx };
static const uint32_t NumCallTempNonArgRegs = static const uint32_t NumCallTempNonArgRegs =

View File

@ -66,6 +66,10 @@ class LIRGeneratorX86 : public LIRGeneratorX86Shared
static bool allowFloat32Optimizations() { static bool allowFloat32Optimizations() {
return true; return true;
} }
static bool allowInlineForkJoinGetSlice() {
return true;
}
}; };
typedef LIRGeneratorX86 LIRGeneratorSpecific; typedef LIRGeneratorX86 LIRGeneratorSpecific;

View File

@ -1422,8 +1422,8 @@ ForkJoinShared::execute()
} }
#ifdef DEBUG #ifdef DEBUG
Spew(SpewOps, "Completed parallel job [slices %d, threads: %d (+1), stolen: %d (work stealing:%s)]", Spew(SpewOps, "Completed parallel job [slices: %d, threads: %d, stolen: %d (work stealing:%s)]",
sliceTo_ - sliceFrom_, sliceTo_ - sliceFrom_ + 1,
threadPool_->numWorkers(), threadPool_->numWorkers(),
threadPool_->stolenSlices(), threadPool_->stolenSlices(),
threadPool_->workStealing() ? "ON" : "OFF"); threadPool_->workStealing() ? "ON" : "OFF");

View File

@ -406,6 +406,11 @@ class ForkJoinContext : public ThreadSafeContext
// Initializes the thread-local state. // Initializes the thread-local state.
static bool initialize(); static bool initialize();
// Used in inlining GetForkJoinSlice.
static size_t offsetOfWorker() {
return offsetof(ForkJoinContext, worker_);
}
private: private:
friend class AutoSetForkJoinContext; friend class AutoSetForkJoinContext;

View File

@ -99,6 +99,10 @@ class ThreadPoolWorker
static size_t offsetOfSliceBounds() { static size_t offsetOfSliceBounds() {
return offsetof(ThreadPoolWorker, sliceBounds_); return offsetof(ThreadPoolWorker, sliceBounds_);
} }
static size_t offsetOfSchedulerRNGState() {
return offsetof(ThreadPoolWorker, schedulerRNGState_);
}
}; };
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
@ -192,6 +196,18 @@ class ThreadPool : public Monitor
ThreadPoolWorker *mainThreadWorker() { return workers_[0]; } ThreadPoolWorker *mainThreadWorker() { return workers_[0]; }
public: public:
#ifdef DEBUG
static size_t offsetOfStolenSlices() {
return offsetof(ThreadPool, stolenSlices_);
}
#endif
static size_t offsetOfPendingSlices() {
return offsetof(ThreadPool, pendingSlices_);
}
static size_t offsetOfWorkers() {
return offsetof(ThreadPool, workers_);
}
ThreadPool(JSRuntime *rt); ThreadPool(JSRuntime *rt);
~ThreadPool(); ~ThreadPool();