mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
Bug 969722 - Part 2: Inline ForkJoinGetSlice. (r=nmatsakis)
This commit is contained in:
parent
31a57bfaef
commit
6e0c6962f0
@ -1201,7 +1201,6 @@ public:
|
||||
// Otherwise, [%base+offset] -> %eax.
|
||||
spew("cmpxchg %s, %s0x%x(%s)",
|
||||
nameIReg(src), PRETTY_PRINT_OFFSET(offset), nameIReg(base));
|
||||
m_formatter.oneByteOp(PRE_LOCK);
|
||||
m_formatter.twoByteOp(OP2_CMPXCHG_GvEw, src, base, offset);
|
||||
}
|
||||
|
||||
@ -1406,6 +1405,14 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void cmpw_rr(RegisterID src, RegisterID dst)
|
||||
{
|
||||
spew("cmpw %s, %s",
|
||||
nameIReg(2, src), nameIReg(2, dst));
|
||||
m_formatter.prefix(PRE_OPERAND_SIZE);
|
||||
m_formatter.oneByteOp(OP_CMP_EvGv, src, dst);
|
||||
}
|
||||
|
||||
void cmpw_rm(RegisterID src, int offset, RegisterID base, RegisterID index, int scale)
|
||||
{
|
||||
FIXME_INSN_PRINTING;
|
||||
@ -2064,6 +2071,13 @@ public:
|
||||
}
|
||||
#endif
|
||||
|
||||
void movzwl_rr(RegisterID src, RegisterID dst)
|
||||
{
|
||||
spew("movzwl %s, %s",
|
||||
nameIReg(2, src), nameIReg(4, dst));
|
||||
m_formatter.twoByteOp(OP2_MOVZX_GvEw, dst, src);
|
||||
}
|
||||
|
||||
void movzwl_mr(int offset, RegisterID base, RegisterID dst)
|
||||
{
|
||||
spew("movzwl %s0x%x(%s), %s",
|
||||
|
@ -79,6 +79,12 @@ CompileRuntime::addressOfInterruptPar()
|
||||
}
|
||||
#endif
|
||||
|
||||
const void *
|
||||
CompileRuntime::addressOfThreadPool()
|
||||
{
|
||||
return &runtime()->threadPool;
|
||||
}
|
||||
|
||||
const JitRuntime *
|
||||
CompileRuntime::jitRuntime()
|
||||
{
|
||||
|
@ -56,6 +56,8 @@ class CompileRuntime
|
||||
const void *addressOfInterruptPar();
|
||||
#endif
|
||||
|
||||
const void *addressOfThreadPool();
|
||||
|
||||
const JitRuntime *jitRuntime();
|
||||
|
||||
// Compilation does not occur off thread when the SPS profiler is enabled.
|
||||
|
@ -157,6 +157,7 @@ JitRuntime::JitRuntime()
|
||||
parallelArgumentsRectifier_(nullptr),
|
||||
invalidator_(nullptr),
|
||||
debugTrapHandler_(nullptr),
|
||||
forkJoinGetSliceStub_(nullptr),
|
||||
functionWrappers_(nullptr),
|
||||
osrTempData_(nullptr),
|
||||
flusher_(nullptr),
|
||||
@ -288,6 +289,18 @@ JitRuntime::debugTrapHandler(JSContext *cx)
|
||||
return debugTrapHandler_;
|
||||
}
|
||||
|
||||
bool
|
||||
JitRuntime::ensureForkJoinGetSliceStubExists(JSContext *cx)
|
||||
{
|
||||
if (!forkJoinGetSliceStub_) {
|
||||
IonSpew(IonSpew_Codegen, "# Emitting ForkJoinGetSlice stub");
|
||||
AutoLockForExclusiveAccess lock(cx);
|
||||
AutoCompartment ac(cx, cx->runtime()->atomsCompartment());
|
||||
forkJoinGetSliceStub_ = generateForkJoinGetSliceStub(cx);
|
||||
}
|
||||
return !!forkJoinGetSliceStub_;
|
||||
}
|
||||
|
||||
uint8_t *
|
||||
JitRuntime::allocateOsrTempData(size_t size)
|
||||
{
|
||||
@ -1667,6 +1680,13 @@ IonCompile(JSContext *cx, JSScript *script,
|
||||
if (!cx->compartment()->jitCompartment()->ensureIonStubsExist(cx))
|
||||
return AbortReason_Alloc;
|
||||
|
||||
if (executionMode == ParallelExecution &&
|
||||
LIRGenerator::allowInlineForkJoinGetSlice() &&
|
||||
!cx->runtime()->jitRuntime()->ensureForkJoinGetSliceStubExists(cx))
|
||||
{
|
||||
return AbortReason_Alloc;
|
||||
}
|
||||
|
||||
MIRGraph *graph = alloc->new_<MIRGraph>(temp);
|
||||
if (!graph)
|
||||
return AbortReason_Alloc;
|
||||
|
@ -674,6 +674,9 @@ class IonBuilder : public MIRGenerator
|
||||
InliningStatus inlineUnsafeSetReservedSlot(CallInfo &callInfo);
|
||||
InliningStatus inlineUnsafeGetReservedSlot(CallInfo &callInfo);
|
||||
|
||||
// ForkJoin intrinsics
|
||||
InliningStatus inlineForkJoinGetSlice(CallInfo &callInfo);
|
||||
|
||||
// Utility intrinsics.
|
||||
InliningStatus inlineIsCallable(CallInfo &callInfo);
|
||||
InliningStatus inlineHaveSameClass(CallInfo &callInfo);
|
||||
|
@ -377,6 +377,10 @@ class MacroAssembler : public MacroAssemblerSpecific
|
||||
rshiftPtr(Imm32(JSString::LENGTH_SHIFT), dest);
|
||||
}
|
||||
|
||||
void loadSliceBounds(Register worker, Register dest) {
|
||||
loadPtr(Address(worker, ThreadPoolWorker::offsetOfSliceBounds()), dest);
|
||||
}
|
||||
|
||||
void loadJSContext(const Register &dest) {
|
||||
loadPtr(AbsoluteAddress(GetIonContext()->runtime->addressOfJSContext()), dest);
|
||||
}
|
||||
@ -833,6 +837,12 @@ class MacroAssembler : public MacroAssemblerSpecific
|
||||
Push(ImmPtr(nullptr));
|
||||
}
|
||||
|
||||
void loadThreadPool(Register pool) {
|
||||
// JitRuntimes are tied to JSRuntimes and there is one ThreadPool per
|
||||
// JSRuntime, so we can hardcode the ThreadPool address here.
|
||||
movePtr(ImmPtr(GetIonContext()->runtime->addressOfThreadPool()), pool);
|
||||
}
|
||||
|
||||
void loadForkJoinContext(Register cx, Register scratch);
|
||||
void loadContext(Register cxReg, Register scratch, ExecutionMode executionMode);
|
||||
|
||||
|
@ -189,6 +189,9 @@ class JitRuntime
|
||||
// Thunk used by the debugger for breakpoint and step mode.
|
||||
JitCode *debugTrapHandler_;
|
||||
|
||||
// Stub used to inline the ForkJoinGetSlice intrinsic.
|
||||
JitCode *forkJoinGetSliceStub_;
|
||||
|
||||
// Map VMFunction addresses to the JitCode of the wrapper.
|
||||
typedef WeakCache<const VMFunction *, JitCode *> VMWrapperMap;
|
||||
VMWrapperMap *functionWrappers_;
|
||||
@ -219,6 +222,7 @@ class JitRuntime
|
||||
JitCode *generateInvalidator(JSContext *cx);
|
||||
JitCode *generatePreBarrier(JSContext *cx, MIRType type);
|
||||
JitCode *generateDebugTrapHandler(JSContext *cx);
|
||||
JitCode *generateForkJoinGetSliceStub(JSContext *cx);
|
||||
JitCode *generateVMWrapper(JSContext *cx, const VMFunction &f);
|
||||
|
||||
JSC::ExecutableAllocator *createIonAlloc(JSContext *cx);
|
||||
@ -321,6 +325,11 @@ class JitRuntime
|
||||
JitCode *shapePreBarrier() const {
|
||||
return shapePreBarrier_;
|
||||
}
|
||||
|
||||
bool ensureForkJoinGetSliceStubExists(JSContext *cx);
|
||||
JitCode *forkJoinGetSliceStub() const {
|
||||
return forkJoinGetSliceStub_;
|
||||
}
|
||||
};
|
||||
|
||||
class JitCompartment
|
||||
|
@ -4785,6 +4785,38 @@ class LForkJoinContext : public LCallInstructionHelper<1, 0, 1>
|
||||
}
|
||||
};
|
||||
|
||||
class LForkJoinGetSlice : public LInstructionHelper<1, 1, 4>
|
||||
{
|
||||
public:
|
||||
LIR_HEADER(ForkJoinGetSlice);
|
||||
|
||||
LForkJoinGetSlice(const LAllocation &cx,
|
||||
const LDefinition &temp1, const LDefinition &temp2,
|
||||
const LDefinition &temp3, const LDefinition &temp4) {
|
||||
setOperand(0, cx);
|
||||
setTemp(0, temp1);
|
||||
setTemp(1, temp2);
|
||||
setTemp(2, temp3);
|
||||
setTemp(3, temp4);
|
||||
}
|
||||
|
||||
const LAllocation *forkJoinContext() {
|
||||
return getOperand(0);
|
||||
}
|
||||
const LDefinition *temp1() {
|
||||
return getTemp(0);
|
||||
}
|
||||
const LDefinition *temp2() {
|
||||
return getTemp(1);
|
||||
}
|
||||
const LDefinition *temp3() {
|
||||
return getTemp(2);
|
||||
}
|
||||
const LDefinition *temp4() {
|
||||
return getTemp(3);
|
||||
}
|
||||
};
|
||||
|
||||
class LCallGetProperty : public LCallInstructionHelper<BOX_PIECES, BOX_PIECES, 0>
|
||||
{
|
||||
public:
|
||||
|
@ -214,6 +214,7 @@
|
||||
_(StoreFixedSlotT) \
|
||||
_(FunctionEnvironment) \
|
||||
_(ForkJoinContext) \
|
||||
_(ForkJoinGetSlice) \
|
||||
_(GetPropertyCacheV) \
|
||||
_(GetPropertyCacheT) \
|
||||
_(GetPropertyPolymorphicV) \
|
||||
|
@ -146,6 +146,8 @@ IonBuilder::inlineNativeCall(CallInfo &callInfo, JSNative native)
|
||||
if (native == intrinsic_ShouldForceSequential ||
|
||||
native == intrinsic_InParallelSection)
|
||||
return inlineForceSequentialOrInParallelSection(callInfo);
|
||||
if (native == intrinsic_ForkJoinGetSlice)
|
||||
return inlineForkJoinGetSlice(callInfo);
|
||||
|
||||
// Utility intrinsics.
|
||||
if (native == intrinsic_IsCallable)
|
||||
@ -1389,6 +1391,40 @@ IonBuilder::inlineForceSequentialOrInParallelSection(CallInfo &callInfo)
|
||||
MOZ_ASSUME_UNREACHABLE("Invalid execution mode");
|
||||
}
|
||||
|
||||
IonBuilder::InliningStatus
|
||||
IonBuilder::inlineForkJoinGetSlice(CallInfo &callInfo)
|
||||
{
|
||||
if (info().executionMode() != ParallelExecution)
|
||||
return InliningStatus_NotInlined;
|
||||
|
||||
// Assert the way the function is used instead of testing, as it is a
|
||||
// self-hosted function which must be used in a particular fashion.
|
||||
MOZ_ASSERT(callInfo.argc() == 1 && !callInfo.constructing());
|
||||
MOZ_ASSERT(callInfo.getArg(0)->type() == MIRType_Int32);
|
||||
MOZ_ASSERT(getInlineReturnType() == MIRType_Int32);
|
||||
|
||||
callInfo.setImplicitlyUsedUnchecked();
|
||||
|
||||
switch (info().executionMode()) {
|
||||
case SequentialExecution:
|
||||
case DefinitePropertiesAnalysis:
|
||||
// ForkJoinGetSlice acts as identity for sequential execution.
|
||||
current->push(callInfo.getArg(0));
|
||||
return InliningStatus_Inlined;
|
||||
case ParallelExecution:
|
||||
if (LIRGenerator::allowInlineForkJoinGetSlice()) {
|
||||
MForkJoinGetSlice *getSlice = MForkJoinGetSlice::New(alloc(),
|
||||
graph().forkJoinContext());
|
||||
current->add(getSlice);
|
||||
current->push(getSlice);
|
||||
return InliningStatus_Inlined;
|
||||
}
|
||||
return InliningStatus_NotInlined;
|
||||
}
|
||||
|
||||
MOZ_ASSUME_UNREACHABLE("Invalid execution mode");
|
||||
}
|
||||
|
||||
IonBuilder::InliningStatus
|
||||
IonBuilder::inlineNewDenseArray(CallInfo &callInfo)
|
||||
{
|
||||
|
@ -7489,6 +7489,33 @@ class MForkJoinContext
|
||||
}
|
||||
};
|
||||
|
||||
// Calls the ForkJoinGetSlice stub, used for inlining the eponymous intrinsic.
|
||||
// Only applicable in ParallelExecution.
|
||||
class MForkJoinGetSlice
|
||||
: public MUnaryInstruction
|
||||
{
|
||||
MForkJoinGetSlice(MDefinition *cx)
|
||||
: MUnaryInstruction(cx)
|
||||
{
|
||||
setResultType(MIRType_Int32);
|
||||
}
|
||||
|
||||
public:
|
||||
INSTRUCTION_HEADER(ForkJoinGetSlice);
|
||||
|
||||
static MForkJoinGetSlice *New(TempAllocator &alloc, MDefinition *cx) {
|
||||
return new(alloc) MForkJoinGetSlice(cx);
|
||||
}
|
||||
|
||||
MDefinition *forkJoinContext() {
|
||||
return getOperand(0);
|
||||
}
|
||||
|
||||
bool possiblyCalls() const {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
// Store to vp[slot] (slots that are not inline in an object).
|
||||
class MStoreSlot
|
||||
: public MBinaryInstruction,
|
||||
|
@ -214,6 +214,7 @@ namespace jit {
|
||||
_(LambdaPar) \
|
||||
_(RestPar) \
|
||||
_(ForkJoinContext) \
|
||||
_(ForkJoinGetSlice) \
|
||||
_(GuardThreadExclusive) \
|
||||
_(InterruptCheckPar) \
|
||||
_(RecompileCheck)
|
||||
|
@ -268,6 +268,7 @@ class ParallelSafetyVisitor : public MInstructionVisitor
|
||||
UNSAFE_OP(InstanceOf)
|
||||
CUSTOM_OP(InterruptCheck)
|
||||
SAFE_OP(ForkJoinContext)
|
||||
SAFE_OP(ForkJoinGetSlice)
|
||||
SAFE_OP(NewPar)
|
||||
SAFE_OP(NewDenseArrayPar)
|
||||
SAFE_OP(NewCallObjectPar)
|
||||
|
@ -2310,3 +2310,15 @@ CodeGeneratorARM::visitNegF(LNegF *ins)
|
||||
masm.ma_vneg_f32(input, ToFloatRegister(ins->output()));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
CodeGeneratorARM::visitForkJoinGetSlice(LForkJoinGetSlice *ins)
|
||||
{
|
||||
MOZ_ASSUME_UNREACHABLE("NYI");
|
||||
}
|
||||
|
||||
JitCode *
|
||||
JitRuntime::generateForkJoinGetSliceStub(JSContext *cx)
|
||||
{
|
||||
MOZ_ASSUME_UNREACHABLE("NYI");
|
||||
}
|
||||
|
@ -170,9 +170,10 @@ class CodeGeneratorARM : public CodeGeneratorShared
|
||||
bool visitAsmJSStoreGlobalVar(LAsmJSStoreGlobalVar *ins);
|
||||
bool visitAsmJSLoadFuncPtr(LAsmJSLoadFuncPtr *ins);
|
||||
bool visitAsmJSLoadFFIFunc(LAsmJSLoadFFIFunc *ins);
|
||||
|
||||
bool visitAsmJSPassStackArg(LAsmJSPassStackArg *ins);
|
||||
|
||||
bool visitForkJoinGetSlice(LForkJoinGetSlice *ins);
|
||||
|
||||
bool generateInvalidateEpilogue();
|
||||
protected:
|
||||
void postAsmJSCall(LAsmJSCall *lir) {
|
||||
|
@ -544,4 +544,10 @@ LIRGeneratorARM::visitStoreTypedArrayElementStatic(MStoreTypedArrayElementStatic
|
||||
MOZ_ASSUME_UNREACHABLE("NYI");
|
||||
}
|
||||
|
||||
bool
|
||||
LIRGeneratorARM::visitForkJoinGetSlice(MForkJoinGetSlice *ins)
|
||||
{
|
||||
MOZ_ASSUME_UNREACHABLE("NYI");
|
||||
}
|
||||
|
||||
//__aeabi_uidiv
|
||||
|
@ -88,6 +88,7 @@ class LIRGeneratorARM : public LIRGeneratorShared
|
||||
bool visitAsmJSStoreHeap(MAsmJSStoreHeap *ins);
|
||||
bool visitAsmJSLoadFuncPtr(MAsmJSLoadFuncPtr *ins);
|
||||
bool visitStoreTypedArrayElementStatic(MStoreTypedArrayElementStatic *ins);
|
||||
bool visitForkJoinGetSlice(MForkJoinGetSlice *ins);
|
||||
|
||||
static bool allowFloat32Optimizations() {
|
||||
return true;
|
||||
|
@ -525,6 +525,9 @@ class AssemblerX86Shared
|
||||
}
|
||||
void movzwl(const Operand &src, const Register &dest) {
|
||||
switch (src.kind()) {
|
||||
case Operand::REG:
|
||||
masm.movzwl_rr(src.reg(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_REG_DISP:
|
||||
masm.movzwl_mr(src.disp(), src.base(), dest.code());
|
||||
break;
|
||||
@ -535,7 +538,9 @@ class AssemblerX86Shared
|
||||
MOZ_ASSUME_UNREACHABLE("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
|
||||
void movzwl(const Register &src, const Register &dest) {
|
||||
masm.movzwl_rr(src.code(), dest.code());
|
||||
}
|
||||
void movw(const Register &src, const Operand &dest) {
|
||||
switch (dest.kind()) {
|
||||
case Operand::MEM_REG_DISP:
|
||||
@ -858,6 +863,9 @@ class AssemblerX86Shared
|
||||
void cmpl(const Operand &op, ImmPtr imm) {
|
||||
cmpl(op, ImmWord(uintptr_t(imm.value)));
|
||||
}
|
||||
void cmpw(const Register &lhs, const Register &rhs) {
|
||||
masm.cmpw_rr(lhs.code(), rhs.code());
|
||||
}
|
||||
void setCC(Condition cond, const Register &r) {
|
||||
masm.setCC_r(static_cast<JSC::X86Assembler::Condition>(cond), r.code());
|
||||
}
|
||||
@ -866,6 +874,9 @@ class AssemblerX86Shared
|
||||
JS_ASSERT(GeneralRegisterSet(Registers::SingleByteRegs).has(rhs));
|
||||
masm.testb_rr(rhs.code(), lhs.code());
|
||||
}
|
||||
void testw(const Register &lhs, const Register &rhs) {
|
||||
masm.testw_rr(rhs.code(), lhs.code());
|
||||
}
|
||||
void testl(const Register &lhs, const Register &rhs) {
|
||||
masm.testl_rr(rhs.code(), lhs.code());
|
||||
}
|
||||
|
@ -9,7 +9,10 @@
|
||||
#include "mozilla/DebugOnly.h"
|
||||
#include "mozilla/MathAlgorithms.h"
|
||||
|
||||
#include "jsmath.h"
|
||||
|
||||
#include "jit/IonFrames.h"
|
||||
#include "jit/IonLinker.h"
|
||||
#include "jit/JitCompartment.h"
|
||||
#include "jit/RangeAnalysis.h"
|
||||
|
||||
@ -1768,5 +1771,177 @@ CodeGeneratorX86Shared::visitNegF(LNegF *ins)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
CodeGeneratorX86Shared::visitForkJoinGetSlice(LForkJoinGetSlice *ins)
|
||||
{
|
||||
MOZ_ASSERT(gen->info().executionMode() == ParallelExecution);
|
||||
MOZ_ASSERT(ToRegister(ins->forkJoinContext()) == ForkJoinGetSliceReg_cx);
|
||||
MOZ_ASSERT(ToRegister(ins->temp1()) == eax);
|
||||
MOZ_ASSERT(ToRegister(ins->temp2()) == edx);
|
||||
MOZ_ASSERT(ToRegister(ins->temp3()) == ForkJoinGetSliceReg_temp0);
|
||||
MOZ_ASSERT(ToRegister(ins->temp4()) == ForkJoinGetSliceReg_temp1);
|
||||
MOZ_ASSERT(ToRegister(ins->output()) == ForkJoinGetSliceReg_output);
|
||||
|
||||
masm.call(gen->jitRuntime()->forkJoinGetSliceStub());
|
||||
return true;
|
||||
}
|
||||
|
||||
JitCode *
|
||||
JitRuntime::generateForkJoinGetSliceStub(JSContext *cx)
|
||||
{
|
||||
#ifdef JS_THREADSAFE
|
||||
MacroAssembler masm(cx);
|
||||
|
||||
// We need two fixed temps. We need to fix eax for cmpxchg, and edx for
|
||||
// div.
|
||||
Register cxReg = ForkJoinGetSliceReg_cx, worker = cxReg;
|
||||
Register pool = ForkJoinGetSliceReg_temp0;
|
||||
Register bounds = ForkJoinGetSliceReg_temp1;
|
||||
Register output = ForkJoinGetSliceReg_output;
|
||||
|
||||
MOZ_ASSERT(worker != eax && worker != edx);
|
||||
MOZ_ASSERT(pool != eax && pool != edx);
|
||||
MOZ_ASSERT(bounds != eax && bounds != edx);
|
||||
MOZ_ASSERT(output != eax && output != edx);
|
||||
|
||||
Label stealWork, noMoreWork, gotSlice;
|
||||
Operand workerSliceBounds(Address(worker, ThreadPoolWorker::offsetOfSliceBounds()));
|
||||
|
||||
// Clobber cx to load the worker.
|
||||
masm.push(cxReg);
|
||||
masm.loadPtr(Address(cxReg, ForkJoinContext::offsetOfWorker()), worker);
|
||||
|
||||
// Load the thread pool, which is used in all cases below.
|
||||
masm.loadThreadPool(pool);
|
||||
|
||||
{
|
||||
// Try to get a slice from the current thread.
|
||||
Label getOwnSliceLoopHead;
|
||||
masm.bind(&getOwnSliceLoopHead);
|
||||
|
||||
// Load the slice bounds for the current thread.
|
||||
masm.loadSliceBounds(worker, bounds);
|
||||
|
||||
// The slice bounds is a uint32 composed from two uint16s:
|
||||
// [ from , to ]
|
||||
// ^~~~ ^~
|
||||
// upper 16 bits | lower 16 bits
|
||||
masm.move32(bounds, output);
|
||||
masm.shrl(Imm32(16), output);
|
||||
|
||||
// If we don't have any slices left ourselves, move on to stealing.
|
||||
masm.branch16(Assembler::Equal, output, bounds, &stealWork);
|
||||
|
||||
// If we still have work, try to CAS [ from+1, to ].
|
||||
masm.move32(bounds, edx);
|
||||
masm.add32(Imm32(0x10000), edx);
|
||||
masm.move32(bounds, eax);
|
||||
masm.atomic_cmpxchg32(edx, workerSliceBounds, eax);
|
||||
masm.j(Assembler::NonZero, &getOwnSliceLoopHead);
|
||||
|
||||
// If the CAS succeeded, return |from| in output.
|
||||
masm.jump(&gotSlice);
|
||||
}
|
||||
|
||||
// Try to steal work.
|
||||
masm.bind(&stealWork);
|
||||
|
||||
// It's not technically correct to test whether work-stealing is turned on
|
||||
// only during stub-generation time, but it's a DEBUG only thing.
|
||||
if (cx->runtime()->threadPool.workStealing()) {
|
||||
Label stealWorkLoopHead;
|
||||
masm.bind(&stealWorkLoopHead);
|
||||
|
||||
// Check if we have work.
|
||||
masm.branch32(Assembler::Equal,
|
||||
Address(pool, ThreadPool::offsetOfPendingSlices()),
|
||||
Imm32(0), &noMoreWork);
|
||||
|
||||
// Get an id at random. The following is an inline of
|
||||
// the 32-bit xorshift in ThreadPoolWorker::randomWorker().
|
||||
{
|
||||
// Reload the current worker.
|
||||
masm.loadPtr(Address(StackPointer, 0), cxReg);
|
||||
masm.loadPtr(Address(cxReg, ForkJoinContext::offsetOfWorker()), worker);
|
||||
|
||||
// Perform the xorshift to get a random number in eax, using edx
|
||||
// as a temp.
|
||||
Address rngState(worker, ThreadPoolWorker::offsetOfSchedulerRNGState());
|
||||
masm.load32(rngState, eax);
|
||||
masm.move32(eax, edx);
|
||||
masm.shll(Imm32(ThreadPoolWorker::XORSHIFT_A), eax);
|
||||
masm.xor32(edx, eax);
|
||||
masm.move32(eax, edx);
|
||||
masm.shrl(Imm32(ThreadPoolWorker::XORSHIFT_B), eax);
|
||||
masm.xor32(edx, eax);
|
||||
masm.move32(eax, edx);
|
||||
masm.shll(Imm32(ThreadPoolWorker::XORSHIFT_C), eax);
|
||||
masm.xor32(edx, eax);
|
||||
masm.store32(eax, rngState);
|
||||
|
||||
// Compute the random worker id by computing % numWorkers. Reuse
|
||||
// output as a temp.
|
||||
masm.move32(Imm32(0), edx);
|
||||
masm.move32(Imm32(cx->runtime()->threadPool.numWorkers()), output);
|
||||
masm.udiv(output);
|
||||
}
|
||||
|
||||
// Load the worker from the workers array.
|
||||
masm.loadPtr(Address(pool, ThreadPool::offsetOfWorkers()), worker);
|
||||
masm.loadPtr(BaseIndex(worker, edx, ScalePointer), worker);
|
||||
|
||||
// Try to get a slice from the designated victim worker.
|
||||
Label stealSliceFromWorkerLoopHead;
|
||||
masm.bind(&stealSliceFromWorkerLoopHead);
|
||||
|
||||
// Load the slice bounds and decompose for the victim worker.
|
||||
masm.loadSliceBounds(worker, bounds);
|
||||
masm.move32(bounds, eax);
|
||||
masm.shrl(Imm32(16), eax);
|
||||
|
||||
// If the victim worker has no more slices left, find another worker.
|
||||
masm.branch16(Assembler::Equal, eax, bounds, &stealWorkLoopHead);
|
||||
|
||||
// If the victim worker still has work, try to CAS [ from, to-1 ].
|
||||
masm.move32(bounds, output);
|
||||
masm.sub32(Imm32(1), output);
|
||||
masm.move32(bounds, eax);
|
||||
masm.atomic_cmpxchg32(output, workerSliceBounds, eax);
|
||||
masm.j(Assembler::NonZero, &stealSliceFromWorkerLoopHead);
|
||||
|
||||
// If the CAS succeeded, return |to-1| in output.
|
||||
#ifdef DEBUG
|
||||
masm.atomic_inc32(Operand(Address(pool, ThreadPool::offsetOfStolenSlices())));
|
||||
#endif
|
||||
// Copies lower 16 bits only.
|
||||
masm.movzwl(output, output);
|
||||
}
|
||||
|
||||
// If we successfully got a slice, decrement pool->pendingSlices_ and
|
||||
// return the slice.
|
||||
masm.bind(&gotSlice);
|
||||
masm.atomic_dec32(Operand(Address(pool, ThreadPool::offsetOfPendingSlices())));
|
||||
masm.pop(cxReg);
|
||||
masm.ret();
|
||||
|
||||
// There's no more slices to give out, return -1.
|
||||
masm.bind(&noMoreWork);
|
||||
masm.move32(Imm32(-1), output);
|
||||
masm.pop(cxReg);
|
||||
masm.ret();
|
||||
|
||||
Linker linker(masm);
|
||||
JitCode *code = linker.newCode<NoGC>(cx, JSC::OTHER_CODE);
|
||||
|
||||
#ifdef JS_ION_PERF
|
||||
writePerfSpewerJitCodeProfile(code, "ForkJoinGetSliceStub");
|
||||
#endif
|
||||
|
||||
return code;
|
||||
#else
|
||||
return nullptr;
|
||||
#endif // JS_THREADSAFE
|
||||
}
|
||||
|
||||
} // namespace jit
|
||||
} // namespace js
|
||||
|
@ -122,6 +122,8 @@ class CodeGeneratorX86Shared : public CodeGeneratorShared
|
||||
virtual bool visitUDivOrMod(LUDivOrMod *ins);
|
||||
virtual bool visitAsmJSPassStackArg(LAsmJSPassStackArg *ins);
|
||||
|
||||
bool visitForkJoinGetSlice(LForkJoinGetSlice *ins);
|
||||
|
||||
bool visitNegI(LNegI *lir);
|
||||
bool visitNegD(LNegD *lir);
|
||||
bool visitNegF(LNegF *lir);
|
||||
|
@ -191,6 +191,11 @@ class LIRGeneratorShared : public MInstructionVisitorWithDefaults
|
||||
static bool allowFloat32Optimizations() {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Whether we can inline ForkJoinGetSlice.
|
||||
static bool allowInlineForkJoinGetSlice() {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace jit
|
||||
|
@ -295,3 +295,16 @@ LIRGeneratorX86Shared::lowerTruncateFToInt32(MTruncateToInt32 *ins)
|
||||
LDefinition maybeTemp = Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempFloat32();
|
||||
return define(new(alloc()) LTruncateFToInt32(useRegister(opd), maybeTemp), ins);
|
||||
}
|
||||
|
||||
bool
|
||||
LIRGeneratorX86Shared::visitForkJoinGetSlice(MForkJoinGetSlice *ins)
|
||||
{
|
||||
// We fix eax and edx for cmpxchg and div.
|
||||
LForkJoinGetSlice *lir = new(alloc())
|
||||
LForkJoinGetSlice(useFixed(ins->forkJoinContext(), ForkJoinGetSliceReg_cx),
|
||||
tempFixed(eax),
|
||||
tempFixed(edx),
|
||||
tempFixed(ForkJoinGetSliceReg_temp0),
|
||||
tempFixed(ForkJoinGetSliceReg_temp1));
|
||||
return defineFixed(lir, ins, LAllocation(AnyRegister(ForkJoinGetSliceReg_output)));
|
||||
}
|
||||
|
@ -47,6 +47,7 @@ class LIRGeneratorX86Shared : public LIRGeneratorShared
|
||||
bool lowerConstantFloat32(float d, MInstruction *ins);
|
||||
bool lowerTruncateDToInt32(MTruncateToInt32 *ins);
|
||||
bool lowerTruncateFToInt32(MTruncateToInt32 *ins);
|
||||
bool visitForkJoinGetSlice(MForkJoinGetSlice *ins);
|
||||
};
|
||||
|
||||
} // namespace jit
|
||||
|
@ -111,6 +111,9 @@ class MacroAssemblerX86Shared : public Assembler
|
||||
void and32(const Imm32 &imm, const Address &dest) {
|
||||
andl(imm, Operand(dest));
|
||||
}
|
||||
void or32(const Register &src, const Register &dest) {
|
||||
orl(src, dest);
|
||||
}
|
||||
void or32(const Imm32 &imm, const Register &dest) {
|
||||
orl(imm, dest);
|
||||
}
|
||||
@ -156,6 +159,9 @@ class MacroAssemblerX86Shared : public Assembler
|
||||
void xor32(Imm32 imm, Register dest) {
|
||||
xorl(imm, dest);
|
||||
}
|
||||
void xor32(Register src, Register dest) {
|
||||
xorl(src, dest);
|
||||
}
|
||||
void not32(Register reg) {
|
||||
notl(reg);
|
||||
}
|
||||
@ -177,6 +183,10 @@ class MacroAssemblerX86Shared : public Assembler
|
||||
lock_cmpxchg32(src, addr);
|
||||
}
|
||||
|
||||
void branch16(Condition cond, const Register &lhs, const Register &rhs, Label *label) {
|
||||
cmpw(lhs, rhs);
|
||||
j(cond, label);
|
||||
}
|
||||
void branch32(Condition cond, const Operand &lhs, const Register &rhs, Label *label) {
|
||||
cmpl(lhs, rhs);
|
||||
j(cond, label);
|
||||
@ -201,6 +211,10 @@ class MacroAssemblerX86Shared : public Assembler
|
||||
cmpl(lhs, rhs);
|
||||
j(cond, label);
|
||||
}
|
||||
void branchTest16(Condition cond, const Register &lhs, const Register &rhs, Label *label) {
|
||||
testw(lhs, rhs);
|
||||
j(cond, label);
|
||||
}
|
||||
void branchTest32(Condition cond, const Register &lhs, const Register &rhs, Label *label) {
|
||||
testl(lhs, rhs);
|
||||
j(cond, label);
|
||||
|
@ -129,6 +129,13 @@ static MOZ_CONSTEXPR_VAR uint32_t NumFloatArgRegs = 8;
|
||||
static MOZ_CONSTEXPR_VAR FloatRegister FloatArgRegs[NumFloatArgRegs] = { xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 };
|
||||
#endif
|
||||
|
||||
// The convention used by the ForkJoinGetSlice stub. None of these can be rax
|
||||
// or rdx, which the stub also needs for cmpxchg and div, respectively.
|
||||
static MOZ_CONSTEXPR_VAR Register ForkJoinGetSliceReg_cx = rdi;
|
||||
static MOZ_CONSTEXPR_VAR Register ForkJoinGetSliceReg_temp0 = rbx;
|
||||
static MOZ_CONSTEXPR_VAR Register ForkJoinGetSliceReg_temp1 = rcx;
|
||||
static MOZ_CONSTEXPR_VAR Register ForkJoinGetSliceReg_output = rsi;
|
||||
|
||||
class ABIArgGenerator
|
||||
{
|
||||
#if defined(XP_WIN)
|
||||
|
@ -55,6 +55,10 @@ class LIRGeneratorX64 : public LIRGeneratorX86Shared
|
||||
static bool allowFloat32Optimizations() {
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool allowInlineForkJoinGetSlice() {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
typedef LIRGeneratorX64 LIRGeneratorSpecific;
|
||||
|
@ -55,6 +55,13 @@ static MOZ_CONSTEXPR_VAR Register CallTempReg3 = ecx;
|
||||
static MOZ_CONSTEXPR_VAR Register CallTempReg4 = esi;
|
||||
static MOZ_CONSTEXPR_VAR Register CallTempReg5 = edx;
|
||||
|
||||
// The convention used by the ForkJoinGetSlice stub. None of these can be eax
|
||||
// or edx, which the stub also needs for cmpxchg and div, respectively.
|
||||
static MOZ_CONSTEXPR_VAR Register ForkJoinGetSliceReg_cx = edi;
|
||||
static MOZ_CONSTEXPR_VAR Register ForkJoinGetSliceReg_temp0 = ebx;
|
||||
static MOZ_CONSTEXPR_VAR Register ForkJoinGetSliceReg_temp1 = ecx;
|
||||
static MOZ_CONSTEXPR_VAR Register ForkJoinGetSliceReg_output = esi;
|
||||
|
||||
// We have no arg regs, so our NonArgRegs are just our CallTempReg*
|
||||
static MOZ_CONSTEXPR_VAR Register CallTempNonArgRegs[] = { edi, eax, ebx, ecx, esi, edx };
|
||||
static const uint32_t NumCallTempNonArgRegs =
|
||||
|
@ -66,6 +66,10 @@ class LIRGeneratorX86 : public LIRGeneratorX86Shared
|
||||
static bool allowFloat32Optimizations() {
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool allowInlineForkJoinGetSlice() {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
typedef LIRGeneratorX86 LIRGeneratorSpecific;
|
||||
|
@ -1422,8 +1422,8 @@ ForkJoinShared::execute()
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
Spew(SpewOps, "Completed parallel job [slices %d, threads: %d (+1), stolen: %d (work stealing:%s)]",
|
||||
sliceTo_ - sliceFrom_,
|
||||
Spew(SpewOps, "Completed parallel job [slices: %d, threads: %d, stolen: %d (work stealing:%s)]",
|
||||
sliceTo_ - sliceFrom_ + 1,
|
||||
threadPool_->numWorkers(),
|
||||
threadPool_->stolenSlices(),
|
||||
threadPool_->workStealing() ? "ON" : "OFF");
|
||||
|
@ -406,6 +406,11 @@ class ForkJoinContext : public ThreadSafeContext
|
||||
// Initializes the thread-local state.
|
||||
static bool initialize();
|
||||
|
||||
// Used in inlining GetForkJoinSlice.
|
||||
static size_t offsetOfWorker() {
|
||||
return offsetof(ForkJoinContext, worker_);
|
||||
}
|
||||
|
||||
private:
|
||||
friend class AutoSetForkJoinContext;
|
||||
|
||||
|
@ -99,6 +99,10 @@ class ThreadPoolWorker
|
||||
static size_t offsetOfSliceBounds() {
|
||||
return offsetof(ThreadPoolWorker, sliceBounds_);
|
||||
}
|
||||
|
||||
static size_t offsetOfSchedulerRNGState() {
|
||||
return offsetof(ThreadPoolWorker, schedulerRNGState_);
|
||||
}
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
@ -192,6 +196,18 @@ class ThreadPool : public Monitor
|
||||
ThreadPoolWorker *mainThreadWorker() { return workers_[0]; }
|
||||
|
||||
public:
|
||||
#ifdef DEBUG
|
||||
static size_t offsetOfStolenSlices() {
|
||||
return offsetof(ThreadPool, stolenSlices_);
|
||||
}
|
||||
#endif
|
||||
static size_t offsetOfPendingSlices() {
|
||||
return offsetof(ThreadPool, pendingSlices_);
|
||||
}
|
||||
static size_t offsetOfWorkers() {
|
||||
return offsetof(ThreadPool, workers_);
|
||||
}
|
||||
|
||||
ThreadPool(JSRuntime *rt);
|
||||
~ThreadPool();
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user