Bug 958370 - Unify chunks and slices; self-host the scheduler's slice processing loop. (r=nmatsakis)

This commit is contained in:
Shu-yu Guo 2014-02-07 14:40:29 -08:00
parent b674035d7c
commit c6165cfa30
11 changed files with 561 additions and 711 deletions

View File

@ -569,20 +569,6 @@ function ArrayKeys() {
#define ASSERT_SEQUENTIAL_IS_OK(MODE) \
do { if (MODE) AssertSequentialIsOK(MODE) } while(false)
/* Slice array: see ComputeAllSliceBounds() */
#define SLICE_INFO(START, END) START, END, START, 0
#define SLICE_START(ID) ((ID << 2) + 0)
#define SLICE_END(ID) ((ID << 2) + 1)
#define SLICE_POS(ID) ((ID << 2) + 2)
/*
* How many items at a time do we do recomp. for parallel execution.
* Note that filter currently assumes that this is no greater than 32
* in order to make use of a bitset.
*/
#define CHUNK_SHIFT 5
#define CHUNK_SIZE 32
/* Safe versions of ARRAY.push(ELEMENT) */
#define ARRAY_PUSH(ARRAY, ELEMENT) \
callFunction(std_Array_push, ARRAY, ELEMENT);
@ -597,74 +583,94 @@ function ArrayKeys() {
#define ParallelSpew(args)
#endif
/**
* Determine the number of chunks of size CHUNK_SIZE;
* note that the final chunk may be smaller than CHUNK_SIZE.
*/
function ComputeNumChunks(length) {
var chunks = length >>> CHUNK_SHIFT;
if (chunks << CHUNK_SHIFT === length)
return chunks;
return chunks + 1;
}
#define SLICES_PER_WORKER 8
#define MAX_SLICE_SHIFT 6
#define MAX_SLICE_SIZE 64
#define MAX_SLICES_PER_WORKER 8
/**
* Compute the number of slices given an array length and the number of
* chunks. Used in tandem with the workstealing scheduler.
* Determine the number and size of slices.
*/
function ComputeNumSlices(workers, length, chunks) {
if (length !== 0) {
var slices = workers * SLICES_PER_WORKER;
if (chunks < slices)
return workers;
return slices;
}
return workers;
function ComputeSlicesInfo(length) {
var count = length >>> MAX_SLICE_SHIFT;
var numWorkers = ForkJoinNumWorkers();
if (count < numWorkers)
count = numWorkers;
else if (count >= numWorkers * MAX_SLICES_PER_WORKER)
count = numWorkers * MAX_SLICES_PER_WORKER;
// Round the slice size to be a power of 2.
var shift = std_Math_max(std_Math_log2(length / count) | 0, 1);
// Recompute count with the rounded size.
count = length >>> shift;
if (count << shift !== length)
count += 1;
return { shift: shift, statuses: new Uint8Array(count), lastSequentialId: 0 };
}
/**
* Computes the bounds for slice |sliceIndex| of |numItems| items,
* assuming |numSlices| total slices. If numItems is not evenly
* divisible by numSlices, then the final thread may have a bit of
* extra work.
* Macros to help compute the start and end indices of slices based on id. Use
* with the object returned by ComputeSliceInfo.
*/
function ComputeSliceBounds(numItems, sliceIndex, numSlices) {
var sliceWidth = (numItems / numSlices) | 0;
var extraChunks = (numItems % numSlices) | 0;
#define SLICE_START(info, id) \
(id << info.shift)
#define SLICE_END(info, start, length) \
std_Math_min(start + (1 << info.shift), length)
#define SLICE_COUNT(info) \
info.statuses.length
var startIndex = sliceWidth * sliceIndex + std_Math_min(extraChunks, sliceIndex);
var endIndex = startIndex + sliceWidth;
if (sliceIndex < extraChunks)
endIndex += 1;
return [startIndex, endIndex];
/**
* ForkJoinGetSlice acts as identity when we are not in a parallel section, so
* pass in the next sequential value when we are in sequential mode. The
* reason for this odd API is because intrinsics *need* to be called during
* ForkJoin's warmup to fill the TI info.
*/
#define GET_SLICE(info, id) \
((id = ForkJoinGetSlice(InParallelSection() ? -1 : NextSequentialSliceId(info, -1))) >= 0)
#define SLICE_STATUS_DONE 1
/**
* Macro to mark a slice as completed in the info object.
*/
#define MARK_SLICE_DONE(info, id) \
UnsafePutElements(info.statuses, id, SLICE_STATUS_DONE)
/**
* Reset the status array of the slices info object.
*/
function SlicesInfoClearStatuses(info) {
var statuses = info.statuses;
var length = statuses.length;
for (var i = 0; i < length; i++)
UnsafePutElements(statuses, i, 0);
info.lastSequentialId = 0;
}
/**
* Divides |numItems| items amongst |numSlices| slices. The result
* is an array containing multiple values per slice: the start
* index, end index, current position, and some padding. The
* current position is initially the same as the start index. To
* access the values for a particular slice, use the macros
* SLICE_START() and so forth.
* Compute the slice such that all slices before it (but not including it) are
* completed.
*/
function ComputeAllSliceBounds(numItems, numSlices) {
// FIXME(bug 844890): Use typed arrays here.
var sliceWidth = (numItems / numSlices) | 0;
var extraChunks = (numItems % numSlices) | 0;
var counter = 0;
var info = [];
var i = 0;
for (; i < extraChunks; i++) {
ARRAY_PUSH(info, SLICE_INFO(counter, counter + sliceWidth + 1));
counter += sliceWidth + 1;
function NextSequentialSliceId(info, doneMarker) {
var statuses = info.statuses;
var length = statuses.length;
for (var i = info.lastSequentialId; i < length; i++) {
if (statuses[i] === SLICE_STATUS_DONE)
continue;
info.lastSequentialId = i;
return i;
}
for (; i < numSlices; i++) {
ARRAY_PUSH(info, SLICE_INFO(counter, counter + sliceWidth));
counter += sliceWidth;
}
return info;
return doneMarker == undefined ? length : doneMarker;
}
/**
* Determinism-preserving bounds function.
*/
function ShrinkLeftmost(info) {
return function () {
return [NextSequentialSliceId(info), SLICE_COUNT(info)]
};
}
/**
@ -691,41 +697,28 @@ function ArrayMapPar(func, mode) {
if (!TRY_PARALLEL(mode))
break parallel;
var chunks = ComputeNumChunks(length);
var numWorkers = ForkJoinNumWorkers();
var numSlices = ComputeNumSlices(numWorkers, length, chunks);
var info = ComputeAllSliceBounds(chunks, numSlices);
ForkJoin(mapSlice, ForkJoinMode(mode), numSlices);
var slicesInfo = ComputeSlicesInfo(length);
ForkJoin(mapThread, ShrinkLeftmost(slicesInfo), ForkJoinMode(mode));
return buffer;
}
// Sequential fallback:
ASSERT_SEQUENTIAL_IS_OK(mode);
for (var i = 0; i < length; i++) {
// Note: Unlike JS arrays, parallel arrays cannot have holes.
var v = func(self[i], i, self);
UnsafePutElements(buffer, i, v);
}
for (var i = 0; i < length; i++)
UnsafePutElements(buffer, i, func(self[i], i, self));
return buffer;
function mapSlice(sliceId, warmup) {
var chunkPos = info[SLICE_POS(sliceId)];
var chunkEnd = info[SLICE_END(sliceId)];
if (warmup && chunkEnd > chunkPos + 1)
chunkEnd = chunkPos + 1;
while (chunkPos < chunkEnd) {
var indexStart = chunkPos << CHUNK_SHIFT;
var indexEnd = std_Math_min(indexStart + CHUNK_SIZE, length);
function mapThread(warmup) {
var sliceId;
while (GET_SLICE(slicesInfo, sliceId)) {
var indexStart = SLICE_START(slicesInfo, sliceId);
var indexEnd = SLICE_END(slicesInfo, indexStart, length);
for (var i = indexStart; i < indexEnd; i++)
UnsafePutElements(buffer, i, func(self[i], i, self));
UnsafePutElements(info, SLICE_POS(sliceId), ++chunkPos);
MARK_SLICE_DONE(slicesInfo, sliceId);
if (warmup)
return;
}
return chunkEnd === info[SLICE_END(sliceId)];
}
return undefined;
@ -751,15 +744,12 @@ function ArrayReducePar(func, mode) {
if (!TRY_PARALLEL(mode))
break parallel;
var chunks = ComputeNumChunks(length);
var numWorkers = ForkJoinNumWorkers();
if (chunks < numWorkers)
break parallel;
var numSlices = ComputeNumSlices(numWorkers, length, chunks);
var info = ComputeAllSliceBounds(chunks, numSlices);
var slicesInfo = ComputeSlicesInfo(length);
var numSlices = SLICE_COUNT(slicesInfo);
var subreductions = NewDenseArray(numSlices);
ForkJoin(reduceSlice, ForkJoinMode(mode), numSlices);
ForkJoin(reduceThread, ShrinkLeftmost(slicesInfo), ForkJoinMode(mode));
var accumulator = subreductions[0];
for (var i = 1; i < numSlices; i++)
accumulator = func(accumulator, subreductions[i]);
@ -773,46 +763,19 @@ function ArrayReducePar(func, mode) {
accumulator = func(accumulator, self[i]);
return accumulator;
function reduceSlice(sliceId, warmup) {
var chunkStart = info[SLICE_START(sliceId)];
var chunkPos = info[SLICE_POS(sliceId)];
var chunkEnd = info[SLICE_END(sliceId)];
// (*) This function is carefully designed so that the warmup
// (which executes with chunkStart === chunkPos) will execute all
// potential loads and stores. In particular, the warmup run
// processes two chunks rather than one. Moreover, it stores
// accumulator into subreductions and then loads it again to
// ensure that the load is executed during the warmup, as it will
// certainly be executed during subsequent runs.
if (warmup && chunkEnd > chunkPos + 2)
chunkEnd = chunkPos + 2;
if (chunkStart === chunkPos) {
var indexPos = chunkStart << CHUNK_SHIFT;
var accumulator = reduceChunk(self[indexPos], indexPos + 1, indexPos + CHUNK_SIZE);
UnsafePutElements(subreductions, sliceId, accumulator, // see (*) above
info, SLICE_POS(sliceId), ++chunkPos);
function reduceThread(warmup) {
var sliceId;
while (GET_SLICE(slicesInfo, sliceId)) {
var indexStart = SLICE_START(slicesInfo, sliceId);
var indexEnd = SLICE_END(slicesInfo, indexStart, length);
var accumulator = self[indexStart];
for (var i = indexStart + 1; i < indexEnd; i++)
accumulator = func(accumulator, self[i]);
UnsafePutElements(subreductions, sliceId, accumulator);
MARK_SLICE_DONE(slicesInfo, sliceId);
if (warmup)
return;
}
var accumulator = subreductions[sliceId]; // see (*) above
while (chunkPos < chunkEnd) {
var indexPos = chunkPos << CHUNK_SHIFT;
accumulator = reduceChunk(accumulator, indexPos, indexPos + CHUNK_SIZE);
UnsafePutElements(subreductions, sliceId, accumulator, info, SLICE_POS(sliceId), ++chunkPos);
}
return chunkEnd === info[SLICE_END(sliceId)];
}
function reduceChunk(accumulator, from, to) {
to = std_Math_min(to, length);
for (var i = from; i < to; i++)
accumulator = func(accumulator, self[i]);
return accumulator;
}
return undefined;
@ -841,16 +804,11 @@ function ArrayScanPar(func, mode) {
if (!TRY_PARALLEL(mode))
break parallel;
var chunks = ComputeNumChunks(length);
var numWorkers = ForkJoinNumWorkers();
if (chunks < numWorkers)
break parallel;
var numSlices = ComputeNumSlices(numWorkers, length, chunks);
var info = ComputeAllSliceBounds(chunks, numSlices);
var slicesInfo = ComputeSlicesInfo(length);
var numSlices = SLICE_COUNT(slicesInfo);
// Scan slices individually (see comment on phase1()).
ForkJoin(phase1, ForkJoinMode(mode), numSlices);
ForkJoin(phase1, ShrinkLeftmost(slicesInfo), ForkJoinMode(mode));
// Compute intermediates array (see comment on phase2()).
var intermediates = [];
@ -861,16 +819,14 @@ function ArrayScanPar(func, mode) {
ARRAY_PUSH(intermediates, accumulator);
}
// Reset the current position information for each slice, but
// convert from chunks to indices (see comment on phase2()).
for (var i = 0; i < numSlices; i++) {
info[SLICE_POS(i)] = info[SLICE_START(i)] << CHUNK_SHIFT;
info[SLICE_END(i)] = info[SLICE_END(i)] << CHUNK_SHIFT;
}
info[SLICE_END(numSlices - 1)] = std_Math_min(info[SLICE_END(numSlices - 1)], length);
// Clear the slices' statuses in between phases.
SlicesInfoClearStatuses(slicesInfo);
// There is no work to be done for slice 0, so mark it as done.
MARK_SLICE_DONE(slicesInfo, 0);
// Complete each slice using intermediates array (see comment on phase2()).
ForkJoin(phase2, ForkJoinMode(mode), numSlices);
ForkJoin(phase2, ShrinkLeftmost(slicesInfo), ForkJoinMode(mode));
return buffer;
}
@ -904,46 +860,23 @@ function ArrayScanPar(func, mode) {
*
* Read on in phase2 to see what we do next!
*/
function phase1(sliceId, warmup) {
var chunkStart = info[SLICE_START(sliceId)];
var chunkPos = info[SLICE_POS(sliceId)];
var chunkEnd = info[SLICE_END(sliceId)];
if (warmup && chunkEnd > chunkPos + 2)
chunkEnd = chunkPos + 2;
if (chunkPos === chunkStart) {
// For the first chunk, the accumulator begins as the value in
// the input at the start of the chunk.
var indexStart = chunkPos << CHUNK_SHIFT;
var indexEnd = std_Math_min(indexStart + CHUNK_SIZE, length);
function phase1(warmup) {
var sliceId;
while (GET_SLICE(slicesInfo, sliceId)) {
var indexStart = SLICE_START(slicesInfo, sliceId);
var indexEnd = SLICE_END(slicesInfo, indexStart, length);
scan(self[indexStart], indexStart, indexEnd);
UnsafePutElements(info, SLICE_POS(sliceId), ++chunkPos);
MARK_SLICE_DONE(slicesInfo, sliceId);
if (warmup)
return;
}
while (chunkPos < chunkEnd) {
// For each subsequent chunk, the accumulator begins as the
// combination of the final value of prev chunk and the value in
// the input at the start of this chunk. Note that this loop is
// written as simple as possible, at the cost of an extra read
// from the buffer per iteration.
var indexStart = chunkPos << CHUNK_SHIFT;
var indexEnd = std_Math_min(indexStart + CHUNK_SIZE, length);
var accumulator = func(buffer[indexStart - 1], self[indexStart]);
scan(accumulator, indexStart, indexEnd);
UnsafePutElements(info, SLICE_POS(sliceId), ++chunkPos);
}
return chunkEnd === info[SLICE_END(sliceId)];
}
/**
* Computes the index of the final element computed by the slice |sliceId|.
*/
function finalElement(sliceId) {
var chunkEnd = info[SLICE_END(sliceId)]; // last chunk written by |sliceId| is endChunk - 1
var indexStart = std_Math_min(chunkEnd << CHUNK_SHIFT, length);
return indexStart - 1;
return SLICE_END(slicesInfo, SLICE_START(slicesInfo, sliceId), length) - 1;
}
/**
@ -978,32 +911,21 @@ function ArrayScanPar(func, mode) {
* |intermediates[1-1]|, which is |A+B+C|, so that the final
* result is [(A+B+C)+D, (A+B+C)+(D+E), (A+B+C)+(D+E+F)]. Again I
* am using parentheses to clarify how these results were reduced.
*
* SUBTLE: Because we are mutating |buffer| in place, we have to
* be very careful about bailouts! We cannot checkpoint a chunk
* at a time as we do elsewhere because that assumes it is safe to
* replay the portion of a chunk which was already processed.
* Therefore, in this phase, we track the current position at an
* index granularity, although this requires two memory writes per
* index.
*/
function phase2(sliceId, warmup) {
if (sliceId === 0)
return true; // No work to do for the 0th slice.
function phase2(warmup) {
var sliceId;
while (GET_SLICE(slicesInfo, sliceId)) {
var indexPos = SLICE_START(slicesInfo, sliceId);
var indexEnd = SLICE_END(slicesInfo, indexPos, length);
var indexPos = info[SLICE_POS(sliceId)];
var indexEnd = info[SLICE_END(sliceId)];
var intermediate = intermediates[sliceId - 1];
for (; indexPos < indexEnd; indexPos++)
UnsafePutElements(buffer, indexPos, func(intermediate, buffer[indexPos]));
if (warmup)
indexEnd = std_Math_min(indexEnd, indexPos + CHUNK_SIZE);
var intermediate = intermediates[sliceId - 1];
for (; indexPos < indexEnd; indexPos++) {
UnsafePutElements(buffer, indexPos, func(intermediate, buffer[indexPos]),
info, SLICE_POS(sliceId), indexPos + 1);
MARK_SLICE_DONE(slicesInfo, sliceId);
if (warmup)
return;
}
return indexEnd === info[SLICE_END(sliceId)];
}
return undefined;
@ -1039,210 +961,17 @@ function ArrayScatterPar(targets, defaultValue, conflictFunc, length, mode) {
if (length === undefined)
length = self.length;
// The Divide-Scatter-Vector strategy:
// 1. Slice |targets| array of indices ("scatter-vector") into N
// parts.
// 2. Each of the N threads prepares an output buffer and a
// write-log.
// 3. Each thread scatters according to one of the N parts into its
// own output buffer, tracking written indices in the write-log
// and resolving any resulting local collisions in parallel.
// 4. Merge the parts (either in parallel or sequentially), using
// the write-logs as both the basis for finding merge-inputs and
// for detecting collisions.
// The Divide-Output-Range strategy:
// 1. Slice the range of indices [0..|length|-1] into N parts.
// Allocate a single shared output buffer of length |length|.
// 2. Each of the N threads scans (the entirety of) the |targets|
// array, seeking occurrences of indices from that thread's part
// of the range, and writing the results into the shared output
// buffer.
// 3. Since each thread has its own portion of the output range,
// every collision that occurs can be handled thread-locally.
// SO:
//
// If |targets.length| >> |length|, Divide-Scatter-Vector seems like
// a clear win over Divide-Output-Range, since for the latter, the
// expense of redundantly scanning the |targets| will diminish the
// gain from processing |length| in parallel, while for the former,
// the total expense of building separate output buffers and the
// merging post-process is small compared to the gain from
// processing |targets| in parallel.
//
// If |targets.length| << |length|, then Divide-Output-Range seems
// like it *could* win over Divide-Scatter-Vector. (But when is
// |targets.length| << |length| or even |targets.length| < |length|?
// Seems like an odd situation and an uncommon case at best.)
//
// The unanswered question is which strategy performs better when
// |targets.length| approximately equals |length|, especially for
// special cases like collision-free scatters and permutations.
var targetsLength = std_Math_min(targets.length, self.length);
if (!IS_UINT32(targetsLength) || !IS_UINT32(length))
ThrowError(JSMSG_BAD_ARRAY_LENGTH);
parallel: for (;;) { // see ArrayMapPar() to explain why for(;;) etc
if (ShouldForceSequential())
break parallel;
if (!TRY_PARALLEL(mode))
break parallel;
if (forceDivideScatterVector())
return parDivideScatterVector();
else if (forceDivideOutputRange())
return parDivideOutputRange();
else if (conflictFunc === undefined && targetsLength < length)
return parDivideOutputRange();
return parDivideScatterVector();
}
// FIXME: Bug 965609: Find a better parallel startegy for scatter.
// Sequential fallback:
ASSERT_SEQUENTIAL_IS_OK(mode);
return seq();
function forceDivideScatterVector() {
return mode && mode.strategy && mode.strategy === "divide-scatter-vector";
}
function forceDivideOutputRange() {
return mode && mode.strategy && mode.strategy === "divide-output-range";
}
function collide(elem1, elem2) {
if (conflictFunc === undefined)
ThrowError(JSMSG_PAR_ARRAY_SCATTER_CONFLICT);
return conflictFunc(elem1, elem2);
}
function parDivideOutputRange() {
var chunks = ComputeNumChunks(targetsLength);
var numSlices = ComputeNumSlices(ForkJoinNumWorkers(), length, chunks);
var checkpoints = NewDenseArray(numSlices);
for (var i = 0; i < numSlices; i++)
UnsafePutElements(checkpoints, i, 0);
var buffer = NewDenseArray(length);
var conflicts = NewDenseArray(length);
for (var i = 0; i < length; i++) {
UnsafePutElements(buffer, i, defaultValue);
UnsafePutElements(conflicts, i, false);
}
ForkJoin(fill, ForkJoinMode(mode), numSlices);
return buffer;
function fill(sliceId, warmup) {
var indexPos = checkpoints[sliceId];
var indexEnd = targetsLength;
if (warmup)
indexEnd = std_Math_min(indexEnd, indexPos + CHUNK_SIZE);
// Range in the output for which we are responsible:
var [outputStart, outputEnd] = ComputeSliceBounds(length, sliceId, numSlices);
for (; indexPos < indexEnd; indexPos++) {
var x = self[indexPos];
var t = checkTarget(indexPos, targets[indexPos]);
if (t < outputStart || t >= outputEnd)
continue;
if (conflicts[t])
x = collide(x, buffer[t]);
UnsafePutElements(buffer, t, x, conflicts, t, true, checkpoints, sliceId, indexPos + 1);
}
return indexEnd === targetsLength;
}
return undefined;
}
function parDivideScatterVector() {
// Subtle: because we will be mutating the localBuffers and
// conflict arrays in place, we can never replay an entry in the
// target array for fear of inducing a conflict where none existed
// before. Therefore, we must proceed not by chunks but rather by
// individual indices.
var numSlices = ComputeNumSlices(ForkJoinNumWorkers(), length, ComputeNumChunks(length));
var info = ComputeAllSliceBounds(targetsLength, numSlices);
// FIXME(bug 844890): Use typed arrays here.
var localBuffers = NewDenseArray(numSlices);
for (var i = 0; i < numSlices; i++)
UnsafePutElements(localBuffers, i, NewDenseArray(length));
var localConflicts = NewDenseArray(numSlices);
for (var i = 0; i < numSlices; i++) {
var conflicts_i = NewDenseArray(length);
for (var j = 0; j < length; j++)
UnsafePutElements(conflicts_i, j, false);
UnsafePutElements(localConflicts, i, conflicts_i);
}
// Initialize the 0th buffer, which will become the output. For
// the other buffers, we track which parts have been written to
// using the conflict buffer so they do not need to be
// initialized.
var outputBuffer = localBuffers[0];
for (var i = 0; i < length; i++)
UnsafePutElements(outputBuffer, i, defaultValue);
ForkJoin(fill, ForkJoinMode(mode), numSlices);
mergeBuffers();
return outputBuffer;
function fill(sliceId, warmup) {
var indexPos = info[SLICE_POS(sliceId)];
var indexEnd = info[SLICE_END(sliceId)];
if (warmup)
indexEnd = std_Math_min(indexEnd, indexPos + CHUNK_SIZE);
var localbuffer = localBuffers[sliceId];
var conflicts = localConflicts[sliceId];
while (indexPos < indexEnd) {
var x = self[indexPos];
var t = checkTarget(indexPos, targets[indexPos]);
if (conflicts[t])
x = collide(x, localbuffer[t]);
UnsafePutElements(localbuffer, t, x, conflicts, t, true,
info, SLICE_POS(sliceId), ++indexPos);
}
return indexEnd === info[SLICE_END(sliceId)];
}
/**
* Merge buffers 1..NUMSLICES into buffer 0. In principle, we could
* parallelize the merge work as well. But for this first cut,
* just do the merge sequentially.
*/
function mergeBuffers() {
var buffer = localBuffers[0];
var conflicts = localConflicts[0];
for (var i = 1; i < numSlices; i++) {
var otherbuffer = localBuffers[i];
var otherconflicts = localConflicts[i];
for (var j = 0; j < length; j++) {
if (otherconflicts[j]) {
if (conflicts[j]) {
buffer[j] = collide(otherbuffer[j], buffer[j]);
} else {
buffer[j] = otherbuffer[j];
conflicts[j] = true;
}
}
}
}
}
return undefined;
}
function seq() {
var buffer = NewDenseArray(length);
var conflicts = NewDenseArray(length);
@ -1294,13 +1023,7 @@ function ArrayFilterPar(func, mode) {
if (!TRY_PARALLEL(mode))
break parallel;
var chunks = ComputeNumChunks(length);
var numWorkers = ForkJoinNumWorkers();
if (chunks < numWorkers * 2)
break parallel;
var numSlices = ComputeNumSlices(numWorkers, length, chunks);
var info = ComputeAllSliceBounds(chunks, numSlices);
var slicesInfo = ComputeSlicesInfo(length);
// Step 1. Compute which items from each slice of the result
// buffer should be preserved. When we're done, we have an array
@ -1310,11 +1033,15 @@ function ArrayFilterPar(func, mode) {
// preserved from within one slice.
//
// FIXME(bug 844890): Use typed arrays here.
var numSlices = SLICE_COUNT(slicesInfo);
var counts = NewDenseArray(numSlices);
for (var i = 0; i < numSlices; i++)
UnsafePutElements(counts, i, 0);
var survivors = NewDenseArray(chunks);
ForkJoin(findSurvivorsInSlice, ForkJoinMode(mode), numSlices);
var survivors = NewDenseArray(computeNum32BitChunks(length));
ForkJoin(findSurvivorsThread, ShrinkLeftmost(slicesInfo), ForkJoinMode(mode));
// Clear the slices' statuses in between phases.
SlicesInfoClearStatuses(slicesInfo);
// Step 2. Compress the slices into one contiguous set.
var count = 0;
@ -1322,7 +1049,7 @@ function ArrayFilterPar(func, mode) {
count += counts[i];
var buffer = NewDenseArray(count);
if (count > 0)
ForkJoin(copySurvivorsInSlice, ForkJoinMode(mode), numSlices);
ForkJoin(copySurvivorsThread, ShrinkLeftmost(slicesInfo), ForkJoinMode(mode));
return buffer;
}
@ -1337,80 +1064,97 @@ function ArrayFilterPar(func, mode) {
}
return buffer;
/**
* Determine the number of 32-bit chunks for use with the survivors bitset.
*/
function computeNum32BitChunks(length) {
var chunks = length >>> 5;
if (chunks << 5 === length)
return chunks;
return chunks + 1;
}
/**
* As described above, our goal is to determine which items we
* will preserve from a given slice. We do this one chunk at a
* time. When we finish a chunk, we record our current count and
* the next chunk sliceId, lest we should bail.
*/
function findSurvivorsInSlice(sliceId, warmup) {
var chunkPos = info[SLICE_POS(sliceId)];
var chunkEnd = info[SLICE_END(sliceId)];
if (warmup && chunkEnd > chunkPos)
chunkEnd = chunkPos + 1;
var count = counts[sliceId];
while (chunkPos < chunkEnd) {
var indexStart = chunkPos << CHUNK_SHIFT;
var indexEnd = std_Math_min(indexStart + CHUNK_SIZE, length);
var chunkBits = 0;
for (var bit = 0; indexStart + bit < indexEnd; bit++) {
var keep = !!func(self[indexStart + bit], indexStart + bit, self);
chunkBits |= keep << bit;
count += keep;
function findSurvivorsThread(warmup) {
var sliceId;
while (GET_SLICE(slicesInfo, sliceId)) {
var count = 0;
var indexStart = SLICE_START(slicesInfo, sliceId);
var indexEnd = SLICE_END(slicesInfo, indexStart, length);
var chunkStart = computeNum32BitChunks(indexStart);
var chunkEnd = computeNum32BitChunks(indexEnd);
for (var chunkPos = chunkStart; chunkPos < chunkEnd; chunkPos++, indexStart += 32) {
var chunkBits = 0;
for (var bit = 0, indexPos = indexStart; bit < 32 && indexPos < indexEnd; bit++, indexPos++) {
var keep = !!func(self[indexPos], indexPos, self);
chunkBits |= keep << bit;
count += keep;
}
UnsafePutElements(survivors, chunkPos, chunkBits);
}
UnsafePutElements(counts, sliceId, count);
UnsafePutElements(survivors, chunkPos, chunkBits,
counts, sliceId, count,
info, SLICE_POS(sliceId), ++chunkPos);
MARK_SLICE_DONE(slicesInfo, sliceId);
if (warmup)
return;
}
return chunkEnd === info[SLICE_END(sliceId)];
}
function copySurvivorsInSlice(sliceId, warmup) {
// Copies the survivors from this slice into the correct position.
// Note that this is an idempotent operation that does not invoke
// user code. Therefore, we don't expect bailouts and make an
// effort to proceed chunk by chunk or avoid duplicating work.
function copySurvivorsThread(warmup) {
var sliceId;
while (GET_SLICE(slicesInfo, sliceId)) {
// Copies the survivors from this slice into the correct position.
// Note that this is an idempotent operation that does not invoke
// user code. Therefore, we don't expect bailouts and make an
// effort to proceed chunk by chunk or avoid duplicating work.
// Total up the items preserved by previous slices.
var count = 0;
if (sliceId > 0) { // FIXME(#819219)---work around a bug in Ion's range checks
for (var i = 0; i < sliceId; i++)
count += counts[i];
}
// Total up the items preserved by previous slices.
var total = 0;
for (var i = 0; i < sliceId + 1; i++)
total += counts[i];
// Compute the final index we expect to write.
var total = count + counts[sliceId];
if (count === total)
return true;
// Iterate over the chunks assigned to us. Read the bitset for
// each chunk. Copy values where a 1 appears until we have
// written all the values that we expect to. We can just iterate
// from 0...CHUNK_SIZE without fear of a truncated final chunk
// because we are already checking for when count==total.
var chunkStart = info[SLICE_START(sliceId)];
var chunkEnd = info[SLICE_END(sliceId)];
for (var chunk = chunkStart; chunk < chunkEnd; chunk++) {
var chunkBits = survivors[chunk];
if (!chunkBits)
// Compute the final index we expect to write.
var count = total - counts[sliceId];
if (count === total) {
MARK_SLICE_DONE(slicesInfo, sliceId);
continue;
var indexStart = chunk << CHUNK_SHIFT;
for (var i = 0; i < CHUNK_SIZE; i++) {
if (chunkBits & (1 << i)) {
UnsafePutElements(buffer, count++, self[indexStart + i]);
if (count === total)
break;
}
}
}
return true;
// Iterate over the chunks assigned to us. Read the bitset for
// each chunk. Copy values where a 1 appears until we have
// written all the values that we expect to. We can just iterate
// from 0...CHUNK_SIZE without fear of a truncated final chunk
// because we are already checking for when count==total.
var indexStart = SLICE_START(slicesInfo, sliceId);
var indexEnd = SLICE_END(slicesInfo, indexStart, length);
var chunkStart = computeNum32BitChunks(indexStart);
var chunkEnd = computeNum32BitChunks(indexEnd);
for (var chunkPos = chunkStart; chunkPos < chunkEnd; chunkPos++, indexStart += 32) {
var chunkBits = survivors[chunkPos];
if (!chunkBits)
continue;
for (var i = 0; i < 32; i++) {
if (chunkBits & (1 << i)) {
UnsafePutElements(buffer, count++, self[indexStart + i]);
if (count === total)
break;
}
}
if (count == total)
break;
}
MARK_SLICE_DONE(slicesInfo, sliceId);
if (warmup)
return;
}
}
return undefined;
@ -1452,39 +1196,28 @@ function ArrayStaticBuildPar(length, func, mode) {
if (!TRY_PARALLEL(mode))
break parallel;
var chunks = ComputeNumChunks(length);
var numWorkers = ForkJoinNumWorkers();
var numSlices = ComputeNumSlices(numWorkers, length, chunks);
var info = ComputeAllSliceBounds(chunks, numSlices);
ForkJoin(constructSlice, ForkJoinMode(mode), numSlices);
var slicesInfo = ComputeSlicesInfo(length);
ForkJoin(constructThread, ShrinkLeftmost(slicesInfo), ForkJoinMode(mode));
return buffer;
}
// Sequential fallback:
ASSERT_SEQUENTIAL_IS_OK(mode);
fill(0, length);
for (var i = 0; i < length; i++)
UnsafePutElements(buffer, i, func(i));
return buffer;
function constructSlice(sliceId, warmup) {
var chunkPos = info[SLICE_POS(sliceId)];
var chunkEnd = info[SLICE_END(sliceId)];
if (warmup && chunkEnd > chunkPos)
chunkEnd = chunkPos + 1;
while (chunkPos < chunkEnd) {
var indexStart = chunkPos << CHUNK_SHIFT;
var indexEnd = std_Math_min(indexStart + CHUNK_SIZE, length);
fill(indexStart, indexEnd);
UnsafePutElements(info, SLICE_POS(sliceId), ++chunkPos);
function constructThread(warmup) {
var sliceId;
while (GET_SLICE(slicesInfo, sliceId)) {
var indexStart = SLICE_START(slicesInfo, sliceId);
var indexEnd = SLICE_END(slicesInfo, indexStart, length);
for (var i = indexStart; i < indexEnd; i++)
UnsafePutElements(buffer, i, func(i));
MARK_SLICE_DONE(slicesInfo, sliceId);
if (warmup)
return;
}
return chunkEnd === info[SLICE_END(sliceId)];
}
function fill(indexStart, indexEnd) {
for (var i = indexStart; i < indexEnd; i++)
UnsafePutElements(buffer, i, func(i));
}
return undefined;

View File

@ -57,6 +57,7 @@ var std_Math_floor = Math.floor;
var std_Math_max = Math.max;
var std_Math_min = Math.min;
var std_Math_imul = Math.imul;
var std_Math_log2 = Math.log2;
var std_Number_valueOf = Number.prototype.valueOf;
var std_Number_POSITIVE_INFINITY = Number.POSITIVE_INFINITY;
var std_Object_create = Object.create;

View File

@ -100,7 +100,7 @@ function assertEqArray(a, b) {
try {
assertStructuralEq(a[i], b[i]);
} catch (e) {
print("...in index ", i, " of ", l);
print("...in index", i, "of", l);
throw e;
}
}

View File

@ -17,5 +17,6 @@ function test() {
}
}
if (getBuildConfiguration().parallelJS)
test();
// FIXME: Bug 949296. Broken due to all interrupt triggers aborting PJS.
//if (getBuildConfiguration().parallelJS)
// test();

View File

@ -143,7 +143,8 @@ IonBuilder::inlineNativeCall(CallInfo &callInfo, JSNative native)
return inlineUnsafeGetReservedSlot(callInfo);
// Parallel intrinsics.
if (native == intrinsic_ShouldForceSequential)
if (native == intrinsic_ShouldForceSequential ||
native == intrinsic_InParallelSection)
return inlineForceSequentialOrInParallelSection(callInfo);
// Utility intrinsics.

View File

@ -1020,6 +1020,8 @@ bool intrinsic_IsPackedArray(JSContext *cx, unsigned argc, Value *vp);
bool intrinsic_ShouldForceSequential(JSContext *cx, unsigned argc, Value *vp);
bool intrinsic_NewParallelArray(JSContext *cx, unsigned argc, Value *vp);
bool intrinsic_ForkJoinGetSlice(JSContext *cx, unsigned argc, Value *vp);
bool intrinsic_InParallelSection(JSContext *cx, unsigned argc, Value *vp);
class AutoLockForExclusiveAccess
{

View File

@ -48,17 +48,14 @@ using mozilla::ThreadLocal;
// altogether.
static bool
ExecuteSequentially(JSContext *cx_, HandleValue funVal, bool *complete,
uint16_t sliceStart, uint16_t numSlices);
ExecuteSequentially(JSContext *cx_, HandleValue funVal);
#if !defined(JS_THREADSAFE) || !defined(JS_ION)
bool
js::ForkJoin(JSContext *cx, CallArgs &args)
{
RootedValue argZero(cx, args[0]);
bool complete = false; // since warmup is false, will always complete
uint32_t numSlices = args[2].toInt32();
return ExecuteSequentially(cx, argZero, &complete, 0, numSlices);
return ExecuteSequentially(cx, argZero);
}
JSContext *
@ -169,25 +166,16 @@ JS_JITINFO_NATIVE_PARALLEL(js::intrinsic_SetForkJoinTargetRegionInfo,
// Some code that is shared between degenerate and parallel configurations.
static bool
ExecuteSequentially(JSContext *cx, HandleValue funVal, bool *complete,
uint16_t sliceStart, uint16_t numSlices)
ExecuteSequentially(JSContext *cx, HandleValue funVal)
{
bool allComplete = true;
for (uint16_t i = sliceStart; i < numSlices; i++) {
FastInvokeGuard fig(cx, funVal);
InvokeArgs &args = fig.args();
if (!args.init(2))
return false;
args.setCallee(funVal);
args.setThis(UndefinedValue());
args[0].setInt32(i);
args[1].setBoolean(!!cx->runtime()->parallelWarmup);
if (!fig.invoke(cx))
return false;
allComplete = allComplete & args.rval().toBoolean();
}
*complete = allComplete;
return true;
FastInvokeGuard fig(cx, funVal);
InvokeArgs &args = fig.args();
if (!args.init(1))
return false;
args.setCallee(funVal);
args.setThis(UndefinedValue());
args[0].setBoolean(!!cx->runtime()->parallelWarmup);
return fig.invoke(cx);
}
ThreadLocal<ForkJoinContext*> ForkJoinContext::tlsForkJoinContext;
@ -260,7 +248,8 @@ class ForkJoinOperation
RootedScript bailoutScript;
jsbytecode *bailoutBytecode;
ForkJoinOperation(JSContext *cx, HandleObject fun, ForkJoinMode mode, uint16_t numSlices);
ForkJoinOperation(JSContext *cx, HandleFunction fun, HandleFunction boundsFun,
ForkJoinMode mode);
ExecutionStatus apply();
private:
@ -298,18 +287,16 @@ class ForkJoinOperation
};
JSContext *cx_;
HandleObject fun_;
HandleFunction fun_;
HandleFunction boundsFun_;
Vector<ParallelBailoutRecord, 16> bailoutRecords_;
AutoScriptVector worklist_;
Vector<WorklistData, 16> worklistData_;
ForkJoinMode mode_;
uint16_t warmupSlice_;
uint16_t numSlices_;
TrafficLight enqueueInitialScript(ExecutionStatus *status);
TrafficLight compileForParallelExecution(ExecutionStatus *status);
TrafficLight warmupExecution(bool stopIfComplete,
ExecutionStatus *status);
TrafficLight warmupExecution(bool stopIfComplete, ExecutionStatus *status);
TrafficLight parallelExecution(ExecutionStatus *status);
TrafficLight sequentialExecution(bool disqualified, ExecutionStatus *status);
TrafficLight recoverFromBailout(ExecutionStatus *status);
@ -318,12 +305,12 @@ class ForkJoinOperation
bool invalidateBailedOutScripts();
ExecutionStatus sequentialExecution(bool disqualified);
TrafficLight appendCallTargetsToWorklist(uint32_t index,
ExecutionStatus *status);
TrafficLight appendCallTargetToWorklist(HandleScript script,
ExecutionStatus *status);
TrafficLight appendCallTargetsToWorklist(uint32_t index, ExecutionStatus *status);
TrafficLight appendCallTargetToWorklist(HandleScript script, ExecutionStatus *status);
bool addToWorklist(HandleScript script);
inline bool hasScript(Vector<types::RecompileInfo> &scripts, JSScript *script);
bool computeBounds(uint16_t *start, uint16_t *end);
}; // class ForkJoinOperation
class ForkJoinShared : public ParallelJob, public Monitor
@ -331,12 +318,13 @@ class ForkJoinShared : public ParallelJob, public Monitor
/////////////////////////////////////////////////////////////////////////
// Constant fields
JSContext *const cx_; // Current context
ThreadPool *const threadPool_; // The thread pool.
HandleObject fun_; // The JavaScript function to execute.
uint16_t numSlices_; // Total number of slices. Dynamically changed
PRLock *cxLock_; // Locks cx_ for parallel VM calls.
ParallelBailoutRecord *const records_; // Bailout records for each slice
JSContext *const cx_; // Current context
ThreadPool *const threadPool_; // The thread pool
HandleFunction fun_; // The JavaScript function to execute
uint16_t sliceFrom_; // The starting slice id.
uint16_t sliceTo_; // The ending slice id + 1.
PRLock *cxLock_; // Locks cx_ for parallel VM calls
ParallelBailoutRecord *const records_; // Bailout records for each worker
/////////////////////////////////////////////////////////////////////////
// Per-thread arenas
@ -369,8 +357,9 @@ class ForkJoinShared : public ParallelJob, public Monitor
public:
ForkJoinShared(JSContext *cx,
ThreadPool *threadPool,
HandleObject fun,
uint16_t numSlices,
HandleFunction fun,
uint16_t sliceFrom,
uint16_t sliceTo,
ParallelBailoutRecord *records);
~ForkJoinShared();
@ -379,14 +368,13 @@ class ForkJoinShared : public ParallelJob, public Monitor
ParallelResult execute();
// Invoked from parallel worker threads:
virtual bool executeFromWorker(uint16_t sliceId, uint32_t workerId,
uintptr_t stackLimit) MOZ_OVERRIDE;
virtual bool executeFromWorker(uint32_t workerId, uintptr_t stackLimit) MOZ_OVERRIDE;
// Invoked only from the main thread:
virtual bool executeFromMainThread(uint16_t sliceId) MOZ_OVERRIDE;
virtual bool executeFromMainThread() MOZ_OVERRIDE;
// Executes slice |sliceId| either from a worker or the main thread.
void executePortion(PerThreadData *perThread, uint16_t sliceId, uint32_t workerId);
// Executes the user-supplied function a worker or the main thread.
void executePortion(PerThreadData *perThread, uint32_t workerId);
// Moves all the per-thread arenas into the main compartment and processes
// any pending requests for a GC. This can only safely be invoked on the
@ -489,16 +477,16 @@ js::ForkJoin(JSContext *cx, CallArgs &args)
JS_ASSERT(args.length() == 3); // else the self-hosted code is wrong
JS_ASSERT(args[0].isObject());
JS_ASSERT(args[0].toObject().is<JSFunction>());
JS_ASSERT(args[1].isInt32());
JS_ASSERT(args[1].toInt32() < NumForkJoinModes);
JS_ASSERT(args[1].isObject());
JS_ASSERT(args[1].toObject().is<JSFunction>());
JS_ASSERT(args[2].isInt32());
JS_ASSERT(args[2].toInt32() < NumForkJoinModes);
RootedObject fun(cx, &args[0].toObject());
ForkJoinMode mode = (ForkJoinMode) args[1].toInt32();
uint32_t numSlices = args[2].toInt32();
MOZ_ASSERT(uint32_t(uint16_t(numSlices)) == numSlices);
RootedFunction fun(cx, &args[0].toObject().as<JSFunction>());
RootedFunction boundsFun(cx, &args[1].toObject().as<JSFunction>());
ForkJoinMode mode = (ForkJoinMode) args[2].toInt32();
ForkJoinOperation op(cx, fun, mode, numSlices);
ForkJoinOperation op(cx, fun, boundsFun, mode);
ExecutionStatus status = op.apply();
if (status == ExecutionFatal)
return false;
@ -557,24 +545,23 @@ ForkJoinModeString(ForkJoinMode mode) {
return "???";
}
js::ForkJoinOperation::ForkJoinOperation(JSContext *cx, HandleObject fun, ForkJoinMode mode,
uint16_t numSlices)
ForkJoinOperation::ForkJoinOperation(JSContext *cx, HandleFunction fun, HandleFunction boundsFun,
ForkJoinMode mode)
: bailouts(0),
bailoutCause(ParallelBailoutNone),
bailoutScript(cx),
bailoutBytecode(nullptr),
cx_(cx),
fun_(fun),
boundsFun_(boundsFun),
bailoutRecords_(cx),
worklist_(cx),
worklistData_(cx),
mode_(mode),
warmupSlice_(0),
numSlices_(numSlices)
mode_(mode)
{ }
ExecutionStatus
js::ForkJoinOperation::apply()
ForkJoinOperation::apply()
{
ExecutionStatus status;
@ -660,8 +647,8 @@ js::ForkJoinOperation::apply()
return SpewEndOp(sequentialExecution(true));
}
js::ForkJoinOperation::TrafficLight
js::ForkJoinOperation::enqueueInitialScript(ExecutionStatus *status)
ForkJoinOperation::TrafficLight
ForkJoinOperation::enqueueInitialScript(ExecutionStatus *status)
{
// GreenLight: script successfully enqueued if necessary
// RedLight: fatal error or fell back to sequential
@ -698,8 +685,8 @@ js::ForkJoinOperation::enqueueInitialScript(ExecutionStatus *status)
return GreenLight;
}
js::ForkJoinOperation::TrafficLight
js::ForkJoinOperation::compileForParallelExecution(ExecutionStatus *status)
ForkJoinOperation::TrafficLight
ForkJoinOperation::compileForParallelExecution(ExecutionStatus *status)
{
// GreenLight: all scripts compiled
// RedLight: fatal error or completed work via warmups or fallback
@ -889,8 +876,8 @@ js::ForkJoinOperation::compileForParallelExecution(ExecutionStatus *status)
return GreenLight;
}
js::ForkJoinOperation::TrafficLight
js::ForkJoinOperation::appendCallTargetsToWorklist(uint32_t index, ExecutionStatus *status)
ForkJoinOperation::TrafficLight
ForkJoinOperation::appendCallTargetsToWorklist(uint32_t index, ExecutionStatus *status)
{
// GreenLight: call targets appended
// RedLight: fatal error or completed work via warmups or fallback
@ -918,8 +905,8 @@ js::ForkJoinOperation::appendCallTargetsToWorklist(uint32_t index, ExecutionStat
return GreenLight;
}
js::ForkJoinOperation::TrafficLight
js::ForkJoinOperation::appendCallTargetToWorklist(HandleScript script, ExecutionStatus *status)
ForkJoinOperation::TrafficLight
ForkJoinOperation::appendCallTargetToWorklist(HandleScript script, ExecutionStatus *status)
{
// GreenLight: call target appended if necessary
// RedLight: fatal error or completed work via warmups or fallback
@ -949,7 +936,7 @@ js::ForkJoinOperation::appendCallTargetToWorklist(HandleScript script, Execution
}
bool
js::ForkJoinOperation::addToWorklist(HandleScript script)
ForkJoinOperation::addToWorklist(HandleScript script)
{
for (uint32_t i = 0; i < worklist_.length(); i++) {
if (worklist_[i] == script) {
@ -977,8 +964,8 @@ js::ForkJoinOperation::addToWorklist(HandleScript script)
return true;
}
js::ForkJoinOperation::TrafficLight
js::ForkJoinOperation::sequentialExecution(bool disqualified, ExecutionStatus *status)
ForkJoinOperation::TrafficLight
ForkJoinOperation::sequentialExecution(bool disqualified, ExecutionStatus *status)
{
// RedLight: fatal error or completed work
@ -987,26 +974,21 @@ js::ForkJoinOperation::sequentialExecution(bool disqualified, ExecutionStatus *s
}
ExecutionStatus
js::ForkJoinOperation::sequentialExecution(bool disqualified)
ForkJoinOperation::sequentialExecution(bool disqualified)
{
// XXX use disqualified to set parallelIon to ION_DISABLED_SCRIPT?
Spew(SpewOps, "Executing sequential execution (disqualified=%d).",
disqualified);
bool complete = false;
RootedValue funVal(cx_, ObjectValue(*fun_));
if (!ExecuteSequentially(cx_, funVal, &complete, 0, numSlices_))
if (!ExecuteSequentially(cx_, funVal))
return ExecutionFatal;
// When invoked without the warmup flag set to true, the kernel
// function OUGHT to complete successfully, barring an exception.
JS_ASSERT(complete);
return ExecutionSequential;
}
js::ForkJoinOperation::TrafficLight
js::ForkJoinOperation::fatalError(ExecutionStatus *status)
ForkJoinOperation::TrafficLight
ForkJoinOperation::fatalError(ExecutionStatus *status)
{
// RedLight: fatal error
@ -1058,7 +1040,7 @@ BailoutExplanation(ParallelBailoutCause cause)
}
void
js::ForkJoinOperation::determineBailoutCause()
ForkJoinOperation::determineBailoutCause()
{
bailoutCause = ParallelBailoutNone;
for (uint32_t i = 0; i < bailoutRecords_.length(); i++) {
@ -1096,7 +1078,7 @@ js::ForkJoinOperation::determineBailoutCause()
}
bool
js::ForkJoinOperation::invalidateBailedOutScripts()
ForkJoinOperation::invalidateBailedOutScripts()
{
Vector<types::RecompileInfo> invalid(cx_);
for (uint32_t i = 0; i < bailoutRecords_.length(); i++) {
@ -1148,48 +1130,52 @@ js::ForkJoinOperation::invalidateBailedOutScripts()
return true;
}
js::ForkJoinOperation::TrafficLight
js::ForkJoinOperation::warmupExecution(bool stopIfComplete, ExecutionStatus *status)
ForkJoinOperation::TrafficLight
ForkJoinOperation::warmupExecution(bool stopIfComplete, ExecutionStatus *status)
{
// GreenLight: warmup succeeded, still more work to do
// RedLight: fatal error or warmup completed all work (check status)
Spew(SpewOps, "Executing warmup of slice %u.", warmupSlice_);
AutoEnterWarmup warmup(cx_->runtime());
RootedValue funVal(cx_, ObjectValue(*fun_));
bool complete;
uint32_t warmupTo = Min<uint16_t>(warmupSlice_ + 1, numSlices_);
if (!ExecuteSequentially(cx_, funVal, &complete, warmupSlice_, warmupTo)) {
uint16_t from, to;
if (!computeBounds(&from, &to)) {
*status = ExecutionFatal;
return RedLight;
}
if (complete) {
warmupSlice_ = warmupTo;
if (warmupSlice_ == numSlices_) {
if (stopIfComplete) {
Spew(SpewOps, "Warmup execution finished all the work.");
*status = ExecutionWarmup;
return RedLight;
}
if (from == to) {
Spew(SpewOps, "Warmup execution finished all the work.");
// If we finished all slices in warmup, be sure check the
// interrupt flag. This is because we won't be running more JS
// code, and thus no more automatic checking of the interrupt
// flag.
if (!js_HandleExecutionInterrupt(cx_)) {
*status = ExecutionFatal;
return RedLight;
}
if (stopIfComplete) {
*status = ExecutionWarmup;
return RedLight;
}
// If we finished all slices in warmup, be sure check the
// interrupt flag. This is because we won't be running more JS
// code, and thus no more automatic checking of the interrupt
// flag.
if (!js_HandleExecutionInterrupt(cx_)) {
*status = ExecutionFatal;
return RedLight;
}
return GreenLight;
}
Spew(SpewOps, "Executing warmup.");
AutoEnterWarmup warmup(cx_->runtime());
RootedValue funVal(cx_, ObjectValue(*fun_));
if (!ExecuteSequentially(cx_, funVal)) {
*status = ExecutionFatal;
return RedLight;
}
return GreenLight;
}
js::ForkJoinOperation::TrafficLight
js::ForkJoinOperation::parallelExecution(ExecutionStatus *status)
ForkJoinOperation::TrafficLight
ForkJoinOperation::parallelExecution(ExecutionStatus *status)
{
// GreenLight: bailout occurred, keep trying
// RedLight: fatal error or all work completed
@ -1201,10 +1187,20 @@ js::ForkJoinOperation::parallelExecution(ExecutionStatus *status)
ForkJoinActivation activation(cx_);
ThreadPool *threadPool = &cx_->runtime()->threadPool;
uint16_t from, to;
if (!computeBounds(&from, &to)) {
*status = ExecutionFatal;
return RedLight;
}
RootedObject rootedFun(cx_, fun_);
ForkJoinShared shared(cx_, threadPool, rootedFun, numSlices_, &bailoutRecords_[0]);
if (from == to) {
Spew(SpewOps, "Warmup execution finished all the work.");
*status = ExecutionWarmup;
return RedLight;
}
ThreadPool *threadPool = &cx_->runtime()->threadPool;
ForkJoinShared shared(cx_, threadPool, fun_, from, to, &bailoutRecords_[0]);
if (!shared.init()) {
*status = ExecutionFatal;
return RedLight;
@ -1227,8 +1223,8 @@ js::ForkJoinOperation::parallelExecution(ExecutionStatus *status)
return GreenLight;
}
js::ForkJoinOperation::TrafficLight
js::ForkJoinOperation::recoverFromBailout(ExecutionStatus *status)
ForkJoinOperation::TrafficLight
ForkJoinOperation::recoverFromBailout(ExecutionStatus *status)
{
// GreenLight: bailout recovered, try to compile-and-run again
// RedLight: fatal error
@ -1240,7 +1236,7 @@ js::ForkJoinOperation::recoverFromBailout(ExecutionStatus *status)
// After any bailout, we always scan over callee list of main
// function, if nothing else
RootedScript mainScript(cx_, fun_->as<JSFunction>().nonLazyScript());
RootedScript mainScript(cx_, fun_->nonLazyScript());
if (!addToWorklist(mainScript))
return fatalError(status);
@ -1256,7 +1252,7 @@ js::ForkJoinOperation::recoverFromBailout(ExecutionStatus *status)
}
bool
js::ForkJoinOperation::hasScript(Vector<types::RecompileInfo> &scripts, JSScript *script)
ForkJoinOperation::hasScript(Vector<types::RecompileInfo> &scripts, JSScript *script)
{
for (uint32_t i = 0; i < scripts.length(); i++) {
if (scripts[i] == script->parallelIonScript()->recompileInfo())
@ -1265,6 +1261,36 @@ js::ForkJoinOperation::hasScript(Vector<types::RecompileInfo> &scripts, JSScript
return false;
}
bool
ForkJoinOperation::computeBounds(uint16_t *start, uint16_t *end)
{
RootedValue funVal(cx_, ObjectValue(*boundsFun_));
FastInvokeGuard fig(cx_, funVal);
InvokeArgs &args = fig.args();
if (!args.init(0))
return false;
args.setCallee(funVal);
args.setThis(UndefinedValue());
if (!fig.invoke(cx_))
return false;
MOZ_ASSERT(args.rval().toObject().is<ArrayObject>());
MOZ_ASSERT(args.rval().toObject().getDenseInitializedLength() == 2);
int32_t start32 = args.rval().toObject().getDenseElement(0).toInt32();
int32_t end32 = args.rval().toObject().getDenseElement(1).toInt32();
MOZ_ASSERT(int32_t(uint16_t(start32)) == start32);
MOZ_ASSERT(int32_t(uint16_t(end32)) == end32);
*start = uint16_t(start32);
*end = uint16_t(end32);
return true;
}
// Can only enter callees with a valid IonScript.
template <uint32_t maxArgc>
class ParallelIonInvoke
@ -1312,13 +1338,15 @@ class ParallelIonInvoke
ForkJoinShared::ForkJoinShared(JSContext *cx,
ThreadPool *threadPool,
HandleObject fun,
uint16_t numSlices,
HandleFunction fun,
uint16_t sliceFrom,
uint16_t sliceTo,
ParallelBailoutRecord *records)
: cx_(cx),
threadPool_(threadPool),
fun_(fun),
numSlices_(numSlices),
sliceFrom_(sliceFrom),
sliceTo_(sliceTo),
cxLock_(nullptr),
records_(records),
allocators_(cx),
@ -1388,7 +1416,7 @@ ForkJoinShared::execute()
AutoUnlockMonitor unlock(*this);
// Push parallel tasks and wait until they're all done.
jobResult = threadPool_->executeJob(cx_, this, numSlices_);
jobResult = threadPool_->executeJob(cx_, this, sliceFrom_, sliceTo_);
if (jobResult == TP_FATAL)
return TP_FATAL;
}
@ -1404,7 +1432,7 @@ ForkJoinShared::execute()
#ifdef DEBUG
Spew(SpewOps, "Completed parallel job [slices %d, threads: %d (+1), stolen: %d (work stealing:%s)]",
numSlices_,
sliceTo_ - sliceFrom_,
threadPool_->numWorkers(),
threadPool_->stolenSlices(),
threadPool_->workStealing() ? "ON" : "OFF");
@ -1432,10 +1460,8 @@ ForkJoinShared::transferArenasToCompartmentAndProcessGCRequests()
}
bool
ForkJoinShared::executeFromWorker(uint16_t sliceId, uint32_t workerId, uintptr_t stackLimit)
ForkJoinShared::executeFromWorker(uint32_t workerId, uintptr_t stackLimit)
{
JS_ASSERT(sliceId <= numSlices_);
PerThreadData thisThread(cx_->runtime());
if (!thisThread.init()) {
setAbortFlag(true);
@ -1450,21 +1476,21 @@ ForkJoinShared::executeFromWorker(uint16_t sliceId, uint32_t workerId, uintptr_t
// Don't use setIonStackLimit() because that acquires the ionStackLimitLock, and the
// lock has not been initialized in these cases.
thisThread.ionStackLimit = stackLimit;
executePortion(&thisThread, sliceId, workerId);
executePortion(&thisThread, workerId);
TlsPerThreadData.set(nullptr);
return !abort_;
}
bool
ForkJoinShared::executeFromMainThread(uint16_t sliceId)
ForkJoinShared::executeFromMainThread()
{
executePortion(&cx_->mainThread(), sliceId, threadPool_->numWorkers());
executePortion(&cx_->mainThread(), threadPool_->numWorkers());
return !abort_;
}
void
ForkJoinShared::executePortion(PerThreadData *perThread, uint16_t sliceId, uint32_t workerId)
ForkJoinShared::executePortion(PerThreadData *perThread, uint32_t workerId)
{
// WARNING: This code runs ON THE PARALLEL WORKER THREAD.
// Be careful when accessing cx_.
@ -1475,16 +1501,15 @@ ForkJoinShared::executePortion(PerThreadData *perThread, uint16_t sliceId, uint3
JS::AutoAssertNoGC nogc(runtime());
Allocator *allocator = allocators_[workerId];
ForkJoinContext cx(perThread, sliceId, workerId, allocator, this, &records_[workerId]);
ForkJoinContext cx(perThread, workerId, allocator, this, &records_[workerId]);
AutoSetForkJoinContext autoContext(&cx);
#ifdef DEBUG
// Set the maximum worker and slice number for prettier spewing.
cx.maxSliceId = numSlices_ - 1;
cx.maxWorkerId = threadPool_->numWorkers();
#endif
Spew(SpewOps, "Slice up");
Spew(SpewOps, "Up");
// Make a new IonContext for the slice, which is needed if we need to
// re-enter the VM.
@ -1494,10 +1519,7 @@ ForkJoinShared::executePortion(PerThreadData *perThread, uint16_t sliceId, uint3
JS_ASSERT(cx.bailoutRecord->topScript == nullptr);
RootedObject fun(perThread, fun_);
JS_ASSERT(fun->is<JSFunction>());
RootedFunction callee(perThread, &fun->as<JSFunction>());
if (!callee->nonLazyScript()->hasParallelIonScript()) {
if (!fun_->nonLazyScript()->hasParallelIonScript()) {
// Sometimes, particularly with GCZeal, the parallel ion
// script can be collected between starting the parallel
// op and reaching this point. In that case, we just fail
@ -1506,10 +1528,9 @@ ForkJoinShared::executePortion(PerThreadData *perThread, uint16_t sliceId, uint3
cx.bailoutRecord->setCause(ParallelBailoutMainScriptNotPresent);
setAbortFlag(false);
} else {
ParallelIonInvoke<2> fii(cx_->runtime(), callee, 2);
ParallelIonInvoke<2> fii(cx_->runtime(), fun_, 1);
fii.args[0] = Int32Value(cx.sliceId);
fii.args[1] = BooleanValue(false);
fii.args[0] = BooleanValue(false);
bool ok = fii.invoke(perThread);
JS_ASSERT(ok == !cx.bailoutRecord->topScript);
@ -1517,7 +1538,7 @@ ForkJoinShared::executePortion(PerThreadData *perThread, uint16_t sliceId, uint3
setAbortFlag(false);
}
Spew(SpewOps, "Slice down");
Spew(SpewOps, "Down");
}
bool
@ -1595,12 +1616,10 @@ ForkJoinShared::requestZoneGC(JS::Zone *zone, JS::gcreason::Reason reason)
// ForkJoinContext
//
ForkJoinContext::ForkJoinContext(PerThreadData *perThreadData,
uint16_t sliceId, uint32_t workerId,
ForkJoinContext::ForkJoinContext(PerThreadData *perThreadData, uint32_t workerId,
Allocator *allocator, ForkJoinShared *shared,
ParallelBailoutRecord *bailoutRecord)
: ThreadSafeContext(shared->runtime(), perThreadData, Context_ForkJoin),
sliceId(sliceId),
workerId(workerId),
bailoutRecord(bailoutRecord),
targetRegionStart(nullptr),
@ -1884,12 +1903,12 @@ class ParallelSpewer
if (ForkJoinContext *cx = ForkJoinContext::current()) {
// Print the format first into a buffer to right-justify the
// worker and slice ids.
// worker ids.
char bufbuf[BufferSize];
JS_snprintf(bufbuf, BufferSize, "[%%sParallel:%%0%du(%%0%du)%%s] ",
NumberOfDigits(cx->maxWorkerId), NumberOfDigits(cx->maxSliceId));
JS_snprintf(bufbuf, BufferSize, "[%%sParallel:%%0%du%%s] ",
NumberOfDigits(cx->maxWorkerId));
JS_snprintf(buf, BufferSize, bufbuf, workerColor(cx->workerId),
cx->workerId, cx->sliceId, reset());
cx->workerId, reset());
} else {
JS_snprintf(buf, BufferSize, "[Parallel:M] ");
}

View File

@ -30,38 +30,49 @@
// to enable parallel execution. At the top-level, it consists of a native
// function (exposed as the ForkJoin intrinsic) that is used like so:
//
// ForkJoin(func, feedback, N)
// ForkJoin(func, boundsFunc, mode)
//
// The intention of this statement is to start |N| copies of |func()|
// running in parallel. Each copy will then do more or less 1/Nth of
// the total work, depending on workstealing-based load balancing.
// The intention of this statement is to start some some number (usually the
// number of hardware threads) of copies of |func()| running in parallel. Each
// copy will then do a portion of the total work, depending on
// workstealing-based load balancing.
//
// Typically, each of the N slices runs in a different worker thread,
// but that is not something you should rely upon---if work-stealing
// is enabled it could be that a single worker thread winds up
// handling multiple slices.
// Typically, each of the N slices runs in a different worker thread, but that
// is not something you should rely upon---if work-stealing is enabled it
// could be that a single worker thread winds up handling multiple slices.
//
// The second argument, |feedback|, is an optional callback that will
// receiver information about how execution proceeded. This is
// intended for use in unit testing but also for providing feedback to
// users. Note that gathering the data to provide to |feedback| is
// not free and so execution will run somewhat slower if |feedback| is
// provided.
// The second argument, |boundsFunc|, is a function that must return an array
// of exactly two integers. This function is called before every attempt at
// execution: warmup, sequential, or parallel. The bounds are taken from a
// function call instead of taken as two static integers so that the bounds
// may be shrunk when recovering from bailout.
//
// The third argument, |mode|, is an internal mode integer giving finer
// control over the behavior of ForkJoin. See the |ForkJoinMode| enum.
//
// func() should expect the following arguments:
//
// func(id, n, warmup)
// func(warmup)
//
// Here, |id| is the slice id. |n| is the total number of slices. The
// parameter |warmup| is true for a *warmup or recovery phase*.
// Warmup phases are discussed below in more detail, but the general
// idea is that if |warmup| is true, |func| should only do a fixed
// amount of work. If |warmup| is false, |func| should try to do all
// remaining work is assigned.
// The parameter |warmup| is true for a *warmup or recovery phase*. Warmup
// phases are discussed below in more detail, but the general idea is that if
// |warmup| is true, |func| should only do a fixed amount of work. If |warmup|
// is false, |func| should try to do all remaining work is assigned.
//
// Note that we implicitly assume that |func| is tracking how much
// work it has accomplished thus far; some techniques for doing this
// are discussed in |ParallelArray.js|.
// |func| can keep asking for more work from the scheduler by calling the
// intrinsic |GetForkJoinSlice(id)|. When there are no more slices to hand
// out, -1 is returned as a sentinel value. By exposing this function as an
// intrinsic, we reduce the number of JS-C++ boundary crossings incurred by
// workstealing, which may have many slices.
//
// |func| MUST PROCESS ALL SLICES BEFORE RETURNING! Not doing so is an error
// |and is protected by debug asserts in ThreadPool.
//
// Note well that there is a separation of concern between *scheduling* slices
// and *interpreting* slices. ForkJoin only schedules slices by handing out
// slice ids; it does not interpret what slice ids mean. Instead, |func|
// should track how much work it has accomplished thus far; consult |Array.js|
// for some examples.
//
// Warmups and Sequential Fallbacks
// --------------------------------
@ -301,9 +312,6 @@ struct ForkJoinShared;
class ForkJoinContext : public ThreadSafeContext
{
public:
// The slice that is being processed.
const uint16_t sliceId;
// The worker that is doing the work.
const uint32_t workerId;
@ -314,8 +322,7 @@ class ForkJoinContext : public ThreadSafeContext
// Records the last instr. to execute on this thread.
IonLIRTraceData traceData;
// The maximum worker and slice id.
uint16_t maxSliceId;
// The maximum worker id.
uint32_t maxWorkerId;
#endif
@ -336,10 +343,18 @@ class ForkJoinContext : public ThreadSafeContext
uint8_t *targetRegionStart;
uint8_t *targetRegionEnd;
ForkJoinContext(PerThreadData *perThreadData, uint16_t sliceId, uint32_t workerId,
ForkJoinContext(PerThreadData *perThreadData, uint32_t workerId,
Allocator *allocator, ForkJoinShared *shared,
ParallelBailoutRecord *bailoutRecord);
// Get a slice of work for the worker associated with the context.
bool getSlice(uint16_t *sliceId) {
ThreadPool &pool = runtime()->threadPool;
return (isMainThread()
? pool.getSliceForMainThread(sliceId)
: pool.getSliceForWorker(workerId, sliceId));
}
// True if this is the main thread, false if it is one of the parallel workers.
bool isMainThread() const;

View File

@ -316,6 +316,46 @@ intrinsic_ForkJoinNumWorkers(JSContext *cx, unsigned argc, Value *vp)
return true;
}
/*
* ForkJoinGetSlice(id): Returns the id of the next slice to be worked
* on.
*
* Acts as the identity function when called from outside of a ForkJoin
* thread. This odd API is because intrinsics must be called during the
* parallel warm up phase to populate observed type sets, so we must call it
* even during sequential execution. But since there is no thread pool during
* sequential execution, the selfhosted code is responsible for computing the
* next sequential slice id and passing it in itself.
*/
bool
js::intrinsic_ForkJoinGetSlice(JSContext *cx, unsigned argc, Value *vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
MOZ_ASSERT(args.length() == 1);
MOZ_ASSERT(args[0].isInt32());
args.rval().set(args[0]);
return true;
}
static bool
intrinsic_ForkJoinGetSlicePar(ForkJoinContext *cx, unsigned argc, Value *vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
MOZ_ASSERT(args.length() == 1);
MOZ_ASSERT(args[0].isInt32());
uint16_t sliceId;
if (cx->getSlice(&sliceId))
args.rval().setInt32(sliceId);
else
args.rval().setInt32(-1);
return true;
}
JS_JITINFO_NATIVE_PARALLEL(intrinsic_ForkJoinGetSlice_jitInfo,
intrinsic_ForkJoinGetSlicePar);
/*
* NewDenseArray(length): Allocates and returns a new dense array with
* the given length where all values are initialized to holes.
@ -573,6 +613,25 @@ js::intrinsic_ShouldForceSequential(JSContext *cx, unsigned argc, Value *vp)
return true;
}
bool
js::intrinsic_InParallelSection(JSContext *cx, unsigned argc, Value *vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
args.rval().setBoolean(false);
return true;
}
static bool
intrinsic_InParallelSectionPar(ForkJoinContext *cx, unsigned argc, Value *vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
args.rval().setBoolean(true);
return true;
}
JS_JITINFO_NATIVE_PARALLEL(intrinsic_InParallelSection_jitInfo,
intrinsic_InParallelSectionPar);
/**
* Returns the default locale as a well-formed, but not necessarily canonicalized,
* BCP-47 language tag.
@ -630,6 +689,12 @@ static const JSFunctionSpec intrinsic_functions[] = {
JS_FNINFO("SetForkJoinTargetRegion",
intrinsic_SetForkJoinTargetRegion,
&intrinsic_SetForkJoinTargetRegionInfo, 2, 0),
JS_FNINFO("ForkJoinGetSlice",
intrinsic_ForkJoinGetSlice,
&intrinsic_ForkJoinGetSlice_jitInfo, 1, 0),
JS_FNINFO("InParallelSection",
intrinsic_InParallelSection,
&intrinsic_InParallelSection_jitInfo, 0, 0),
// See builtin/TypedObject.h for descriptors of the typedobj functions.
JS_FN("NewTypedHandle",

View File

@ -68,7 +68,6 @@ class js::ThreadPoolBaseWorker
void submitSlices(uint16_t sliceFrom, uint16_t sliceTo) {
MOZ_ASSERT(!hasWork());
MOZ_ASSERT(sliceFrom < sliceTo);
sliceBounds_ = ComposeSliceBounds(sliceFrom, sliceTo);
}
@ -98,16 +97,16 @@ class js::ThreadPoolWorker : public ThreadPoolBaseWorker
static void ThreadMain(void *arg);
void run();
// Get a slice of work, from ourself or steal work from other workers
// (or from the main thread).
bool getSlice(uint16_t *sliceId);
public:
ThreadPoolWorker(uint32_t workerId, ThreadPool *pool)
: ThreadPoolBaseWorker(workerId, pool),
state_(CREATED)
{ }
// Get a slice of work, from ourself or steal work from other workers
// (or from the main thread).
bool getSlice(uint16_t *sliceId);
// Invoked from main thread; signals worker to start.
bool start();
@ -124,9 +123,6 @@ class js::ThreadPoolMainWorker : public ThreadPoolBaseWorker
{
friend class ThreadPoolWorker;
// Get a slice of work, from ourself or steal work from other workers.
bool getSlice(uint16_t *sliceId);
public:
bool isActive;
@ -135,6 +131,9 @@ class js::ThreadPoolMainWorker : public ThreadPoolBaseWorker
isActive(false)
{ }
// Get a slice of work, from ourself or steal work from other workers.
bool getSlice(uint16_t *sliceId);
// Execute a job on the main thread.
void executeJob();
};
@ -287,14 +286,8 @@ ThreadPoolWorker::run()
pool_->activeWorkers_++;
}
ParallelJob *job = pool_->job();
uint16_t sliceId;
while (getSlice(&sliceId)) {
if (!job->executeFromWorker(sliceId, workerId_, stackLimit)) {
pool_->abortJob();
break;
}
}
if (!pool_->job()->executeFromWorker(workerId_, stackLimit))
pool_->abortJob();
// Join the pool.
{
@ -315,14 +308,8 @@ ThreadPoolWorker::terminate(AutoLockMonitor &lock)
void
ThreadPoolMainWorker::executeJob()
{
ParallelJob *job = pool_->job();
uint16_t sliceId;
while (getSlice(&sliceId)) {
if (!job->executeFromMainThread(sliceId)) {
pool_->abortJob();
return;
}
}
if (!pool_->job()->executeFromMainThread())
pool_->abortJob();
}
bool
@ -514,8 +501,9 @@ ThreadPool::waitForWorkers(AutoLockMonitor &lock)
}
ParallelResult
ThreadPool::executeJob(JSContext *cx, ParallelJob *job, uint16_t numSlices)
ThreadPool::executeJob(JSContext *cx, ParallelJob *job, uint16_t sliceFrom, uint16_t sliceMax)
{
MOZ_ASSERT(sliceFrom < sliceMax);
MOZ_ASSERT(CurrentThreadCanAccessRuntime(runtime_));
MOZ_ASSERT(activeWorkers_ == 0);
MOZ_ASSERT(!hasWork());
@ -533,10 +521,10 @@ ThreadPool::executeJob(JSContext *cx, ParallelJob *job, uint16_t numSlices)
return TP_FATAL;
// Evenly distribute slices to the workers.
uint16_t numSlices = sliceMax - sliceFrom;
uint16_t slicesPerWorker = numSlices / (numWorkers() + 1);
uint16_t leftover = numSlices % slicesPerWorker;
uint16_t sliceFrom = 0;
uint16_t sliceTo = 0;
uint16_t leftover = numSlices % (numWorkers() + 1);
uint16_t sliceTo = sliceFrom;
for (uint32_t workerId = 0; workerId < numWorkers(); workerId++) {
if (leftover > 0) {
sliceTo += slicesPerWorker + 1;
@ -573,10 +561,28 @@ ThreadPool::executeJob(JSContext *cx, ParallelJob *job, uint16_t numSlices)
waitForWorkers(lock);
}
// Guard against errors in the self-hosted slice processing function. If
// we still have work at this point, it is the user function's fault.
MOZ_ASSERT(!hasWork(), "User function did not process all the slices!");
// Everything went swimmingly. Give yourself a pat on the back.
return TP_SUCCESS;
}
bool
ThreadPool::getSliceForWorker(uint32_t workerId, uint16_t *sliceId)
{
MOZ_ASSERT(workers_[workerId]);
return workers_[workerId]->getSlice(sliceId);
}
bool
ThreadPool::getSliceForMainThread(uint16_t *sliceId)
{
MOZ_ASSERT(mainWorker_);
return mainWorker_->getSlice(sliceId);
}
void
ThreadPool::abortJob()
{

View File

@ -26,14 +26,15 @@ class ThreadPoolWorker;
class ThreadPoolMainWorker;
// A ParallelJob is the main runnable abstraction in the ThreadPool.
// ParallelJobs are composed of one or more slices. Each slice is executed by
// the pool by calling one of the execute method with the unique |sliceId|
// as argument. The pool executes multiple slices in parallel.
//
// The unit of work here is in terms of threads, *not* slices. The
// user-provided function has the responsibility of getting slices of work via
// the |ForkJoinGetSlice| intrinsic.
class ParallelJob
{
public:
virtual bool executeFromWorker(uint16_t sliceId, uint32_t workerId, uintptr_t stackLimit) = 0;
virtual bool executeFromMainThread(uint16_t sliceId) = 0;
virtual bool executeFromWorker(uint32_t workerId, uintptr_t stackLimit) = 0;
virtual bool executeFromMainThread() = 0;
};
// ThreadPool used for parallel JavaScript execution. Unless you are building
@ -147,7 +148,13 @@ class ThreadPool : public Monitor
// Execute the given ParallelJob using the main thread and any available worker.
// Blocks until the main thread has completed execution.
ParallelResult executeJob(JSContext *cx, ParallelJob *job, uint16_t numSlices);
ParallelResult executeJob(JSContext *cx, ParallelJob *job, uint16_t sliceStart,
uint16_t numSlices);
// Get the next slice; work stealing happens here if work stealing is
// on. Returns false if there are no more slices to hand out.
bool getSliceForWorker(uint32_t workerId, uint16_t *sliceId);
bool getSliceForMainThread(uint16_t *sliceId);
// Abort the current job.
void abortJob();