2019-03-19 03:18:21 +00:00
|
|
|
//===------ LoopGeneratorsKMP.cpp - IR helper to create loops -------------===//
|
|
|
|
|
//
|
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
|
//
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
//
|
|
|
|
|
// This file contains functions to create parallel loops as LLVM-IR.
|
|
|
|
|
//
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
|
|
#include "polly/CodeGen/LoopGeneratorsKMP.h"
|
[Polly] Use separate DT/LI/SE for outlined subfn. NFC. (#102460)
DominatorTree, LoopInfo, and ScalarEvolution are function-level analyses
that expect to be called only on instructions and basic blocks of the
function they were original created for. When Polly outlined a parallel
loop body into a separate function, it reused the same analyses seemed
to work until new checks to be added in #101198.
This patch creates new analyses for the subfunctions. GenDT, GenLI, and
GenSE now refer to the analyses of the current region of code. Outside
of an outlined function, they refer to the same analysis as used for the
SCoP, but are substituted within an outlined function.
Additionally to the cross-function queries of DT/LI/SE, we must not
create SCEVs that refer to a mix of expressions for old and generated
values. Currently, SCEVs themselves do not "remember" which
ScalarEvolution analysis they were created for, but mixing them is just
as unexpected as using DT/LI across function boundaries. Hence
`SCEVLoopAddRecRewriter` was combined into `ScopExpander`.
`SCEVLoopAddRecRewriter` only replaced induction variables but left
SCEVUnknowns to reference the old function. `SCEVParameterRewriter`
would have done so but its job was effectively superseded by
`ScopExpander`, and now also `SCEVLoopAddRecRewriter`. Some issues
persist put marked with a FIXME in the code. Changing them would
possibly cause this patch to be not NFC anymore.
2024-08-10 14:25:15 +02:00
|
|
|
#include "llvm/Analysis/LoopInfo.h"
|
2019-03-19 03:18:21 +00:00
|
|
|
#include "llvm/IR/Dominators.h"
|
|
|
|
|
#include "llvm/IR/Module.h"
|
|
|
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
using namespace polly;
|
|
|
|
|
|
|
|
|
|
void ParallelLoopGeneratorKMP::createCallSpawnThreads(Value *SubFn,
|
|
|
|
|
Value *SubFnParam,
|
|
|
|
|
Value *LB, Value *UB,
|
|
|
|
|
Value *Stride) {
|
|
|
|
|
const std::string Name = "__kmpc_fork_call";
|
|
|
|
|
Function *F = M->getFunction(Name);
|
2020-11-30 11:34:12 -08:00
|
|
|
Type *KMPCMicroTy = StructType::getTypeByName(M->getContext(), "kmpc_micro");
|
2019-03-19 03:18:21 +00:00
|
|
|
|
|
|
|
|
if (!KMPCMicroTy) {
|
|
|
|
|
// void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid, ...)
|
2024-10-17 05:40:43 -04:00
|
|
|
Type *MicroParams[] = {Builder.getPtrTy(0), Builder.getPtrTy(0)};
|
2019-03-19 03:18:21 +00:00
|
|
|
|
|
|
|
|
KMPCMicroTy = FunctionType::get(Builder.getVoidTy(), MicroParams, true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If F is not available, declare it.
|
|
|
|
|
if (!F) {
|
|
|
|
|
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
2024-10-17 05:40:43 -04:00
|
|
|
Type *Params[] = {Builder.getPtrTy(0), Builder.getInt32Ty(),
|
|
|
|
|
Builder.getPtrTy(0)};
|
2019-03-19 03:18:21 +00:00
|
|
|
|
|
|
|
|
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, true);
|
|
|
|
|
F = Function::Create(Ty, Linkage, Name, M);
|
|
|
|
|
}
|
|
|
|
|
|
2024-10-17 05:40:43 -04:00
|
|
|
Value *Task =
|
|
|
|
|
Builder.CreatePointerBitCastOrAddrSpaceCast(SubFn, Builder.getPtrTy(0));
|
2019-03-19 03:18:21 +00:00
|
|
|
|
|
|
|
|
Value *Args[] = {SourceLocationInfo,
|
|
|
|
|
Builder.getInt32(4) /* Number of arguments (w/o Task) */,
|
|
|
|
|
Task,
|
|
|
|
|
LB,
|
|
|
|
|
UB,
|
|
|
|
|
Stride,
|
|
|
|
|
SubFnParam};
|
|
|
|
|
|
2022-07-26 19:02:21 -05:00
|
|
|
CallInst *Call = Builder.CreateCall(F, Args);
|
|
|
|
|
Call->setDebugLoc(DLGenerated);
|
2019-03-19 03:18:21 +00:00
|
|
|
}
|
|
|
|
|
|
2020-04-03 14:57:12 -07:00
|
|
|
void ParallelLoopGeneratorKMP::deployParallelExecution(Function *SubFn,
|
2019-03-19 03:18:21 +00:00
|
|
|
Value *SubFnParam,
|
|
|
|
|
Value *LB, Value *UB,
|
|
|
|
|
Value *Stride) {
|
|
|
|
|
// Inform OpenMP runtime about the number of threads if greater than zero
|
|
|
|
|
if (PollyNumThreads > 0) {
|
|
|
|
|
Value *GlobalThreadID = createCallGlobalThreadNum();
|
|
|
|
|
createCallPushNumThreads(GlobalThreadID, Builder.getInt32(PollyNumThreads));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Tell the runtime we start a parallel loop
|
|
|
|
|
createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Function *ParallelLoopGeneratorKMP::prepareSubFnDefinition(Function *F) const {
|
2024-10-17 05:40:43 -04:00
|
|
|
std::vector<Type *> Arguments = {
|
|
|
|
|
Builder.getPtrTy(0), Builder.getPtrTy(0), LongType, LongType, LongType,
|
|
|
|
|
Builder.getPtrTy()};
|
2019-03-19 03:18:21 +00:00
|
|
|
|
|
|
|
|
FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false);
|
|
|
|
|
Function *SubFn = Function::Create(FT, Function::InternalLinkage,
|
|
|
|
|
F->getName() + "_polly_subfn", M);
|
|
|
|
|
// Name the function's arguments
|
|
|
|
|
Function::arg_iterator AI = SubFn->arg_begin();
|
|
|
|
|
AI->setName("polly.kmpc.global_tid");
|
|
|
|
|
std::advance(AI, 1);
|
|
|
|
|
AI->setName("polly.kmpc.bound_tid");
|
|
|
|
|
std::advance(AI, 1);
|
|
|
|
|
AI->setName("polly.kmpc.lb");
|
|
|
|
|
std::advance(AI, 1);
|
|
|
|
|
AI->setName("polly.kmpc.ub");
|
|
|
|
|
std::advance(AI, 1);
|
|
|
|
|
AI->setName("polly.kmpc.inc");
|
|
|
|
|
std::advance(AI, 1);
|
|
|
|
|
AI->setName("polly.kmpc.shared");
|
|
|
|
|
|
|
|
|
|
return SubFn;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Create a subfunction of the following (preliminary) structure:
|
|
|
|
|
//
|
2020-02-11 12:46:31 -06:00
|
|
|
// PrevBB
|
|
|
|
|
// |
|
|
|
|
|
// v
|
|
|
|
|
// HeaderBB
|
|
|
|
|
// / | _____
|
|
|
|
|
// / v v |
|
|
|
|
|
// / PreHeaderBB |
|
|
|
|
|
// | | |
|
|
|
|
|
// | v |
|
|
|
|
|
// | CheckNextBB |
|
|
|
|
|
// \ | \_____/
|
|
|
|
|
// \ |
|
|
|
|
|
// v v
|
|
|
|
|
// ExitBB
|
2019-03-19 03:18:21 +00:00
|
|
|
//
|
|
|
|
|
// HeaderBB will hold allocations, loading of variables and kmp-init calls.
|
2020-02-11 12:46:31 -06:00
|
|
|
// CheckNextBB will check for more work (dynamic / static chunked) or will be
|
|
|
|
|
// empty (static non chunked).
|
2019-03-19 03:18:21 +00:00
|
|
|
// If there is more work to do: go to PreHeaderBB, otherwise go to ExitBB.
|
|
|
|
|
// PreHeaderBB loads the new boundaries (& will lead to the loop body later on).
|
2020-02-11 12:46:31 -06:00
|
|
|
// Just like CheckNextBB: PreHeaderBB is (preliminary) empty in the static non
|
|
|
|
|
// chunked scheduling case. ExitBB marks the end of the parallel execution.
|
2019-03-19 03:18:21 +00:00
|
|
|
// The possibly empty BasicBlocks will automatically be removed.
|
|
|
|
|
std::tuple<Value *, Function *>
|
2020-02-11 12:46:31 -06:00
|
|
|
ParallelLoopGeneratorKMP::createSubFn(Value *SequentialLoopStride,
|
2019-03-19 03:18:21 +00:00
|
|
|
AllocaInst *StructData,
|
|
|
|
|
SetVector<Value *> Data, ValueMapT &Map) {
|
|
|
|
|
Function *SubFn = createSubFnDefinition();
|
|
|
|
|
LLVMContext &Context = SubFn->getContext();
|
|
|
|
|
|
|
|
|
|
// Create basic blocks.
|
|
|
|
|
BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn);
|
[Polly] Use separate DT/LI/SE for outlined subfn. NFC. (#102460)
DominatorTree, LoopInfo, and ScalarEvolution are function-level analyses
that expect to be called only on instructions and basic blocks of the
function they were original created for. When Polly outlined a parallel
loop body into a separate function, it reused the same analyses seemed
to work until new checks to be added in #101198.
This patch creates new analyses for the subfunctions. GenDT, GenLI, and
GenSE now refer to the analyses of the current region of code. Outside
of an outlined function, they refer to the same analysis as used for the
SCoP, but are substituted within an outlined function.
Additionally to the cross-function queries of DT/LI/SE, we must not
create SCEVs that refer to a mix of expressions for old and generated
values. Currently, SCEVs themselves do not "remember" which
ScalarEvolution analysis they were created for, but mixing them is just
as unexpected as using DT/LI across function boundaries. Hence
`SCEVLoopAddRecRewriter` was combined into `ScopExpander`.
`SCEVLoopAddRecRewriter` only replaced induction variables but left
SCEVUnknowns to reference the old function. `SCEVParameterRewriter`
would have done so but its job was effectively superseded by
`ScopExpander`, and now also `SCEVLoopAddRecRewriter`. Some issues
persist put marked with a FIXME in the code. Changing them would
possibly cause this patch to be not NFC anymore.
2024-08-10 14:25:15 +02:00
|
|
|
SubFnDT = std::make_unique<DominatorTree>(*SubFn);
|
|
|
|
|
SubFnLI = std::make_unique<LoopInfo>(*SubFnDT);
|
|
|
|
|
|
2019-03-19 03:18:21 +00:00
|
|
|
BasicBlock *ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn);
|
|
|
|
|
BasicBlock *CheckNextBB =
|
|
|
|
|
BasicBlock::Create(Context, "polly.par.checkNext", SubFn);
|
|
|
|
|
BasicBlock *PreHeaderBB =
|
|
|
|
|
BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn);
|
|
|
|
|
|
[Polly] Use separate DT/LI/SE for outlined subfn. NFC. (#102460)
DominatorTree, LoopInfo, and ScalarEvolution are function-level analyses
that expect to be called only on instructions and basic blocks of the
function they were original created for. When Polly outlined a parallel
loop body into a separate function, it reused the same analyses seemed
to work until new checks to be added in #101198.
This patch creates new analyses for the subfunctions. GenDT, GenLI, and
GenSE now refer to the analyses of the current region of code. Outside
of an outlined function, they refer to the same analysis as used for the
SCoP, but are substituted within an outlined function.
Additionally to the cross-function queries of DT/LI/SE, we must not
create SCEVs that refer to a mix of expressions for old and generated
values. Currently, SCEVs themselves do not "remember" which
ScalarEvolution analysis they were created for, but mixing them is just
as unexpected as using DT/LI across function boundaries. Hence
`SCEVLoopAddRecRewriter` was combined into `ScopExpander`.
`SCEVLoopAddRecRewriter` only replaced induction variables but left
SCEVUnknowns to reference the old function. `SCEVParameterRewriter`
would have done so but its job was effectively superseded by
`ScopExpander`, and now also `SCEVLoopAddRecRewriter`. Some issues
persist put marked with a FIXME in the code. Changing them would
possibly cause this patch to be not NFC anymore.
2024-08-10 14:25:15 +02:00
|
|
|
SubFnDT->addNewBlock(ExitBB, HeaderBB);
|
|
|
|
|
SubFnDT->addNewBlock(CheckNextBB, HeaderBB);
|
|
|
|
|
SubFnDT->addNewBlock(PreHeaderBB, HeaderBB);
|
2019-03-19 03:18:21 +00:00
|
|
|
|
|
|
|
|
// Fill up basic block HeaderBB.
|
|
|
|
|
Builder.SetInsertPoint(HeaderBB);
|
|
|
|
|
Value *LBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.LBPtr");
|
|
|
|
|
Value *UBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.UBPtr");
|
|
|
|
|
Value *IsLastPtr = Builder.CreateAlloca(Builder.getInt32Ty(), nullptr,
|
|
|
|
|
"polly.par.lastIterPtr");
|
|
|
|
|
Value *StridePtr =
|
|
|
|
|
Builder.CreateAlloca(LongType, nullptr, "polly.par.StridePtr");
|
|
|
|
|
|
|
|
|
|
// Get iterator for retrieving the previously defined parameters.
|
|
|
|
|
Function::arg_iterator AI = SubFn->arg_begin();
|
|
|
|
|
// First argument holds "global thread ID".
|
|
|
|
|
Value *IDPtr = &*AI;
|
|
|
|
|
// Skip "bound thread ID" since it is not used (but had to be defined).
|
|
|
|
|
std::advance(AI, 2);
|
|
|
|
|
// Move iterator to: LB, UB, Stride, Shared variable struct.
|
|
|
|
|
Value *LB = &*AI;
|
|
|
|
|
std::advance(AI, 1);
|
|
|
|
|
Value *UB = &*AI;
|
|
|
|
|
std::advance(AI, 1);
|
|
|
|
|
Value *Stride = &*AI;
|
|
|
|
|
std::advance(AI, 1);
|
|
|
|
|
Value *Shared = &*AI;
|
|
|
|
|
|
2023-03-17 15:58:52 +01:00
|
|
|
extractValuesFromStruct(Data, StructData->getAllocatedType(), Shared, Map);
|
2019-03-19 03:18:21 +00:00
|
|
|
|
2020-01-23 16:18:34 +01:00
|
|
|
const auto Alignment = llvm::Align(is64BitArch() ? 8 : 4);
|
2021-02-12 00:07:23 +01:00
|
|
|
Value *ID = Builder.CreateAlignedLoad(Builder.getInt32Ty(), IDPtr, Alignment,
|
|
|
|
|
"polly.par.global_tid");
|
2019-03-19 03:18:21 +00:00
|
|
|
|
|
|
|
|
Builder.CreateAlignedStore(LB, LBPtr, Alignment);
|
|
|
|
|
Builder.CreateAlignedStore(UB, UBPtr, Alignment);
|
|
|
|
|
Builder.CreateAlignedStore(Builder.getInt32(0), IsLastPtr, Alignment);
|
|
|
|
|
Builder.CreateAlignedStore(Stride, StridePtr, Alignment);
|
|
|
|
|
|
|
|
|
|
// Subtract one as the upper bound provided by openmp is a < comparison
|
|
|
|
|
// whereas the codegenForSequential function creates a <= comparison.
|
|
|
|
|
Value *AdjustedUB = Builder.CreateAdd(UB, ConstantInt::get(LongType, -1),
|
|
|
|
|
"polly.indvar.UBAdjusted");
|
|
|
|
|
|
|
|
|
|
Value *ChunkSize =
|
|
|
|
|
ConstantInt::get(LongType, std::max<int>(PollyChunkSize, 1));
|
|
|
|
|
|
2020-02-11 12:46:31 -06:00
|
|
|
OMPGeneralSchedulingType Scheduling =
|
|
|
|
|
getSchedType(PollyChunkSize, PollyScheduling);
|
|
|
|
|
|
|
|
|
|
switch (Scheduling) {
|
2019-03-19 03:18:21 +00:00
|
|
|
case OMPGeneralSchedulingType::Dynamic:
|
|
|
|
|
case OMPGeneralSchedulingType::Guided:
|
|
|
|
|
case OMPGeneralSchedulingType::Runtime:
|
|
|
|
|
// "DYNAMIC" scheduling types are handled below (including 'runtime')
|
|
|
|
|
{
|
|
|
|
|
UB = AdjustedUB;
|
|
|
|
|
createCallDispatchInit(ID, LB, UB, Stride, ChunkSize);
|
|
|
|
|
Value *HasWork =
|
|
|
|
|
createCallDispatchNext(ID, IsLastPtr, LBPtr, UBPtr, StridePtr);
|
|
|
|
|
Value *HasIteration =
|
|
|
|
|
Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_EQ, HasWork,
|
|
|
|
|
Builder.getInt32(1), "polly.hasIteration");
|
|
|
|
|
Builder.CreateCondBr(HasIteration, PreHeaderBB, ExitBB);
|
|
|
|
|
|
|
|
|
|
Builder.SetInsertPoint(CheckNextBB);
|
|
|
|
|
HasWork = createCallDispatchNext(ID, IsLastPtr, LBPtr, UBPtr, StridePtr);
|
|
|
|
|
HasIteration =
|
|
|
|
|
Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_EQ, HasWork,
|
|
|
|
|
Builder.getInt32(1), "polly.hasWork");
|
|
|
|
|
Builder.CreateCondBr(HasIteration, PreHeaderBB, ExitBB);
|
|
|
|
|
|
|
|
|
|
Builder.SetInsertPoint(PreHeaderBB);
|
2021-02-12 00:07:23 +01:00
|
|
|
LB = Builder.CreateAlignedLoad(LongType, LBPtr, Alignment,
|
|
|
|
|
"polly.indvar.LB");
|
|
|
|
|
UB = Builder.CreateAlignedLoad(LongType, UBPtr, Alignment,
|
|
|
|
|
"polly.indvar.UB");
|
2019-03-19 03:18:21 +00:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case OMPGeneralSchedulingType::StaticChunked:
|
|
|
|
|
case OMPGeneralSchedulingType::StaticNonChunked:
|
|
|
|
|
// "STATIC" scheduling types are handled below
|
|
|
|
|
{
|
2020-02-11 12:46:31 -06:00
|
|
|
Builder.CreateAlignedStore(AdjustedUB, UBPtr, Alignment);
|
2019-03-19 03:18:21 +00:00
|
|
|
createCallStaticInit(ID, IsLastPtr, LBPtr, UBPtr, StridePtr, ChunkSize);
|
|
|
|
|
|
2021-02-12 00:07:23 +01:00
|
|
|
Value *ChunkedStride = Builder.CreateAlignedLoad(
|
|
|
|
|
LongType, StridePtr, Alignment, "polly.kmpc.stride");
|
2020-02-11 12:46:31 -06:00
|
|
|
|
2021-02-12 00:07:23 +01:00
|
|
|
LB = Builder.CreateAlignedLoad(LongType, LBPtr, Alignment,
|
|
|
|
|
"polly.indvar.LB");
|
|
|
|
|
UB = Builder.CreateAlignedLoad(LongType, UBPtr, Alignment,
|
|
|
|
|
"polly.indvar.UB.temp");
|
2019-03-19 03:18:21 +00:00
|
|
|
|
2020-02-11 12:46:31 -06:00
|
|
|
Value *UBInRange =
|
|
|
|
|
Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_SLE, UB, AdjustedUB,
|
|
|
|
|
"polly.indvar.UB.inRange");
|
|
|
|
|
UB = Builder.CreateSelect(UBInRange, UB, AdjustedUB, "polly.indvar.UB");
|
2019-03-19 03:18:21 +00:00
|
|
|
Builder.CreateAlignedStore(UB, UBPtr, Alignment);
|
|
|
|
|
|
|
|
|
|
Value *HasIteration = Builder.CreateICmp(
|
|
|
|
|
llvm::CmpInst::Predicate::ICMP_SLE, LB, UB, "polly.hasIteration");
|
|
|
|
|
Builder.CreateCondBr(HasIteration, PreHeaderBB, ExitBB);
|
|
|
|
|
|
2020-02-11 12:46:31 -06:00
|
|
|
if (Scheduling == OMPGeneralSchedulingType::StaticChunked) {
|
|
|
|
|
Builder.SetInsertPoint(PreHeaderBB);
|
2021-02-12 00:07:23 +01:00
|
|
|
LB = Builder.CreateAlignedLoad(LongType, LBPtr, Alignment,
|
2020-02-11 12:46:31 -06:00
|
|
|
"polly.indvar.LB.entry");
|
2021-02-12 00:07:23 +01:00
|
|
|
UB = Builder.CreateAlignedLoad(LongType, UBPtr, Alignment,
|
2020-02-11 12:46:31 -06:00
|
|
|
"polly.indvar.UB.entry");
|
|
|
|
|
}
|
|
|
|
|
|
2019-03-19 03:18:21 +00:00
|
|
|
Builder.SetInsertPoint(CheckNextBB);
|
2020-02-11 12:46:31 -06:00
|
|
|
|
|
|
|
|
if (Scheduling == OMPGeneralSchedulingType::StaticChunked) {
|
|
|
|
|
Value *NextLB =
|
|
|
|
|
Builder.CreateAdd(LB, ChunkedStride, "polly.indvar.nextLB");
|
|
|
|
|
Value *NextUB = Builder.CreateAdd(UB, ChunkedStride);
|
|
|
|
|
|
|
|
|
|
Value *NextUBOutOfBounds =
|
|
|
|
|
Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_SGT, NextUB,
|
|
|
|
|
AdjustedUB, "polly.indvar.nextUB.outOfBounds");
|
|
|
|
|
NextUB = Builder.CreateSelect(NextUBOutOfBounds, AdjustedUB, NextUB,
|
|
|
|
|
"polly.indvar.nextUB");
|
|
|
|
|
|
|
|
|
|
Builder.CreateAlignedStore(NextLB, LBPtr, Alignment);
|
|
|
|
|
Builder.CreateAlignedStore(NextUB, UBPtr, Alignment);
|
|
|
|
|
|
|
|
|
|
Value *HasWork =
|
|
|
|
|
Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_SLE, NextLB,
|
|
|
|
|
AdjustedUB, "polly.hasWork");
|
|
|
|
|
Builder.CreateCondBr(HasWork, PreHeaderBB, ExitBB);
|
|
|
|
|
} else {
|
|
|
|
|
Builder.CreateBr(ExitBB);
|
|
|
|
|
}
|
2019-03-19 03:18:21 +00:00
|
|
|
|
|
|
|
|
Builder.SetInsertPoint(PreHeaderBB);
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Builder.CreateBr(CheckNextBB);
|
2025-04-24 18:31:48 +05:30
|
|
|
Builder.SetInsertPoint(--Builder.GetInsertPoint());
|
2019-03-19 03:18:21 +00:00
|
|
|
BasicBlock *AfterBB;
|
[Polly] Use separate DT/LI/SE for outlined subfn. NFC. (#102460)
DominatorTree, LoopInfo, and ScalarEvolution are function-level analyses
that expect to be called only on instructions and basic blocks of the
function they were original created for. When Polly outlined a parallel
loop body into a separate function, it reused the same analyses seemed
to work until new checks to be added in #101198.
This patch creates new analyses for the subfunctions. GenDT, GenLI, and
GenSE now refer to the analyses of the current region of code. Outside
of an outlined function, they refer to the same analysis as used for the
SCoP, but are substituted within an outlined function.
Additionally to the cross-function queries of DT/LI/SE, we must not
create SCEVs that refer to a mix of expressions for old and generated
values. Currently, SCEVs themselves do not "remember" which
ScalarEvolution analysis they were created for, but mixing them is just
as unexpected as using DT/LI across function boundaries. Hence
`SCEVLoopAddRecRewriter` was combined into `ScopExpander`.
`SCEVLoopAddRecRewriter` only replaced induction variables but left
SCEVUnknowns to reference the old function. `SCEVParameterRewriter`
would have done so but its job was effectively superseded by
`ScopExpander`, and now also `SCEVLoopAddRecRewriter`. Some issues
persist put marked with a FIXME in the code. Changing them would
possibly cause this patch to be not NFC anymore.
2024-08-10 14:25:15 +02:00
|
|
|
Value *IV = createLoop(LB, UB, SequentialLoopStride, Builder, *SubFnLI,
|
|
|
|
|
*SubFnDT, AfterBB, ICmpInst::ICMP_SLE, nullptr, true,
|
2019-03-19 03:18:21 +00:00
|
|
|
/* UseGuard */ false);
|
|
|
|
|
|
|
|
|
|
BasicBlock::iterator LoopBody = Builder.GetInsertPoint();
|
|
|
|
|
|
|
|
|
|
// Add code to terminate this subfunction.
|
|
|
|
|
Builder.SetInsertPoint(ExitBB);
|
|
|
|
|
// Static (i.e. non-dynamic) scheduling types, are terminated with a fini-call
|
2020-02-11 12:46:31 -06:00
|
|
|
if (Scheduling == OMPGeneralSchedulingType::StaticChunked ||
|
|
|
|
|
Scheduling == OMPGeneralSchedulingType::StaticNonChunked) {
|
2019-03-19 03:18:21 +00:00
|
|
|
createCallStaticFini(ID);
|
|
|
|
|
}
|
|
|
|
|
Builder.CreateRetVoid();
|
2025-04-24 18:31:48 +05:30
|
|
|
Builder.SetInsertPoint(LoopBody);
|
2019-03-19 03:18:21 +00:00
|
|
|
|
[Polly] Use separate DT/LI/SE for outlined subfn. NFC. (#102460)
DominatorTree, LoopInfo, and ScalarEvolution are function-level analyses
that expect to be called only on instructions and basic blocks of the
function they were original created for. When Polly outlined a parallel
loop body into a separate function, it reused the same analyses seemed
to work until new checks to be added in #101198.
This patch creates new analyses for the subfunctions. GenDT, GenLI, and
GenSE now refer to the analyses of the current region of code. Outside
of an outlined function, they refer to the same analysis as used for the
SCoP, but are substituted within an outlined function.
Additionally to the cross-function queries of DT/LI/SE, we must not
create SCEVs that refer to a mix of expressions for old and generated
values. Currently, SCEVs themselves do not "remember" which
ScalarEvolution analysis they were created for, but mixing them is just
as unexpected as using DT/LI across function boundaries. Hence
`SCEVLoopAddRecRewriter` was combined into `ScopExpander`.
`SCEVLoopAddRecRewriter` only replaced induction variables but left
SCEVUnknowns to reference the old function. `SCEVParameterRewriter`
would have done so but its job was effectively superseded by
`ScopExpander`, and now also `SCEVLoopAddRecRewriter`. Some issues
persist put marked with a FIXME in the code. Changing them would
possibly cause this patch to be not NFC anymore.
2024-08-10 14:25:15 +02:00
|
|
|
// FIXME: Call SubFnDT->verify() and SubFnLI->verify() to check that the
|
|
|
|
|
// DominatorTree/LoopInfo has been created correctly. Alternatively, recreate
|
|
|
|
|
// from scratch since it is not needed here directly.
|
|
|
|
|
|
2019-03-19 03:18:21 +00:00
|
|
|
return std::make_tuple(IV, SubFn);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Value *ParallelLoopGeneratorKMP::createCallGlobalThreadNum() {
|
|
|
|
|
const std::string Name = "__kmpc_global_thread_num";
|
|
|
|
|
Function *F = M->getFunction(Name);
|
|
|
|
|
|
|
|
|
|
// If F is not available, declare it.
|
|
|
|
|
if (!F) {
|
|
|
|
|
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
2024-10-17 05:40:43 -04:00
|
|
|
Type *Params[] = {Builder.getPtrTy(0)};
|
2019-03-19 03:18:21 +00:00
|
|
|
|
|
|
|
|
FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), Params, false);
|
|
|
|
|
F = Function::Create(Ty, Linkage, Name, M);
|
|
|
|
|
}
|
|
|
|
|
|
2022-07-26 19:02:21 -05:00
|
|
|
CallInst *Call = Builder.CreateCall(F, {SourceLocationInfo});
|
|
|
|
|
Call->setDebugLoc(DLGenerated);
|
|
|
|
|
return Call;
|
2019-03-19 03:18:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ParallelLoopGeneratorKMP::createCallPushNumThreads(Value *GlobalThreadID,
|
|
|
|
|
Value *NumThreads) {
|
|
|
|
|
const std::string Name = "__kmpc_push_num_threads";
|
|
|
|
|
Function *F = M->getFunction(Name);
|
|
|
|
|
|
|
|
|
|
// If F is not available, declare it.
|
|
|
|
|
if (!F) {
|
|
|
|
|
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
2024-10-17 05:40:43 -04:00
|
|
|
Type *Params[] = {Builder.getPtrTy(0), Builder.getInt32Ty(),
|
2019-03-19 03:18:21 +00:00
|
|
|
Builder.getInt32Ty()};
|
|
|
|
|
|
|
|
|
|
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
|
|
|
|
|
F = Function::Create(Ty, Linkage, Name, M);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Value *Args[] = {SourceLocationInfo, GlobalThreadID, NumThreads};
|
|
|
|
|
|
2022-07-26 19:02:21 -05:00
|
|
|
CallInst *Call = Builder.CreateCall(F, Args);
|
|
|
|
|
Call->setDebugLoc(DLGenerated);
|
2019-03-19 03:18:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ParallelLoopGeneratorKMP::createCallStaticInit(Value *GlobalThreadID,
|
|
|
|
|
Value *IsLastPtr,
|
|
|
|
|
Value *LBPtr, Value *UBPtr,
|
|
|
|
|
Value *StridePtr,
|
|
|
|
|
Value *ChunkSize) {
|
|
|
|
|
const std::string Name =
|
|
|
|
|
is64BitArch() ? "__kmpc_for_static_init_8" : "__kmpc_for_static_init_4";
|
|
|
|
|
Function *F = M->getFunction(Name);
|
|
|
|
|
|
|
|
|
|
// If F is not available, declare it.
|
|
|
|
|
if (!F) {
|
|
|
|
|
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
|
|
|
|
|
2024-10-17 05:40:43 -04:00
|
|
|
Type *Params[] = {Builder.getPtrTy(0),
|
2019-03-19 03:18:21 +00:00
|
|
|
Builder.getInt32Ty(),
|
|
|
|
|
Builder.getInt32Ty(),
|
2024-10-17 05:40:43 -04:00
|
|
|
Builder.getPtrTy(0),
|
|
|
|
|
Builder.getPtrTy(0),
|
|
|
|
|
Builder.getPtrTy(0),
|
|
|
|
|
Builder.getPtrTy(0),
|
2019-03-19 03:18:21 +00:00
|
|
|
LongType,
|
|
|
|
|
LongType};
|
|
|
|
|
|
|
|
|
|
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
|
|
|
|
|
F = Function::Create(Ty, Linkage, Name, M);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// The parameter 'ChunkSize' will hold strictly positive integer values,
|
|
|
|
|
// regardless of PollyChunkSize's value
|
|
|
|
|
Value *Args[] = {
|
|
|
|
|
SourceLocationInfo,
|
|
|
|
|
GlobalThreadID,
|
|
|
|
|
Builder.getInt32(int(getSchedType(PollyChunkSize, PollyScheduling))),
|
|
|
|
|
IsLastPtr,
|
|
|
|
|
LBPtr,
|
|
|
|
|
UBPtr,
|
|
|
|
|
StridePtr,
|
|
|
|
|
ConstantInt::get(LongType, 1),
|
|
|
|
|
ChunkSize};
|
|
|
|
|
|
2022-07-26 19:02:21 -05:00
|
|
|
CallInst *Call = Builder.CreateCall(F, Args);
|
|
|
|
|
Call->setDebugLoc(DLGenerated);
|
2019-03-19 03:18:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ParallelLoopGeneratorKMP::createCallStaticFini(Value *GlobalThreadID) {
|
|
|
|
|
const std::string Name = "__kmpc_for_static_fini";
|
|
|
|
|
Function *F = M->getFunction(Name);
|
|
|
|
|
|
|
|
|
|
// If F is not available, declare it.
|
|
|
|
|
if (!F) {
|
|
|
|
|
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
2024-10-17 05:40:43 -04:00
|
|
|
Type *Params[] = {Builder.getPtrTy(0), Builder.getInt32Ty()};
|
2019-03-19 03:18:21 +00:00
|
|
|
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
|
|
|
|
|
F = Function::Create(Ty, Linkage, Name, M);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Value *Args[] = {SourceLocationInfo, GlobalThreadID};
|
|
|
|
|
|
2022-07-26 19:02:21 -05:00
|
|
|
CallInst *Call = Builder.CreateCall(F, Args);
|
|
|
|
|
Call->setDebugLoc(DLGenerated);
|
2019-03-19 03:18:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ParallelLoopGeneratorKMP::createCallDispatchInit(Value *GlobalThreadID,
|
|
|
|
|
Value *LB, Value *UB,
|
|
|
|
|
Value *Inc,
|
|
|
|
|
Value *ChunkSize) {
|
|
|
|
|
const std::string Name =
|
|
|
|
|
is64BitArch() ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_4";
|
|
|
|
|
Function *F = M->getFunction(Name);
|
|
|
|
|
|
|
|
|
|
// If F is not available, declare it.
|
|
|
|
|
if (!F) {
|
|
|
|
|
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
|
|
|
|
|
2024-10-17 05:40:43 -04:00
|
|
|
Type *Params[] = {Builder.getPtrTy(0),
|
2019-03-19 03:18:21 +00:00
|
|
|
Builder.getInt32Ty(),
|
|
|
|
|
Builder.getInt32Ty(),
|
|
|
|
|
LongType,
|
|
|
|
|
LongType,
|
|
|
|
|
LongType,
|
|
|
|
|
LongType};
|
|
|
|
|
|
|
|
|
|
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
|
|
|
|
|
F = Function::Create(Ty, Linkage, Name, M);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// The parameter 'ChunkSize' will hold strictly positive integer values,
|
|
|
|
|
// regardless of PollyChunkSize's value
|
|
|
|
|
Value *Args[] = {
|
|
|
|
|
SourceLocationInfo,
|
|
|
|
|
GlobalThreadID,
|
|
|
|
|
Builder.getInt32(int(getSchedType(PollyChunkSize, PollyScheduling))),
|
|
|
|
|
LB,
|
|
|
|
|
UB,
|
|
|
|
|
Inc,
|
|
|
|
|
ChunkSize};
|
|
|
|
|
|
2022-07-26 19:02:21 -05:00
|
|
|
CallInst *Call = Builder.CreateCall(F, Args);
|
|
|
|
|
Call->setDebugLoc(DLGenerated);
|
2019-03-19 03:18:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Value *ParallelLoopGeneratorKMP::createCallDispatchNext(Value *GlobalThreadID,
|
|
|
|
|
Value *IsLastPtr,
|
|
|
|
|
Value *LBPtr,
|
|
|
|
|
Value *UBPtr,
|
|
|
|
|
Value *StridePtr) {
|
|
|
|
|
const std::string Name =
|
|
|
|
|
is64BitArch() ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_4";
|
|
|
|
|
Function *F = M->getFunction(Name);
|
|
|
|
|
|
|
|
|
|
// If F is not available, declare it.
|
|
|
|
|
if (!F) {
|
|
|
|
|
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
|
|
|
|
|
2024-10-17 05:40:43 -04:00
|
|
|
Type *Params[] = {Builder.getPtrTy(0), Builder.getInt32Ty(),
|
|
|
|
|
Builder.getPtrTy(0), Builder.getPtrTy(0),
|
|
|
|
|
Builder.getPtrTy(0), Builder.getPtrTy(0)};
|
2019-03-19 03:18:21 +00:00
|
|
|
|
|
|
|
|
FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), Params, false);
|
|
|
|
|
F = Function::Create(Ty, Linkage, Name, M);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Value *Args[] = {SourceLocationInfo, GlobalThreadID, IsLastPtr, LBPtr, UBPtr,
|
|
|
|
|
StridePtr};
|
|
|
|
|
|
2022-07-26 19:02:21 -05:00
|
|
|
CallInst *Call = Builder.CreateCall(F, Args);
|
|
|
|
|
Call->setDebugLoc(DLGenerated);
|
|
|
|
|
return Call;
|
2019-03-19 03:18:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TODO: This function currently creates a source location dummy. It might be
|
|
|
|
|
// necessary to (actually) provide information, in the future.
|
|
|
|
|
GlobalVariable *ParallelLoopGeneratorKMP::createSourceLocation() {
|
|
|
|
|
const std::string LocName = ".loc.dummy";
|
|
|
|
|
GlobalVariable *SourceLocDummy = M->getGlobalVariable(LocName);
|
|
|
|
|
|
|
|
|
|
if (SourceLocDummy == nullptr) {
|
|
|
|
|
const std::string StructName = "struct.ident_t";
|
2020-11-30 11:34:12 -08:00
|
|
|
StructType *IdentTy =
|
|
|
|
|
StructType::getTypeByName(M->getContext(), StructName);
|
2019-03-19 03:18:21 +00:00
|
|
|
|
|
|
|
|
// If the ident_t StructType is not available, declare it.
|
|
|
|
|
// in LLVM-IR: ident_t = type { i32, i32, i32, i32, i8* }
|
|
|
|
|
if (!IdentTy) {
|
|
|
|
|
Type *LocMembers[] = {Builder.getInt32Ty(), Builder.getInt32Ty(),
|
|
|
|
|
Builder.getInt32Ty(), Builder.getInt32Ty(),
|
2023-11-27 20:58:25 -08:00
|
|
|
Builder.getPtrTy()};
|
2019-03-19 03:18:21 +00:00
|
|
|
|
|
|
|
|
IdentTy =
|
|
|
|
|
StructType::create(M->getContext(), LocMembers, StructName, false);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const auto ArrayType =
|
|
|
|
|
llvm::ArrayType::get(Builder.getInt8Ty(), /* Length */ 23);
|
|
|
|
|
|
|
|
|
|
// Global Variable Definitions
|
2022-01-07 10:17:29 -08:00
|
|
|
GlobalVariable *StrVar =
|
|
|
|
|
new GlobalVariable(*M, ArrayType, true, GlobalValue::PrivateLinkage,
|
|
|
|
|
nullptr, ".str.ident");
|
[Alignment][NFC] Deprecate Align::None()
Summary:
This is a follow up on https://reviews.llvm.org/D71473#inline-647262.
There's a caveat here that `Align(1)` relies on the compiler understanding of `Log2_64` implementation to produce good code. One could use `Align()` as a replacement but I believe it is less clear that the alignment is one in that case.
Reviewers: xbolva00, courbet, bollu
Subscribers: arsenm, dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, hiraditya, kbarton, jrtc27, atanasyan, jsji, Jim, kerbowa, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D73099
2020-01-21 15:00:04 +01:00
|
|
|
StrVar->setAlignment(llvm::Align(1));
|
2019-03-19 03:18:21 +00:00
|
|
|
|
|
|
|
|
SourceLocDummy = new GlobalVariable(
|
|
|
|
|
*M, IdentTy, true, GlobalValue::PrivateLinkage, nullptr, LocName);
|
2019-10-15 11:24:36 +00:00
|
|
|
SourceLocDummy->setAlignment(llvm::Align(8));
|
2019-03-19 03:18:21 +00:00
|
|
|
|
|
|
|
|
// Constant Definitions
|
|
|
|
|
Constant *InitStr = ConstantDataArray::getString(
|
|
|
|
|
M->getContext(), "Source location dummy.", true);
|
|
|
|
|
|
|
|
|
|
Constant *StrPtr = static_cast<Constant *>(Builder.CreateInBoundsGEP(
|
|
|
|
|
ArrayType, StrVar, {Builder.getInt32(0), Builder.getInt32(0)}));
|
|
|
|
|
|
|
|
|
|
Constant *LocInitStruct = ConstantStruct::get(
|
|
|
|
|
IdentTy, {Builder.getInt32(0), Builder.getInt32(0), Builder.getInt32(0),
|
|
|
|
|
Builder.getInt32(0), StrPtr});
|
|
|
|
|
|
|
|
|
|
// Initialize variables
|
|
|
|
|
StrVar->setInitializer(InitStr);
|
|
|
|
|
SourceLocDummy->setInitializer(LocInitStruct);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return SourceLocDummy;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool ParallelLoopGeneratorKMP::is64BitArch() {
|
|
|
|
|
return (LongType->getIntegerBitWidth() == 64);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
OMPGeneralSchedulingType ParallelLoopGeneratorKMP::getSchedType(
|
|
|
|
|
int ChunkSize, OMPGeneralSchedulingType Scheduling) const {
|
|
|
|
|
if (ChunkSize == 0 && Scheduling == OMPGeneralSchedulingType::StaticChunked)
|
|
|
|
|
return OMPGeneralSchedulingType::StaticNonChunked;
|
|
|
|
|
|
|
|
|
|
return Scheduling;
|
|
|
|
|
}
|