You've already forked linux-packaging-mono
							
							
		
			
	
	
		
			154 lines
		
	
	
		
			4.8 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
		
		
			
		
	
	
			154 lines
		
	
	
		
			4.8 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
|   | //===- NVPTXLowerAggrCopies.cpp - ------------------------------*- C++ -*--===//
 | ||
|  | //
 | ||
|  | //                     The LLVM Compiler Infrastructure
 | ||
|  | //
 | ||
|  | // This file is distributed under the University of Illinois Open Source
 | ||
|  | // License. See LICENSE.TXT for details.
 | ||
|  | //
 | ||
|  | //===----------------------------------------------------------------------===//
 | ||
|  | //
 | ||
|  | // \file
 | ||
|  | // Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when
 | ||
|  | // the size is large or is not a compile-time constant.
 | ||
|  | //
 | ||
|  | //===----------------------------------------------------------------------===//
 | ||
|  | 
 | ||
|  | #include "NVPTXLowerAggrCopies.h"
 | ||
|  | #include "llvm/Analysis/TargetTransformInfo.h"
 | ||
|  | #include "llvm/CodeGen/StackProtector.h"
 | ||
|  | #include "llvm/IR/Constants.h"
 | ||
|  | #include "llvm/IR/DataLayout.h"
 | ||
|  | #include "llvm/IR/Function.h"
 | ||
|  | #include "llvm/IR/IRBuilder.h"
 | ||
|  | #include "llvm/IR/Instructions.h"
 | ||
|  | #include "llvm/IR/IntrinsicInst.h"
 | ||
|  | #include "llvm/IR/Intrinsics.h"
 | ||
|  | #include "llvm/IR/LLVMContext.h"
 | ||
|  | #include "llvm/IR/Module.h"
 | ||
|  | #include "llvm/Support/Debug.h"
 | ||
|  | #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 | ||
|  | #include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
 | ||
|  | 
 | ||
|  | #define DEBUG_TYPE "nvptx"
 | ||
|  | 
 | ||
|  | using namespace llvm; | ||
|  | 
 | ||
|  | namespace { | ||
|  | 
 | ||
|  | // actual analysis class, which is a functionpass
 | ||
|  | struct NVPTXLowerAggrCopies : public FunctionPass { | ||
|  |   static char ID; | ||
|  | 
 | ||
|  |   NVPTXLowerAggrCopies() : FunctionPass(ID) {} | ||
|  | 
 | ||
|  |   void getAnalysisUsage(AnalysisUsage &AU) const override { | ||
|  |     AU.addPreserved<StackProtector>(); | ||
|  |     AU.addRequired<TargetTransformInfoWrapperPass>(); | ||
|  |   } | ||
|  | 
 | ||
|  |   bool runOnFunction(Function &F) override; | ||
|  | 
 | ||
|  |   static const unsigned MaxAggrCopySize = 128; | ||
|  | 
 | ||
|  |   StringRef getPassName() const override { | ||
|  |     return "Lower aggregate copies/intrinsics into loops"; | ||
|  |   } | ||
|  | }; | ||
|  | 
 | ||
|  | char NVPTXLowerAggrCopies::ID = 0; | ||
|  | 
 | ||
|  | bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { | ||
|  |   SmallVector<LoadInst *, 4> AggrLoads; | ||
|  |   SmallVector<MemIntrinsic *, 4> MemCalls; | ||
|  | 
 | ||
|  |   const DataLayout &DL = F.getParent()->getDataLayout(); | ||
|  |   LLVMContext &Context = F.getParent()->getContext(); | ||
|  |   const TargetTransformInfo &TTI = | ||
|  |       getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); | ||
|  | 
 | ||
|  |   // Collect all aggregate loads and mem* calls.
 | ||
|  |   for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { | ||
|  |     for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; | ||
|  |          ++II) { | ||
|  |       if (LoadInst *LI = dyn_cast<LoadInst>(II)) { | ||
|  |         if (!LI->hasOneUse()) | ||
|  |           continue; | ||
|  | 
 | ||
|  |         if (DL.getTypeStoreSize(LI->getType()) < MaxAggrCopySize) | ||
|  |           continue; | ||
|  | 
 | ||
|  |         if (StoreInst *SI = dyn_cast<StoreInst>(LI->user_back())) { | ||
|  |           if (SI->getOperand(0) != LI) | ||
|  |             continue; | ||
|  |           AggrLoads.push_back(LI); | ||
|  |         } | ||
|  |       } else if (MemIntrinsic *IntrCall = dyn_cast<MemIntrinsic>(II)) { | ||
|  |         // Convert intrinsic calls with variable size or with constant size
 | ||
|  |         // larger than the MaxAggrCopySize threshold.
 | ||
|  |         if (ConstantInt *LenCI = dyn_cast<ConstantInt>(IntrCall->getLength())) { | ||
|  |           if (LenCI->getZExtValue() >= MaxAggrCopySize) { | ||
|  |             MemCalls.push_back(IntrCall); | ||
|  |           } | ||
|  |         } else { | ||
|  |           MemCalls.push_back(IntrCall); | ||
|  |         } | ||
|  |       } | ||
|  |     } | ||
|  |   } | ||
|  | 
 | ||
|  |   if (AggrLoads.size() == 0 && MemCalls.size() == 0) { | ||
|  |     return false; | ||
|  |   } | ||
|  | 
 | ||
|  |   //
 | ||
|  |   // Do the transformation of an aggr load/copy/set to a loop
 | ||
|  |   //
 | ||
|  |   for (LoadInst *LI : AggrLoads) { | ||
|  |     StoreInst *SI = dyn_cast<StoreInst>(*LI->user_begin()); | ||
|  |     Value *SrcAddr = LI->getOperand(0); | ||
|  |     Value *DstAddr = SI->getOperand(1); | ||
|  |     unsigned NumLoads = DL.getTypeStoreSize(LI->getType()); | ||
|  |     ConstantInt *CopyLen = | ||
|  |         ConstantInt::get(Type::getInt32Ty(Context), NumLoads); | ||
|  | 
 | ||
|  |     createMemCpyLoopKnownSize(/* ConvertedInst */ SI, | ||
|  |                               /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr, | ||
|  |                               /* CopyLen */ CopyLen, | ||
|  |                               /* SrcAlign */ LI->getAlignment(), | ||
|  |                               /* DestAlign */ SI->getAlignment(), | ||
|  |                               /* SrcIsVolatile */ LI->isVolatile(), | ||
|  |                               /* DstIsVolatile */ SI->isVolatile(), TTI); | ||
|  | 
 | ||
|  |     SI->eraseFromParent(); | ||
|  |     LI->eraseFromParent(); | ||
|  |   } | ||
|  | 
 | ||
|  |   // Transform mem* intrinsic calls.
 | ||
|  |   for (MemIntrinsic *MemCall : MemCalls) { | ||
|  |     if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) { | ||
|  |       expandMemCpyAsLoop(Memcpy, TTI); | ||
|  |     } else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) { | ||
|  |       expandMemMoveAsLoop(Memmove); | ||
|  |     } else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) { | ||
|  |       expandMemSetAsLoop(Memset); | ||
|  |     } | ||
|  |     MemCall->eraseFromParent(); | ||
|  |   } | ||
|  | 
 | ||
|  |   return true; | ||
|  | } | ||
|  | 
 | ||
|  | } // namespace
 | ||
|  | 
 | ||
|  | namespace llvm { | ||
|  | void initializeNVPTXLowerAggrCopiesPass(PassRegistry &); | ||
|  | } | ||
|  | 
 | ||
|  | INITIALIZE_PASS(NVPTXLowerAggrCopies, "nvptx-lower-aggr-copies", | ||
|  |                 "Lower aggregate copies, and llvm.mem* intrinsics into loops", | ||
|  |                 false, false) | ||
|  | 
 | ||
|  | FunctionPass *llvm::createLowerAggrCopies() { | ||
|  |   return new NVPTXLowerAggrCopies(); | ||
|  | } |