| 
									
										
										
										
											2019-04-12 14:10:50 +00:00
										 |  |  | //===- AArch64FalkorHWPFFix.cpp - Avoid HW prefetcher pitfalls on Falkor --===//
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | //                     The LLVM Compiler Infrastructure
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // This file is distributed under the University of Illinois Open Source
 | 
					
						
							|  |  |  | // License. See LICENSE.TXT for details.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | //===----------------------------------------------------------------------===//
 | 
					
						
							|  |  |  | /// \file For Falkor, we want to avoid HW prefetcher instruction tag collisions
 | 
					
						
							|  |  |  | /// that may inhibit the HW prefetching.  This is done in two steps.  Before
 | 
					
						
							|  |  |  | /// ISel, we mark strided loads (i.e. those that will likely benefit from
 | 
					
						
							|  |  |  | /// prefetching) with metadata.  Then, after opcodes have been finalized, we
 | 
					
						
							|  |  |  | /// insert MOVs and re-write loads to prevent unintnentional tag collisions.
 | 
					
						
							|  |  |  | // ===---------------------------------------------------------------------===//
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include "AArch64.h"
 | 
					
						
							|  |  |  | #include "AArch64InstrInfo.h"
 | 
					
						
							|  |  |  | #include "AArch64Subtarget.h"
 | 
					
						
							|  |  |  | #include "AArch64TargetMachine.h"
 | 
					
						
							|  |  |  | #include "llvm/ADT/DenseMap.h"
 | 
					
						
							|  |  |  | #include "llvm/ADT/DepthFirstIterator.h"
 | 
					
						
							|  |  |  | #include "llvm/ADT/None.h"
 | 
					
						
							|  |  |  | #include "llvm/ADT/Optional.h"
 | 
					
						
							|  |  |  | #include "llvm/ADT/SmallVector.h"
 | 
					
						
							|  |  |  | #include "llvm/ADT/Statistic.h"
 | 
					
						
							|  |  |  | #include "llvm/Analysis/LoopInfo.h"
 | 
					
						
							|  |  |  | #include "llvm/Analysis/ScalarEvolution.h"
 | 
					
						
							|  |  |  | #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 | 
					
						
							|  |  |  | #include "llvm/CodeGen/LiveRegUnits.h"
 | 
					
						
							|  |  |  | #include "llvm/CodeGen/MachineBasicBlock.h"
 | 
					
						
							|  |  |  | #include "llvm/CodeGen/MachineFunction.h"
 | 
					
						
							|  |  |  | #include "llvm/CodeGen/MachineFunctionPass.h"
 | 
					
						
							|  |  |  | #include "llvm/CodeGen/MachineInstr.h"
 | 
					
						
							|  |  |  | #include "llvm/CodeGen/MachineInstrBuilder.h"
 | 
					
						
							|  |  |  | #include "llvm/CodeGen/MachineLoopInfo.h"
 | 
					
						
							|  |  |  | #include "llvm/CodeGen/MachineOperand.h"
 | 
					
						
							|  |  |  | #include "llvm/CodeGen/MachineRegisterInfo.h"
 | 
					
						
							|  |  |  | #include "llvm/CodeGen/TargetPassConfig.h"
 | 
					
						
							|  |  |  | #include "llvm/CodeGen/TargetRegisterInfo.h"
 | 
					
						
							|  |  |  | #include "llvm/IR/DebugLoc.h"
 | 
					
						
							|  |  |  | #include "llvm/IR/Dominators.h"
 | 
					
						
							|  |  |  | #include "llvm/IR/Function.h"
 | 
					
						
							|  |  |  | #include "llvm/IR/Instruction.h"
 | 
					
						
							|  |  |  | #include "llvm/IR/Instructions.h"
 | 
					
						
							|  |  |  | #include "llvm/IR/Metadata.h"
 | 
					
						
							|  |  |  | #include "llvm/Pass.h"
 | 
					
						
							|  |  |  | #include "llvm/Support/Casting.h"
 | 
					
						
							|  |  |  | #include "llvm/Support/Debug.h"
 | 
					
						
							| 
									
										
										
										
											2019-07-26 19:53:28 +00:00
										 |  |  | #include "llvm/Support/DebugCounter.h"
 | 
					
						
							| 
									
										
										
										
											2019-04-12 14:10:50 +00:00
										 |  |  | #include "llvm/Support/raw_ostream.h"
 | 
					
						
							|  |  |  | #include <cassert>
 | 
					
						
							|  |  |  | #include <iterator>
 | 
					
						
							|  |  |  | #include <utility>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | using namespace llvm; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define DEBUG_TYPE "falkor-hwpf-fix"
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | STATISTIC(NumStridedLoadsMarked, "Number of strided loads marked"); | 
					
						
							|  |  |  | STATISTIC(NumCollisionsAvoided, | 
					
						
							|  |  |  |           "Number of HW prefetch tag collisions avoided"); | 
					
						
							|  |  |  | STATISTIC(NumCollisionsNotAvoided, | 
					
						
							|  |  |  |           "Number of HW prefetch tag collisions not avoided due to lack of regsiters"); | 
					
						
							| 
									
										
										
										
											2019-07-26 19:53:28 +00:00
										 |  |  | DEBUG_COUNTER(FixCounter, "falkor-hwpf", | 
					
						
							|  |  |  |               "Controls which tag collisions are avoided"); | 
					
						
							| 
									
										
										
										
											2019-04-12 14:10:50 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | namespace { | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class FalkorMarkStridedAccesses { | 
					
						
							|  |  |  | public: | 
					
						
							|  |  |  |   FalkorMarkStridedAccesses(LoopInfo &LI, ScalarEvolution &SE) | 
					
						
							|  |  |  |       : LI(LI), SE(SE) {} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   bool run(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | private: | 
					
						
							|  |  |  |   bool runOnLoop(Loop &L); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   LoopInfo &LI; | 
					
						
							|  |  |  |   ScalarEvolution &SE; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class FalkorMarkStridedAccessesLegacy : public FunctionPass { | 
					
						
							|  |  |  | public: | 
					
						
							|  |  |  |   static char ID; // Pass ID, replacement for typeid
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   FalkorMarkStridedAccessesLegacy() : FunctionPass(ID) { | 
					
						
							|  |  |  |     initializeFalkorMarkStridedAccessesLegacyPass( | 
					
						
							|  |  |  |         *PassRegistry::getPassRegistry()); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   void getAnalysisUsage(AnalysisUsage &AU) const override { | 
					
						
							|  |  |  |     AU.addRequired<TargetPassConfig>(); | 
					
						
							|  |  |  |     AU.addPreserved<DominatorTreeWrapperPass>(); | 
					
						
							|  |  |  |     AU.addRequired<LoopInfoWrapperPass>(); | 
					
						
							|  |  |  |     AU.addPreserved<LoopInfoWrapperPass>(); | 
					
						
							|  |  |  |     AU.addRequired<ScalarEvolutionWrapperPass>(); | 
					
						
							|  |  |  |     AU.addPreserved<ScalarEvolutionWrapperPass>(); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   bool runOnFunction(Function &F) override; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | } // end anonymous namespace
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | char FalkorMarkStridedAccessesLegacy::ID = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | INITIALIZE_PASS_BEGIN(FalkorMarkStridedAccessesLegacy, DEBUG_TYPE, | 
					
						
							|  |  |  |                       "Falkor HW Prefetch Fix", false, false) | 
					
						
							|  |  |  | INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) | 
					
						
							|  |  |  | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) | 
					
						
							|  |  |  | INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) | 
					
						
							|  |  |  | INITIALIZE_PASS_END(FalkorMarkStridedAccessesLegacy, DEBUG_TYPE, | 
					
						
							|  |  |  |                     "Falkor HW Prefetch Fix", false, false) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | FunctionPass *llvm::createFalkorMarkStridedAccessesPass() { | 
					
						
							|  |  |  |   return new FalkorMarkStridedAccessesLegacy(); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | bool FalkorMarkStridedAccessesLegacy::runOnFunction(Function &F) { | 
					
						
							|  |  |  |   TargetPassConfig &TPC = getAnalysis<TargetPassConfig>(); | 
					
						
							|  |  |  |   const AArch64Subtarget *ST = | 
					
						
							|  |  |  |       TPC.getTM<AArch64TargetMachine>().getSubtargetImpl(F); | 
					
						
							|  |  |  |   if (ST->getProcFamily() != AArch64Subtarget::Falkor) | 
					
						
							|  |  |  |     return false; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if (skipFunction(F)) | 
					
						
							|  |  |  |     return false; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); | 
					
						
							|  |  |  |   ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   FalkorMarkStridedAccesses LDP(LI, SE); | 
					
						
							|  |  |  |   return LDP.run(); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | bool FalkorMarkStridedAccesses::run() { | 
					
						
							|  |  |  |   bool MadeChange = false; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for (Loop *L : LI) | 
					
						
							|  |  |  |     for (auto LIt = df_begin(L), LE = df_end(L); LIt != LE; ++LIt) | 
					
						
							|  |  |  |       MadeChange |= runOnLoop(**LIt); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return MadeChange; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | bool FalkorMarkStridedAccesses::runOnLoop(Loop &L) { | 
					
						
							|  |  |  |   // Only mark strided loads in the inner-most loop
 | 
					
						
							|  |  |  |   if (!L.empty()) | 
					
						
							|  |  |  |     return false; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   bool MadeChange = false; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for (BasicBlock *BB : L.blocks()) { | 
					
						
							|  |  |  |     for (Instruction &I : *BB) { | 
					
						
							|  |  |  |       LoadInst *LoadI = dyn_cast<LoadInst>(&I); | 
					
						
							|  |  |  |       if (!LoadI) | 
					
						
							|  |  |  |         continue; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       Value *PtrValue = LoadI->getPointerOperand(); | 
					
						
							|  |  |  |       if (L.isLoopInvariant(PtrValue)) | 
					
						
							|  |  |  |         continue; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       const SCEV *LSCEV = SE.getSCEV(PtrValue); | 
					
						
							|  |  |  |       const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV); | 
					
						
							|  |  |  |       if (!LSCEVAddRec || !LSCEVAddRec->isAffine()) | 
					
						
							|  |  |  |         continue; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       LoadI->setMetadata(FALKOR_STRIDED_ACCESS_MD, | 
					
						
							|  |  |  |                          MDNode::get(LoadI->getContext(), {})); | 
					
						
							|  |  |  |       ++NumStridedLoadsMarked; | 
					
						
							|  |  |  |       DEBUG(dbgs() << "Load: " << I << " marked as strided\n"); | 
					
						
							|  |  |  |       MadeChange = true; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return MadeChange; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | namespace { | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class FalkorHWPFFix : public MachineFunctionPass { | 
					
						
							|  |  |  | public: | 
					
						
							|  |  |  |   static char ID; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   FalkorHWPFFix() : MachineFunctionPass(ID) { | 
					
						
							|  |  |  |     initializeFalkorHWPFFixPass(*PassRegistry::getPassRegistry()); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   bool runOnMachineFunction(MachineFunction &Fn) override; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   void getAnalysisUsage(AnalysisUsage &AU) const override { | 
					
						
							|  |  |  |     AU.addRequired<MachineLoopInfo>(); | 
					
						
							|  |  |  |     MachineFunctionPass::getAnalysisUsage(AU); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   MachineFunctionProperties getRequiredProperties() const override { | 
					
						
							|  |  |  |     return MachineFunctionProperties().set( | 
					
						
							|  |  |  |         MachineFunctionProperties::Property::NoVRegs); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | private: | 
					
						
							|  |  |  |   void runOnLoop(MachineLoop &L, MachineFunction &Fn); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   const AArch64InstrInfo *TII; | 
					
						
							|  |  |  |   const TargetRegisterInfo *TRI; | 
					
						
							|  |  |  |   DenseMap<unsigned, SmallVector<MachineInstr *, 4>> TagMap; | 
					
						
							|  |  |  |   bool Modified; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /// Bits from load opcodes used to compute HW prefetcher instruction tags.
 | 
					
						
							|  |  |  | struct LoadInfo { | 
					
						
							|  |  |  |   LoadInfo() = default; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   unsigned DestReg = 0; | 
					
						
							|  |  |  |   unsigned BaseReg = 0; | 
					
						
							|  |  |  |   int BaseRegIdx = -1; | 
					
						
							|  |  |  |   const MachineOperand *OffsetOpnd = nullptr; | 
					
						
							|  |  |  |   bool IsPrePost = false; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | } // end anonymous namespace
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | char FalkorHWPFFix::ID = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | INITIALIZE_PASS_BEGIN(FalkorHWPFFix, "falkor-hwpf-fix-late", | 
					
						
							|  |  |  |                       "Falkor HW Prefetch Fix Late Phase", false, false) | 
					
						
							|  |  |  | INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) | 
					
						
							|  |  |  | INITIALIZE_PASS_END(FalkorHWPFFix, "falkor-hwpf-fix-late", | 
					
						
							|  |  |  |                     "Falkor HW Prefetch Fix Late Phase", false, false) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static unsigned makeTag(unsigned Dest, unsigned Base, unsigned Offset) { | 
					
						
							|  |  |  |   return (Dest & 0xf) | ((Base & 0xf) << 4) | ((Offset & 0x3f) << 8); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) { | 
					
						
							|  |  |  |   int DestRegIdx; | 
					
						
							|  |  |  |   int BaseRegIdx; | 
					
						
							|  |  |  |   int OffsetIdx; | 
					
						
							|  |  |  |   bool IsPrePost; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   switch (MI.getOpcode()) { | 
					
						
							|  |  |  |   default: | 
					
						
							|  |  |  |     return None; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   case AArch64::LD1i64: | 
					
						
							|  |  |  |   case AArch64::LD2i64: | 
					
						
							|  |  |  |     DestRegIdx = 0; | 
					
						
							|  |  |  |     BaseRegIdx = 3; | 
					
						
							|  |  |  |     OffsetIdx = -1; | 
					
						
							|  |  |  |     IsPrePost = false; | 
					
						
							|  |  |  |     break; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   case AArch64::LD1i8: | 
					
						
							|  |  |  |   case AArch64::LD1i16: | 
					
						
							|  |  |  |   case AArch64::LD1i32: | 
					
						
							|  |  |  |   case AArch64::LD2i8: | 
					
						
							|  |  |  |   case AArch64::LD2i16: | 
					
						
							|  |  |  |   case AArch64::LD2i32: | 
					
						
							|  |  |  |   case AArch64::LD3i8: | 
					
						
							|  |  |  |   case AArch64::LD3i16: | 
					
						
							|  |  |  |   case AArch64::LD3i32: | 
					
						
							|  |  |  |   case AArch64::LD3i64: | 
					
						
							|  |  |  |   case AArch64::LD4i8: | 
					
						
							|  |  |  |   case AArch64::LD4i16: | 
					
						
							|  |  |  |   case AArch64::LD4i32: | 
					
						
							|  |  |  |   case AArch64::LD4i64: | 
					
						
							|  |  |  |     DestRegIdx = -1; | 
					
						
							|  |  |  |     BaseRegIdx = 3; | 
					
						
							|  |  |  |     OffsetIdx = -1; | 
					
						
							|  |  |  |     IsPrePost = false; | 
					
						
							|  |  |  |     break; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   case AArch64::LD1Onev1d: | 
					
						
							|  |  |  |   case AArch64::LD1Onev2s: | 
					
						
							|  |  |  |   case AArch64::LD1Onev4h: | 
					
						
							|  |  |  |   case AArch64::LD1Onev8b: | 
					
						
							|  |  |  |   case AArch64::LD1Onev2d: | 
					
						
							|  |  |  |   case AArch64::LD1Onev4s: | 
					
						
							|  |  |  |   case AArch64::LD1Onev8h: | 
					
						
							|  |  |  |   case AArch64::LD1Onev16b: | 
					
						
							|  |  |  |   case AArch64::LD1Rv1d: | 
					
						
							|  |  |  |   case AArch64::LD1Rv2s: | 
					
						
							|  |  |  |   case AArch64::LD1Rv4h: | 
					
						
							|  |  |  |   case AArch64::LD1Rv8b: | 
					
						
							|  |  |  |   case AArch64::LD1Rv2d: | 
					
						
							|  |  |  |   case AArch64::LD1Rv4s: | 
					
						
							|  |  |  |   case AArch64::LD1Rv8h: | 
					
						
							|  |  |  |   case AArch64::LD1Rv16b: | 
					
						
							|  |  |  |     DestRegIdx = 0; | 
					
						
							|  |  |  |     BaseRegIdx = 1; | 
					
						
							|  |  |  |     OffsetIdx = -1; | 
					
						
							|  |  |  |     IsPrePost = false; | 
					
						
							|  |  |  |     break; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   case AArch64::LD1Twov1d: | 
					
						
							|  |  |  |   case AArch64::LD1Twov2s: | 
					
						
							|  |  |  |   case AArch64::LD1Twov4h: | 
					
						
							|  |  |  |   case AArch64::LD1Twov8b: | 
					
						
							|  |  |  |   case AArch64::LD1Twov2d: | 
					
						
							|  |  |  |   case AArch64::LD1Twov4s: | 
					
						
							|  |  |  |   case AArch64::LD1Twov8h: | 
					
						
							|  |  |  |   case AArch64::LD1Twov16b: | 
					
						
							|  |  |  |   case AArch64::LD1Threev1d: | 
					
						
							|  |  |  |   case AArch64::LD1Threev2s: | 
					
						
							|  |  |  |   case AArch64::LD1Threev4h: | 
					
						
							|  |  |  |   case AArch64::LD1Threev8b: | 
					
						
							|  |  |  |   case AArch64::LD1Threev2d: | 
					
						
							|  |  |  |   case AArch64::LD1Threev4s: | 
					
						
							|  |  |  |   case AArch64::LD1Threev8h: | 
					
						
							|  |  |  |   case AArch64::LD1Threev16b: | 
					
						
							|  |  |  |   case AArch64::LD1Fourv1d: | 
					
						
							|  |  |  |   case AArch64::LD1Fourv2s: | 
					
						
							|  |  |  |   case AArch64::LD1Fourv4h: | 
					
						
							|  |  |  |   case AArch64::LD1Fourv8b: | 
					
						
							|  |  |  |   case AArch64::LD1Fourv2d: | 
					
						
							|  |  |  |   case AArch64::LD1Fourv4s: | 
					
						
							|  |  |  |   case AArch64::LD1Fourv8h: | 
					
						
							|  |  |  |   case AArch64::LD1Fourv16b: | 
					
						
							|  |  |  |   case AArch64::LD2Twov2s: | 
					
						
							|  |  |  |   case AArch64::LD2Twov4s: | 
					
						
							|  |  |  |   case AArch64::LD2Twov8b: | 
					
						
							|  |  |  |   case AArch64::LD2Twov2d: | 
					
						
							|  |  |  |   case AArch64::LD2Twov4h: | 
					
						
							|  |  |  |   case AArch64::LD2Twov8h: | 
					
						
							|  |  |  |   case AArch64::LD2Twov16b: | 
					
						
							|  |  |  |   case AArch64::LD2Rv1d: | 
					
						
							|  |  |  |   case AArch64::LD2Rv2s: | 
					
						
							|  |  |  |   case AArch64::LD2Rv4s: | 
					
						
							|  |  |  |   case AArch64::LD2Rv8b: | 
					
						
							|  |  |  |   case AArch64::LD2Rv2d: | 
					
						
							|  |  |  |   case AArch64::LD2Rv4h: | 
					
						
							|  |  |  |   case AArch64::LD2Rv8h: | 
					
						
							|  |  |  |   case AArch64::LD2Rv16b: | 
					
						
							|  |  |  |   case AArch64::LD3Threev2s: | 
					
						
							|  |  |  |   case AArch64::LD3Threev4h: | 
					
						
							|  |  |  |   case AArch64::LD3Threev8b: | 
					
						
							|  |  |  |   case AArch64::LD3Threev2d: | 
					
						
							|  |  |  |   case AArch64::LD3Threev4s: | 
					
						
							|  |  |  |   case AArch64::LD3Threev8h: | 
					
						
							|  |  |  |   case AArch64::LD3Threev16b: | 
					
						
							|  |  |  |   case AArch64::LD3Rv1d: | 
					
						
							|  |  |  |   case AArch64::LD3Rv2s: | 
					
						
							|  |  |  |   case AArch64::LD3Rv4h: | 
					
						
							|  |  |  |   case AArch64::LD3Rv8b: | 
					
						
							|  |  |  |   case AArch64::LD3Rv2d: | 
					
						
							|  |  |  |   case AArch64::LD3Rv4s: | 
					
						
							|  |  |  |   case AArch64::LD3Rv8h: | 
					
						
							|  |  |  |   case AArch64::LD3Rv16b: | 
					
						
							|  |  |  |   case AArch64::LD4Fourv2s: | 
					
						
							|  |  |  |   case AArch64::LD4Fourv4h: | 
					
						
							|  |  |  |   case AArch64::LD4Fourv8b: | 
					
						
							|  |  |  |   case AArch64::LD4Fourv2d: | 
					
						
							|  |  |  |   case AArch64::LD4Fourv4s: | 
					
						
							|  |  |  |   case AArch64::LD4Fourv8h: | 
					
						
							|  |  |  |   case AArch64::LD4Fourv16b: | 
					
						
							|  |  |  |   case AArch64::LD4Rv1d: | 
					
						
							|  |  |  |   case AArch64::LD4Rv2s: | 
					
						
							|  |  |  |   case AArch64::LD4Rv4h: | 
					
						
							|  |  |  |   case AArch64::LD4Rv8b: | 
					
						
							|  |  |  |   case AArch64::LD4Rv2d: | 
					
						
							|  |  |  |   case AArch64::LD4Rv4s: | 
					
						
							|  |  |  |   case AArch64::LD4Rv8h: | 
					
						
							|  |  |  |   case AArch64::LD4Rv16b: | 
					
						
							|  |  |  |     DestRegIdx = -1; | 
					
						
							|  |  |  |     BaseRegIdx = 1; | 
					
						
							|  |  |  |     OffsetIdx = -1; | 
					
						
							|  |  |  |     IsPrePost = false; | 
					
						
							|  |  |  |     break; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   case AArch64::LD1i64_POST: | 
					
						
							|  |  |  |   case AArch64::LD2i64_POST: | 
					
						
							|  |  |  |     DestRegIdx = 1; | 
					
						
							|  |  |  |     BaseRegIdx = 4; | 
					
						
							|  |  |  |     OffsetIdx = 5; | 
					
						
							|  |  |  |     IsPrePost = true; | 
					
						
							|  |  |  |     break; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   case AArch64::LD1i8_POST: | 
					
						
							|  |  |  |   case AArch64::LD1i16_POST: | 
					
						
							|  |  |  |   case AArch64::LD1i32_POST: | 
					
						
							|  |  |  |   case AArch64::LD2i8_POST: | 
					
						
							|  |  |  |   case AArch64::LD2i16_POST: | 
					
						
							|  |  |  |   case AArch64::LD2i32_POST: | 
					
						
							|  |  |  |   case AArch64::LD3i8_POST: | 
					
						
							|  |  |  |   case AArch64::LD3i16_POST: | 
					
						
							|  |  |  |   case AArch64::LD3i32_POST: | 
					
						
							|  |  |  |   case AArch64::LD3i64_POST: | 
					
						
							|  |  |  |   case AArch64::LD4i8_POST: | 
					
						
							|  |  |  |   case AArch64::LD4i16_POST: | 
					
						
							|  |  |  |   case AArch64::LD4i32_POST: | 
					
						
							|  |  |  |   case AArch64::LD4i64_POST: | 
					
						
							|  |  |  |     DestRegIdx = -1; | 
					
						
							|  |  |  |     BaseRegIdx = 4; | 
					
						
							|  |  |  |     OffsetIdx = 5; | 
					
						
							|  |  |  |     IsPrePost = true; | 
					
						
							|  |  |  |     break; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   case AArch64::LD1Onev1d_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Onev2s_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Onev4h_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Onev8b_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Onev2d_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Onev4s_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Onev8h_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Onev16b_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Rv1d_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Rv2s_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Rv4h_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Rv8b_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Rv2d_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Rv4s_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Rv8h_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Rv16b_POST: | 
					
						
							|  |  |  |     DestRegIdx = 1; | 
					
						
							|  |  |  |     BaseRegIdx = 2; | 
					
						
							|  |  |  |     OffsetIdx = 3; | 
					
						
							|  |  |  |     IsPrePost = true; | 
					
						
							|  |  |  |     break; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   case AArch64::LD1Twov1d_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Twov2s_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Twov4h_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Twov8b_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Twov2d_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Twov4s_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Twov8h_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Twov16b_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Threev1d_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Threev2s_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Threev4h_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Threev8b_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Threev2d_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Threev4s_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Threev8h_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Threev16b_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Fourv1d_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Fourv2s_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Fourv4h_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Fourv8b_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Fourv2d_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Fourv4s_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Fourv8h_POST: | 
					
						
							|  |  |  |   case AArch64::LD1Fourv16b_POST: | 
					
						
							|  |  |  |   case AArch64::LD2Twov2s_POST: | 
					
						
							|  |  |  |   case AArch64::LD2Twov4s_POST: | 
					
						
							|  |  |  |   case AArch64::LD2Twov8b_POST: | 
					
						
							|  |  |  |   case AArch64::LD2Twov2d_POST: | 
					
						
							|  |  |  |   case AArch64::LD2Twov4h_POST: | 
					
						
							|  |  |  |   case AArch64::LD2Twov8h_POST: | 
					
						
							|  |  |  |   case AArch64::LD2Twov16b_POST: | 
					
						
							|  |  |  |   case AArch64::LD2Rv1d_POST: | 
					
						
							|  |  |  |   case AArch64::LD2Rv2s_POST: | 
					
						
							|  |  |  |   case AArch64::LD2Rv4s_POST: | 
					
						
							|  |  |  |   case AArch64::LD2Rv8b_POST: | 
					
						
							|  |  |  |   case AArch64::LD2Rv2d_POST: | 
					
						
							|  |  |  |   case AArch64::LD2Rv4h_POST: | 
					
						
							|  |  |  |   case AArch64::LD2Rv8h_POST: | 
					
						
							|  |  |  |   case AArch64::LD2Rv16b_POST: | 
					
						
							|  |  |  |   case AArch64::LD3Threev2s_POST: | 
					
						
							|  |  |  |   case AArch64::LD3Threev4h_POST: | 
					
						
							|  |  |  |   case AArch64::LD3Threev8b_POST: | 
					
						
							|  |  |  |   case AArch64::LD3Threev2d_POST: | 
					
						
							|  |  |  |   case AArch64::LD3Threev4s_POST: | 
					
						
							|  |  |  |   case AArch64::LD3Threev8h_POST: | 
					
						
							|  |  |  |   case AArch64::LD3Threev16b_POST: | 
					
						
							|  |  |  |   case AArch64::LD3Rv1d_POST: | 
					
						
							|  |  |  |   case AArch64::LD3Rv2s_POST: | 
					
						
							|  |  |  |   case AArch64::LD3Rv4h_POST: | 
					
						
							|  |  |  |   case AArch64::LD3Rv8b_POST: | 
					
						
							|  |  |  |   case AArch64::LD3Rv2d_POST: | 
					
						
							|  |  |  |   case AArch64::LD3Rv4s_POST: | 
					
						
							|  |  |  |   case AArch64::LD3Rv8h_POST: | 
					
						
							|  |  |  |   case AArch64::LD3Rv16b_POST: | 
					
						
							|  |  |  |   case AArch64::LD4Fourv2s_POST: | 
					
						
							|  |  |  |   case AArch64::LD4Fourv4h_POST: | 
					
						
							|  |  |  |   case AArch64::LD4Fourv8b_POST: | 
					
						
							|  |  |  |   case AArch64::LD4Fourv2d_POST: | 
					
						
							|  |  |  |   case AArch64::LD4Fourv4s_POST: | 
					
						
							|  |  |  |   case AArch64::LD4Fourv8h_POST: | 
					
						
							|  |  |  |   case AArch64::LD4Fourv16b_POST: | 
					
						
							|  |  |  |   case AArch64::LD4Rv1d_POST: | 
					
						
							|  |  |  |   case AArch64::LD4Rv2s_POST: | 
					
						
							|  |  |  |   case AArch64::LD4Rv4h_POST: | 
					
						
							|  |  |  |   case AArch64::LD4Rv8b_POST: | 
					
						
							|  |  |  |   case AArch64::LD4Rv2d_POST: | 
					
						
							|  |  |  |   case AArch64::LD4Rv4s_POST: | 
					
						
							|  |  |  |   case AArch64::LD4Rv8h_POST: | 
					
						
							|  |  |  |   case AArch64::LD4Rv16b_POST: | 
					
						
							|  |  |  |     DestRegIdx = -1; | 
					
						
							|  |  |  |     BaseRegIdx = 2; | 
					
						
							|  |  |  |     OffsetIdx = 3; | 
					
						
							|  |  |  |     IsPrePost = true; | 
					
						
							|  |  |  |     break; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   case AArch64::LDRBBroW: | 
					
						
							|  |  |  |   case AArch64::LDRBBroX: | 
					
						
							|  |  |  |   case AArch64::LDRBBui: | 
					
						
							|  |  |  |   case AArch64::LDRBroW: | 
					
						
							|  |  |  |   case AArch64::LDRBroX: | 
					
						
							|  |  |  |   case AArch64::LDRBui: | 
					
						
							|  |  |  |   case AArch64::LDRDl: | 
					
						
							|  |  |  |   case AArch64::LDRDroW: | 
					
						
							|  |  |  |   case AArch64::LDRDroX: | 
					
						
							|  |  |  |   case AArch64::LDRDui: | 
					
						
							|  |  |  |   case AArch64::LDRHHroW: | 
					
						
							|  |  |  |   case AArch64::LDRHHroX: | 
					
						
							|  |  |  |   case AArch64::LDRHHui: | 
					
						
							|  |  |  |   case AArch64::LDRHroW: | 
					
						
							|  |  |  |   case AArch64::LDRHroX: | 
					
						
							|  |  |  |   case AArch64::LDRHui: | 
					
						
							|  |  |  |   case AArch64::LDRQl: | 
					
						
							|  |  |  |   case AArch64::LDRQroW: | 
					
						
							|  |  |  |   case AArch64::LDRQroX: | 
					
						
							|  |  |  |   case AArch64::LDRQui: | 
					
						
							|  |  |  |   case AArch64::LDRSBWroW: | 
					
						
							|  |  |  |   case AArch64::LDRSBWroX: | 
					
						
							|  |  |  |   case AArch64::LDRSBWui: | 
					
						
							|  |  |  |   case AArch64::LDRSBXroW: | 
					
						
							|  |  |  |   case AArch64::LDRSBXroX: | 
					
						
							|  |  |  |   case AArch64::LDRSBXui: | 
					
						
							|  |  |  |   case AArch64::LDRSHWroW: | 
					
						
							|  |  |  |   case AArch64::LDRSHWroX: | 
					
						
							|  |  |  |   case AArch64::LDRSHWui: | 
					
						
							|  |  |  |   case AArch64::LDRSHXroW: | 
					
						
							|  |  |  |   case AArch64::LDRSHXroX: | 
					
						
							|  |  |  |   case AArch64::LDRSHXui: | 
					
						
							|  |  |  |   case AArch64::LDRSWl: | 
					
						
							|  |  |  |   case AArch64::LDRSWroW: | 
					
						
							|  |  |  |   case AArch64::LDRSWroX: | 
					
						
							|  |  |  |   case AArch64::LDRSWui: | 
					
						
							|  |  |  |   case AArch64::LDRSl: | 
					
						
							|  |  |  |   case AArch64::LDRSroW: | 
					
						
							|  |  |  |   case AArch64::LDRSroX: | 
					
						
							|  |  |  |   case AArch64::LDRSui: | 
					
						
							|  |  |  |   case AArch64::LDRWl: | 
					
						
							|  |  |  |   case AArch64::LDRWroW: | 
					
						
							|  |  |  |   case AArch64::LDRWroX: | 
					
						
							|  |  |  |   case AArch64::LDRWui: | 
					
						
							|  |  |  |   case AArch64::LDRXl: | 
					
						
							|  |  |  |   case AArch64::LDRXroW: | 
					
						
							|  |  |  |   case AArch64::LDRXroX: | 
					
						
							|  |  |  |   case AArch64::LDRXui: | 
					
						
							|  |  |  |   case AArch64::LDURBBi: | 
					
						
							|  |  |  |   case AArch64::LDURBi: | 
					
						
							|  |  |  |   case AArch64::LDURDi: | 
					
						
							|  |  |  |   case AArch64::LDURHHi: | 
					
						
							|  |  |  |   case AArch64::LDURHi: | 
					
						
							|  |  |  |   case AArch64::LDURQi: | 
					
						
							|  |  |  |   case AArch64::LDURSBWi: | 
					
						
							|  |  |  |   case AArch64::LDURSBXi: | 
					
						
							|  |  |  |   case AArch64::LDURSHWi: | 
					
						
							|  |  |  |   case AArch64::LDURSHXi: | 
					
						
							|  |  |  |   case AArch64::LDURSWi: | 
					
						
							|  |  |  |   case AArch64::LDURSi: | 
					
						
							|  |  |  |   case AArch64::LDURWi: | 
					
						
							|  |  |  |   case AArch64::LDURXi: | 
					
						
							|  |  |  |     DestRegIdx = 0; | 
					
						
							|  |  |  |     BaseRegIdx = 1; | 
					
						
							|  |  |  |     OffsetIdx = 2; | 
					
						
							|  |  |  |     IsPrePost = false; | 
					
						
							|  |  |  |     break; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   case AArch64::LDRBBpost: | 
					
						
							|  |  |  |   case AArch64::LDRBBpre: | 
					
						
							|  |  |  |   case AArch64::LDRBpost: | 
					
						
							|  |  |  |   case AArch64::LDRBpre: | 
					
						
							|  |  |  |   case AArch64::LDRDpost: | 
					
						
							|  |  |  |   case AArch64::LDRDpre: | 
					
						
							|  |  |  |   case AArch64::LDRHHpost: | 
					
						
							|  |  |  |   case AArch64::LDRHHpre: | 
					
						
							|  |  |  |   case AArch64::LDRHpost: | 
					
						
							|  |  |  |   case AArch64::LDRHpre: | 
					
						
							|  |  |  |   case AArch64::LDRQpost: | 
					
						
							|  |  |  |   case AArch64::LDRQpre: | 
					
						
							|  |  |  |   case AArch64::LDRSBWpost: | 
					
						
							|  |  |  |   case AArch64::LDRSBWpre: | 
					
						
							|  |  |  |   case AArch64::LDRSBXpost: | 
					
						
							|  |  |  |   case AArch64::LDRSBXpre: | 
					
						
							|  |  |  |   case AArch64::LDRSHWpost: | 
					
						
							|  |  |  |   case AArch64::LDRSHWpre: | 
					
						
							|  |  |  |   case AArch64::LDRSHXpost: | 
					
						
							|  |  |  |   case AArch64::LDRSHXpre: | 
					
						
							|  |  |  |   case AArch64::LDRSWpost: | 
					
						
							|  |  |  |   case AArch64::LDRSWpre: | 
					
						
							|  |  |  |   case AArch64::LDRSpost: | 
					
						
							|  |  |  |   case AArch64::LDRSpre: | 
					
						
							|  |  |  |   case AArch64::LDRWpost: | 
					
						
							|  |  |  |   case AArch64::LDRWpre: | 
					
						
							|  |  |  |   case AArch64::LDRXpost: | 
					
						
							|  |  |  |   case AArch64::LDRXpre: | 
					
						
							|  |  |  |     DestRegIdx = 1; | 
					
						
							|  |  |  |     BaseRegIdx = 2; | 
					
						
							|  |  |  |     OffsetIdx = 3; | 
					
						
							|  |  |  |     IsPrePost = true; | 
					
						
							|  |  |  |     break; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   case AArch64::LDNPDi: | 
					
						
							|  |  |  |   case AArch64::LDNPQi: | 
					
						
							|  |  |  |   case AArch64::LDNPSi: | 
					
						
							|  |  |  |   case AArch64::LDPQi: | 
					
						
							|  |  |  |   case AArch64::LDPDi: | 
					
						
							|  |  |  |   case AArch64::LDPSi: | 
					
						
							|  |  |  |     DestRegIdx = -1; | 
					
						
							|  |  |  |     BaseRegIdx = 2; | 
					
						
							|  |  |  |     OffsetIdx = 3; | 
					
						
							|  |  |  |     IsPrePost = false; | 
					
						
							|  |  |  |     break; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   case AArch64::LDPSWi: | 
					
						
							|  |  |  |   case AArch64::LDPWi: | 
					
						
							|  |  |  |   case AArch64::LDPXi: | 
					
						
							|  |  |  |     DestRegIdx = 0; | 
					
						
							|  |  |  |     BaseRegIdx = 2; | 
					
						
							|  |  |  |     OffsetIdx = 3; | 
					
						
							|  |  |  |     IsPrePost = false; | 
					
						
							|  |  |  |     break; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   case AArch64::LDPQpost: | 
					
						
							|  |  |  |   case AArch64::LDPQpre: | 
					
						
							|  |  |  |   case AArch64::LDPDpost: | 
					
						
							|  |  |  |   case AArch64::LDPDpre: | 
					
						
							|  |  |  |   case AArch64::LDPSpost: | 
					
						
							|  |  |  |   case AArch64::LDPSpre: | 
					
						
							|  |  |  |     DestRegIdx = -1; | 
					
						
							|  |  |  |     BaseRegIdx = 3; | 
					
						
							|  |  |  |     OffsetIdx = 4; | 
					
						
							|  |  |  |     IsPrePost = true; | 
					
						
							|  |  |  |     break; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   case AArch64::LDPSWpost: | 
					
						
							|  |  |  |   case AArch64::LDPSWpre: | 
					
						
							|  |  |  |   case AArch64::LDPWpost: | 
					
						
							|  |  |  |   case AArch64::LDPWpre: | 
					
						
							|  |  |  |   case AArch64::LDPXpost: | 
					
						
							|  |  |  |   case AArch64::LDPXpre: | 
					
						
							|  |  |  |     DestRegIdx = 1; | 
					
						
							|  |  |  |     BaseRegIdx = 3; | 
					
						
							|  |  |  |     OffsetIdx = 4; | 
					
						
							|  |  |  |     IsPrePost = true; | 
					
						
							|  |  |  |     break; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   // Loads from the stack pointer don't get prefetched.
 | 
					
						
							|  |  |  |   unsigned BaseReg = MI.getOperand(BaseRegIdx).getReg(); | 
					
						
							|  |  |  |   if (BaseReg == AArch64::SP || BaseReg == AArch64::WSP) | 
					
						
							|  |  |  |     return None; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   LoadInfo LI; | 
					
						
							|  |  |  |   LI.DestReg = DestRegIdx == -1 ? 0 : MI.getOperand(DestRegIdx).getReg(); | 
					
						
							|  |  |  |   LI.BaseReg = BaseReg; | 
					
						
							|  |  |  |   LI.BaseRegIdx = BaseRegIdx; | 
					
						
							|  |  |  |   LI.OffsetOpnd = OffsetIdx == -1 ? nullptr : &MI.getOperand(OffsetIdx); | 
					
						
							|  |  |  |   LI.IsPrePost = IsPrePost; | 
					
						
							|  |  |  |   return LI; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static Optional<unsigned> getTag(const TargetRegisterInfo *TRI, | 
					
						
							|  |  |  |                                  const MachineInstr &MI, const LoadInfo &LI) { | 
					
						
							|  |  |  |   unsigned Dest = LI.DestReg ? TRI->getEncodingValue(LI.DestReg) : 0; | 
					
						
							|  |  |  |   unsigned Base = TRI->getEncodingValue(LI.BaseReg); | 
					
						
							|  |  |  |   unsigned Off; | 
					
						
							|  |  |  |   if (LI.OffsetOpnd == nullptr) | 
					
						
							|  |  |  |     Off = 0; | 
					
						
							|  |  |  |   else if (LI.OffsetOpnd->isGlobal() || LI.OffsetOpnd->isSymbol() || | 
					
						
							|  |  |  |            LI.OffsetOpnd->isCPI()) | 
					
						
							|  |  |  |     return None; | 
					
						
							|  |  |  |   else if (LI.OffsetOpnd->isReg()) | 
					
						
							|  |  |  |     Off = (1 << 5) | TRI->getEncodingValue(LI.OffsetOpnd->getReg()); | 
					
						
							|  |  |  |   else | 
					
						
							|  |  |  |     Off = LI.OffsetOpnd->getImm() >> 2; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return makeTag(Dest, Base, Off); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void FalkorHWPFFix::runOnLoop(MachineLoop &L, MachineFunction &Fn) { | 
					
						
							|  |  |  |   // Build the initial tag map for the whole loop.
 | 
					
						
							|  |  |  |   TagMap.clear(); | 
					
						
							|  |  |  |   for (MachineBasicBlock *MBB : L.getBlocks()) | 
					
						
							|  |  |  |     for (MachineInstr &MI : *MBB) { | 
					
						
							|  |  |  |       Optional<LoadInfo> LInfo = getLoadInfo(MI); | 
					
						
							|  |  |  |       if (!LInfo) | 
					
						
							|  |  |  |         continue; | 
					
						
							|  |  |  |       Optional<unsigned> Tag = getTag(TRI, MI, *LInfo); | 
					
						
							|  |  |  |       if (!Tag) | 
					
						
							|  |  |  |         continue; | 
					
						
							|  |  |  |       TagMap[*Tag].push_back(&MI); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   bool AnyCollisions = false; | 
					
						
							|  |  |  |   for (auto &P : TagMap) { | 
					
						
							|  |  |  |     auto Size = P.second.size(); | 
					
						
							|  |  |  |     if (Size > 1) { | 
					
						
							|  |  |  |       for (auto *MI : P.second) { | 
					
						
							|  |  |  |         if (TII->isStridedAccess(*MI)) { | 
					
						
							|  |  |  |           AnyCollisions = true; | 
					
						
							|  |  |  |           break; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (AnyCollisions) | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   // Nothing to fix.
 | 
					
						
							|  |  |  |   if (!AnyCollisions) | 
					
						
							|  |  |  |     return; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   MachineRegisterInfo &MRI = Fn.getRegInfo(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   // Go through all the basic blocks in the current loop and fix any streaming
 | 
					
						
							|  |  |  |   // loads to avoid collisions with any other loads.
 | 
					
						
							|  |  |  |   LiveRegUnits LR(*TRI); | 
					
						
							|  |  |  |   for (MachineBasicBlock *MBB : L.getBlocks()) { | 
					
						
							|  |  |  |     LR.clear(); | 
					
						
							|  |  |  |     LR.addLiveOuts(*MBB); | 
					
						
							|  |  |  |     for (auto I = MBB->rbegin(); I != MBB->rend(); LR.stepBackward(*I), ++I) { | 
					
						
							|  |  |  |       MachineInstr &MI = *I; | 
					
						
							|  |  |  |       if (!TII->isStridedAccess(MI)) | 
					
						
							|  |  |  |         continue; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       Optional<LoadInfo> OptLdI = getLoadInfo(MI); | 
					
						
							|  |  |  |       if (!OptLdI) | 
					
						
							|  |  |  |         continue; | 
					
						
							|  |  |  |       LoadInfo LdI = *OptLdI; | 
					
						
							|  |  |  |       Optional<unsigned> OptOldTag = getTag(TRI, MI, LdI); | 
					
						
							|  |  |  |       if (!OptOldTag) | 
					
						
							|  |  |  |         continue; | 
					
						
							|  |  |  |       auto &OldCollisions = TagMap[*OptOldTag]; | 
					
						
							|  |  |  |       if (OldCollisions.size() <= 1) | 
					
						
							|  |  |  |         continue; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       bool Fixed = false; | 
					
						
							|  |  |  |       DEBUG(dbgs() << "Attempting to fix tag collision: " << MI); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-07-26 19:53:28 +00:00
										 |  |  |       if (!DebugCounter::shouldExecute(FixCounter)) { | 
					
						
							|  |  |  |         DEBUG(dbgs() << "Skipping fix due to debug counter:\n  " << MI); | 
					
						
							|  |  |  |         continue; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       // Add the non-base registers of MI as live so we don't use them as
 | 
					
						
							|  |  |  |       // scratch registers.
 | 
					
						
							|  |  |  |       for (unsigned OpI = 0, OpE = MI.getNumOperands(); OpI < OpE; ++OpI) { | 
					
						
							|  |  |  |         if (OpI == static_cast<unsigned>(LdI.BaseRegIdx)) | 
					
						
							|  |  |  |           continue; | 
					
						
							|  |  |  |         MachineOperand &MO = MI.getOperand(OpI); | 
					
						
							|  |  |  |         if (MO.isReg() && MO.readsReg()) | 
					
						
							|  |  |  |           LR.addReg(MO.getReg()); | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-04-12 14:10:50 +00:00
										 |  |  |       for (unsigned ScratchReg : AArch64::GPR64RegClass) { | 
					
						
							|  |  |  |         if (!LR.available(ScratchReg) || MRI.isReserved(ScratchReg)) | 
					
						
							|  |  |  |           continue; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         LoadInfo NewLdI(LdI); | 
					
						
							|  |  |  |         NewLdI.BaseReg = ScratchReg; | 
					
						
							|  |  |  |         unsigned NewTag = *getTag(TRI, MI, NewLdI); | 
					
						
							|  |  |  |         // Scratch reg tag would collide too, so don't use it.
 | 
					
						
							|  |  |  |         if (TagMap.count(NewTag)) | 
					
						
							|  |  |  |           continue; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         DEBUG(dbgs() << "Changing base reg to: " << printReg(ScratchReg, TRI) | 
					
						
							|  |  |  |                      << '\n'); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // Rewrite:
 | 
					
						
							|  |  |  |         //   Xd = LOAD Xb, off
 | 
					
						
							|  |  |  |         // to:
 | 
					
						
							|  |  |  |         //   Xc = MOV Xb
 | 
					
						
							|  |  |  |         //   Xd = LOAD Xc, off
 | 
					
						
							|  |  |  |         DebugLoc DL = MI.getDebugLoc(); | 
					
						
							|  |  |  |         BuildMI(*MBB, &MI, DL, TII->get(AArch64::ORRXrs), ScratchReg) | 
					
						
							|  |  |  |             .addReg(AArch64::XZR) | 
					
						
							|  |  |  |             .addReg(LdI.BaseReg) | 
					
						
							|  |  |  |             .addImm(0); | 
					
						
							|  |  |  |         MachineOperand &BaseOpnd = MI.getOperand(LdI.BaseRegIdx); | 
					
						
							|  |  |  |         BaseOpnd.setReg(ScratchReg); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // If the load does a pre/post increment, then insert a MOV after as
 | 
					
						
							|  |  |  |         // well to update the real base register.
 | 
					
						
							|  |  |  |         if (LdI.IsPrePost) { | 
					
						
							|  |  |  |           DEBUG(dbgs() << "Doing post MOV of incremented reg: " | 
					
						
							|  |  |  |                        << printReg(ScratchReg, TRI) << '\n'); | 
					
						
							|  |  |  |           MI.getOperand(0).setReg( | 
					
						
							|  |  |  |               ScratchReg); // Change tied operand pre/post update dest.
 | 
					
						
							|  |  |  |           BuildMI(*MBB, std::next(MachineBasicBlock::iterator(MI)), DL, | 
					
						
							|  |  |  |                   TII->get(AArch64::ORRXrs), LdI.BaseReg) | 
					
						
							|  |  |  |               .addReg(AArch64::XZR) | 
					
						
							|  |  |  |               .addReg(ScratchReg) | 
					
						
							|  |  |  |               .addImm(0); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         for (int I = 0, E = OldCollisions.size(); I != E; ++I) | 
					
						
							|  |  |  |           if (OldCollisions[I] == &MI) { | 
					
						
							|  |  |  |             std::swap(OldCollisions[I], OldCollisions[E - 1]); | 
					
						
							|  |  |  |             OldCollisions.pop_back(); | 
					
						
							|  |  |  |             break; | 
					
						
							|  |  |  |           } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // Update TagMap to reflect instruction changes to reduce the number
 | 
					
						
							|  |  |  |         // of later MOVs to be inserted.  This needs to be done after
 | 
					
						
							|  |  |  |         // OldCollisions is updated since it may be relocated by this
 | 
					
						
							|  |  |  |         // insertion.
 | 
					
						
							|  |  |  |         TagMap[NewTag].push_back(&MI); | 
					
						
							|  |  |  |         ++NumCollisionsAvoided; | 
					
						
							|  |  |  |         Fixed = true; | 
					
						
							|  |  |  |         Modified = true; | 
					
						
							|  |  |  |         break; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |       if (!Fixed) | 
					
						
							|  |  |  |         ++NumCollisionsNotAvoided; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | bool FalkorHWPFFix::runOnMachineFunction(MachineFunction &Fn) { | 
					
						
							|  |  |  |   auto &ST = static_cast<const AArch64Subtarget &>(Fn.getSubtarget()); | 
					
						
							|  |  |  |   if (ST.getProcFamily() != AArch64Subtarget::Falkor) | 
					
						
							|  |  |  |     return false; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if (skipFunction(Fn.getFunction())) | 
					
						
							|  |  |  |     return false; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   TII = static_cast<const AArch64InstrInfo *>(ST.getInstrInfo()); | 
					
						
							|  |  |  |   TRI = ST.getRegisterInfo(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   assert(TRI->trackLivenessAfterRegAlloc(Fn) && | 
					
						
							|  |  |  |          "Register liveness not available!"); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   Modified = false; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for (MachineLoop *I : LI) | 
					
						
							|  |  |  |     for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L) | 
					
						
							|  |  |  |       // Only process inner-loops
 | 
					
						
							|  |  |  |       if (L->empty()) | 
					
						
							|  |  |  |         runOnLoop(**L, Fn); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return Modified; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | FunctionPass *llvm::createFalkorHWPFFixPass() { return new FalkorHWPFFix(); } |