# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-insert-waits -o - %s | FileCheck %s --- | define amdgpu_ps <4 x float> @exp_done_waitcnt(<4 x i32> inreg, <4 x i32> inreg, i32 inreg %w, float %v) #0 { %a = load volatile float, float addrspace(1)* undef %b = load volatile float, float addrspace(1)* undef %c = load volatile float, float addrspace(1)* undef %d = load volatile float, float addrspace(1)* undef call void @llvm.amdgcn.exp.f32(i32 15, i32 1, float %a, float %b, float %c, float %d, i1 true, i1 false) ret <4 x float> } declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 attributes #0 = { nounwind } ... --- # CHECK-LABEL: name: exp_done_waitcnt{{$}} # CHECK: EXP_DONE # CHECK-NEXT: S_WAITCNT 3855 # CHECK: %vgpr0 = V_MOV_B32 # CHECK: %vgpr1 = V_MOV_B32 # CHECK: %vgpr2 = V_MOV_B32 # CHECK: %vgpr3 = V_MOV_B32 name: exp_done_waitcnt alignment: 0 exposesReturnsTwice: false legalized: false regBankSelected: false selected: false tracksRegLiveness: true frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false hasPatchPoint: false stackSize: 0 offsetAdjustment: 0 maxAlignment: 0 adjustsStack: false hasCalls: false maxCallFrameSize: 0 hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false body: | bb.0 (%ir-block.2): %sgpr3 = S_MOV_B32 61440 %sgpr2 = S_MOV_B32 -1 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`) %vgpr1 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`) %vgpr2 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`) %vgpr3 = BUFFER_LOAD_DWORD_OFFSET killed %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`) EXP_DONE 0, killed %vgpr0, killed %vgpr1, killed %vgpr2, killed %vgpr3, -1, -1, 15, implicit %exec %vgpr0 = V_MOV_B32_e32 1056964608, implicit %exec %vgpr1 = V_MOV_B32_e32 1065353216, implicit %exec %vgpr2 = V_MOV_B32_e32 1073741824, implicit %exec %vgpr3 = V_MOV_B32_e32 1082130432, implicit %exec SI_RETURN_TO_EPILOG killed %vgpr0, killed %vgpr1, killed %vgpr2, killed %vgpr3 ...