64ac736ec5
Former-commit-id: f3cc9b82f3e5bd8f0fd3ebc098f789556b44e9cd
197 lines
5.7 KiB
LLVM
197 lines
5.7 KiB
LLVM
; REQUIRES: asserts
|
|
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DEFAULT
|
|
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null -fp-contract=fast | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FAST
|
|
; Check latencies of vmul/vfma accumulate chains.
|
|
|
|
define float @Test1(float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) {
|
|
; CHECK: ********** MI Scheduling **********
|
|
; CHECK: Test1:%bb.0
|
|
|
|
; CHECK: VMULS
|
|
; > VMULS common latency = 5
|
|
; CHECK: Latency : 5
|
|
; CHECK: Successors:
|
|
; CHECK: Data
|
|
; > VMULS read-advanced latency to VMLAS = 0
|
|
; CHECK-SAME: Latency=0
|
|
|
|
; CHECK-DEFAULT: VMLAS
|
|
; CHECK-FAST: VFMAS
|
|
; > VMLAS common latency = 9
|
|
; CHECK: Latency : 9
|
|
; CHECK: Successors:
|
|
; CHECK: Data
|
|
; > VMLAS read-advanced latency to the next VMLAS = 4
|
|
; CHECK-SAME: Latency=4
|
|
|
|
; CHECK-DEFAULT: VMLAS
|
|
; CHECK-FAST: VFMAS
|
|
; CHECK: Latency : 9
|
|
; CHECK: Successors:
|
|
; CHECK: Data
|
|
; > VMLAS not-optimized latency to VMOVRS = 9
|
|
; CHECK-SAME: Latency=9
|
|
|
|
; f1 * f2 + f3 * f4 + f5 * f6 ==> VMULS, VMLAS, VMLAS
|
|
%mul1 = fmul float %f1, %f2
|
|
%mul2 = fmul float %f3, %f4
|
|
%mul3 = fmul float %f5, %f6
|
|
%add1 = fadd float %mul1, %mul2
|
|
%add2 = fadd float %add1, %mul3
|
|
ret float %add2
|
|
}
|
|
|
|
; ASIMD form
|
|
define <2 x float> @Test2(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2 x float> %f4, <2 x float> %f5, <2 x float> %f6) {
|
|
; CHECK: ********** MI Scheduling **********
|
|
; CHECK: Test2:%bb.0
|
|
|
|
; CHECK: VMULfd
|
|
; > VMULfd common latency = 5
|
|
; CHECK: Latency : 5
|
|
; CHECK: Successors:
|
|
; CHECK: Data
|
|
; VMULfd read-advanced latency to VMLAfd = 0
|
|
; CHECK-SAME: Latency=0
|
|
|
|
; CHECK-DEFAULT: VMLAfd
|
|
; CHECK-FAST: VFMAfd
|
|
; > VMLAfd common latency = 9
|
|
; CHECK: Latency : 9
|
|
; CHECK: Successors:
|
|
; CHECK: Data
|
|
; > VMLAfd read-advanced latency to the next VMLAfd = 4
|
|
; CHECK-SAME: Latency=4
|
|
|
|
; CHECK-DEFAULT: VMLAfd
|
|
; CHECK-FAST: VFMAfd
|
|
; CHECK: Latency : 9
|
|
; CHECK: Successors:
|
|
; CHECK: Data
|
|
; > VMLAfd not-optimized latency to VMOVRRD = 9
|
|
; CHECK-SAME: Latency=9
|
|
|
|
; f1 * f2 + f3 * f4 + f5 * f6 ==> VMULS, VMLAS, VMLAS
|
|
%mul1 = fmul <2 x float> %f1, %f2
|
|
%mul2 = fmul <2 x float> %f3, %f4
|
|
%mul3 = fmul <2 x float> %f5, %f6
|
|
%add1 = fadd <2 x float> %mul1, %mul2
|
|
%add2 = fadd <2 x float> %add1, %mul3
|
|
ret <2 x float> %add2
|
|
}
|
|
|
|
define float @Test3(float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) {
|
|
; CHECK: ********** MI Scheduling **********
|
|
; CHECK: Test3:%bb.0
|
|
|
|
; CHECK: VMULS
|
|
; > VMULS common latency = 5
|
|
; CHECK: Latency : 5
|
|
; CHECK: Successors:
|
|
; CHECK: Data
|
|
; > VMULS read-advanced latency to VMLSS = 0
|
|
; CHECK-SAME: Latency=0
|
|
|
|
; CHECK-DEFAULT: VMLSS
|
|
; CHECK-FAST: VFMSS
|
|
; > VMLSS common latency = 9
|
|
; CHECK: Latency : 9
|
|
; CHECK: Successors:
|
|
; CHECK: Data
|
|
; > VMLSS read-advanced latency to the next VMLSS = 4
|
|
; CHECK-SAME: Latency=4
|
|
|
|
; CHECK-DEFAULT: VMLSS
|
|
; CHECK-FAST: VFMSS
|
|
; CHECK: Latency : 9
|
|
; CHECK: Successors:
|
|
; CHECK: Data
|
|
; > VMLSS not-optimized latency to VMOVRS = 9
|
|
; CHECK-SAME: Latency=9
|
|
|
|
; f1 * f2 + f3 * f4 + f5 * f6 ==> VMULS, VMLSS, VMLSS
|
|
%mul1 = fmul float %f1, %f2
|
|
%mul2 = fmul float %f3, %f4
|
|
%mul3 = fmul float %f5, %f6
|
|
%sub1 = fsub float %mul1, %mul2
|
|
%sub2 = fsub float %sub1, %mul3
|
|
ret float %sub2
|
|
}
|
|
|
|
; ASIMD form
|
|
define <2 x float> @Test4(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2 x float> %f4, <2 x float> %f5, <2 x float> %f6) {
|
|
; CHECK: ********** MI Scheduling **********
|
|
; CHECK: Test4:%bb.0
|
|
|
|
; CHECK: VMULfd
|
|
; > VMULfd common latency = 5
|
|
; CHECK: Latency : 5
|
|
; CHECK: Successors:
|
|
; CHECK: Data
|
|
; VMULfd read-advanced latency to VMLSfd = 0
|
|
; CHECK-SAME: Latency=0
|
|
|
|
; CHECK-DEFAULT: VMLSfd
|
|
; CHECK-FAST: VFMSfd
|
|
; > VMLSfd common latency = 9
|
|
; CHECK: Latency : 9
|
|
; CHECK: Successors:
|
|
; CHECK: Data
|
|
; > VMLSfd read-advanced latency to the next VMLSfd = 4
|
|
; CHECK-SAME: Latency=4
|
|
|
|
; CHECK-DEFAULT: VMLSfd
|
|
; CHECK-FAST: VFMSfd
|
|
; CHECK: Latency : 9
|
|
; CHECK: Successors:
|
|
; CHECK: Data
|
|
; > VMLSfd not-optimized latency to VMOVRRD = 9
|
|
; CHECK-SAME: Latency=9
|
|
|
|
; f1 * f2 + f3 * f4 + f5 * f6 ==> VMULS, VMLSS, VMLSS
|
|
%mul1 = fmul <2 x float> %f1, %f2
|
|
%mul2 = fmul <2 x float> %f3, %f4
|
|
%mul3 = fmul <2 x float> %f5, %f6
|
|
%sub1 = fsub <2 x float> %mul1, %mul2
|
|
%sub2 = fsub <2 x float> %sub1, %mul3
|
|
ret <2 x float> %sub2
|
|
}
|
|
|
|
define float @Test5(float %f1, float %f2, float %f3) {
|
|
; CHECK: ********** MI Scheduling **********
|
|
; CHECK: Test5:%bb.0
|
|
|
|
; CHECK-DEFAULT: VNMLS
|
|
; CHECK-FAST: VFNMS
|
|
; CHECK: Latency : 9
|
|
; CHECK: Successors:
|
|
; CHECK: Data
|
|
; > VMLAS not-optimized latency to VMOVRS = 9
|
|
; CHECK-SAME: Latency=9
|
|
|
|
; f1 * f2 - f3 ==> VNMLS/VFNMS
|
|
%mul = fmul float %f1, %f2
|
|
%sub = fsub float %mul, %f3
|
|
ret float %sub
|
|
}
|
|
|
|
|
|
define float @Test6(float %f1, float %f2, float %f3) {
|
|
; CHECK: ********** MI Scheduling **********
|
|
; CHECK: Test6:%bb.0
|
|
|
|
; CHECK-DEFAULT: VNMLA
|
|
; CHECK-FAST: VFNMA
|
|
; CHECK: Latency : 9
|
|
; CHECK: Successors:
|
|
; CHECK: Data
|
|
; > VMLAS not-optimized latency to VMOVRS = 9
|
|
; CHECK-SAME: Latency=9
|
|
|
|
; f1 * f2 - f3 ==> VNMLA/VFNMA
|
|
%mul = fmul float %f1, %f2
|
|
%sub1 = fsub float -0.0, %mul
|
|
%sub2 = fsub float %sub1, %f2
|
|
ret float %sub2
|
|
}
|