Imported Upstream version 5.18.0.247

Former-commit-id: 2d6af2e4ed0eda5cbdc2946446ef7718456ad190
This commit is contained in:
Xamarin Public Jenkins (auto-signing)
2019-01-25 08:19:26 +00:00
parent 279aa8f685
commit ce8e504569
28478 changed files with 39 additions and 3866962 deletions

View File

@@ -1,33 +0,0 @@
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -fp-contract=fast | FileCheck %s --check-prefix=FAST
; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 | FileCheck %s --check-prefix=DEFAULT
target triple = "nvptx64-unknown-cuda"
;; Make sure we are generating proper instruction sequences for fused ops
;; If fusion is allowed, we try to form fma.rn at the PTX level, and emit
;; add.f32 otherwise. Without an explicit rounding mode on add.f32, ptxas
;; is free to fuse with a multiply if it is able. If fusion is not allowed,
;; we do not form fma.rn at the PTX level and explicitly generate add.rn
;; for all adds to prevent ptxas from fusion the ops.
;; FAST-LABEL: @t0
;; DEFAULT-LABEL: @t0
define float @t0(float %a, float %b, float %c) {
;; FAST: fma.rn.f32
;; DEFAULT: mul.rn.f32
;; DEFAULT: add.rn.f32
%v0 = fmul float %a, %b
%v1 = fadd float %v0, %c
ret float %v1
}
;; FAST-LABEL: @t1
;; DEFAULT-LABEL: @t1
define float @t1(float %a, float %b) {
;; We cannot form an fma here, but make sure we explicitly emit add.rn.f32
;; to prevent ptxas from fusing this with anything else.
;; FAST: add.f32
;; DEFAULT: add.rn.f32
%v1 = fadd float %a, %b
ret float %v1
}