You've already forked linux-packaging-mono
							
							
		
			
	
	
		
			167 lines
		
	
	
		
			6.2 KiB
		
	
	
	
		
			LLVM
		
	
	
	
	
	
		
		
			
		
	
	
			167 lines
		
	
	
		
			6.2 KiB
		
	
	
	
		
			LLVM
		
	
	
	
	
	
|   | ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 | ||
|  | ; RUN: opt < %s -instcombine -S | FileCheck %s
 | ||
|  | 
 | ||
|  | declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone | ||
|  | 
 | ||
|  | ; This should never happen, but make sure we don't crash handling a non-constant immediate byte.
 | ||
|  | 
 | ||
|  | define <4 x float> @insertps_non_const_imm(<4 x float> %v1, <4 x float> %v2, i8 %c) { | ||
|  | ; CHECK-LABEL: @insertps_non_const_imm(
 | ||
|  | ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c)
 | ||
|  | ; CHECK-NEXT:    ret <4 x float> [[RES]]
 | ||
|  | ;
 | ||
|  |   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c) | ||
|  |   ret <4 x float> %res | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | ; If all zero mask bits are set, return a zero regardless of the other control bits.
 | ||
|  | 
 | ||
|  | define <4 x float> @insertps_0x0f(<4 x float> %v1, <4 x float> %v2) { | ||
|  | ; CHECK-LABEL: @insertps_0x0f(
 | ||
|  | ; CHECK-NEXT:    ret <4 x float> zeroinitializer
 | ||
|  | ;
 | ||
|  |   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 15) | ||
|  |   ret <4 x float> %res | ||
|  | 
 | ||
|  | } | ||
|  | define <4 x float> @insertps_0xff(<4 x float> %v1, <4 x float> %v2) { | ||
|  | ; CHECK-LABEL: @insertps_0xff(
 | ||
|  | ; CHECK-NEXT:    ret <4 x float> zeroinitializer
 | ||
|  | ;
 | ||
|  |   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 255) | ||
|  |   ret <4 x float> %res | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | ; If some zero mask bits are set that do not override the insertion, we do not change anything.
 | ||
|  | 
 | ||
|  | define <4 x float> @insertps_0x0c(<4 x float> %v1, <4 x float> %v2) { | ||
|  | ; CHECK-LABEL: @insertps_0x0c(
 | ||
|  | ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12)
 | ||
|  | ; CHECK-NEXT:    ret <4 x float> [[RES]]
 | ||
|  | ;
 | ||
|  |   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12) | ||
|  |   ret <4 x float> %res | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | ; ...unless both input vectors are the same operand.
 | ||
|  | 
 | ||
|  | define <4 x float> @insertps_0x15_single_input(<4 x float> %v1) { | ||
|  | ; CHECK-LABEL: @insertps_0x15_single_input(
 | ||
|  | ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> <float 0.000000e+00, float undef, float 0.000000e+00, float undef>, <4 x i32> <i32 4, i32 0, i32 6, i32 3>
 | ||
|  | ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 | ||
|  | ;
 | ||
|  |   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 21) | ||
|  |   ret <4 x float> %res | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | ; The zero mask overrides the insertion lane.
 | ||
|  | 
 | ||
|  | define <4 x float> @insertps_0x1a_single_input(<4 x float> %v1) { | ||
|  | ; CHECK-LABEL: @insertps_0x1a_single_input(
 | ||
|  | ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> <float undef, float 0.000000e+00, float undef, float 0.000000e+00>, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
 | ||
|  | ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 | ||
|  | ;
 | ||
|  |   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 26) | ||
|  |   ret <4 x float> %res | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | ; The zero mask overrides the insertion lane, so the second input vector is not used.
 | ||
|  | 
 | ||
|  | define <4 x float> @insertps_0xc1(<4 x float> %v1, <4 x float> %v2) { | ||
|  | ; CHECK-LABEL: @insertps_0xc1(
 | ||
|  | ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> %v1, float 0.000000e+00, i32 0
 | ||
|  | ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 | ||
|  | ;
 | ||
|  |   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 193) | ||
|  |   ret <4 x float> %res | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | ; If no zero mask bits are set, convert to a shuffle.
 | ||
|  | 
 | ||
|  | define <4 x float> @insertps_0x00(<4 x float> %v1, <4 x float> %v2) { | ||
|  | ; CHECK-LABEL: @insertps_0x00(
 | ||
|  | ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
 | ||
|  | ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 | ||
|  | ;
 | ||
|  |   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 0) | ||
|  |   ret <4 x float> %res | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | define <4 x float> @insertps_0x10(<4 x float> %v1, <4 x float> %v2) { | ||
|  | ; CHECK-LABEL: @insertps_0x10(
 | ||
|  | ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
 | ||
|  | ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 | ||
|  | ;
 | ||
|  |   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 16) | ||
|  |   ret <4 x float> %res | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | define <4 x float> @insertps_0x20(<4 x float> %v1, <4 x float> %v2) { | ||
|  | ; CHECK-LABEL: @insertps_0x20(
 | ||
|  | ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
 | ||
|  | ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 | ||
|  | ;
 | ||
|  |   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 32) | ||
|  |   ret <4 x float> %res | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | define <4 x float> @insertps_0x30(<4 x float> %v1, <4 x float> %v2) { | ||
|  | ; CHECK-LABEL: @insertps_0x30(
 | ||
|  | ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
 | ||
|  | ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 | ||
|  | ;
 | ||
|  |   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 48) | ||
|  |   ret <4 x float> %res | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | define <4 x float> @insertps_0xc0(<4 x float> %v1, <4 x float> %v2) { | ||
|  | ; CHECK-LABEL: @insertps_0xc0(
 | ||
|  | ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 7, i32 1, i32 2, i32 3>
 | ||
|  | ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 | ||
|  | ;
 | ||
|  |   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 192) | ||
|  |   ret <4 x float> %res | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | define <4 x float> @insertps_0xd0(<4 x float> %v1, <4 x float> %v2) { | ||
|  | ; CHECK-LABEL: @insertps_0xd0(
 | ||
|  | ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
 | ||
|  | ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 | ||
|  | ;
 | ||
|  |   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 208) | ||
|  |   ret <4 x float> %res | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | define <4 x float> @insertps_0xe0(<4 x float> %v1, <4 x float> %v2) { | ||
|  | ; CHECK-LABEL: @insertps_0xe0(
 | ||
|  | ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 7, i32 3>
 | ||
|  | ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 | ||
|  | ;
 | ||
|  |   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 224) | ||
|  |   ret <4 x float> %res | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | define <4 x float> @insertps_0xf0(<4 x float> %v1, <4 x float> %v2) { | ||
|  | ; CHECK-LABEL: @insertps_0xf0(
 | ||
|  | ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
 | ||
|  | ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 | ||
|  | ;
 | ||
|  |   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 240) | ||
|  |   ret <4 x float> %res | ||
|  | 
 | ||
|  | } | ||
|  | 
 |