You've already forked linux-packaging-mono
							
							
		
			
	
	
		
			270 lines
		
	
	
		
			9.8 KiB
		
	
	
	
		
			LLVM
		
	
	
	
	
	
		
		
			
		
	
	
			270 lines
		
	
	
		
			9.8 KiB
		
	
	
	
		
			LLVM
		
	
	
	
	
	
|   | ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 | ||
|  | 
 | ||
|  | ; GCN-LABEL: {{^}}simple_nested_if:
 | ||
|  | ; GCN:      s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
 | ||
|  | ; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9_]+]]
 | ||
|  | ; GCN-NEXT: s_cbranch_execz [[ENDIF]]
 | ||
|  | ; GCN:      s_and_b64 exec, exec, vcc
 | ||
|  | ; GCN-NEXT: ; mask branch [[ENDIF]]
 | ||
|  | ; GCN-NEXT: {{^BB[0-9_]+}}:
 | ||
|  | ; GCN:      store_dword
 | ||
|  | ; GCN-NEXT: {{^}}[[ENDIF]]:
 | ||
|  | ; GCN-NEXT: s_endpgm
 | ||
|  | define amdgpu_kernel void @simple_nested_if(i32 addrspace(1)* nocapture %arg) { | ||
|  | bb: | ||
|  |   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() | ||
|  |   %tmp1 = icmp ugt i32 %tmp, 1 | ||
|  |   br i1 %tmp1, label %bb.outer.then, label %bb.outer.end | ||
|  | 
 | ||
|  | bb.outer.then:                                    ; preds = %bb
 | ||
|  |   %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp | ||
|  |   store i32 0, i32 addrspace(1)* %tmp4, align 4 | ||
|  |   %tmp5 = icmp eq i32 %tmp, 2 | ||
|  |   br i1 %tmp5, label %bb.outer.end, label %bb.inner.then | ||
|  | 
 | ||
|  | bb.inner.then:                                    ; preds = %bb.outer.then
 | ||
|  |   %tmp7 = add i32 %tmp, 1 | ||
|  |   %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7 | ||
|  |   store i32 1, i32 addrspace(1)* %tmp9, align 4 | ||
|  |   br label %bb.outer.end | ||
|  | 
 | ||
|  | bb.outer.end:                                     ; preds = %bb.outer.then, %bb.inner.then, %bb
 | ||
|  |   ret void | ||
|  | } | ||
|  | 
 | ||
|  | ; GCN-LABEL: {{^}}uncollapsable_nested_if:
 | ||
|  | ; GCN:      s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
 | ||
|  | ; GCN-NEXT: ; mask branch [[ENDIF_OUTER:BB[0-9_]+]]
 | ||
|  | ; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]]
 | ||
|  | ; GCN:      s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]]
 | ||
|  | ; GCN-NEXT: ; mask branch [[ENDIF_INNER:BB[0-9_]+]]
 | ||
|  | ; GCN-NEXT: {{^BB[0-9_]+}}:
 | ||
|  | ; GCN:      store_dword
 | ||
|  | ; GCN-NEXT: {{^}}[[ENDIF_INNER]]:
 | ||
|  | ; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER]]
 | ||
|  | ; GCN:      store_dword
 | ||
|  | ; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
 | ||
|  | ; GCN-NEXT: s_endpgm
 | ||
|  | define amdgpu_kernel void @uncollapsable_nested_if(i32 addrspace(1)* nocapture %arg) { | ||
|  | bb: | ||
|  |   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() | ||
|  |   %tmp1 = icmp ugt i32 %tmp, 1 | ||
|  |   br i1 %tmp1, label %bb.outer.then, label %bb.outer.end | ||
|  | 
 | ||
|  | bb.outer.then:                                    ; preds = %bb
 | ||
|  |   %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp | ||
|  |   store i32 0, i32 addrspace(1)* %tmp4, align 4 | ||
|  |   %tmp5 = icmp eq i32 %tmp, 2 | ||
|  |   br i1 %tmp5, label %bb.inner.end, label %bb.inner.then | ||
|  | 
 | ||
|  | bb.inner.then:                                    ; preds = %bb.outer.then
 | ||
|  |   %tmp7 = add i32 %tmp, 1 | ||
|  |   %tmp8 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7 | ||
|  |   store i32 1, i32 addrspace(1)* %tmp8, align 4 | ||
|  |   br label %bb.inner.end | ||
|  | 
 | ||
|  | bb.inner.end:                                     ; preds = %bb.inner.then, %bb.outer.then
 | ||
|  |   %tmp9 = add i32 %tmp, 2 | ||
|  |   %tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp9 | ||
|  |   store i32 2, i32 addrspace(1)* %tmp10, align 4 | ||
|  |   br label %bb.outer.end | ||
|  | 
 | ||
|  | bb.outer.end:                                     ; preds = %bb.inner.then, %bb
 | ||
|  |   ret void | ||
|  | } | ||
|  | 
 | ||
|  | ; GCN-LABEL: {{^}}nested_if_if_else:
 | ||
|  | ; GCN:      s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
 | ||
|  | ; GCN-NEXT: ; mask branch [[ENDIF_OUTER:BB[0-9_]+]]
 | ||
|  | ; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]]
 | ||
|  | ; GCN:      s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]]
 | ||
|  | ; GCN-NEXT: s_xor_b64 [[SAVEEXEC_INNER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_INNER]]
 | ||
|  | ; GCN-NEXT: ; mask branch [[THEN_INNER:BB[0-9_]+]]
 | ||
|  | ; GCN-NEXT: {{^BB[0-9_]+}}:
 | ||
|  | ; GCN:      store_dword
 | ||
|  | ; GCN-NEXT: {{^}}[[THEN_INNER]]:
 | ||
|  | ; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_INNER3:s\[[0-9:]+\]]], [[SAVEEXEC_INNER2]]
 | ||
|  | ; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_INNER3]]
 | ||
|  | ; GCN-NEXT: ; mask branch [[ENDIF_OUTER]]
 | ||
|  | ; GCN:      store_dword
 | ||
|  | ; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
 | ||
|  | ; GCN-NEXT: s_endpgm
 | ||
|  | define amdgpu_kernel void @nested_if_if_else(i32 addrspace(1)* nocapture %arg) { | ||
|  | bb: | ||
|  |   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() | ||
|  |   %tmp1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp | ||
|  |   store i32 0, i32 addrspace(1)* %tmp1, align 4 | ||
|  |   %tmp2 = icmp ugt i32 %tmp, 1 | ||
|  |   br i1 %tmp2, label %bb.outer.then, label %bb.outer.end | ||
|  | 
 | ||
|  | bb.outer.then:                                       ; preds = %bb
 | ||
|  |   %tmp5 = icmp eq i32 %tmp, 2 | ||
|  |   br i1 %tmp5, label %bb.then, label %bb.else | ||
|  | 
 | ||
|  | bb.then:                                             ; preds = %bb.outer.then
 | ||
|  |   %tmp3 = add i32 %tmp, 1 | ||
|  |   %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp3 | ||
|  |   store i32 1, i32 addrspace(1)* %tmp4, align 4 | ||
|  |   br label %bb.outer.end | ||
|  | 
 | ||
|  | bb.else:                                             ; preds = %bb.outer.then
 | ||
|  |   %tmp7 = add i32 %tmp, 2 | ||
|  |   %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7 | ||
|  |   store i32 2, i32 addrspace(1)* %tmp9, align 4 | ||
|  |   br label %bb.outer.end | ||
|  | 
 | ||
|  | bb.outer.end:                                        ; preds = %bb, %bb.then, %bb.else
 | ||
|  |   ret void | ||
|  | } | ||
|  | 
 | ||
|  | ; GCN-LABEL: {{^}}nested_if_else_if:
 | ||
|  | ; GCN:      s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
 | ||
|  | ; GCN-NEXT: s_xor_b64 [[SAVEEXEC_OUTER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_OUTER]]
 | ||
|  | ; GCN-NEXT: ; mask branch [[THEN_OUTER:BB[0-9_]+]]
 | ||
|  | ; GCN-NEXT: s_cbranch_execz [[THEN_OUTER]]
 | ||
|  | ; GCN-NEXT: {{^BB[0-9_]+}}:
 | ||
|  | ; GCN:      store_dword
 | ||
|  | ; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_ELSE:s\[[0-9:]+\]]]
 | ||
|  | ; GCN-NEXT: ; mask branch [[THEN_OUTER_FLOW:BB[0-9_]+]]
 | ||
|  | ; GCN-NEXT: {{^BB[0-9_]+}}:
 | ||
|  | ; GCN:      store_dword
 | ||
|  | ; GCN-NEXT: {{^}}[[THEN_OUTER_FLOW]]:
 | ||
|  | ; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER_IF_OUTER_ELSE]]
 | ||
|  | ; GCN-NEXT: {{^}}[[THEN_OUTER]]:
 | ||
|  | ; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_OUTER3:s\[[0-9:]+\]]], [[SAVEEXEC_OUTER2]]
 | ||
|  | ; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_OUTER3]]
 | ||
|  | ; GCN-NEXT: ; mask branch [[ENDIF_OUTER:BB[0-9_]+]]
 | ||
|  | ; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]]
 | ||
|  | ; GCN-NEXT: {{^BB[0-9_]+}}:
 | ||
|  | ; GCN:      store_dword
 | ||
|  | ; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_THEN:s\[[0-9:]+\]]]
 | ||
|  | ; GCN-NEXT: ; mask branch [[ENDIF_OUTER]]
 | ||
|  | ; GCN-NEXT: {{^BB[0-9_]+}}:
 | ||
|  | ; GCN:      store_dword
 | ||
|  | ; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
 | ||
|  | ; GCN-NEXT: s_endpgm
 | ||
|  | define amdgpu_kernel void @nested_if_else_if(i32 addrspace(1)* nocapture %arg) { | ||
|  | bb: | ||
|  |   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() | ||
|  |   %tmp1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp | ||
|  |   store i32 0, i32 addrspace(1)* %tmp1, align 4 | ||
|  |   %cc1 = icmp ugt i32 %tmp, 1 | ||
|  |   br i1 %cc1, label %bb.outer.then, label %bb.outer.else | ||
|  | 
 | ||
|  | bb.outer.then: | ||
|  |   %tmp2 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 1 | ||
|  |   store i32 1, i32 addrspace(1)* %tmp2, align 4 | ||
|  |   %cc2 = icmp eq i32 %tmp, 2 | ||
|  |   br i1 %cc2, label %bb.inner.then, label %bb.outer.end | ||
|  | 
 | ||
|  | bb.inner.then: | ||
|  |   %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 2 | ||
|  |   store i32 2, i32 addrspace(1)* %tmp3, align 4 | ||
|  |   br label %bb.outer.end | ||
|  | 
 | ||
|  | bb.outer.else: | ||
|  |   %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 3 | ||
|  |   store i32 3, i32 addrspace(1)* %tmp4, align 4 | ||
|  |   %cc3 = icmp eq i32 %tmp, 2 | ||
|  |   br i1 %cc3, label %bb.inner.then2, label %bb.outer.end | ||
|  | 
 | ||
|  | bb.inner.then2: | ||
|  |   %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 4 | ||
|  |   store i32 4, i32 addrspace(1)* %tmp5, align 4 | ||
|  |   br label %bb.outer.end | ||
|  | 
 | ||
|  | bb.outer.end: | ||
|  |   ret void | ||
|  | } | ||
|  | 
 | ||
|  | ; GCN-LABEL: {{^}}s_endpgm_unsafe_barrier:
 | ||
|  | ; GCN:      s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
 | ||
|  | ; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9_]+]]
 | ||
|  | ; GCN-NEXT: {{^BB[0-9_]+}}:
 | ||
|  | ; GCN:      store_dword
 | ||
|  | ; GCN-NEXT: {{^}}[[ENDIF]]:
 | ||
|  | ; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]]
 | ||
|  | ; GCN:      s_barrier
 | ||
|  | ; GCN-NEXT: s_endpgm
 | ||
|  | define amdgpu_kernel void @s_endpgm_unsafe_barrier(i32 addrspace(1)* nocapture %arg) { | ||
|  | bb: | ||
|  |   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() | ||
|  |   %tmp1 = icmp ugt i32 %tmp, 1 | ||
|  |   br i1 %tmp1, label %bb.then, label %bb.end | ||
|  | 
 | ||
|  | bb.then:                                          ; preds = %bb
 | ||
|  |   %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp | ||
|  |   store i32 0, i32 addrspace(1)* %tmp4, align 4 | ||
|  |   br label %bb.end | ||
|  | 
 | ||
|  | bb.end:                                           ; preds = %bb.then, %bb
 | ||
|  |   call void @llvm.amdgcn.s.barrier() | ||
|  |   ret void | ||
|  | } | ||
|  | 
 | ||
|  | ; Make sure scc liveness is updated if sor_b64 is removed
 | ||
|  | ; GCN-LABEL: {{^}}scc_liveness:
 | ||
|  | 
 | ||
|  | ; GCN: [[BB1_LOOP:BB[0-9]+_[0-9]+]]:
 | ||
|  | ; GCN: s_andn2_b64 exec, exec,
 | ||
|  | ; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]]
 | ||
|  | 
 | ||
|  | ; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
 | ||
|  | ; GCN: s_and_b64 exec, exec, vcc
 | ||
|  | 
 | ||
|  | ; GCN-NOT: s_or_b64 exec, exec
 | ||
|  | 
 | ||
|  | ; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
 | ||
|  | ; GCN: s_andn2_b64
 | ||
|  | ; GCN-NEXT: s_cbranch_execnz
 | ||
|  | 
 | ||
|  | ; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
 | ||
|  | ; GCN: buffer_store_dword
 | ||
|  | ; GCN: buffer_store_dword
 | ||
|  | ; GCN: buffer_store_dword
 | ||
|  | ; GCN: buffer_store_dword
 | ||
|  | ; GCN: s_setpc_b64
 | ||
|  | define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { | ||
|  | bb: | ||
|  |   br label %bb1 | ||
|  | 
 | ||
|  | bb1:                                              ; preds = %Flow1, %bb1, %bb
 | ||
|  |   %tmp = icmp slt i32 %arg, 519 | ||
|  |   br i1 %tmp, label %bb2, label %bb1 | ||
|  | 
 | ||
|  | bb2:                                              ; preds = %bb1
 | ||
|  |   %tmp3 = icmp eq i32 %arg, 0 | ||
|  |   br i1 %tmp3, label %bb4, label %bb10 | ||
|  | 
 | ||
|  | bb4:                                              ; preds = %bb2
 | ||
|  |   %tmp6 = load float, float* undef | ||
|  |   %tmp7 = fcmp olt float %tmp6, 0.0 | ||
|  |   br i1 %tmp7, label %bb8, label %Flow | ||
|  | 
 | ||
|  | bb8:                                              ; preds = %bb4
 | ||
|  |   %tmp9 = insertelement <4 x float> undef, float 0.0, i32 1 | ||
|  |   br label %Flow | ||
|  | 
 | ||
|  | Flow:                                             ; preds = %bb8, %bb4
 | ||
|  |   %tmp8 = phi <4 x float> [ %tmp9, %bb8 ], [ zeroinitializer, %bb4 ] | ||
|  |   br label %bb10 | ||
|  | 
 | ||
|  | bb10:                                             ; preds = %Flow, %bb2
 | ||
|  |   %tmp11 = phi <4 x float> [ zeroinitializer, %bb2 ], [ %tmp8, %Flow ] | ||
|  |   br i1 %tmp3, label %bb12, label %Flow1 | ||
|  | 
 | ||
|  | Flow1:                                            ; preds = %bb10
 | ||
|  |   br label %bb1 | ||
|  | 
 | ||
|  | bb12:                                             ; preds = %bb10
 | ||
|  |   store volatile <4 x float> %tmp11, <4 x float>* undef, align 16 | ||
|  |   ret void | ||
|  | } | ||
|  | 
 | ||
|  | declare i32 @llvm.amdgcn.workitem.id.x() #0 | ||
|  | declare void @llvm.amdgcn.s.barrier() #1 | ||
|  | 
 | ||
|  | attributes #0 = { nounwind readnone speculatable } | ||
|  | attributes #1 = { nounwind convergent } | ||
|  | attributes #2 = { nounwind } |