Imported Upstream version 6.10.0.49

Former-commit-id: 1d6753294b2993e1fbf92de9366bb9544db4189b
This commit is contained in:
Xamarin Public Jenkins (auto-signing)
2020-01-16 16:38:04 +00:00
parent d94e79959b
commit 468663ddbb
48518 changed files with 2789335 additions and 61176 deletions

View File

@@ -0,0 +1,52 @@
#include <stdio.h>
#define N 1536
float A[N][N];
float B[N][N];
float C[N][N];
void init_array()
{
int i, j;
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
A[i][j] = (1+(i*j)%1024)/2.0;
B[i][j] = (1+(i*j)%1024)/2.0;
}
}
}
void print_array()
{
int i, j;
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
fprintf(stdout, "%lf ", C[i][j]);
if (j%80 == 79) fprintf(stdout, "\n");
}
fprintf(stdout, "\n");
}
}
int main()
{
int i, j, k;
double t_start, t_end;
init_array();
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
C[i][j] = 0;
for (k = 0; k < N; k++)
C[i][j] = C[i][j] + A[i][k] * B[k][j];
}
}
#ifdef TEST
print_array();
#endif
return 0;
}

View File

@@ -0,0 +1,274 @@
.file "matmul.normalopt.ll"
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LCPI0_0:
.quad 4602678819172646912 # double 0.5
.text
.globl init_array
.align 16, 0x90
.type init_array,@function
init_array: # @init_array
.cfi_startproc
# BB#0: # %entry
pushq %rbp
.Ltmp2:
.cfi_def_cfa_offset 16
.Ltmp3:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp4:
.cfi_def_cfa_register %rbp
xorl %r8d, %r8d
vmovsd .LCPI0_0(%rip), %xmm0
.align 16, 0x90
.LBB0_1: # %for.cond1.preheader
# =>This Loop Header: Depth=1
# Child Loop BB0_2 Depth 2
xorl %ecx, %ecx
.align 16, 0x90
.LBB0_2: # %for.body3
# Parent Loop BB0_1 Depth=1
# => This Inner Loop Header: Depth=2
movl %ecx, %edx
imull %r8d, %edx
movl %edx, %esi
sarl $31, %esi
shrl $22, %esi
addl %edx, %esi
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
negl %esi
movq %r8, %rax
shlq $11, %rax
leal 1(%rdx,%rsi), %edi
leaq (%rax,%rax,2), %rsi
leaq 1(%rcx), %rdx
cmpq $1536, %rdx # imm = 0x600
vcvtsi2sdl %edi, %xmm0, %xmm1
vmulsd %xmm0, %xmm1, %xmm1
vcvtsd2ss %xmm1, %xmm1, %xmm1
vmovss %xmm1, A(%rsi,%rcx,4)
vmovss %xmm1, B(%rsi,%rcx,4)
movq %rdx, %rcx
jne .LBB0_2
# BB#3: # %for.inc17
# in Loop: Header=BB0_1 Depth=1
incq %r8
cmpq $1536, %r8 # imm = 0x600
jne .LBB0_1
# BB#4: # %for.end19
popq %rbp
ret
.Ltmp5:
.size init_array, .Ltmp5-init_array
.cfi_endproc
.globl print_array
.align 16, 0x90
.type print_array,@function
print_array: # @print_array
.cfi_startproc
# BB#0: # %entry
pushq %rbp
.Ltmp9:
.cfi_def_cfa_offset 16
.Ltmp10:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp11:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r12
pushq %rbx
.Ltmp12:
.cfi_offset %rbx, -48
.Ltmp13:
.cfi_offset %r12, -40
.Ltmp14:
.cfi_offset %r14, -32
.Ltmp15:
.cfi_offset %r15, -24
xorl %r14d, %r14d
movl $C, %r15d
.align 16, 0x90
.LBB1_1: # %for.cond1.preheader
# =>This Loop Header: Depth=1
# Child Loop BB1_2 Depth 2
movq stdout(%rip), %rax
movq %r15, %r12
xorl %ebx, %ebx
.align 16, 0x90
.LBB1_2: # %for.body3
# Parent Loop BB1_1 Depth=1
# => This Inner Loop Header: Depth=2
vmovss (%r12), %xmm0
vcvtss2sd %xmm0, %xmm0, %xmm0
movq %rax, %rdi
movl $.L.str, %esi
movb $1, %al
callq fprintf
movslq %ebx, %rax
imulq $1717986919, %rax, %rcx # imm = 0x66666667
movq %rcx, %rdx
shrq $63, %rdx
sarq $37, %rcx
addl %edx, %ecx
imull $80, %ecx, %ecx
subl %ecx, %eax
cmpl $79, %eax
jne .LBB1_4
# BB#3: # %if.then
# in Loop: Header=BB1_2 Depth=2
movq stdout(%rip), %rsi
movl $10, %edi
callq fputc
.LBB1_4: # %for.inc
# in Loop: Header=BB1_2 Depth=2
addq $4, %r12
incq %rbx
movq stdout(%rip), %rax
cmpq $1536, %rbx # imm = 0x600
jne .LBB1_2
# BB#5: # %for.end
# in Loop: Header=BB1_1 Depth=1
movl $10, %edi
movq %rax, %rsi
callq fputc
addq $6144, %r15 # imm = 0x1800
incq %r14
cmpq $1536, %r14 # imm = 0x600
jne .LBB1_1
# BB#6: # %for.end12
popq %rbx
popq %r12
popq %r14
popq %r15
popq %rbp
ret
.Ltmp16:
.size print_array, .Ltmp16-print_array
.cfi_endproc
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LCPI2_0:
.quad 4602678819172646912 # double 0.5
.text
.globl main
.align 16, 0x90
.type main,@function
main: # @main
.cfi_startproc
# BB#0: # %entry
pushq %rbp
.Ltmp19:
.cfi_def_cfa_offset 16
.Ltmp20:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp21:
.cfi_def_cfa_register %rbp
xorl %r8d, %r8d
vmovsd .LCPI2_0(%rip), %xmm0
.align 16, 0x90
.LBB2_1: # %for.cond1.preheader.i
# =>This Loop Header: Depth=1
# Child Loop BB2_2 Depth 2
xorl %ecx, %ecx
.align 16, 0x90
.LBB2_2: # %for.body3.i
# Parent Loop BB2_1 Depth=1
# => This Inner Loop Header: Depth=2
movl %ecx, %edx
imull %r8d, %edx
movl %edx, %esi
sarl $31, %esi
shrl $22, %esi
addl %edx, %esi
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
negl %esi
movq %r8, %rax
shlq $11, %rax
leal 1(%rdx,%rsi), %edi
leaq (%rax,%rax,2), %rsi
leaq 1(%rcx), %rdx
cmpq $1536, %rdx # imm = 0x600
vcvtsi2sdl %edi, %xmm0, %xmm1
vmulsd %xmm0, %xmm1, %xmm1
vcvtsd2ss %xmm1, %xmm1, %xmm1
vmovss %xmm1, A(%rsi,%rcx,4)
vmovss %xmm1, B(%rsi,%rcx,4)
movq %rdx, %rcx
jne .LBB2_2
# BB#3: # %for.inc17.i
# in Loop: Header=BB2_1 Depth=1
incq %r8
cmpq $1536, %r8 # imm = 0x600
jne .LBB2_1
# BB#4:
xorl %r8d, %r8d
movl $A, %r9d
.align 16, 0x90
.LBB2_5: # %for.cond1.preheader
# =>This Loop Header: Depth=1
# Child Loop BB2_6 Depth 2
# Child Loop BB2_7 Depth 3
leaq (%r8,%r8,2), %rdx
shlq $11, %rdx
leaq C(%rdx), %rsi
xorl %edi, %edi
.align 16, 0x90
.LBB2_6: # %for.body3
# Parent Loop BB2_5 Depth=1
# => This Loop Header: Depth=2
# Child Loop BB2_7 Depth 3
movl $0, (%rsi)
vxorps %xmm0, %xmm0, %xmm0
movq $-9437184, %rax # imm = 0xFFFFFFFFFF700000
movq %r9, %rcx
.align 16, 0x90
.LBB2_7: # %for.body8
# Parent Loop BB2_5 Depth=1
# Parent Loop BB2_6 Depth=2
# => This Inner Loop Header: Depth=3
vmovss (%rcx), %xmm1
vmulss B+9437184(%rax,%rdi,4), %xmm1, %xmm1
vaddss %xmm1, %xmm0, %xmm0
addq $4, %rcx
addq $6144, %rax # imm = 0x1800
jne .LBB2_7
# BB#8: # %for.inc25
# in Loop: Header=BB2_6 Depth=2
vmovss %xmm0, (%rsi)
leaq C+4(%rdx,%rdi,4), %rsi
incq %rdi
cmpq $1536, %rdi # imm = 0x600
jne .LBB2_6
# BB#9: # %for.inc28
# in Loop: Header=BB2_5 Depth=1
addq $6144, %r9 # imm = 0x1800
incq %r8
cmpq $1536, %r8 # imm = 0x600
jne .LBB2_5
# BB#10: # %for.end30
xorl %eax, %eax
popq %rbp
ret
.Ltmp22:
.size main, .Ltmp22-main
.cfi_endproc
.type A,@object # @A
.comm A,9437184,16
.type B,@object # @B
.comm B,9437184,16
.type .L.str,@object # @.str
.section .rodata.str1.1,"aMS",@progbits,1
.L.str:
.asciz "%lf "
.size .L.str, 5
.type C,@object # @C
.comm C,9437184,16
.section ".note.GNU-stack","",@progbits

View File

@@ -0,0 +1,396 @@
.file "matmul.polly.interchanged+tiled+vector.ll"
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LCPI0_0:
.quad 4602678819172646912 # double 0.5
.text
.globl init_array
.align 16, 0x90
.type init_array,@function
init_array: # @init_array
.cfi_startproc
# BB#0: # %entry
pushq %rbp
.Ltmp2:
.cfi_def_cfa_offset 16
.Ltmp3:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp4:
.cfi_def_cfa_register %rbp
xorl %r8d, %r8d
vmovsd .LCPI0_0(%rip), %xmm0
.align 16, 0x90
.LBB0_1: # %polly.loop_preheader3
# =>This Loop Header: Depth=1
# Child Loop BB0_2 Depth 2
xorl %ecx, %ecx
.align 16, 0x90
.LBB0_2: # %polly.loop_header2
# Parent Loop BB0_1 Depth=1
# => This Inner Loop Header: Depth=2
movl %ecx, %edx
imull %r8d, %edx
movl %edx, %esi
sarl $31, %esi
shrl $22, %esi
addl %edx, %esi
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
negl %esi
movq %r8, %rax
shlq $11, %rax
leal 1(%rdx,%rsi), %edi
leaq (%rax,%rax,2), %rsi
leaq 1(%rcx), %rdx
cmpq $1536, %rdx # imm = 0x600
vcvtsi2sdl %edi, %xmm0, %xmm1
vmulsd %xmm0, %xmm1, %xmm1
vcvtsd2ss %xmm1, %xmm1, %xmm1
vmovss %xmm1, A(%rsi,%rcx,4)
vmovss %xmm1, B(%rsi,%rcx,4)
movq %rdx, %rcx
jne .LBB0_2
# BB#3: # %polly.loop_exit4
# in Loop: Header=BB0_1 Depth=1
incq %r8
cmpq $1536, %r8 # imm = 0x600
jne .LBB0_1
# BB#4: # %polly.loop_exit
popq %rbp
ret
.Ltmp5:
.size init_array, .Ltmp5-init_array
.cfi_endproc
.globl print_array
.align 16, 0x90
.type print_array,@function
print_array: # @print_array
.cfi_startproc
# BB#0: # %entry
pushq %rbp
.Ltmp9:
.cfi_def_cfa_offset 16
.Ltmp10:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp11:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r12
pushq %rbx
.Ltmp12:
.cfi_offset %rbx, -48
.Ltmp13:
.cfi_offset %r12, -40
.Ltmp14:
.cfi_offset %r14, -32
.Ltmp15:
.cfi_offset %r15, -24
xorl %r14d, %r14d
movl $C, %r15d
.align 16, 0x90
.LBB1_1: # %for.cond1.preheader
# =>This Loop Header: Depth=1
# Child Loop BB1_2 Depth 2
movq stdout(%rip), %rax
movq %r15, %r12
xorl %ebx, %ebx
.align 16, 0x90
.LBB1_2: # %for.body3
# Parent Loop BB1_1 Depth=1
# => This Inner Loop Header: Depth=2
vmovss (%r12), %xmm0
vcvtss2sd %xmm0, %xmm0, %xmm0
movq %rax, %rdi
movl $.L.str, %esi
movb $1, %al
callq fprintf
movslq %ebx, %rax
imulq $1717986919, %rax, %rcx # imm = 0x66666667
movq %rcx, %rdx
shrq $63, %rdx
sarq $37, %rcx
addl %edx, %ecx
imull $80, %ecx, %ecx
subl %ecx, %eax
cmpl $79, %eax
jne .LBB1_4
# BB#3: # %if.then
# in Loop: Header=BB1_2 Depth=2
movq stdout(%rip), %rsi
movl $10, %edi
callq fputc
.LBB1_4: # %for.inc
# in Loop: Header=BB1_2 Depth=2
addq $4, %r12
incq %rbx
movq stdout(%rip), %rax
cmpq $1536, %rbx # imm = 0x600
jne .LBB1_2
# BB#5: # %for.end
# in Loop: Header=BB1_1 Depth=1
movl $10, %edi
movq %rax, %rsi
callq fputc
addq $6144, %r15 # imm = 0x1800
incq %r14
cmpq $1536, %r14 # imm = 0x600
jne .LBB1_1
# BB#6: # %for.end12
popq %rbx
popq %r12
popq %r14
popq %r15
popq %rbp
ret
.Ltmp16:
.size print_array, .Ltmp16-print_array
.cfi_endproc
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LCPI2_0:
.quad 4602678819172646912 # double 0.5
.text
.globl main
.align 16, 0x90
.type main,@function
main: # @main
.cfi_startproc
# BB#0: # %entry
pushq %rbp
.Ltmp20:
.cfi_def_cfa_offset 16
.Ltmp21:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp22:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
subq $56, %rsp
.Ltmp23:
.cfi_offset %rbx, -56
.Ltmp24:
.cfi_offset %r12, -48
.Ltmp25:
.cfi_offset %r13, -40
.Ltmp26:
.cfi_offset %r14, -32
.Ltmp27:
.cfi_offset %r15, -24
xorl %ebx, %ebx
vmovsd .LCPI2_0(%rip), %xmm0
.align 16, 0x90
.LBB2_1: # %polly.loop_preheader3.i
# =>This Loop Header: Depth=1
# Child Loop BB2_2 Depth 2
xorl %ecx, %ecx
.align 16, 0x90
.LBB2_2: # %polly.loop_header2.i
# Parent Loop BB2_1 Depth=1
# => This Inner Loop Header: Depth=2
movl %ecx, %edx
imull %ebx, %edx
movl %edx, %esi
sarl $31, %esi
shrl $22, %esi
addl %edx, %esi
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
negl %esi
movq %rbx, %rax
shlq $11, %rax
leal 1(%rdx,%rsi), %edi
leaq (%rax,%rax,2), %rsi
leaq 1(%rcx), %rdx
cmpq $1536, %rdx # imm = 0x600
vcvtsi2sdl %edi, %xmm0, %xmm1
vmulsd %xmm0, %xmm1, %xmm1
vcvtsd2ss %xmm1, %xmm1, %xmm1
vmovss %xmm1, A(%rsi,%rcx,4)
vmovss %xmm1, B(%rsi,%rcx,4)
movq %rdx, %rcx
jne .LBB2_2
# BB#3: # %polly.loop_exit4.i
# in Loop: Header=BB2_1 Depth=1
incq %rbx
cmpq $1536, %rbx # imm = 0x600
jne .LBB2_1
# BB#4: # %polly.loop_preheader3.preheader
movl $C, %edi
xorl %esi, %esi
movl $9437184, %edx # imm = 0x900000
callq memset
xorl %esi, %esi
movl $C+16, %eax
movq %rax, -88(%rbp) # 8-byte Spill
.align 16, 0x90
.LBB2_5: # %polly.loop_preheader17
# =>This Loop Header: Depth=1
# Child Loop BB2_15 Depth 2
# Child Loop BB2_8 Depth 3
# Child Loop BB2_11 Depth 4
# Child Loop BB2_17 Depth 5
# Child Loop BB2_18 Depth 6
movq %rsi, -56(%rbp) # 8-byte Spill
movq %rsi, %rax
orq $63, %rax
movq %rax, -72(%rbp) # 8-byte Spill
leaq -1(%rax), %rax
movq %rax, -48(%rbp) # 8-byte Spill
xorl %edx, %edx
.align 16, 0x90
.LBB2_15: # %polly.loop_preheader24
# Parent Loop BB2_5 Depth=1
# => This Loop Header: Depth=2
# Child Loop BB2_8 Depth 3
# Child Loop BB2_11 Depth 4
# Child Loop BB2_17 Depth 5
# Child Loop BB2_18 Depth 6
movq %rdx, -80(%rbp) # 8-byte Spill
leaq -4(%rdx), %rcx
movq %rdx, %rax
decq %rax
cmovsq %rcx, %rax
movq %rax, %r15
sarq $63, %r15
shrq $62, %r15
addq %rax, %r15
andq $-4, %r15
movq %rdx, %r13
orq $63, %r13
leaq -4(%r13), %rdx
xorl %r10d, %r10d
movq -88(%rbp), %rax # 8-byte Reload
leaq (%rax,%r15,4), %rax
movq %rax, -64(%rbp) # 8-byte Spill
leaq B+16(,%r15,4), %rbx
leaq 4(%r15), %r12
.align 16, 0x90
.LBB2_8: # %polly.loop_header23
# Parent Loop BB2_5 Depth=1
# Parent Loop BB2_15 Depth=2
# => This Loop Header: Depth=3
# Child Loop BB2_11 Depth 4
# Child Loop BB2_17 Depth 5
# Child Loop BB2_18 Depth 6
cmpq -72(%rbp), %rsi # 8-byte Folded Reload
jg .LBB2_13
# BB#9: # %polly.loop_header30.preheader
# in Loop: Header=BB2_8 Depth=3
movq %r10, %rax
orq $63, %rax
cmpq %rax, %r10
jg .LBB2_13
# BB#10: # in Loop: Header=BB2_8 Depth=3
decq %rax
movq -64(%rbp), %r14 # 8-byte Reload
movq -56(%rbp), %r11 # 8-byte Reload
.align 16, 0x90
.LBB2_11: # %polly.loop_header37.preheader
# Parent Loop BB2_5 Depth=1
# Parent Loop BB2_15 Depth=2
# Parent Loop BB2_8 Depth=3
# => This Loop Header: Depth=4
# Child Loop BB2_17 Depth 5
# Child Loop BB2_18 Depth 6
cmpq %r13, %r12
movq %rbx, %r8
movq %r10, %rsi
jg .LBB2_12
.align 16, 0x90
.LBB2_17: # %polly.loop_header46.preheader
# Parent Loop BB2_5 Depth=1
# Parent Loop BB2_15 Depth=2
# Parent Loop BB2_8 Depth=3
# Parent Loop BB2_11 Depth=4
# => This Loop Header: Depth=5
# Child Loop BB2_18 Depth 6
leaq (%r11,%r11,2), %rcx
shlq $11, %rcx
vbroadcastss A(%rcx,%rsi,4), %xmm0
movq %r14, %rdi
movq %r8, %r9
movq %r15, %rcx
.LBB2_18: # %polly.loop_header46
# Parent Loop BB2_5 Depth=1
# Parent Loop BB2_15 Depth=2
# Parent Loop BB2_8 Depth=3
# Parent Loop BB2_11 Depth=4
# Parent Loop BB2_17 Depth=5
# => This Inner Loop Header: Depth=6
vmulps (%r9), %xmm0, %xmm1
vaddps (%rdi), %xmm1, %xmm1
vmovaps %xmm1, (%rdi)
addq $16, %rdi
addq $16, %r9
addq $4, %rcx
cmpq %rdx, %rcx
jle .LBB2_18
# BB#16: # %polly.loop_exit48
# in Loop: Header=BB2_17 Depth=5
addq $6144, %r8 # imm = 0x1800
cmpq %rax, %rsi
leaq 1(%rsi), %rsi
jle .LBB2_17
.align 16, 0x90
.LBB2_12: # %polly.loop_exit39
# in Loop: Header=BB2_11 Depth=4
addq $6144, %r14 # imm = 0x1800
cmpq -48(%rbp), %r11 # 8-byte Folded Reload
leaq 1(%r11), %r11
jle .LBB2_11
.align 16, 0x90
.LBB2_13: # %polly.loop_exit32
# in Loop: Header=BB2_8 Depth=3
addq $393216, %rbx # imm = 0x60000
cmpq $1472, %r10 # imm = 0x5C0
leaq 64(%r10), %r10
movq -56(%rbp), %rsi # 8-byte Reload
jl .LBB2_8
# BB#14: # %polly.loop_exit25
# in Loop: Header=BB2_15 Depth=2
movq -80(%rbp), %rdx # 8-byte Reload
cmpq $1472, %rdx # imm = 0x5C0
leaq 64(%rdx), %rdx
jl .LBB2_15
# BB#6: # %polly.loop_exit18
# in Loop: Header=BB2_5 Depth=1
addq $393216, -88(%rbp) # 8-byte Folded Spill
# imm = 0x60000
cmpq $1472, %rsi # imm = 0x5C0
leaq 64(%rsi), %rsi
jl .LBB2_5
# BB#7: # %polly.loop_exit11
xorl %eax, %eax
addq $56, %rsp
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
ret
.Ltmp28:
.size main, .Ltmp28-main
.cfi_endproc
.type A,@object # @A
.comm A,9437184,16
.type B,@object # @B
.comm B,9437184,16
.type .L.str,@object # @.str
.section .rodata.str1.1,"aMS",@progbits,1
.L.str:
.asciz "%lf "
.size .L.str, 5
.type C,@object # @C
.comm C,9437184,16
.section ".note.GNU-stack","",@progbits

View File

@@ -0,0 +1,390 @@
.file "matmul.polly.interchanged+tiled.ll"
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LCPI0_0:
.quad 4602678819172646912 # double 0.5
.text
.globl init_array
.align 16, 0x90
.type init_array,@function
init_array: # @init_array
.cfi_startproc
# BB#0: # %entry
pushq %rbp
.Ltmp2:
.cfi_def_cfa_offset 16
.Ltmp3:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp4:
.cfi_def_cfa_register %rbp
xorl %r8d, %r8d
vmovsd .LCPI0_0(%rip), %xmm0
.align 16, 0x90
.LBB0_1: # %polly.loop_preheader3
# =>This Loop Header: Depth=1
# Child Loop BB0_2 Depth 2
xorl %ecx, %ecx
.align 16, 0x90
.LBB0_2: # %polly.loop_header2
# Parent Loop BB0_1 Depth=1
# => This Inner Loop Header: Depth=2
movl %ecx, %edx
imull %r8d, %edx
movl %edx, %esi
sarl $31, %esi
shrl $22, %esi
addl %edx, %esi
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
negl %esi
movq %r8, %rax
shlq $11, %rax
leal 1(%rdx,%rsi), %edi
leaq (%rax,%rax,2), %rsi
leaq 1(%rcx), %rdx
cmpq $1536, %rdx # imm = 0x600
vcvtsi2sdl %edi, %xmm0, %xmm1
vmulsd %xmm0, %xmm1, %xmm1
vcvtsd2ss %xmm1, %xmm1, %xmm1
vmovss %xmm1, A(%rsi,%rcx,4)
vmovss %xmm1, B(%rsi,%rcx,4)
movq %rdx, %rcx
jne .LBB0_2
# BB#3: # %polly.loop_exit4
# in Loop: Header=BB0_1 Depth=1
incq %r8
cmpq $1536, %r8 # imm = 0x600
jne .LBB0_1
# BB#4: # %polly.loop_exit
popq %rbp
ret
.Ltmp5:
.size init_array, .Ltmp5-init_array
.cfi_endproc
.globl print_array
.align 16, 0x90
.type print_array,@function
print_array: # @print_array
.cfi_startproc
# BB#0: # %entry
pushq %rbp
.Ltmp9:
.cfi_def_cfa_offset 16
.Ltmp10:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp11:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r12
pushq %rbx
.Ltmp12:
.cfi_offset %rbx, -48
.Ltmp13:
.cfi_offset %r12, -40
.Ltmp14:
.cfi_offset %r14, -32
.Ltmp15:
.cfi_offset %r15, -24
xorl %r14d, %r14d
movl $C, %r15d
.align 16, 0x90
.LBB1_1: # %for.cond1.preheader
# =>This Loop Header: Depth=1
# Child Loop BB1_2 Depth 2
movq stdout(%rip), %rax
movq %r15, %r12
xorl %ebx, %ebx
.align 16, 0x90
.LBB1_2: # %for.body3
# Parent Loop BB1_1 Depth=1
# => This Inner Loop Header: Depth=2
vmovss (%r12), %xmm0
vcvtss2sd %xmm0, %xmm0, %xmm0
movq %rax, %rdi
movl $.L.str, %esi
movb $1, %al
callq fprintf
movslq %ebx, %rax
imulq $1717986919, %rax, %rcx # imm = 0x66666667
movq %rcx, %rdx
shrq $63, %rdx
sarq $37, %rcx
addl %edx, %ecx
imull $80, %ecx, %ecx
subl %ecx, %eax
cmpl $79, %eax
jne .LBB1_4
# BB#3: # %if.then
# in Loop: Header=BB1_2 Depth=2
movq stdout(%rip), %rsi
movl $10, %edi
callq fputc
.LBB1_4: # %for.inc
# in Loop: Header=BB1_2 Depth=2
addq $4, %r12
incq %rbx
movq stdout(%rip), %rax
cmpq $1536, %rbx # imm = 0x600
jne .LBB1_2
# BB#5: # %for.end
# in Loop: Header=BB1_1 Depth=1
movl $10, %edi
movq %rax, %rsi
callq fputc
addq $6144, %r15 # imm = 0x1800
incq %r14
cmpq $1536, %r14 # imm = 0x600
jne .LBB1_1
# BB#6: # %for.end12
popq %rbx
popq %r12
popq %r14
popq %r15
popq %rbp
ret
.Ltmp16:
.size print_array, .Ltmp16-print_array
.cfi_endproc
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LCPI2_0:
.quad 4602678819172646912 # double 0.5
.text
.globl main
.align 16, 0x90
.type main,@function
main: # @main
.cfi_startproc
# BB#0: # %entry
pushq %rbp
.Ltmp20:
.cfi_def_cfa_offset 16
.Ltmp21:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp22:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
subq $56, %rsp
.Ltmp23:
.cfi_offset %rbx, -56
.Ltmp24:
.cfi_offset %r12, -48
.Ltmp25:
.cfi_offset %r13, -40
.Ltmp26:
.cfi_offset %r14, -32
.Ltmp27:
.cfi_offset %r15, -24
xorl %ebx, %ebx
vmovsd .LCPI2_0(%rip), %xmm0
.align 16, 0x90
.LBB2_1: # %polly.loop_preheader3.i
# =>This Loop Header: Depth=1
# Child Loop BB2_2 Depth 2
xorl %ecx, %ecx
.align 16, 0x90
.LBB2_2: # %polly.loop_header2.i
# Parent Loop BB2_1 Depth=1
# => This Inner Loop Header: Depth=2
movl %ecx, %edx
imull %ebx, %edx
movl %edx, %esi
sarl $31, %esi
shrl $22, %esi
addl %edx, %esi
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
negl %esi
movq %rbx, %rax
shlq $11, %rax
leal 1(%rdx,%rsi), %edi
leaq (%rax,%rax,2), %rsi
leaq 1(%rcx), %rdx
cmpq $1536, %rdx # imm = 0x600
vcvtsi2sdl %edi, %xmm0, %xmm1
vmulsd %xmm0, %xmm1, %xmm1
vcvtsd2ss %xmm1, %xmm1, %xmm1
vmovss %xmm1, A(%rsi,%rcx,4)
vmovss %xmm1, B(%rsi,%rcx,4)
movq %rdx, %rcx
jne .LBB2_2
# BB#3: # %polly.loop_exit4.i
# in Loop: Header=BB2_1 Depth=1
incq %rbx
cmpq $1536, %rbx # imm = 0x600
jne .LBB2_1
# BB#4: # %polly.loop_preheader3.preheader
movl $C, %ebx
movl $C, %edi
xorl %esi, %esi
movl $9437184, %edx # imm = 0x900000
callq memset
xorl %eax, %eax
.align 16, 0x90
.LBB2_5: # %polly.loop_preheader17
# =>This Loop Header: Depth=1
# Child Loop BB2_15 Depth 2
# Child Loop BB2_8 Depth 3
# Child Loop BB2_11 Depth 4
# Child Loop BB2_17 Depth 5
# Child Loop BB2_18 Depth 6
movq %rax, -56(%rbp) # 8-byte Spill
movq %rbx, -88(%rbp) # 8-byte Spill
movq %rax, %rcx
orq $63, %rcx
movq %rcx, -72(%rbp) # 8-byte Spill
leaq -1(%rcx), %rcx
movq %rcx, -48(%rbp) # 8-byte Spill
movq $-1, %r15
movl $B, %ecx
movq %rbx, -64(%rbp) # 8-byte Spill
xorl %r12d, %r12d
.align 16, 0x90
.LBB2_15: # %polly.loop_preheader24
# Parent Loop BB2_5 Depth=1
# => This Loop Header: Depth=2
# Child Loop BB2_8 Depth 3
# Child Loop BB2_11 Depth 4
# Child Loop BB2_17 Depth 5
# Child Loop BB2_18 Depth 6
movq %rcx, -80(%rbp) # 8-byte Spill
movq %r12, %r13
orq $63, %r13
leaq -1(%r13), %rbx
xorl %r9d, %r9d
movq %rcx, %rdx
.align 16, 0x90
.LBB2_8: # %polly.loop_header23
# Parent Loop BB2_5 Depth=1
# Parent Loop BB2_15 Depth=2
# => This Loop Header: Depth=3
# Child Loop BB2_11 Depth 4
# Child Loop BB2_17 Depth 5
# Child Loop BB2_18 Depth 6
cmpq -72(%rbp), %rax # 8-byte Folded Reload
jg .LBB2_13
# BB#9: # %polly.loop_header30.preheader
# in Loop: Header=BB2_8 Depth=3
movq %r9, %rax
orq $63, %rax
cmpq %rax, %r9
jg .LBB2_13
# BB#10: # in Loop: Header=BB2_8 Depth=3
decq %rax
movq -64(%rbp), %r10 # 8-byte Reload
movq -56(%rbp), %r11 # 8-byte Reload
.align 16, 0x90
.LBB2_11: # %polly.loop_header37.preheader
# Parent Loop BB2_5 Depth=1
# Parent Loop BB2_15 Depth=2
# Parent Loop BB2_8 Depth=3
# => This Loop Header: Depth=4
# Child Loop BB2_17 Depth 5
# Child Loop BB2_18 Depth 6
cmpq %r13, %r12
movq %rdx, %r14
movq %r9, %rcx
jg .LBB2_12
.align 16, 0x90
.LBB2_17: # %polly.loop_header46.preheader
# Parent Loop BB2_5 Depth=1
# Parent Loop BB2_15 Depth=2
# Parent Loop BB2_8 Depth=3
# Parent Loop BB2_11 Depth=4
# => This Loop Header: Depth=5
# Child Loop BB2_18 Depth 6
leaq (%r11,%r11,2), %rsi
shlq $11, %rsi
vmovss A(%rsi,%rcx,4), %xmm0
movq %r10, %rdi
movq %r14, %r8
movq %r15, %rsi
.LBB2_18: # %polly.loop_header46
# Parent Loop BB2_5 Depth=1
# Parent Loop BB2_15 Depth=2
# Parent Loop BB2_8 Depth=3
# Parent Loop BB2_11 Depth=4
# Parent Loop BB2_17 Depth=5
# => This Inner Loop Header: Depth=6
vmulss (%r8), %xmm0, %xmm1
vaddss (%rdi), %xmm1, %xmm1
vmovss %xmm1, (%rdi)
addq $4, %rdi
addq $4, %r8
incq %rsi
cmpq %rbx, %rsi
jle .LBB2_18
# BB#16: # %polly.loop_exit48
# in Loop: Header=BB2_17 Depth=5
addq $6144, %r14 # imm = 0x1800
cmpq %rax, %rcx
leaq 1(%rcx), %rcx
jle .LBB2_17
.align 16, 0x90
.LBB2_12: # %polly.loop_exit39
# in Loop: Header=BB2_11 Depth=4
addq $6144, %r10 # imm = 0x1800
cmpq -48(%rbp), %r11 # 8-byte Folded Reload
leaq 1(%r11), %r11
jle .LBB2_11
.align 16, 0x90
.LBB2_13: # %polly.loop_exit32
# in Loop: Header=BB2_8 Depth=3
addq $393216, %rdx # imm = 0x60000
cmpq $1472, %r9 # imm = 0x5C0
leaq 64(%r9), %r9
movq -56(%rbp), %rax # 8-byte Reload
jl .LBB2_8
# BB#14: # %polly.loop_exit25
# in Loop: Header=BB2_15 Depth=2
addq $256, -64(%rbp) # 8-byte Folded Spill
# imm = 0x100
movq -80(%rbp), %rcx # 8-byte Reload
addq $256, %rcx # imm = 0x100
addq $64, %r15
cmpq $1472, %r12 # imm = 0x5C0
leaq 64(%r12), %r12
jl .LBB2_15
# BB#6: # %polly.loop_exit18
# in Loop: Header=BB2_5 Depth=1
movq -88(%rbp), %rbx # 8-byte Reload
addq $393216, %rbx # imm = 0x60000
cmpq $1472, %rax # imm = 0x5C0
leaq 64(%rax), %rax
jl .LBB2_5
# BB#7: # %polly.loop_exit11
xorl %eax, %eax
addq $56, %rsp
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
ret
.Ltmp28:
.size main, .Ltmp28-main
.cfi_endproc
.type A,@object # @A
.comm A,9437184,16
.type B,@object # @B
.comm B,9437184,16
.type .L.str,@object # @.str
.section .rodata.str1.1,"aMS",@progbits,1
.L.str:
.asciz "%lf "
.size .L.str, 5
.type C,@object # @C
.comm C,9437184,16
.section ".note.GNU-stack","",@progbits

View File

@@ -0,0 +1,286 @@
.file "matmul.polly.interchanged.ll"
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LCPI0_0:
.quad 4602678819172646912 # double 0.5
.text
.globl init_array
.align 16, 0x90
.type init_array,@function
init_array: # @init_array
.cfi_startproc
# BB#0: # %entry
pushq %rbp
.Ltmp2:
.cfi_def_cfa_offset 16
.Ltmp3:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp4:
.cfi_def_cfa_register %rbp
xorl %r8d, %r8d
vmovsd .LCPI0_0(%rip), %xmm0
.align 16, 0x90
.LBB0_1: # %polly.loop_preheader3
# =>This Loop Header: Depth=1
# Child Loop BB0_2 Depth 2
xorl %ecx, %ecx
.align 16, 0x90
.LBB0_2: # %polly.loop_header2
# Parent Loop BB0_1 Depth=1
# => This Inner Loop Header: Depth=2
movl %ecx, %edx
imull %r8d, %edx
movl %edx, %esi
sarl $31, %esi
shrl $22, %esi
addl %edx, %esi
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
negl %esi
movq %r8, %rax
shlq $11, %rax
leal 1(%rdx,%rsi), %edi
leaq (%rax,%rax,2), %rsi
leaq 1(%rcx), %rdx
cmpq $1536, %rdx # imm = 0x600
vcvtsi2sdl %edi, %xmm0, %xmm1
vmulsd %xmm0, %xmm1, %xmm1
vcvtsd2ss %xmm1, %xmm1, %xmm1
vmovss %xmm1, A(%rsi,%rcx,4)
vmovss %xmm1, B(%rsi,%rcx,4)
movq %rdx, %rcx
jne .LBB0_2
# BB#3: # %polly.loop_exit4
# in Loop: Header=BB0_1 Depth=1
incq %r8
cmpq $1536, %r8 # imm = 0x600
jne .LBB0_1
# BB#4: # %polly.loop_exit
popq %rbp
ret
.Ltmp5:
.size init_array, .Ltmp5-init_array
.cfi_endproc
.globl print_array
.align 16, 0x90
.type print_array,@function
print_array: # @print_array
.cfi_startproc
# BB#0: # %entry
pushq %rbp
.Ltmp9:
.cfi_def_cfa_offset 16
.Ltmp10:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp11:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r12
pushq %rbx
.Ltmp12:
.cfi_offset %rbx, -48
.Ltmp13:
.cfi_offset %r12, -40
.Ltmp14:
.cfi_offset %r14, -32
.Ltmp15:
.cfi_offset %r15, -24
xorl %r14d, %r14d
movl $C, %r15d
.align 16, 0x90
.LBB1_1: # %for.cond1.preheader
# =>This Loop Header: Depth=1
# Child Loop BB1_2 Depth 2
movq stdout(%rip), %rax
movq %r15, %r12
xorl %ebx, %ebx
.align 16, 0x90
.LBB1_2: # %for.body3
# Parent Loop BB1_1 Depth=1
# => This Inner Loop Header: Depth=2
vmovss (%r12), %xmm0
vcvtss2sd %xmm0, %xmm0, %xmm0
movq %rax, %rdi
movl $.L.str, %esi
movb $1, %al
callq fprintf
movslq %ebx, %rax
imulq $1717986919, %rax, %rcx # imm = 0x66666667
movq %rcx, %rdx
shrq $63, %rdx
sarq $37, %rcx
addl %edx, %ecx
imull $80, %ecx, %ecx
subl %ecx, %eax
cmpl $79, %eax
jne .LBB1_4
# BB#3: # %if.then
# in Loop: Header=BB1_2 Depth=2
movq stdout(%rip), %rsi
movl $10, %edi
callq fputc
.LBB1_4: # %for.inc
# in Loop: Header=BB1_2 Depth=2
addq $4, %r12
incq %rbx
movq stdout(%rip), %rax
cmpq $1536, %rbx # imm = 0x600
jne .LBB1_2
# BB#5: # %for.end
# in Loop: Header=BB1_1 Depth=1
movl $10, %edi
movq %rax, %rsi
callq fputc
addq $6144, %r15 # imm = 0x1800
incq %r14
cmpq $1536, %r14 # imm = 0x600
jne .LBB1_1
# BB#6: # %for.end12
popq %rbx
popq %r12
popq %r14
popq %r15
popq %rbp
ret
.Ltmp16:
.size print_array, .Ltmp16-print_array
.cfi_endproc
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LCPI2_0:
.quad 4602678819172646912 # double 0.5
.text
.globl main
.align 16, 0x90
.type main,@function
main: # @main
.cfi_startproc
# BB#0: # %entry
pushq %rbp
.Ltmp20:
.cfi_def_cfa_offset 16
.Ltmp21:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp22:
.cfi_def_cfa_register %rbp
pushq %r14
pushq %rbx
.Ltmp23:
.cfi_offset %rbx, -32
.Ltmp24:
.cfi_offset %r14, -24
xorl %ebx, %ebx
vmovsd .LCPI2_0(%rip), %xmm0
.align 16, 0x90
.LBB2_1: # %polly.loop_preheader3.i
# =>This Loop Header: Depth=1
# Child Loop BB2_2 Depth 2
xorl %ecx, %ecx
.align 16, 0x90
.LBB2_2: # %polly.loop_header2.i
# Parent Loop BB2_1 Depth=1
# => This Inner Loop Header: Depth=2
movl %ecx, %edx
imull %ebx, %edx
movl %edx, %esi
sarl $31, %esi
shrl $22, %esi
addl %edx, %esi
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
negl %esi
movq %rbx, %rax
shlq $11, %rax
leal 1(%rdx,%rsi), %edi
leaq (%rax,%rax,2), %rsi
leaq 1(%rcx), %rdx
cmpq $1536, %rdx # imm = 0x600
vcvtsi2sdl %edi, %xmm0, %xmm1
vmulsd %xmm0, %xmm1, %xmm1
vcvtsd2ss %xmm1, %xmm1, %xmm1
vmovss %xmm1, A(%rsi,%rcx,4)
vmovss %xmm1, B(%rsi,%rcx,4)
movq %rdx, %rcx
jne .LBB2_2
# BB#3: # %polly.loop_exit4.i
# in Loop: Header=BB2_1 Depth=1
incq %rbx
cmpq $1536, %rbx # imm = 0x600
jne .LBB2_1
# BB#4: # %polly.loop_preheader3.preheader
movl $C, %r14d
movl $C, %edi
xorl %esi, %esi
movl $9437184, %edx # imm = 0x900000
callq memset
xorl %eax, %eax
.align 16, 0x90
.LBB2_5: # %polly.loop_preheader17
# =>This Loop Header: Depth=1
# Child Loop BB2_10 Depth 2
# Child Loop BB2_8 Depth 3
movl $B, %ebx
xorl %edx, %edx
.align 16, 0x90
.LBB2_10: # %polly.loop_preheader24
# Parent Loop BB2_5 Depth=1
# => This Loop Header: Depth=2
# Child Loop BB2_8 Depth 3
leaq (%rax,%rax,2), %rcx
shlq $11, %rcx
vmovss A(%rcx,%rdx,4), %xmm0
movl $1536, %esi # imm = 0x600
movq %r14, %rdi
movq %rbx, %rcx
.align 16, 0x90
.LBB2_8: # %polly.loop_header23
# Parent Loop BB2_5 Depth=1
# Parent Loop BB2_10 Depth=2
# => This Inner Loop Header: Depth=3
vmulss (%rcx), %xmm0, %xmm1
vaddss (%rdi), %xmm1, %xmm1
vmovss %xmm1, (%rdi)
addq $4, %rdi
addq $4, %rcx
decq %rsi
jne .LBB2_8
# BB#9: # %polly.loop_exit25
# in Loop: Header=BB2_10 Depth=2
addq $6144, %rbx # imm = 0x1800
incq %rdx
cmpq $1536, %rdx # imm = 0x600
jne .LBB2_10
# BB#6: # %polly.loop_exit18
# in Loop: Header=BB2_5 Depth=1
addq $6144, %r14 # imm = 0x1800
incq %rax
cmpq $1536, %rax # imm = 0x600
jne .LBB2_5
# BB#7: # %polly.loop_exit11
xorl %eax, %eax
popq %rbx
popq %r14
popq %rbp
ret
.Ltmp25:
.size main, .Ltmp25-main
.cfi_endproc
.type A,@object # @A
.comm A,9437184,16
.type B,@object # @B
.comm B,9437184,16
.type .L.str,@object # @.str
.section .rodata.str1.1,"aMS",@progbits,1
.L.str:
.asciz "%lf "
.size .L.str, 5
.type C,@object # @C
.comm C,9437184,16
.section ".note.GNU-stack","",@progbits

View File

@@ -0,0 +1,171 @@
; ModuleID = 'matmul.s'
source_filename = "matmul.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
@A = common global [1536 x [1536 x float]] zeroinitializer, align 16
@B = common global [1536 x [1536 x float]] zeroinitializer, align 16
@stdout = external global %struct._IO_FILE*, align 8
@.str = private unnamed_addr constant [5 x i8] c"%lf \00", align 1
@C = common global [1536 x [1536 x float]] zeroinitializer, align 16
@.str.1 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1
; Function Attrs: nounwind uwtable
define void @init_array() #0 {
entry:
br label %entry.split
entry.split: ; preds = %entry
br label %for.cond1.preheader
for.cond1.preheader: ; preds = %entry.split, %for.inc17
%indvars.iv5 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next6, %for.inc17 ]
br label %for.body3
for.body3: ; preds = %for.cond1.preheader, %for.body3
%indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
%0 = mul nuw nsw i64 %indvars.iv, %indvars.iv5
%1 = trunc i64 %0 to i32
%rem = srem i32 %1, 1024
%add = add nsw i32 %rem, 1
%conv = sitofp i32 %add to double
%div = fmul double %conv, 5.000000e-01
%conv4 = fptrunc double %div to float
%arrayidx6 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %indvars.iv5, i64 %indvars.iv
store float %conv4, float* %arrayidx6, align 4
%2 = mul nuw nsw i64 %indvars.iv, %indvars.iv5
%3 = trunc i64 %2 to i32
%rem8 = srem i32 %3, 1024
%add9 = add nsw i32 %rem8, 1
%conv10 = sitofp i32 %add9 to double
%div11 = fmul double %conv10, 5.000000e-01
%conv12 = fptrunc double %div11 to float
%arrayidx16 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %indvars.iv5, i64 %indvars.iv
store float %conv12, float* %arrayidx16, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, 1536
br i1 %exitcond, label %for.body3, label %for.inc17
for.inc17: ; preds = %for.body3
%indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1
%exitcond7 = icmp ne i64 %indvars.iv.next6, 1536
br i1 %exitcond7, label %for.cond1.preheader, label %for.end19
for.end19: ; preds = %for.inc17
ret void
}
; Function Attrs: nounwind uwtable
define void @print_array() #0 {
entry:
br label %entry.split
entry.split: ; preds = %entry
br label %for.cond1.preheader
for.cond1.preheader: ; preds = %entry.split, %for.end
%indvars.iv6 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next7, %for.end ]
%0 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
br label %for.body3
for.body3: ; preds = %for.cond1.preheader, %for.inc
%indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.inc ]
%1 = phi %struct._IO_FILE* [ %0, %for.cond1.preheader ], [ %5, %for.inc ]
%arrayidx5 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %indvars.iv6, i64 %indvars.iv
%2 = load float, float* %arrayidx5, align 4
%conv = fpext float %2 to double
%call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), double %conv) #2
%3 = trunc i64 %indvars.iv to i32
%rem = srem i32 %3, 80
%cmp6 = icmp eq i32 %rem, 79
br i1 %cmp6, label %if.then, label %for.inc
if.then: ; preds = %for.body3
%4 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
%fputc3 = tail call i32 @fputc(i32 10, %struct._IO_FILE* %4)
br label %for.inc
for.inc: ; preds = %for.body3, %if.then
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%5 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
%exitcond = icmp ne i64 %indvars.iv.next, 1536
br i1 %exitcond, label %for.body3, label %for.end
for.end: ; preds = %for.inc
%.lcssa = phi %struct._IO_FILE* [ %5, %for.inc ]
%fputc = tail call i32 @fputc(i32 10, %struct._IO_FILE* %.lcssa)
%indvars.iv.next7 = add nuw nsw i64 %indvars.iv6, 1
%exitcond8 = icmp ne i64 %indvars.iv.next7, 1536
br i1 %exitcond8, label %for.cond1.preheader, label %for.end12
for.end12: ; preds = %for.end
ret void
}
declare i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1
; Function Attrs: nounwind uwtable
define i32 @main() #0 {
entry:
br label %entry.split
entry.split: ; preds = %entry
tail call void @init_array()
br label %for.cond1.preheader
for.cond1.preheader: ; preds = %entry.split, %for.inc28
%indvars.iv7 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next8, %for.inc28 ]
br label %for.body3
for.body3: ; preds = %for.cond1.preheader, %for.inc25
%indvars.iv4 = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next5, %for.inc25 ]
%arrayidx5 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4
store float 0.000000e+00, float* %arrayidx5, align 4
br label %for.body8
for.body8: ; preds = %for.body3, %for.body8
%indvars.iv = phi i64 [ 0, %for.body3 ], [ %indvars.iv.next, %for.body8 ]
%arrayidx12 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4
%0 = load float, float* %arrayidx12, align 4
%arrayidx16 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %indvars.iv7, i64 %indvars.iv
%1 = load float, float* %arrayidx16, align 4
%arrayidx20 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %indvars.iv, i64 %indvars.iv4
%2 = load float, float* %arrayidx20, align 4
%mul = fmul float %1, %2
%add = fadd float %0, %mul
%arrayidx24 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4
store float %add, float* %arrayidx24, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, 1536
br i1 %exitcond, label %for.body8, label %for.inc25
for.inc25: ; preds = %for.body8
%indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1
%exitcond6 = icmp ne i64 %indvars.iv.next5, 1536
br i1 %exitcond6, label %for.body3, label %for.inc28
for.inc28: ; preds = %for.inc25
%indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1
%exitcond9 = icmp ne i64 %indvars.iv.next8, 1536
br i1 %exitcond9, label %for.cond1.preheader, label %for.end30
for.end30: ; preds = %for.inc28
ret i32 0
}
; Function Attrs: nounwind
declare i64 @fwrite(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) #2
; Function Attrs: nounwind
declare i32 @fputc(i32, %struct._IO_FILE* nocapture) #2
attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind }
!llvm.ident = !{!0}
!0 = !{!"clang version 4.0.0 (http://llvm.org/git/clang.git 081569d9a29c7bc827b2d41f8e62891bbc895e2f) (http://llvm.org/git/llvm.git e117e506536626352e8e47f6c72cd6e2a276622c)"}

View File

@@ -0,0 +1,269 @@
; ModuleID = 'matmul.c'
source_filename = "matmul.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
@A = common global [1536 x [1536 x float]] zeroinitializer, align 16
@B = common global [1536 x [1536 x float]] zeroinitializer, align 16
@stdout = external global %struct._IO_FILE*, align 8
@.str = private unnamed_addr constant [5 x i8] c"%lf \00", align 1
@C = common global [1536 x [1536 x float]] zeroinitializer, align 16
@.str.1 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1
; Function Attrs: nounwind uwtable
define void @init_array() #0 {
entry:
%i = alloca i32, align 4
%j = alloca i32, align 4
store i32 0, i32* %i, align 4
br label %for.cond
for.cond: ; preds = %for.inc17, %entry
%0 = load i32, i32* %i, align 4
%cmp = icmp slt i32 %0, 1536
br i1 %cmp, label %for.body, label %for.end19
for.body: ; preds = %for.cond
store i32 0, i32* %j, align 4
br label %for.cond1
for.cond1: ; preds = %for.inc, %for.body
%1 = load i32, i32* %j, align 4
%cmp2 = icmp slt i32 %1, 1536
br i1 %cmp2, label %for.body3, label %for.end
for.body3: ; preds = %for.cond1
%2 = load i32, i32* %i, align 4
%3 = load i32, i32* %j, align 4
%mul = mul nsw i32 %2, %3
%rem = srem i32 %mul, 1024
%add = add nsw i32 1, %rem
%conv = sitofp i32 %add to double
%div = fdiv double %conv, 2.000000e+00
%conv4 = fptrunc double %div to float
%4 = load i32, i32* %j, align 4
%idxprom = sext i32 %4 to i64
%5 = load i32, i32* %i, align 4
%idxprom5 = sext i32 %5 to i64
%arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %idxprom5
%arrayidx6 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom
store float %conv4, float* %arrayidx6, align 4
%6 = load i32, i32* %i, align 4
%7 = load i32, i32* %j, align 4
%mul7 = mul nsw i32 %6, %7
%rem8 = srem i32 %mul7, 1024
%add9 = add nsw i32 1, %rem8
%conv10 = sitofp i32 %add9 to double
%div11 = fdiv double %conv10, 2.000000e+00
%conv12 = fptrunc double %div11 to float
%8 = load i32, i32* %j, align 4
%idxprom13 = sext i32 %8 to i64
%9 = load i32, i32* %i, align 4
%idxprom14 = sext i32 %9 to i64
%arrayidx15 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %idxprom14
%arrayidx16 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx15, i64 0, i64 %idxprom13
store float %conv12, float* %arrayidx16, align 4
br label %for.inc
for.inc: ; preds = %for.body3
%10 = load i32, i32* %j, align 4
%inc = add nsw i32 %10, 1
store i32 %inc, i32* %j, align 4
br label %for.cond1
for.end: ; preds = %for.cond1
br label %for.inc17
for.inc17: ; preds = %for.end
%11 = load i32, i32* %i, align 4
%inc18 = add nsw i32 %11, 1
store i32 %inc18, i32* %i, align 4
br label %for.cond
for.end19: ; preds = %for.cond
ret void
}
; Function Attrs: nounwind uwtable
define void @print_array() #0 {
entry:
%i = alloca i32, align 4
%j = alloca i32, align 4
store i32 0, i32* %i, align 4
br label %for.cond
for.cond: ; preds = %for.inc10, %entry
%0 = load i32, i32* %i, align 4
%cmp = icmp slt i32 %0, 1536
br i1 %cmp, label %for.body, label %for.end12
for.body: ; preds = %for.cond
store i32 0, i32* %j, align 4
br label %for.cond1
for.cond1: ; preds = %for.inc, %for.body
%1 = load i32, i32* %j, align 4
%cmp2 = icmp slt i32 %1, 1536
br i1 %cmp2, label %for.body3, label %for.end
for.body3: ; preds = %for.cond1
%2 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
%3 = load i32, i32* %j, align 4
%idxprom = sext i32 %3 to i64
%4 = load i32, i32* %i, align 4
%idxprom4 = sext i32 %4 to i64
%arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom4
%arrayidx5 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom
%5 = load float, float* %arrayidx5, align 4
%conv = fpext float %5 to double
%call = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %2, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), double %conv)
%6 = load i32, i32* %j, align 4
%rem = srem i32 %6, 80
%cmp6 = icmp eq i32 %rem, 79
br i1 %cmp6, label %if.then, label %if.end
if.then: ; preds = %for.body3
%7 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
%call8 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %7, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0))
br label %if.end
if.end: ; preds = %if.then, %for.body3
br label %for.inc
for.inc: ; preds = %if.end
%8 = load i32, i32* %j, align 4
%inc = add nsw i32 %8, 1
store i32 %inc, i32* %j, align 4
br label %for.cond1
for.end: ; preds = %for.cond1
%9 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
%call9 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %9, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0))
br label %for.inc10
for.inc10: ; preds = %for.end
%10 = load i32, i32* %i, align 4
%inc11 = add nsw i32 %10, 1
store i32 %inc11, i32* %i, align 4
br label %for.cond
for.end12: ; preds = %for.cond
ret void
}
declare i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1
; Function Attrs: nounwind uwtable
define i32 @main() #0 {
entry:
%retval = alloca i32, align 4
%i = alloca i32, align 4
%j = alloca i32, align 4
%k = alloca i32, align 4
%t_start = alloca double, align 8
%t_end = alloca double, align 8
store i32 0, i32* %retval, align 4
call void @init_array()
store i32 0, i32* %i, align 4
br label %for.cond
for.cond: ; preds = %for.inc28, %entry
%0 = load i32, i32* %i, align 4
%cmp = icmp slt i32 %0, 1536
br i1 %cmp, label %for.body, label %for.end30
for.body: ; preds = %for.cond
store i32 0, i32* %j, align 4
br label %for.cond1
for.cond1: ; preds = %for.inc25, %for.body
%1 = load i32, i32* %j, align 4
%cmp2 = icmp slt i32 %1, 1536
br i1 %cmp2, label %for.body3, label %for.end27
for.body3: ; preds = %for.cond1
%2 = load i32, i32* %j, align 4
%idxprom = sext i32 %2 to i64
%3 = load i32, i32* %i, align 4
%idxprom4 = sext i32 %3 to i64
%arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom4
%arrayidx5 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom
store float 0.000000e+00, float* %arrayidx5, align 4
store i32 0, i32* %k, align 4
br label %for.cond6
for.cond6: ; preds = %for.inc, %for.body3
%4 = load i32, i32* %k, align 4
%cmp7 = icmp slt i32 %4, 1536
br i1 %cmp7, label %for.body8, label %for.end
for.body8: ; preds = %for.cond6
%5 = load i32, i32* %j, align 4
%idxprom9 = sext i32 %5 to i64
%6 = load i32, i32* %i, align 4
%idxprom10 = sext i32 %6 to i64
%arrayidx11 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom10
%arrayidx12 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx11, i64 0, i64 %idxprom9
%7 = load float, float* %arrayidx12, align 4
%8 = load i32, i32* %k, align 4
%idxprom13 = sext i32 %8 to i64
%9 = load i32, i32* %i, align 4
%idxprom14 = sext i32 %9 to i64
%arrayidx15 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %idxprom14
%arrayidx16 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx15, i64 0, i64 %idxprom13
%10 = load float, float* %arrayidx16, align 4
%11 = load i32, i32* %j, align 4
%idxprom17 = sext i32 %11 to i64
%12 = load i32, i32* %k, align 4
%idxprom18 = sext i32 %12 to i64
%arrayidx19 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %idxprom18
%arrayidx20 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx19, i64 0, i64 %idxprom17
%13 = load float, float* %arrayidx20, align 4
%mul = fmul float %10, %13
%add = fadd float %7, %mul
%14 = load i32, i32* %j, align 4
%idxprom21 = sext i32 %14 to i64
%15 = load i32, i32* %i, align 4
%idxprom22 = sext i32 %15 to i64
%arrayidx23 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom22
%arrayidx24 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx23, i64 0, i64 %idxprom21
store float %add, float* %arrayidx24, align 4
br label %for.inc
for.inc: ; preds = %for.body8
%16 = load i32, i32* %k, align 4
%inc = add nsw i32 %16, 1
store i32 %inc, i32* %k, align 4
br label %for.cond6
for.end: ; preds = %for.cond6
br label %for.inc25
for.inc25: ; preds = %for.end
%17 = load i32, i32* %j, align 4
%inc26 = add nsw i32 %17, 1
store i32 %inc26, i32* %j, align 4
br label %for.cond1
for.end27: ; preds = %for.cond1
br label %for.inc28
for.inc28: ; preds = %for.end27
%18 = load i32, i32* %i, align 4
%inc29 = add nsw i32 %18, 1
store i32 %inc29, i32* %i, align 4
br label %for.cond
for.end30: ; preds = %for.cond
ret i32 0
}
attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.ident = !{!0}
!0 = !{!"clang version 4.0.0 (http://llvm.org/git/clang.git 081569d9a29c7bc827b2d41f8e62891bbc895e2f) (http://llvm.org/git/llvm.git e117e506536626352e8e47f6c72cd6e2a276622c)"}

View File

@@ -0,0 +1,95 @@
#!/bin/sh -a
echo "--> 1. Create LLVM-IR from C"
clang -S -emit-llvm matmul.c -o matmul.s
echo "--> 2. Prepare the LLVM-IR for Polly"
opt -S -polly-canonicalize matmul.s > matmul.preopt.ll
echo "--> 3. Show the SCoPs detected by Polly"
opt -basicaa -polly-ast -analyze -q matmul.preopt.ll \
-polly-process-unprofitable
echo "--> 4.1 Highlight the detected SCoPs in the CFGs of the program"
# We only create .dot files, as directly -view-scops directly calls graphviz
# which would require user interaction to continue the script.
# opt -basicaa -view-scops -disable-output matmul.preopt.ll
opt -basicaa -dot-scops -disable-output matmul.preopt.ll
echo "--> 4.2 Highlight the detected SCoPs in the CFGs of the program (print \
no instructions)"
# We only create .dot files, as directly -view-scops-only directly calls
# graphviz which would require user interaction to continue the script.
# opt -basicaa -view-scops-only -disable-output matmul.preopt.ll
opt -basicaa -dot-scops-only -disable-output matmul.preopt.ll
echo "--> 4.3 Create .png files from the .dot files"
for i in `ls *.dot`; do dot -Tpng $i > $i.png; done
echo "--> 5. View the polyhedral representation of the SCoPs"
opt -basicaa -polly-scops -analyze matmul.preopt.ll -polly-process-unprofitable
echo "--> 6. Show the dependences for the SCoPs"
opt -basicaa -polly-dependences -analyze matmul.preopt.ll \
-polly-process-unprofitable
echo "--> 7. Export jscop files"
opt -basicaa -polly-export-jscop matmul.preopt.ll -polly-process-unprofitable
echo "--> 8. Import the updated jscop files and print the new SCoPs. (optional)"
opt -basicaa -polly-import-jscop -polly-ast -analyze matmul.preopt.ll \
-polly-process-unprofitable
opt -basicaa -polly-import-jscop -polly-ast -analyze matmul.preopt.ll \
-polly-import-jscop-postfix=interchanged -polly-process-unprofitable
opt -basicaa -polly-import-jscop -polly-ast -analyze matmul.preopt.ll \
-polly-import-jscop-postfix=interchanged+tiled -polly-process-unprofitable
opt -basicaa -polly-import-jscop -polly-ast -analyze matmul.preopt.ll \
-polly-import-jscop-postfix=interchanged+tiled+vector \
-polly-process-unprofitable
echo "--> 9. Codegenerate the SCoPs"
opt -basicaa -polly-import-jscop -polly-import-jscop-postfix=interchanged \
-polly-codegen -polly-process-unprofitable\
matmul.preopt.ll | opt -O3 > matmul.polly.interchanged.ll
opt -basicaa -polly-import-jscop \
-polly-import-jscop-postfix=interchanged+tiled -polly-codegen \
matmul.preopt.ll -polly-process-unprofitable \
| opt -O3 > matmul.polly.interchanged+tiled.ll
opt -basicaa -polly-import-jscop -polly-process-unprofitable\
-polly-import-jscop-postfix=interchanged+tiled+vector -polly-codegen \
matmul.preopt.ll -polly-vectorizer=polly\
| opt -O3 > matmul.polly.interchanged+tiled+vector.ll
opt -basicaa -polly-import-jscop -polly-process-unprofitable\
-polly-import-jscop-postfix=interchanged+tiled+vector -polly-codegen \
matmul.preopt.ll -polly-vectorizer=polly -polly-parallel\
| opt -O3 > matmul.polly.interchanged+tiled+vector+openmp.ll
opt matmul.preopt.ll | opt -O3 > matmul.normalopt.ll
echo "--> 10. Create the executables"
llc matmul.polly.interchanged.ll -o matmul.polly.interchanged.s && gcc matmul.polly.interchanged.s \
-o matmul.polly.interchanged.exe
llc matmul.polly.interchanged+tiled.ll -o matmul.polly.interchanged+tiled.s && gcc matmul.polly.interchanged+tiled.s \
-o matmul.polly.interchanged+tiled.exe
llc matmul.polly.interchanged+tiled+vector.ll \
-o matmul.polly.interchanged+tiled+vector.s \
&& gcc matmul.polly.interchanged+tiled+vector.s \
-o matmul.polly.interchanged+tiled+vector.exe
llc matmul.polly.interchanged+tiled+vector+openmp.ll \
-o matmul.polly.interchanged+tiled+vector+openmp.s \
&& gcc -lgomp matmul.polly.interchanged+tiled+vector+openmp.s \
-o matmul.polly.interchanged+tiled+vector+openmp.exe
llc matmul.normalopt.ll -o matmul.normalopt.s && gcc matmul.normalopt.s \
-o matmul.normalopt.exe
echo "--> 11. Compare the runtime of the executables"
echo "time ./matmul.normalopt.exe"
time -f "%E real, %U user, %S sys" ./matmul.normalopt.exe
echo "time ./matmul.polly.interchanged.exe"
time -f "%E real, %U user, %S sys" ./matmul.polly.interchanged.exe
echo "time ./matmul.polly.interchanged+tiled.exe"
time -f "%E real, %U user, %S sys" ./matmul.polly.interchanged+tiled.exe
echo "time ./matmul.polly.interchanged+tiled+vector.exe"
time -f "%E real, %U user, %S sys" ./matmul.polly.interchanged+tiled+vector.exe
echo "time ./matmul.polly.interchanged+tiled+vector+openmp.exe"
time -f "%E real, %U user, %S sys" ./matmul.polly.interchanged+tiled+vector+openmp.exe

View File

@@ -0,0 +1,39 @@
digraph "Scop Graph for 'init_array' function" {
label="Scop Graph for 'init_array' function";
Node0x5b5b5a0 [shape=record,label="{entry:\l br label %entry.split\l}"];
Node0x5b5b5a0 -> Node0x5b5de30;
Node0x5b5de30 [shape=record,label="{entry.split: \l br label %for.cond1.preheader\l}"];
Node0x5b5de30 -> Node0x5b5de50;
Node0x5b5de50 [shape=record,label="{for.cond1.preheader: \l %indvars.iv5 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next6, %for.inc17 ]\l br label %for.body3\l}"];
Node0x5b5de50 -> Node0x5b5b570;
Node0x5b5b570 [shape=record,label="{for.body3: \l %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next,\l... %for.body3 ]\l %0 = mul nuw nsw i64 %indvars.iv, %indvars.iv5\l %1 = trunc i64 %0 to i32\l %rem = srem i32 %1, 1024\l %add = add nsw i32 %rem, 1\l %conv = sitofp i32 %add to double\l %div = fmul double %conv, 5.000000e-01\l %conv4 = fptrunc double %div to float\l %arrayidx6 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x\l... float]]* @A, i64 0, i64 %indvars.iv5, i64 %indvars.iv\l store float %conv4, float* %arrayidx6, align 4\l %2 = mul nuw nsw i64 %indvars.iv, %indvars.iv5\l %3 = trunc i64 %2 to i32\l %rem8 = srem i32 %3, 1024\l %add9 = add nsw i32 %rem8, 1\l %conv10 = sitofp i32 %add9 to double\l %div11 = fmul double %conv10, 5.000000e-01\l %conv12 = fptrunc double %div11 to float\l %arrayidx16 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @B, i64 0, i64 %indvars.iv5, i64 %indvars.iv\l store float %conv12, float* %arrayidx16, align 4\l %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\l %exitcond = icmp ne i64 %indvars.iv.next, 1536\l br i1 %exitcond, label %for.body3, label %for.inc17\l}"];
Node0x5b5b570 -> Node0x5b5b570[constraint=false];
Node0x5b5b570 -> Node0x5b5df30;
Node0x5b5df30 [shape=record,label="{for.inc17: \l %indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1\l %exitcond7 = icmp ne i64 %indvars.iv.next6, 1536\l br i1 %exitcond7, label %for.cond1.preheader, label %for.end19\l}"];
Node0x5b5df30 -> Node0x5b5de50[constraint=false];
Node0x5b5df30 -> Node0x5b5df90;
Node0x5b5df90 [shape=record,label="{for.end19: \l ret void\l}"];
colorscheme = "paired12"
subgraph cluster_0x5b4bdd0 {
label = "";
style = solid;
color = 1
subgraph cluster_0x5b4bf50 {
label = "Region can not profitably be optimized!";
style = solid;
color = 6
subgraph cluster_0x5b4c0d0 {
label = "";
style = solid;
color = 5
Node0x5b5b570;
}
Node0x5b5de50;
Node0x5b5df30;
}
Node0x5b5b5a0;
Node0x5b5de30;
Node0x5b5df90;
}
}

View File

@@ -0,0 +1 @@
48a9f38946a958de9f91f915b8ec112c1ba5d2c3

View File

@@ -0,0 +1,50 @@
digraph "Scop Graph for 'main' function" {
label="Scop Graph for 'main' function";
Node0x5b5c850 [shape=record,label="{entry:\l br label %entry.split\l}"];
Node0x5b5c850 -> Node0x5b5a440;
Node0x5b5a440 [shape=record,label="{entry.split: \l tail call void @init_array()\l br label %for.cond1.preheader\l}"];
Node0x5b5a440 -> Node0x5b38cd0;
Node0x5b38cd0 [shape=record,label="{for.cond1.preheader: \l %indvars.iv7 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next8, %for.inc28 ]\l br label %for.body3\l}"];
Node0x5b38cd0 -> Node0x5b4bd30;
Node0x5b4bd30 [shape=record,label="{for.body3: \l %indvars.iv4 = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next5,\l... %for.inc25 ]\l %arrayidx5 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x\l... float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4\l store float 0.000000e+00, float* %arrayidx5, align 4\l br label %for.body8\l}"];
Node0x5b4bd30 -> Node0x5b38c50;
Node0x5b38c50 [shape=record,label="{for.body8: \l %indvars.iv = phi i64 [ 0, %for.body3 ], [ %indvars.iv.next, %for.body8 ]\l %arrayidx12 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4\l %0 = load float, float* %arrayidx12, align 4\l %arrayidx16 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @A, i64 0, i64 %indvars.iv7, i64 %indvars.iv\l %1 = load float, float* %arrayidx16, align 4\l %arrayidx20 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @B, i64 0, i64 %indvars.iv, i64 %indvars.iv4\l %2 = load float, float* %arrayidx20, align 4\l %mul = fmul float %1, %2\l %add = fadd float %0, %mul\l %arrayidx24 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4\l store float %add, float* %arrayidx24, align 4\l %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\l %exitcond = icmp ne i64 %indvars.iv.next, 1536\l br i1 %exitcond, label %for.body8, label %for.inc25\l}"];
Node0x5b38c50 -> Node0x5b38c50[constraint=false];
Node0x5b38c50 -> Node0x5b5a290;
Node0x5b5a290 [shape=record,label="{for.inc25: \l %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1\l %exitcond6 = icmp ne i64 %indvars.iv.next5, 1536\l br i1 %exitcond6, label %for.body3, label %for.inc28\l}"];
Node0x5b5a290 -> Node0x5b4bd30[constraint=false];
Node0x5b5a290 -> Node0x5b5a340;
Node0x5b5a340 [shape=record,label="{for.inc28: \l %indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1\l %exitcond9 = icmp ne i64 %indvars.iv.next8, 1536\l br i1 %exitcond9, label %for.cond1.preheader, label %for.end30\l}"];
Node0x5b5a340 -> Node0x5b38cd0[constraint=false];
Node0x5b5a340 -> Node0x5b5a3a0;
Node0x5b5a3a0 [shape=record,label="{for.end30: \l ret i32 0\l}"];
colorscheme = "paired12"
subgraph cluster_0x5b5c970 {
label = "";
style = solid;
color = 1
subgraph cluster_0x5b5c5a0 {
label = "";
style = filled;
color = 3 subgraph cluster_0x5b5c9f0 {
label = "";
style = solid;
color = 5
subgraph cluster_0x5b5c110 {
label = "";
style = solid;
color = 7
Node0x5b38c50;
}
Node0x5b4bd30;
Node0x5b5a290;
}
Node0x5b38cd0;
Node0x5b5a340;
}
Node0x5b5c850;
Node0x5b5a440;
Node0x5b5a3a0;
}
}

View File

@@ -0,0 +1 @@
4e73701a08d7549f57feb667e558eeb18fcc47b8

View File

@@ -0,0 +1,51 @@
digraph "Scop Graph for 'print_array' function" {
label="Scop Graph for 'print_array' function";
Node0x5b5ee00 [shape=record,label="{entry:\l br label %entry.split\l}"];
Node0x5b5ee00 -> Node0x5b5ee50;
Node0x5b5ee50 [shape=record,label="{entry.split: \l br label %for.cond1.preheader\l}"];
Node0x5b5ee50 -> Node0x5b5ee70;
Node0x5b5ee70 [shape=record,label="{for.cond1.preheader: \l %indvars.iv6 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next7, %for.end ]\l %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8\l br label %for.body3\l}"];
Node0x5b5ee70 -> Node0x5b5ee20;
Node0x5b5ee20 [shape=record,label="{for.body3: \l %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next,\l... %for.inc ]\l %1 = phi %struct._IO_FILE* [ %0, %for.cond1.preheader ], [ %5, %for.inc ]\l %arrayidx5 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x\l... float]]* @C, i64 0, i64 %indvars.iv6, i64 %indvars.iv\l %2 = load float, float* %arrayidx5, align 4\l %conv = fpext float %2 to double\l %call = tail call i32 (%struct._IO_FILE*, i8*, ...)\l... @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x\l... i8]* @.str, i64 0, i64 0), double %conv) #2\l %3 = trunc i64 %indvars.iv to i32\l %rem = srem i32 %3, 80\l %cmp6 = icmp eq i32 %rem, 79\l br i1 %cmp6, label %if.then, label %for.inc\l}"];
Node0x5b5ee20 -> Node0x5b60d10;
Node0x5b5ee20 -> Node0x5b60d70;
Node0x5b60d10 [shape=record,label="{if.then: \l %4 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8\l %fputc3 = tail call i32 @fputc(i32 10, %struct._IO_FILE* %4)\l br label %for.inc\l}"];
Node0x5b60d10 -> Node0x5b60d70;
Node0x5b60d70 [shape=record,label="{for.inc: \l %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\l %5 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8\l %exitcond = icmp ne i64 %indvars.iv.next, 1536\l br i1 %exitcond, label %for.body3, label %for.end\l}"];
Node0x5b60d70 -> Node0x5b5ee20[constraint=false];
Node0x5b60d70 -> Node0x5b60e10;
Node0x5b60e10 [shape=record,label="{for.end: \l %.lcssa = phi %struct._IO_FILE* [ %5, %for.inc ]\l %fputc = tail call i32 @fputc(i32 10, %struct._IO_FILE* %.lcssa)\l %indvars.iv.next7 = add nuw nsw i64 %indvars.iv6, 1\l %exitcond8 = icmp ne i64 %indvars.iv.next7, 1536\l br i1 %exitcond8, label %for.cond1.preheader, label %for.end12\l}"];
Node0x5b60e10 -> Node0x5b5ee70[constraint=false];
Node0x5b60e10 -> Node0x5b60e70;
Node0x5b60e70 [shape=record,label="{for.end12: \l ret void\l}"];
colorscheme = "paired12"
subgraph cluster_0x5b349a0 {
label = "";
style = solid;
color = 1
subgraph cluster_0x5b5c2c0 {
label = "Call instruction: %call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), double %conv) #2";
style = solid;
color = 6
subgraph cluster_0x5b5c240 {
label = "Call instruction: %call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), double %conv) #2";
style = solid;
color = 5
subgraph cluster_0x5b34a20 {
label = "Region can not profitably be optimized!";
style = solid;
color = 7
Node0x5b5ee20;
Node0x5b60d10;
}
Node0x5b60d70;
}
Node0x5b5ee70;
Node0x5b60e10;
}
Node0x5b5ee00;
Node0x5b5ee50;
Node0x5b60e70;
}
}

View File

@@ -0,0 +1 @@
e3b973b131ab57d119b10fbb0588298f1657027c

Some files were not shown because too many files have changed in this diff Show More