| .text |
| .file "matmul.c" |
| .section .rodata.cst8,"aM",@progbits,8 |
| .p2align 3 # -- Begin function init_array |
| .LCPI0_0: |
| .quad 4602678819172646912 # double 0.5 |
| .text |
| .globl init_array |
| .p2align 4, 0x90 |
| .type init_array,@function |
| init_array: # @init_array |
| .cfi_startproc |
| # %bb.0: # %entry |
| pushq %rbp |
| .cfi_def_cfa_offset 16 |
| .cfi_offset %rbp, -16 |
| movq %rsp, %rbp |
| .cfi_def_cfa_register %rbp |
| leaq B(%rip), %rax |
| leaq A(%rip), %rcx |
| xorl %r8d, %r8d |
| movsd .LCPI0_0(%rip), %xmm0 # xmm0 = mem[0],zero |
| xorl %r9d, %r9d |
| .p2align 4, 0x90 |
| .LBB0_1: # %polly.loop_header |
| # =>This Loop Header: Depth=1 |
| # Child Loop BB0_2 Depth 2 |
| movl $1, %edi |
| xorl %edx, %edx |
| .p2align 4, 0x90 |
| .LBB0_2: # %polly.loop_header1 |
| # Parent Loop BB0_1 Depth=1 |
| # => This Inner Loop Header: Depth=2 |
| movl %edx, %esi |
| andl $1022, %esi # imm = 0x3FE |
| orl $1, %esi |
| xorps %xmm1, %xmm1 |
| cvtsi2sdl %esi, %xmm1 |
| mulsd %xmm0, %xmm1 |
| cvtsd2ss %xmm1, %xmm1 |
| movss %xmm1, -4(%rcx,%rdi,4) |
| movss %xmm1, -4(%rax,%rdi,4) |
| leal (%r9,%rdx), %esi |
| andl $1023, %esi # imm = 0x3FF |
| addl $1, %esi |
| xorps %xmm1, %xmm1 |
| cvtsi2sdl %esi, %xmm1 |
| mulsd %xmm0, %xmm1 |
| cvtsd2ss %xmm1, %xmm1 |
| movss %xmm1, (%rcx,%rdi,4) |
| movss %xmm1, (%rax,%rdi,4) |
| addq $2, %rdi |
| addl %r8d, %edx |
| cmpq $1537, %rdi # imm = 0x601 |
| jne .LBB0_2 |
| # %bb.3: # %polly.loop_exit3 |
| # in Loop: Header=BB0_1 Depth=1 |
| addq $1, %r9 |
| addq $6144, %rax # imm = 0x1800 |
| addq $6144, %rcx # imm = 0x1800 |
| addl $2, %r8d |
| cmpq $1536, %r9 # imm = 0x600 |
| jne .LBB0_1 |
| # %bb.4: # %polly.exiting |
| popq %rbp |
| .cfi_def_cfa %rsp, 8 |
| retq |
| .Lfunc_end0: |
| .size init_array, .Lfunc_end0-init_array |
| .cfi_endproc |
| # -- End function |
| .globl print_array # -- Begin function print_array |
| .p2align 4, 0x90 |
| .type print_array,@function |
| print_array: # @print_array |
| .cfi_startproc |
| # %bb.0: # %entry |
| pushq %rbp |
| .cfi_def_cfa_offset 16 |
| .cfi_offset %rbp, -16 |
| movq %rsp, %rbp |
| .cfi_def_cfa_register %rbp |
| pushq %r15 |
| pushq %r14 |
| pushq %r13 |
| pushq %r12 |
| pushq %rbx |
| pushq %rax |
| .cfi_offset %rbx, -56 |
| .cfi_offset %r12, -48 |
| .cfi_offset %r13, -40 |
| .cfi_offset %r14, -32 |
| .cfi_offset %r15, -24 |
| leaq C(%rip), %r13 |
| xorl %eax, %eax |
| movl $3435973837, %r12d # imm = 0xCCCCCCCD |
| leaq .L.str(%rip), %r14 |
| .p2align 4, 0x90 |
| .LBB1_1: # %for.cond1.preheader |
| # =>This Loop Header: Depth=1 |
| # Child Loop BB1_2 Depth 2 |
| movq %rax, -48(%rbp) # 8-byte Spill |
| movq stdout(%rip), %rsi |
| xorl %ebx, %ebx |
| .p2align 4, 0x90 |
| .LBB1_2: # %for.body3 |
| # Parent Loop BB1_1 Depth=1 |
| # => This Inner Loop Header: Depth=2 |
| movl %ebx, %eax |
| imulq %r12, %rax |
| shrq $38, %rax |
| leal (%rax,%rax,4), %r15d |
| shll $4, %r15d |
| addl $79, %r15d |
| movss (%r13,%rbx,4), %xmm0 # xmm0 = mem[0],zero,zero,zero |
| cvtss2sd %xmm0, %xmm0 |
| movb $1, %al |
| movq %rsi, %rdi |
| movq %r14, %rsi |
| callq fprintf |
| cmpl %ebx, %r15d |
| jne .LBB1_4 |
| # %bb.3: # %if.then |
| # in Loop: Header=BB1_2 Depth=2 |
| movq stdout(%rip), %rsi |
| movl $10, %edi |
| callq fputc@PLT |
| .LBB1_4: # %for.inc |
| # in Loop: Header=BB1_2 Depth=2 |
| addq $1, %rbx |
| movq stdout(%rip), %rsi |
| cmpq $1536, %rbx # imm = 0x600 |
| jne .LBB1_2 |
| # %bb.5: # %for.end |
| # in Loop: Header=BB1_1 Depth=1 |
| movl $10, %edi |
| callq fputc@PLT |
| movq -48(%rbp), %rax # 8-byte Reload |
| addq $1, %rax |
| addq $6144, %r13 # imm = 0x1800 |
| cmpq $1536, %rax # imm = 0x600 |
| jne .LBB1_1 |
| # %bb.6: # %for.end12 |
| addq $8, %rsp |
| popq %rbx |
| popq %r12 |
| popq %r13 |
| popq %r14 |
| popq %r15 |
| popq %rbp |
| .cfi_def_cfa %rsp, 8 |
| retq |
| .Lfunc_end1: |
| .size print_array, .Lfunc_end1-print_array |
| .cfi_endproc |
| # -- End function |
| .globl main # -- Begin function main |
| .p2align 4, 0x90 |
| .type main,@function |
| main: # @main |
| .cfi_startproc |
| # %bb.0: # %entry |
| pushq %rbp |
| .cfi_def_cfa_offset 16 |
| .cfi_offset %rbp, -16 |
| movq %rsp, %rbp |
| .cfi_def_cfa_register %rbp |
| pushq %r15 |
| pushq %r14 |
| pushq %r13 |
| pushq %r12 |
| pushq %rbx |
| subq $344, %rsp # imm = 0x158 |
| .cfi_offset %rbx, -56 |
| .cfi_offset %r12, -48 |
| .cfi_offset %r13, -40 |
| .cfi_offset %r14, -32 |
| .cfi_offset %r15, -24 |
| callq init_array |
| leaq C(%rip), %rdi |
| xorl %eax, %eax |
| movq %rax, -48(%rbp) # 8-byte Spill |
| xorl %esi, %esi |
| movl $9437184, %edx # imm = 0x900000 |
| callq memset@PLT |
| movl $64, %eax |
| movq %rax, -64(%rbp) # 8-byte Spill |
| leaq A(%rip), %rax |
| movq %rax, -56(%rbp) # 8-byte Spill |
| .p2align 4, 0x90 |
| .LBB2_1: # %polly.loop_header8 |
| # =>This Loop Header: Depth=1 |
| # Child Loop BB2_2 Depth 2 |
| # Child Loop BB2_3 Depth 3 |
| # Child Loop BB2_4 Depth 4 |
| # Child Loop BB2_5 Depth 5 |
| leaq B+240(%rip), %rax |
| xorl %edi, %edi |
| .p2align 4, 0x90 |
| .LBB2_2: # %polly.loop_header14 |
| # Parent Loop BB2_1 Depth=1 |
| # => This Loop Header: Depth=2 |
| # Child Loop BB2_3 Depth 3 |
| # Child Loop BB2_4 Depth 4 |
| # Child Loop BB2_5 Depth 5 |
| movq %rdi, %rcx |
| orq $4, %rcx |
| movq %rcx, -80(%rbp) # 8-byte Spill |
| movq %rdi, %rcx |
| orq $8, %rcx |
| movq %rcx, -264(%rbp) # 8-byte Spill |
| movq %rdi, %rcx |
| orq $12, %rcx |
| movq %rcx, -256(%rbp) # 8-byte Spill |
| movq %rdi, %rcx |
| orq $16, %rcx |
| movq %rcx, -248(%rbp) # 8-byte Spill |
| movq %rdi, %rcx |
| orq $20, %rcx |
| movq %rcx, -240(%rbp) # 8-byte Spill |
| movq %rdi, %rcx |
| orq $24, %rcx |
| movq %rcx, -232(%rbp) # 8-byte Spill |
| movq %rdi, %rcx |
| orq $28, %rcx |
| movq %rcx, -224(%rbp) # 8-byte Spill |
| movq %rdi, %rcx |
| orq $32, %rcx |
| movq %rcx, -216(%rbp) # 8-byte Spill |
| movq %rdi, %rcx |
| orq $36, %rcx |
| movq %rcx, -208(%rbp) # 8-byte Spill |
| movq %rdi, %rcx |
| orq $40, %rcx |
| movq %rcx, -200(%rbp) # 8-byte Spill |
| movq %rdi, %rcx |
| orq $44, %rcx |
| movq %rcx, -192(%rbp) # 8-byte Spill |
| movq %rdi, %rcx |
| orq $48, %rcx |
| movq %rcx, -184(%rbp) # 8-byte Spill |
| movq %rdi, %rcx |
| orq $52, %rcx |
| movq %rcx, -176(%rbp) # 8-byte Spill |
| movq %rdi, %rcx |
| orq $56, %rcx |
| movq %rcx, -168(%rbp) # 8-byte Spill |
| movq %rdi, %rcx |
| orq $60, %rcx |
| movq %rcx, -160(%rbp) # 8-byte Spill |
| movq -56(%rbp), %rdx # 8-byte Reload |
| movq %rax, -136(%rbp) # 8-byte Spill |
| movq %rax, -72(%rbp) # 8-byte Spill |
| xorl %eax, %eax |
| movq %rdi, -272(%rbp) # 8-byte Spill |
| .p2align 4, 0x90 |
| .LBB2_3: # %polly.loop_header20 |
| # Parent Loop BB2_1 Depth=1 |
| # Parent Loop BB2_2 Depth=2 |
| # => This Loop Header: Depth=3 |
| # Child Loop BB2_4 Depth 4 |
| # Child Loop BB2_5 Depth 5 |
| movq %rax, -144(%rbp) # 8-byte Spill |
| movq %rdx, -152(%rbp) # 8-byte Spill |
| movq -48(%rbp), %rax # 8-byte Reload |
| .p2align 4, 0x90 |
| .LBB2_4: # %polly.loop_header26 |
| # Parent Loop BB2_1 Depth=1 |
| # Parent Loop BB2_2 Depth=2 |
| # Parent Loop BB2_3 Depth=3 |
| # => This Loop Header: Depth=4 |
| # Child Loop BB2_5 Depth 5 |
| movq %rax, -376(%rbp) # 8-byte Spill |
| leaq (%rax,%rax,2), %rax |
| shlq $11, %rax |
| leaq C(%rip), %rsi |
| addq %rsi, %rax |
| leaq (%rax,%rdi,4), %rcx |
| movq %rcx, -368(%rbp) # 8-byte Spill |
| movq -80(%rbp), %rcx # 8-byte Reload |
| leaq (%rax,%rcx,4), %rcx |
| movq %rcx, -360(%rbp) # 8-byte Spill |
| movq -264(%rbp), %rbx # 8-byte Reload |
| leaq (%rax,%rbx,4), %rcx |
| movq %rcx, -352(%rbp) # 8-byte Spill |
| movq -256(%rbp), %r8 # 8-byte Reload |
| movq %rdi, %rsi |
| leaq (%rax,%r8,4), %rdi |
| movq %rdi, -344(%rbp) # 8-byte Spill |
| movq -248(%rbp), %rdi # 8-byte Reload |
| leaq (%rax,%rdi,4), %rcx |
| movq %rcx, -336(%rbp) # 8-byte Spill |
| movq -240(%rbp), %r9 # 8-byte Reload |
| leaq (%rax,%r9,4), %rcx |
| movq %rcx, -328(%rbp) # 8-byte Spill |
| movq -232(%rbp), %r10 # 8-byte Reload |
| leaq (%rax,%r10,4), %rcx |
| movq %rcx, -320(%rbp) # 8-byte Spill |
| movq -224(%rbp), %r14 # 8-byte Reload |
| leaq (%rax,%r14,4), %rcx |
| movq %rcx, -312(%rbp) # 8-byte Spill |
| movq -216(%rbp), %r15 # 8-byte Reload |
| leaq (%rax,%r15,4), %rcx |
| movq %rcx, -304(%rbp) # 8-byte Spill |
| movq -208(%rbp), %r12 # 8-byte Reload |
| leaq (%rax,%r12,4), %rcx |
| movq %rcx, -296(%rbp) # 8-byte Spill |
| movq -200(%rbp), %r13 # 8-byte Reload |
| leaq (%rax,%r13,4), %rcx |
| movq %rcx, -288(%rbp) # 8-byte Spill |
| movq -192(%rbp), %r11 # 8-byte Reload |
| leaq (%rax,%r11,4), %rcx |
| movq %rcx, -280(%rbp) # 8-byte Spill |
| movaps (%rax,%rsi,4), %xmm15 |
| movq -80(%rbp), %rcx # 8-byte Reload |
| movaps (%rax,%rcx,4), %xmm14 |
| movaps (%rax,%rbx,4), %xmm13 |
| movaps (%rax,%r8,4), %xmm12 |
| movaps (%rax,%rdi,4), %xmm11 |
| movaps (%rax,%r9,4), %xmm10 |
| movaps (%rax,%r10,4), %xmm9 |
| movaps (%rax,%r14,4), %xmm8 |
| movaps (%rax,%r15,4), %xmm7 |
| movaps (%rax,%r12,4), %xmm6 |
| movaps (%rax,%r13,4), %xmm5 |
| movaps (%rax,%r11,4), %xmm4 |
| movq -184(%rbp), %rcx # 8-byte Reload |
| movaps (%rax,%rcx,4), %xmm3 |
| movq -176(%rbp), %rsi # 8-byte Reload |
| movaps (%rax,%rsi,4), %xmm0 |
| movaps %xmm0, -96(%rbp) # 16-byte Spill |
| movq -168(%rbp), %rbx # 8-byte Reload |
| movaps (%rax,%rbx,4), %xmm0 |
| movaps %xmm0, -112(%rbp) # 16-byte Spill |
| movq -160(%rbp), %rdi # 8-byte Reload |
| movaps (%rax,%rdi,4), %xmm0 |
| movaps %xmm0, -128(%rbp) # 16-byte Spill |
| leaq (%rax,%rcx,4), %r8 |
| leaq (%rax,%rsi,4), %rcx |
| leaq (%rax,%rbx,4), %rsi |
| leaq (%rax,%rdi,4), %rax |
| movq -72(%rbp), %r9 # 8-byte Reload |
| movl $0, %r10d |
| .p2align 4, 0x90 |
| .LBB2_5: # %vector.ph |
| # Parent Loop BB2_1 Depth=1 |
| # Parent Loop BB2_2 Depth=2 |
| # Parent Loop BB2_3 Depth=3 |
| # Parent Loop BB2_4 Depth=4 |
| # => This Inner Loop Header: Depth=5 |
| movss (%rdx,%r10,4), %xmm0 # xmm0 = mem[0],zero,zero,zero |
| shufps $0, %xmm0, %xmm0 # xmm0 = xmm0[0,0,0,0] |
| movaps -240(%r9), %xmm1 |
| mulps %xmm0, %xmm1 |
| addps %xmm1, %xmm15 |
| movaps -224(%r9), %xmm1 |
| mulps %xmm0, %xmm1 |
| addps %xmm1, %xmm14 |
| movaps -208(%r9), %xmm1 |
| mulps %xmm0, %xmm1 |
| addps %xmm1, %xmm13 |
| movaps -192(%r9), %xmm1 |
| mulps %xmm0, %xmm1 |
| addps %xmm1, %xmm12 |
| movaps -176(%r9), %xmm1 |
| mulps %xmm0, %xmm1 |
| addps %xmm1, %xmm11 |
| movaps -160(%r9), %xmm1 |
| mulps %xmm0, %xmm1 |
| addps %xmm1, %xmm10 |
| movaps -144(%r9), %xmm1 |
| mulps %xmm0, %xmm1 |
| addps %xmm1, %xmm9 |
| movaps -128(%r9), %xmm1 |
| mulps %xmm0, %xmm1 |
| addps %xmm1, %xmm8 |
| movaps -112(%r9), %xmm1 |
| mulps %xmm0, %xmm1 |
| addps %xmm1, %xmm7 |
| movaps -96(%r9), %xmm1 |
| mulps %xmm0, %xmm1 |
| addps %xmm1, %xmm6 |
| movaps -80(%r9), %xmm1 |
| mulps %xmm0, %xmm1 |
| addps %xmm1, %xmm5 |
| movaps -64(%r9), %xmm1 |
| mulps %xmm0, %xmm1 |
| addps %xmm1, %xmm4 |
| movaps -48(%r9), %xmm1 |
| mulps %xmm0, %xmm1 |
| addps %xmm1, %xmm3 |
| movaps -32(%r9), %xmm1 |
| mulps %xmm0, %xmm1 |
| movaps -96(%rbp), %xmm2 # 16-byte Reload |
| addps %xmm1, %xmm2 |
| movaps %xmm2, -96(%rbp) # 16-byte Spill |
| movaps -16(%r9), %xmm1 |
| mulps %xmm0, %xmm1 |
| movaps -112(%rbp), %xmm2 # 16-byte Reload |
| addps %xmm1, %xmm2 |
| movaps %xmm2, -112(%rbp) # 16-byte Spill |
| mulps (%r9), %xmm0 |
| movaps -128(%rbp), %xmm1 # 16-byte Reload |
| addps %xmm0, %xmm1 |
| movaps %xmm1, -128(%rbp) # 16-byte Spill |
| addq $1, %r10 |
| addq $6144, %r9 # imm = 0x1800 |
| cmpq $64, %r10 |
| jne .LBB2_5 |
| # %bb.6: # %polly.loop_exit34 |
| # in Loop: Header=BB2_4 Depth=4 |
| movq -368(%rbp), %rdi # 8-byte Reload |
| movaps %xmm15, (%rdi) |
| movq -360(%rbp), %rdi # 8-byte Reload |
| movaps %xmm14, (%rdi) |
| movq -352(%rbp), %rdi # 8-byte Reload |
| movaps %xmm13, (%rdi) |
| movq -344(%rbp), %rdi # 8-byte Reload |
| movaps %xmm12, (%rdi) |
| movq -336(%rbp), %rdi # 8-byte Reload |
| movaps %xmm11, (%rdi) |
| movq -328(%rbp), %rdi # 8-byte Reload |
| movaps %xmm10, (%rdi) |
| movq -320(%rbp), %rdi # 8-byte Reload |
| movaps %xmm9, (%rdi) |
| movq -312(%rbp), %rdi # 8-byte Reload |
| movaps %xmm8, (%rdi) |
| movq -304(%rbp), %rdi # 8-byte Reload |
| movaps %xmm7, (%rdi) |
| movq -296(%rbp), %rdi # 8-byte Reload |
| movaps %xmm6, (%rdi) |
| movq -288(%rbp), %rdi # 8-byte Reload |
| movaps %xmm5, (%rdi) |
| movq -280(%rbp), %rdi # 8-byte Reload |
| movaps %xmm4, (%rdi) |
| movaps %xmm3, (%r8) |
| movaps -96(%rbp), %xmm0 # 16-byte Reload |
| movaps %xmm0, (%rcx) |
| movaps -112(%rbp), %xmm0 # 16-byte Reload |
| movaps %xmm0, (%rsi) |
| movaps -128(%rbp), %xmm0 # 16-byte Reload |
| movaps %xmm0, (%rax) |
| movq -376(%rbp), %rax # 8-byte Reload |
| addq $1, %rax |
| addq $6144, %rdx # imm = 0x1800 |
| cmpq -64(%rbp), %rax # 8-byte Folded Reload |
| movq -272(%rbp), %rdi # 8-byte Reload |
| jne .LBB2_4 |
| # %bb.7: # %polly.loop_exit28 |
| # in Loop: Header=BB2_3 Depth=3 |
| movq -144(%rbp), %rax # 8-byte Reload |
| addq $64, %rax |
| addq $393216, -72(%rbp) # 8-byte Folded Spill |
| # imm = 0x60000 |
| movq -152(%rbp), %rdx # 8-byte Reload |
| addq $256, %rdx # imm = 0x100 |
| cmpq $1536, %rax # imm = 0x600 |
| jb .LBB2_3 |
| # %bb.8: # %polly.loop_exit22 |
| # in Loop: Header=BB2_2 Depth=2 |
| addq $64, %rdi |
| movq -136(%rbp), %rax # 8-byte Reload |
| addq $256, %rax # imm = 0x100 |
| cmpq $1536, %rdi # imm = 0x600 |
| jb .LBB2_2 |
| # %bb.9: # %polly.loop_exit16 |
| # in Loop: Header=BB2_1 Depth=1 |
| movq -48(%rbp), %rax # 8-byte Reload |
| movq %rax, %rcx |
| addq $64, %rcx |
| addq $64, -64(%rbp) # 8-byte Folded Spill |
| addq $393216, -56(%rbp) # 8-byte Folded Spill |
| # imm = 0x60000 |
| movq %rcx, %rax |
| movq %rcx, -48(%rbp) # 8-byte Spill |
| cmpq $1536, %rcx # imm = 0x600 |
| jb .LBB2_1 |
| # %bb.10: # %polly.exiting |
| xorl %eax, %eax |
| addq $344, %rsp # imm = 0x158 |
| popq %rbx |
| popq %r12 |
| popq %r13 |
| popq %r14 |
| popq %r15 |
| popq %rbp |
| .cfi_def_cfa %rsp, 8 |
| retq |
| .Lfunc_end2: |
| .size main, .Lfunc_end2-main |
| .cfi_endproc |
| # -- End function |
| .type A,@object # @A |
| .comm A,9437184,16 |
| .type B,@object # @B |
| .comm B,9437184,16 |
| .type .L.str,@object # @.str |
| .section .rodata.str1.1,"aMS",@progbits,1 |
| .L.str: |
| .asciz "%lf " |
| .size .L.str, 5 |
| |
| .type C,@object # @C |
| .comm C,9437184,16 |
| |
| .ident "clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)" |
| .section ".note.GNU-stack","",@progbits |