| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s |
| ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 | FileCheck %s --check-prefix=VECTOR |
| ; |
| ; Test some fp16 vector operations, which must be scalarized. With less than |
| ; 8 elements there should only be operations emitted for the used elements. |
| |
| %Ty0 = type <8 x half> |
| define void @fun0(ptr %Src, ptr %Dst) { |
| ; CHECK-LABEL: fun0: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: stmg %r13, %r15, 104(%r15) |
| ; CHECK-NEXT: .cfi_offset %r13, -56 |
| ; CHECK-NEXT: .cfi_offset %r14, -48 |
| ; CHECK-NEXT: .cfi_offset %r15, -40 |
| ; CHECK-NEXT: aghi %r15, -288 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 448 |
| ; CHECK-NEXT: std %f8, 280(%r15) # 8-byte Spill |
| ; CHECK-NEXT: std %f9, 272(%r15) # 8-byte Spill |
| ; CHECK-NEXT: std %f10, 264(%r15) # 8-byte Spill |
| ; CHECK-NEXT: std %f11, 256(%r15) # 8-byte Spill |
| ; CHECK-NEXT: std %f12, 248(%r15) # 8-byte Spill |
| ; CHECK-NEXT: std %f13, 240(%r15) # 8-byte Spill |
| ; CHECK-NEXT: std %f14, 232(%r15) # 8-byte Spill |
| ; CHECK-NEXT: std %f15, 224(%r15) # 8-byte Spill |
| ; CHECK-NEXT: .cfi_offset %f8, -168 |
| ; CHECK-NEXT: .cfi_offset %f9, -176 |
| ; CHECK-NEXT: .cfi_offset %f10, -184 |
| ; CHECK-NEXT: .cfi_offset %f11, -192 |
| ; CHECK-NEXT: .cfi_offset %f12, -200 |
| ; CHECK-NEXT: .cfi_offset %f13, -208 |
| ; CHECK-NEXT: .cfi_offset %f14, -216 |
| ; CHECK-NEXT: .cfi_offset %f15, -224 |
| ; CHECK-NEXT: lgh %r0, 14(%r2) |
| ; CHECK-NEXT: lgr %r13, %r3 |
| ; CHECK-NEXT: lgh %r1, 12(%r2) |
| ; CHECK-NEXT: sllg %r0, %r0, 48 |
| ; CHECK-NEXT: stg %r0, 216(%r15) # 8-byte Spill |
| ; CHECK-NEXT: lgh %r0, 10(%r2) |
| ; CHECK-NEXT: sllg %r1, %r1, 48 |
| ; CHECK-NEXT: stg %r1, 208(%r15) # 8-byte Spill |
| ; CHECK-NEXT: lgh %r1, 8(%r2) |
| ; CHECK-NEXT: sllg %r0, %r0, 48 |
| ; CHECK-NEXT: stg %r0, 200(%r15) # 8-byte Spill |
| ; CHECK-NEXT: lgh %r0, 6(%r2) |
| ; CHECK-NEXT: sllg %r1, %r1, 48 |
| ; CHECK-NEXT: stg %r1, 192(%r15) # 8-byte Spill |
| ; CHECK-NEXT: lgh %r1, 4(%r2) |
| ; CHECK-NEXT: sllg %r0, %r0, 48 |
| ; CHECK-NEXT: stg %r0, 176(%r15) # 8-byte Spill |
| ; CHECK-NEXT: lgh %r0, 2(%r2) |
| ; CHECK-NEXT: sllg %r1, %r1, 48 |
| ; CHECK-NEXT: stg %r1, 160(%r15) # 8-byte Spill |
| ; CHECK-NEXT: lgh %r1, 0(%r2) |
| ; CHECK-NEXT: sllg %r0, %r0, 48 |
| ; CHECK-NEXT: ldgr %f8, %r0 |
| ; CHECK-NEXT: lgh %r0, 30(%r2) |
| ; CHECK-NEXT: sllg %r1, %r1, 48 |
| ; CHECK-NEXT: ldgr %f13, %r1 |
| ; CHECK-NEXT: lgh %r1, 28(%r2) |
| ; CHECK-NEXT: sllg %r0, %r0, 48 |
| ; CHECK-NEXT: stg %r0, 184(%r15) # 8-byte Spill |
| ; CHECK-NEXT: lgh %r0, 26(%r2) |
| ; CHECK-NEXT: sllg %r1, %r1, 48 |
| ; CHECK-NEXT: stg %r1, 168(%r15) # 8-byte Spill |
| ; CHECK-NEXT: lgh %r1, 24(%r2) |
| ; CHECK-NEXT: sllg %r0, %r0, 48 |
| ; CHECK-NEXT: lgh %r3, 22(%r2) |
| ; CHECK-NEXT: ldgr %f10, %r0 |
| ; CHECK-NEXT: sllg %r0, %r1, 48 |
| ; CHECK-NEXT: ldgr %f11, %r0 |
| ; CHECK-NEXT: sllg %r0, %r3, 48 |
| ; CHECK-NEXT: lgh %r1, 20(%r2) |
| ; CHECK-NEXT: ldgr %f12, %r0 |
| ; CHECK-NEXT: lgh %r0, 18(%r2) |
| ; CHECK-NEXT: lgh %r2, 16(%r2) |
| ; CHECK-NEXT: sllg %r1, %r1, 48 |
| ; CHECK-NEXT: ldgr %f14, %r1 |
| ; CHECK-NEXT: sllg %r0, %r0, 48 |
| ; CHECK-NEXT: sllg %r1, %r2, 48 |
| ; CHECK-NEXT: ldgr %f0, %r1 |
| ; CHECK-NEXT: ldgr %f15, %r0 |
| ; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d |
| ; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; CHECK-NEXT: ler %f9, %f0 |
| ; CHECK-NEXT: ler %f0, %f13 |
| ; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; CHECK-NEXT: aebr %f0, %f9 |
| ; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT |
| ; CHECK-NEXT: ler %f13, %f0 |
| ; CHECK-NEXT: ler %f0, %f15 |
| ; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; CHECK-NEXT: ler %f9, %f0 |
| ; CHECK-NEXT: ler %f0, %f8 |
| ; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; CHECK-NEXT: aebr %f0, %f9 |
| ; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT |
| ; CHECK-NEXT: ler %f8, %f0 |
| ; CHECK-NEXT: ler %f0, %f14 |
| ; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; CHECK-NEXT: ler %f9, %f0 |
| ; CHECK-NEXT: ld %f0, 160(%r15) # 8-byte Reload |
| ; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d |
| ; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; CHECK-NEXT: aebr %f0, %f9 |
| ; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT |
| ; CHECK-NEXT: ler %f9, %f0 |
| ; CHECK-NEXT: ler %f0, %f12 |
| ; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; CHECK-NEXT: ler %f12, %f0 |
| ; CHECK-NEXT: ld %f0, 176(%r15) # 8-byte Reload |
| ; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d |
| ; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; CHECK-NEXT: aebr %f0, %f12 |
| ; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT |
| ; CHECK-NEXT: ler %f14, %f0 |
| ; CHECK-NEXT: ler %f0, %f11 |
| ; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; CHECK-NEXT: ler %f11, %f0 |
| ; CHECK-NEXT: ld %f0, 192(%r15) # 8-byte Reload |
| ; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d |
| ; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; CHECK-NEXT: aebr %f0, %f11 |
| ; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT |
| ; CHECK-NEXT: ler %f11, %f0 |
| ; CHECK-NEXT: ler %f0, %f10 |
| ; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; CHECK-NEXT: ler %f10, %f0 |
| ; CHECK-NEXT: ld %f0, 200(%r15) # 8-byte Reload |
| ; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d |
| ; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; CHECK-NEXT: aebr %f0, %f10 |
| ; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT |
| ; CHECK-NEXT: ler %f10, %f0 |
| ; CHECK-NEXT: ld %f0, 168(%r15) # 8-byte Reload |
| ; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d |
| ; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; CHECK-NEXT: ler %f12, %f0 |
| ; CHECK-NEXT: ld %f0, 208(%r15) # 8-byte Reload |
| ; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d |
| ; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; CHECK-NEXT: aebr %f0, %f12 |
| ; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT |
| ; CHECK-NEXT: ler %f12, %f0 |
| ; CHECK-NEXT: ld %f0, 184(%r15) # 8-byte Reload |
| ; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d |
| ; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; CHECK-NEXT: ler %f15, %f0 |
| ; CHECK-NEXT: ld %f0, 216(%r15) # 8-byte Reload |
| ; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d |
| ; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; CHECK-NEXT: aebr %f0, %f15 |
| ; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT |
| ; CHECK-NEXT: # kill: def $f0h killed $f0h def $f0d |
| ; CHECK-NEXT: lgdr %r0, %f0 |
| ; CHECK-NEXT: srlg %r0, %r0, 48 |
| ; CHECK-NEXT: sth %r0, 14(%r13) |
| ; CHECK-NEXT: lgdr %r0, %f12 |
| ; CHECK-NEXT: srlg %r0, %r0, 48 |
| ; CHECK-NEXT: sth %r0, 12(%r13) |
| ; CHECK-NEXT: lgdr %r0, %f10 |
| ; CHECK-NEXT: srlg %r0, %r0, 48 |
| ; CHECK-NEXT: sth %r0, 10(%r13) |
| ; CHECK-NEXT: lgdr %r0, %f11 |
| ; CHECK-NEXT: srlg %r0, %r0, 48 |
| ; CHECK-NEXT: sth %r0, 8(%r13) |
| ; CHECK-NEXT: lgdr %r0, %f14 |
| ; CHECK-NEXT: srlg %r0, %r0, 48 |
| ; CHECK-NEXT: sth %r0, 6(%r13) |
| ; CHECK-NEXT: lgdr %r0, %f9 |
| ; CHECK-NEXT: srlg %r0, %r0, 48 |
| ; CHECK-NEXT: sth %r0, 4(%r13) |
| ; CHECK-NEXT: lgdr %r0, %f8 |
| ; CHECK-NEXT: srlg %r0, %r0, 48 |
| ; CHECK-NEXT: sth %r0, 2(%r13) |
| ; CHECK-NEXT: lgdr %r0, %f13 |
| ; CHECK-NEXT: srlg %r0, %r0, 48 |
| ; CHECK-NEXT: sth %r0, 0(%r13) |
| ; CHECK-NEXT: ld %f8, 280(%r15) # 8-byte Reload |
| ; CHECK-NEXT: ld %f9, 272(%r15) # 8-byte Reload |
| ; CHECK-NEXT: ld %f10, 264(%r15) # 8-byte Reload |
| ; CHECK-NEXT: ld %f11, 256(%r15) # 8-byte Reload |
| ; CHECK-NEXT: ld %f12, 248(%r15) # 8-byte Reload |
| ; CHECK-NEXT: ld %f13, 240(%r15) # 8-byte Reload |
| ; CHECK-NEXT: ld %f14, 232(%r15) # 8-byte Reload |
| ; CHECK-NEXT: ld %f15, 224(%r15) # 8-byte Reload |
| ; CHECK-NEXT: lmg %r13, %r15, 392(%r15) |
| ; CHECK-NEXT: br %r14 |
| ; |
| ; VECTOR-LABEL: fun0: |
| ; VECTOR: # %bb.0: |
| ; VECTOR-NEXT: stmg %r13, %r15, 104(%r15) |
| ; VECTOR-NEXT: .cfi_offset %r13, -56 |
| ; VECTOR-NEXT: .cfi_offset %r14, -48 |
| ; VECTOR-NEXT: .cfi_offset %r15, -40 |
| ; VECTOR-NEXT: aghi %r15, -248 |
| ; VECTOR-NEXT: .cfi_def_cfa_offset 408 |
| ; VECTOR-NEXT: std %f8, 240(%r15) # 8-byte Spill |
| ; VECTOR-NEXT: .cfi_offset %f8, -168 |
| ; VECTOR-NEXT: vl %v0, 16(%r2), 3 |
| ; VECTOR-NEXT: mvc 160(16,%r15), 0(%r2) # 16-byte Folded Spill |
| ; VECTOR-NEXT: lgr %r13, %r3 |
| ; VECTOR-NEXT: vst %v0, 176(%r15), 3 # 16-byte Spill |
| ; VECTOR-NEXT: vreph %v0, %v0, 7 |
| ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 |
| ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; VECTOR-NEXT: ldr %f8, %f0 |
| ; VECTOR-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload |
| ; VECTOR-NEXT: vreph %v0, %v0, 7 |
| ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 |
| ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; VECTOR-NEXT: aebr %f0, %f8 |
| ; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT |
| ; VECTOR-NEXT: # kill: def $f0h killed $f0h def $v0 |
| ; VECTOR-NEXT: vst %v0, 208(%r15), 3 # 16-byte Spill |
| ; VECTOR-NEXT: vl %v0, 176(%r15), 3 # 16-byte Reload |
| ; VECTOR-NEXT: vreph %v0, %v0, 6 |
| ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 |
| ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; VECTOR-NEXT: ldr %f8, %f0 |
| ; VECTOR-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload |
| ; VECTOR-NEXT: vreph %v0, %v0, 6 |
| ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 |
| ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; VECTOR-NEXT: aebr %f0, %f8 |
| ; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT |
| ; VECTOR-NEXT: vl %v1, 208(%r15), 3 # 16-byte Reload |
| ; VECTOR-NEXT: # kill: def $f0h killed $f0h def $v0 |
| ; VECTOR-NEXT: vmrhh %v0, %v0, %v1 |
| ; VECTOR-NEXT: vst %v0, 208(%r15), 3 # 16-byte Spill |
| ; VECTOR-NEXT: vl %v0, 176(%r15), 3 # 16-byte Reload |
| ; VECTOR-NEXT: vreph %v0, %v0, 5 |
| ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 |
| ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; VECTOR-NEXT: ldr %f8, %f0 |
| ; VECTOR-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload |
| ; VECTOR-NEXT: vreph %v0, %v0, 5 |
| ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 |
| ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; VECTOR-NEXT: aebr %f0, %f8 |
| ; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT |
| ; VECTOR-NEXT: # kill: def $f0h killed $f0h def $v0 |
| ; VECTOR-NEXT: vst %v0, 192(%r15), 3 # 16-byte Spill |
| ; VECTOR-NEXT: vl %v0, 176(%r15), 3 # 16-byte Reload |
| ; VECTOR-NEXT: vreph %v0, %v0, 4 |
| ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 |
| ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; VECTOR-NEXT: ldr %f8, %f0 |
| ; VECTOR-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload |
| ; VECTOR-NEXT: vreph %v0, %v0, 4 |
| ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 |
| ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; VECTOR-NEXT: aebr %f0, %f8 |
| ; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT |
| ; VECTOR-NEXT: vl %v1, 192(%r15), 3 # 16-byte Reload |
| ; VECTOR-NEXT: # kill: def $f0h killed $f0h def $v0 |
| ; VECTOR-NEXT: vmrhh %v0, %v0, %v1 |
| ; VECTOR-NEXT: vl %v1, 208(%r15), 3 # 16-byte Reload |
| ; VECTOR-NEXT: vmrhf %v0, %v0, %v1 |
| ; VECTOR-NEXT: vst %v0, 208(%r15), 3 # 16-byte Spill |
| ; VECTOR-NEXT: vl %v0, 176(%r15), 3 # 16-byte Reload |
| ; VECTOR-NEXT: vreph %v0, %v0, 3 |
| ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 |
| ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; VECTOR-NEXT: ldr %f8, %f0 |
| ; VECTOR-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload |
| ; VECTOR-NEXT: vreph %v0, %v0, 3 |
| ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 |
| ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; VECTOR-NEXT: aebr %f0, %f8 |
| ; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT |
| ; VECTOR-NEXT: # kill: def $f0h killed $f0h def $v0 |
| ; VECTOR-NEXT: vst %v0, 192(%r15), 3 # 16-byte Spill |
| ; VECTOR-NEXT: vl %v0, 176(%r15), 3 # 16-byte Reload |
| ; VECTOR-NEXT: vreph %v0, %v0, 2 |
| ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 |
| ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; VECTOR-NEXT: ldr %f8, %f0 |
| ; VECTOR-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload |
| ; VECTOR-NEXT: vreph %v0, %v0, 2 |
| ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 |
| ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; VECTOR-NEXT: aebr %f0, %f8 |
| ; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT |
| ; VECTOR-NEXT: vl %v1, 192(%r15), 3 # 16-byte Reload |
| ; VECTOR-NEXT: # kill: def $f0h killed $f0h def $v0 |
| ; VECTOR-NEXT: vmrhh %v0, %v0, %v1 |
| ; VECTOR-NEXT: vst %v0, 192(%r15), 3 # 16-byte Spill |
| ; VECTOR-NEXT: vl %v0, 176(%r15), 3 # 16-byte Reload |
| ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 |
| ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; VECTOR-NEXT: ldr %f8, %f0 |
| ; VECTOR-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload |
| ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 |
| ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; VECTOR-NEXT: aebr %f0, %f8 |
| ; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT |
| ; VECTOR-NEXT: # kill: def $f0h killed $f0h def $v0 |
| ; VECTOR-NEXT: vst %v0, 224(%r15), 3 # 16-byte Spill |
| ; VECTOR-NEXT: vl %v0, 176(%r15), 3 # 16-byte Reload |
| ; VECTOR-NEXT: vreph %v0, %v0, 1 |
| ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 |
| ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; VECTOR-NEXT: ldr %f8, %f0 |
| ; VECTOR-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload |
| ; VECTOR-NEXT: vreph %v0, %v0, 1 |
| ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 |
| ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; VECTOR-NEXT: aebr %f0, %f8 |
| ; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT |
| ; VECTOR-NEXT: vl %v1, 224(%r15), 3 # 16-byte Reload |
| ; VECTOR-NEXT: # kill: def $f0h killed $f0h def $v0 |
| ; VECTOR-NEXT: vmrhh %v0, %v1, %v0 |
| ; VECTOR-NEXT: vl %v1, 192(%r15), 3 # 16-byte Reload |
| ; VECTOR-NEXT: vmrhf %v0, %v0, %v1 |
| ; VECTOR-NEXT: vl %v1, 208(%r15), 3 # 16-byte Reload |
| ; VECTOR-NEXT: ld %f8, 240(%r15) # 8-byte Reload |
| ; VECTOR-NEXT: vmrhg %v0, %v0, %v1 |
| ; VECTOR-NEXT: vst %v0, 0(%r13), 3 |
| ; VECTOR-NEXT: lmg %r13, %r15, 352(%r15) |
| ; VECTOR-NEXT: br %r14 |
| %LHS = load %Ty0, ptr %Src |
| %S2 = getelementptr %Ty0, ptr %Src, i32 1 |
| %RHS = load %Ty0, ptr %S2 |
| %Res = fadd %Ty0 %LHS, %RHS |
| store %Ty0 %Res, ptr %Dst |
| ret void |
| } |
| |
| %Ty1 = type <4 x half> |
| define void @fun1(ptr %Src, ptr %Dst) { |
| ; CHECK-LABEL: fun1: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: stmg %r13, %r15, 104(%r15) |
| ; CHECK-NEXT: .cfi_offset %r13, -56 |
| ; CHECK-NEXT: .cfi_offset %r14, -48 |
| ; CHECK-NEXT: .cfi_offset %r15, -40 |
| ; CHECK-NEXT: aghi %r15, -224 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 384 |
| ; CHECK-NEXT: std %f8, 216(%r15) # 8-byte Spill |
| ; CHECK-NEXT: std %f9, 208(%r15) # 8-byte Spill |
| ; CHECK-NEXT: std %f10, 200(%r15) # 8-byte Spill |
| ; CHECK-NEXT: std %f11, 192(%r15) # 8-byte Spill |
| ; CHECK-NEXT: std %f12, 184(%r15) # 8-byte Spill |
| ; CHECK-NEXT: std %f13, 176(%r15) # 8-byte Spill |
| ; CHECK-NEXT: std %f14, 168(%r15) # 8-byte Spill |
| ; CHECK-NEXT: std %f15, 160(%r15) # 8-byte Spill |
| ; CHECK-NEXT: .cfi_offset %f8, -168 |
| ; CHECK-NEXT: .cfi_offset %f9, -176 |
| ; CHECK-NEXT: .cfi_offset %f10, -184 |
| ; CHECK-NEXT: .cfi_offset %f11, -192 |
| ; CHECK-NEXT: .cfi_offset %f12, -200 |
| ; CHECK-NEXT: .cfi_offset %f13, -208 |
| ; CHECK-NEXT: .cfi_offset %f14, -216 |
| ; CHECK-NEXT: .cfi_offset %f15, -224 |
| ; CHECK-NEXT: lgh %r0, 6(%r2) |
| ; CHECK-NEXT: lgr %r13, %r3 |
| ; CHECK-NEXT: lgh %r1, 4(%r2) |
| ; CHECK-NEXT: sllg %r0, %r0, 48 |
| ; CHECK-NEXT: ldgr %f8, %r0 |
| ; CHECK-NEXT: lgh %r0, 2(%r2) |
| ; CHECK-NEXT: sllg %r1, %r1, 48 |
| ; CHECK-NEXT: ldgr %f9, %r1 |
| ; CHECK-NEXT: lgh %r1, 0(%r2) |
| ; CHECK-NEXT: sllg %r0, %r0, 48 |
| ; CHECK-NEXT: lgh %r3, 14(%r2) |
| ; CHECK-NEXT: ldgr %f12, %r0 |
| ; CHECK-NEXT: sllg %r0, %r1, 48 |
| ; CHECK-NEXT: ldgr %f10, %r0 |
| ; CHECK-NEXT: sllg %r0, %r3, 48 |
| ; CHECK-NEXT: lgh %r1, 12(%r2) |
| ; CHECK-NEXT: ldgr %f11, %r0 |
| ; CHECK-NEXT: lgh %r0, 10(%r2) |
| ; CHECK-NEXT: lgh %r2, 8(%r2) |
| ; CHECK-NEXT: sllg %r1, %r1, 48 |
| ; CHECK-NEXT: ldgr %f13, %r1 |
| ; CHECK-NEXT: sllg %r0, %r0, 48 |
| ; CHECK-NEXT: sllg %r1, %r2, 48 |
| ; CHECK-NEXT: ldgr %f0, %r1 |
| ; CHECK-NEXT: ldgr %f14, %r0 |
| ; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d |
| ; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; CHECK-NEXT: ler %f15, %f0 |
| ; CHECK-NEXT: ler %f0, %f10 |
| ; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; CHECK-NEXT: sebr %f0, %f15 |
| ; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT |
| ; CHECK-NEXT: ler %f10, %f0 |
| ; CHECK-NEXT: ler %f0, %f14 |
| ; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; CHECK-NEXT: ler %f14, %f0 |
| ; CHECK-NEXT: ler %f0, %f12 |
| ; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; CHECK-NEXT: sebr %f0, %f14 |
| ; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT |
| ; CHECK-NEXT: ler %f12, %f0 |
| ; CHECK-NEXT: ler %f0, %f13 |
| ; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; CHECK-NEXT: ler %f13, %f0 |
| ; CHECK-NEXT: ler %f0, %f9 |
| ; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; CHECK-NEXT: sebr %f0, %f13 |
| ; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT |
| ; CHECK-NEXT: ler %f9, %f0 |
| ; CHECK-NEXT: ler %f0, %f11 |
| ; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; CHECK-NEXT: ler %f11, %f0 |
| ; CHECK-NEXT: ler %f0, %f8 |
| ; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; CHECK-NEXT: sebr %f0, %f11 |
| ; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT |
| ; CHECK-NEXT: # kill: def $f0h killed $f0h def $f0d |
| ; CHECK-NEXT: lgdr %r0, %f0 |
| ; CHECK-NEXT: srlg %r0, %r0, 48 |
| ; CHECK-NEXT: sth %r0, 6(%r13) |
| ; CHECK-NEXT: lgdr %r0, %f9 |
| ; CHECK-NEXT: srlg %r0, %r0, 48 |
| ; CHECK-NEXT: sth %r0, 4(%r13) |
| ; CHECK-NEXT: lgdr %r0, %f12 |
| ; CHECK-NEXT: srlg %r0, %r0, 48 |
| ; CHECK-NEXT: sth %r0, 2(%r13) |
| ; CHECK-NEXT: lgdr %r0, %f10 |
| ; CHECK-NEXT: srlg %r0, %r0, 48 |
| ; CHECK-NEXT: sth %r0, 0(%r13) |
| ; CHECK-NEXT: ld %f8, 216(%r15) # 8-byte Reload |
| ; CHECK-NEXT: ld %f9, 208(%r15) # 8-byte Reload |
| ; CHECK-NEXT: ld %f10, 200(%r15) # 8-byte Reload |
| ; CHECK-NEXT: ld %f11, 192(%r15) # 8-byte Reload |
| ; CHECK-NEXT: ld %f12, 184(%r15) # 8-byte Reload |
| ; CHECK-NEXT: ld %f13, 176(%r15) # 8-byte Reload |
| ; CHECK-NEXT: ld %f14, 168(%r15) # 8-byte Reload |
| ; CHECK-NEXT: ld %f15, 160(%r15) # 8-byte Reload |
| ; CHECK-NEXT: lmg %r13, %r15, 328(%r15) |
| ; CHECK-NEXT: br %r14 |
| ; |
| ; VECTOR-LABEL: fun1: |
| ; VECTOR: # %bb.0: |
| ; VECTOR-NEXT: stmg %r13, %r15, 104(%r15) |
| ; VECTOR-NEXT: .cfi_offset %r13, -56 |
| ; VECTOR-NEXT: .cfi_offset %r14, -48 |
| ; VECTOR-NEXT: .cfi_offset %r15, -40 |
| ; VECTOR-NEXT: aghi %r15, -224 |
| ; VECTOR-NEXT: .cfi_def_cfa_offset 384 |
| ; VECTOR-NEXT: std %f8, 216(%r15) # 8-byte Spill |
| ; VECTOR-NEXT: std %f9, 208(%r15) # 8-byte Spill |
| ; VECTOR-NEXT: std %f10, 200(%r15) # 8-byte Spill |
| ; VECTOR-NEXT: std %f11, 192(%r15) # 8-byte Spill |
| ; VECTOR-NEXT: std %f12, 184(%r15) # 8-byte Spill |
| ; VECTOR-NEXT: std %f13, 176(%r15) # 8-byte Spill |
| ; VECTOR-NEXT: std %f14, 168(%r15) # 8-byte Spill |
| ; VECTOR-NEXT: std %f15, 160(%r15) # 8-byte Spill |
| ; VECTOR-NEXT: .cfi_offset %f8, -168 |
| ; VECTOR-NEXT: .cfi_offset %f9, -176 |
| ; VECTOR-NEXT: .cfi_offset %f10, -184 |
| ; VECTOR-NEXT: .cfi_offset %f11, -192 |
| ; VECTOR-NEXT: .cfi_offset %f12, -200 |
| ; VECTOR-NEXT: .cfi_offset %f13, -208 |
| ; VECTOR-NEXT: .cfi_offset %f14, -216 |
| ; VECTOR-NEXT: .cfi_offset %f15, -224 |
| ; VECTOR-NEXT: vlreph %v0, 8(%r2) |
| ; VECTOR-NEXT: vlreph %v8, 6(%r2) |
| ; VECTOR-NEXT: vlreph %v9, 4(%r2) |
| ; VECTOR-NEXT: vlreph %v10, 2(%r2) |
| ; VECTOR-NEXT: lgr %r13, %r3 |
| ; VECTOR-NEXT: vlreph %v11, 0(%r2) |
| ; VECTOR-NEXT: vlreph %v12, 14(%r2) |
| ; VECTOR-NEXT: vlreph %v13, 12(%r2) |
| ; VECTOR-NEXT: vlreph %v14, 10(%r2) |
| ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; VECTOR-NEXT: ldr %f15, %f0 |
| ; VECTOR-NEXT: ldr %f0, %f11 |
| ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; VECTOR-NEXT: sebr %f0, %f15 |
| ; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT |
| ; VECTOR-NEXT: ldr %f11, %f0 |
| ; VECTOR-NEXT: ldr %f0, %f14 |
| ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; VECTOR-NEXT: ldr %f14, %f0 |
| ; VECTOR-NEXT: ldr %f0, %f10 |
| ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; VECTOR-NEXT: sebr %f0, %f14 |
| ; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT |
| ; VECTOR-NEXT: ldr %f10, %f0 |
| ; VECTOR-NEXT: ldr %f0, %f13 |
| ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; VECTOR-NEXT: ldr %f13, %f0 |
| ; VECTOR-NEXT: ldr %f0, %f9 |
| ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; VECTOR-NEXT: sebr %f0, %f13 |
| ; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT |
| ; VECTOR-NEXT: ldr %f9, %f0 |
| ; VECTOR-NEXT: ldr %f0, %f12 |
| ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; VECTOR-NEXT: ldr %f12, %f0 |
| ; VECTOR-NEXT: ldr %f0, %f8 |
| ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT |
| ; VECTOR-NEXT: sebr %f0, %f12 |
| ; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT |
| ; VECTOR-NEXT: vsteh %v9, 4(%r13), 0 |
| ; VECTOR-NEXT: vsteh %v10, 2(%r13), 0 |
| ; VECTOR-NEXT: vsteh %v11, 0(%r13), 0 |
| ; VECTOR-NEXT: ld %f8, 216(%r15) # 8-byte Reload |
| ; VECTOR-NEXT: ld %f9, 208(%r15) # 8-byte Reload |
| ; VECTOR-NEXT: ld %f10, 200(%r15) # 8-byte Reload |
| ; VECTOR-NEXT: ld %f11, 192(%r15) # 8-byte Reload |
| ; VECTOR-NEXT: ld %f12, 184(%r15) # 8-byte Reload |
| ; VECTOR-NEXT: ld %f13, 176(%r15) # 8-byte Reload |
| ; VECTOR-NEXT: ld %f14, 168(%r15) # 8-byte Reload |
| ; VECTOR-NEXT: ld %f15, 160(%r15) # 8-byte Reload |
| ; VECTOR-NEXT: vsteh %v0, 6(%r13), 0 |
| ; VECTOR-NEXT: lmg %r13, %r15, 328(%r15) |
| ; VECTOR-NEXT: br %r14 |
| %LHS = load %Ty1, ptr %Src |
| %S2 = getelementptr %Ty1, ptr %Src, i32 1 |
| %RHS = load %Ty1, ptr %S2 |
| %Res = fsub %Ty1 %LHS, %RHS |
| store %Ty1 %Res, ptr %Dst |
| ret void |
| } |