| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+b | FileCheck %s --check-prefixes=CHECK,V |
| ; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+b,+zvfh | FileCheck %s --check-prefixes=CHECK,ZVFH |
| |
| declare void @g() |
| |
| define void @f(ptr %m, ptr %n, ptr %p, ptr %q, ptr %r, ptr %s, double %t) { |
| ; CHECK-LABEL: f: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: addi sp, sp, -48 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 48 |
| ; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s1, 24(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s2, 16(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s3, 8(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s4, 0(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_offset ra, -8 |
| ; CHECK-NEXT: .cfi_offset s0, -16 |
| ; CHECK-NEXT: .cfi_offset s1, -24 |
| ; CHECK-NEXT: .cfi_offset s2, -32 |
| ; CHECK-NEXT: .cfi_offset s3, -40 |
| ; CHECK-NEXT: .cfi_offset s4, -48 |
| ; CHECK-NEXT: mv s0, a5 |
| ; CHECK-NEXT: mv s1, a4 |
| ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma |
| ; CHECK-NEXT: vle64.v v8, (a0) |
| ; CHECK-NEXT: vse64.v v8, (a1) |
| ; CHECK-NEXT: ld s3, 0(a2) |
| ; CHECK-NEXT: ld s4, 8(a2) |
| ; CHECK-NEXT: mv s2, a3 |
| ; CHECK-NEXT: call g |
| ; CHECK-NEXT: sd s3, 0(s2) |
| ; CHECK-NEXT: sd s4, 8(s2) |
| ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma |
| ; CHECK-NEXT: vle64.v v8, (s1) |
| ; CHECK-NEXT: vse64.v v8, (s0) |
| ; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s1, 24(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s2, 16(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s3, 8(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s4, 0(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_restore ra |
| ; CHECK-NEXT: .cfi_restore s0 |
| ; CHECK-NEXT: .cfi_restore s1 |
| ; CHECK-NEXT: .cfi_restore s2 |
| ; CHECK-NEXT: .cfi_restore s3 |
| ; CHECK-NEXT: .cfi_restore s4 |
| ; CHECK-NEXT: addi sp, sp, 48 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: ret |
| %z0 = load i64, ptr %m |
| %m.1 = getelementptr i64, ptr %m, i64 1 |
| %z1 = load i64, ptr %m.1 |
| store i64 %z0, ptr %n |
| %n.1 = getelementptr i64, ptr %n, i64 1 |
| store i64 %z1, ptr %n.1 |
| |
| %x0 = load i64, ptr %p |
| %p.1 = getelementptr i64, ptr %p, i64 1 |
| %x1 = load i64, ptr %p.1 |
| call void @g() |
| store i64 %x0, ptr %q |
| %q.1 = getelementptr i64, ptr %q, i64 1 |
| store i64 %x1, ptr %q.1 |
| |
| %y0 = load i64, ptr %r |
| %r.1 = getelementptr i64, ptr %r, i64 1 |
| %y1 = load i64, ptr %r.1 |
| store i64 %y0, ptr %s |
| %s.1 = getelementptr i64, ptr %s, i64 1 |
| store i64 %y1, ptr %s.1 |
| |
| ret void |
| } |
| |
| define void @f1(ptr %p, ptr %q, double %t) { |
| ; CHECK-LABEL: f1: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma |
| ; CHECK-NEXT: vle64.v v8, (a0) |
| ; CHECK-NEXT: fcvt.wu.d a0, fa0, rtz |
| ; CHECK-NEXT: vse64.v v8, (a1) |
| ; CHECK-NEXT: ret |
| %x0 = load i64, ptr %p |
| %p.1 = getelementptr i64, ptr %p, i64 1 |
| %x1 = load i64, ptr %p.1 |
| %t1 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %t, metadata !"fpexcept.strict") |
| store i64 %x0, ptr %q |
| %q.1 = getelementptr i64, ptr %q, i64 1 |
| store i64 %x1, ptr %q.1 |
| ret void |
| } |
| |
| ; Merging scalars is profitable, it reduces pressure within a single |
| ; register class. |
| define void @i8_i16(ptr %p, ptr %q) { |
| ; CHECK-LABEL: i8_i16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: addi sp, sp, -32 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 32 |
| ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_offset ra, -8 |
| ; CHECK-NEXT: .cfi_offset s0, -16 |
| ; CHECK-NEXT: .cfi_offset s1, -24 |
| ; CHECK-NEXT: lh s1, 0(a0) |
| ; CHECK-NEXT: mv s0, a1 |
| ; CHECK-NEXT: call g |
| ; CHECK-NEXT: sh s1, 0(s0) |
| ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_restore ra |
| ; CHECK-NEXT: .cfi_restore s0 |
| ; CHECK-NEXT: .cfi_restore s1 |
| ; CHECK-NEXT: addi sp, sp, 32 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: ret |
| %p0 = getelementptr i8, ptr %p, i64 0 |
| %p1 = getelementptr i8, ptr %p, i64 1 |
| %x0 = load i8, ptr %p0, align 2 |
| %x1 = load i8, ptr %p1 |
| call void @g() |
| %q0 = getelementptr i8, ptr %q, i64 0 |
| %q1 = getelementptr i8, ptr %q, i64 1 |
| store i8 %x0, ptr %q0, align 2 |
| store i8 %x1, ptr %q1 |
| ret void |
| } |
| |
| define void @i8_i16_rotate(ptr %p, ptr %q) { |
| ; CHECK-LABEL: i8_i16_rotate: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: addi sp, sp, -32 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 32 |
| ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_offset ra, -8 |
| ; CHECK-NEXT: .cfi_offset s0, -16 |
| ; CHECK-NEXT: .cfi_offset s1, -24 |
| ; CHECK-NEXT: .cfi_offset s2, -32 |
| ; CHECK-NEXT: lbu s1, 0(a0) |
| ; CHECK-NEXT: lbu s2, 1(a0) |
| ; CHECK-NEXT: mv s0, a1 |
| ; CHECK-NEXT: call g |
| ; CHECK-NEXT: sb s2, 0(s0) |
| ; CHECK-NEXT: sb s1, 1(s0) |
| ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_restore ra |
| ; CHECK-NEXT: .cfi_restore s0 |
| ; CHECK-NEXT: .cfi_restore s1 |
| ; CHECK-NEXT: .cfi_restore s2 |
| ; CHECK-NEXT: addi sp, sp, 32 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: ret |
| %p0 = getelementptr i8, ptr %p, i64 0 |
| %p1 = getelementptr i8, ptr %p, i64 1 |
| %x0 = load i8, ptr %p0, align 2 |
| %x1 = load i8, ptr %p1 |
| call void @g() |
| %q0 = getelementptr i8, ptr %q, i64 0 |
| %q1 = getelementptr i8, ptr %q, i64 1 |
| store i8 %x1, ptr %q0, align 2 |
| store i8 %x0, ptr %q1 |
| ret void |
| } |
| |
| ; We could reorder the first call and the load here to enable |
| ; merging, but don't currently do so. |
| define void @i8_i16_resched_readnone_ld(ptr %p, ptr %q) { |
| ; CHECK-LABEL: i8_i16_resched_readnone_ld: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: addi sp, sp, -32 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 32 |
| ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_offset ra, -8 |
| ; CHECK-NEXT: .cfi_offset s0, -16 |
| ; CHECK-NEXT: .cfi_offset s1, -24 |
| ; CHECK-NEXT: .cfi_offset s2, -32 |
| ; CHECK-NEXT: mv s0, a0 |
| ; CHECK-NEXT: lbu s2, 0(a0) |
| ; CHECK-NEXT: mv s1, a1 |
| ; CHECK-NEXT: call g |
| ; CHECK-NEXT: lbu s0, 1(s0) |
| ; CHECK-NEXT: call g |
| ; CHECK-NEXT: sb s2, 0(s1) |
| ; CHECK-NEXT: sb s0, 1(s1) |
| ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_restore ra |
| ; CHECK-NEXT: .cfi_restore s0 |
| ; CHECK-NEXT: .cfi_restore s1 |
| ; CHECK-NEXT: .cfi_restore s2 |
| ; CHECK-NEXT: addi sp, sp, 32 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: ret |
| %p0 = getelementptr i8, ptr %p, i64 0 |
| %p1 = getelementptr i8, ptr %p, i64 1 |
| %x0 = load i8, ptr %p0, align 2 |
| call void @g() readnone |
| %x1 = load i8, ptr %p1 |
| call void @g() |
| %q0 = getelementptr i8, ptr %q, i64 0 |
| %q1 = getelementptr i8, ptr %q, i64 1 |
| store i8 %x0, ptr %q0, align 2 |
| store i8 %x1, ptr %q1 |
| ret void |
| } |
| |
| ; We could reorder the second call and the store here to |
| ; enable merging, but don't currently do so. |
| define void @i8_i16_resched_readnone_st(ptr %p, ptr %q) { |
| ; CHECK-LABEL: i8_i16_resched_readnone_st: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: addi sp, sp, -32 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 32 |
| ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_offset ra, -8 |
| ; CHECK-NEXT: .cfi_offset s0, -16 |
| ; CHECK-NEXT: .cfi_offset s1, -24 |
| ; CHECK-NEXT: .cfi_offset s2, -32 |
| ; CHECK-NEXT: lbu s1, 0(a0) |
| ; CHECK-NEXT: lbu s2, 1(a0) |
| ; CHECK-NEXT: mv s0, a1 |
| ; CHECK-NEXT: call g |
| ; CHECK-NEXT: sb s1, 0(s0) |
| ; CHECK-NEXT: call g |
| ; CHECK-NEXT: sb s2, 1(s0) |
| ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_restore ra |
| ; CHECK-NEXT: .cfi_restore s0 |
| ; CHECK-NEXT: .cfi_restore s1 |
| ; CHECK-NEXT: .cfi_restore s2 |
| ; CHECK-NEXT: addi sp, sp, 32 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: ret |
| %p0 = getelementptr i8, ptr %p, i64 0 |
| %p1 = getelementptr i8, ptr %p, i64 1 |
| %x0 = load i8, ptr %p0, align 2 |
| %x1 = load i8, ptr %p1 |
| call void @g() |
| %q0 = getelementptr i8, ptr %q, i64 0 |
| store i8 %x0, ptr %q0, align 2 |
| call void @g() readnone |
| %q1 = getelementptr i8, ptr %q, i64 1 |
| store i8 %x1, ptr %q1 |
| ret void |
| } |
| |
| define void @i32_i64(ptr %p, ptr %q) { |
| ; CHECK-LABEL: i32_i64: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: addi sp, sp, -32 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 32 |
| ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_offset ra, -8 |
| ; CHECK-NEXT: .cfi_offset s0, -16 |
| ; CHECK-NEXT: .cfi_offset s1, -24 |
| ; CHECK-NEXT: ld s1, 0(a0) |
| ; CHECK-NEXT: mv s0, a1 |
| ; CHECK-NEXT: call g |
| ; CHECK-NEXT: sd s1, 0(s0) |
| ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_restore ra |
| ; CHECK-NEXT: .cfi_restore s0 |
| ; CHECK-NEXT: .cfi_restore s1 |
| ; CHECK-NEXT: addi sp, sp, 32 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: ret |
| %p0 = getelementptr i8, ptr %p, i64 0 |
| %p1 = getelementptr i8, ptr %p, i64 4 |
| %x0 = load i32, ptr %p0, align 8 |
| %x1 = load i32, ptr %p1 |
| call void @g() |
| %q0 = getelementptr i8, ptr %q, i64 0 |
| %q1 = getelementptr i8, ptr %q, i64 4 |
| store i32 %x0, ptr %q0, align 8 |
| store i32 %x1, ptr %q1 |
| ret void |
| } |
| |
| define void @i32_i64_rotate(ptr %p, ptr %q) { |
| ; CHECK-LABEL: i32_i64_rotate: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: addi sp, sp, -32 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 32 |
| ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_offset ra, -8 |
| ; CHECK-NEXT: .cfi_offset s0, -16 |
| ; CHECK-NEXT: .cfi_offset s1, -24 |
| ; CHECK-NEXT: mv s0, a1 |
| ; CHECK-NEXT: ld a0, 0(a0) |
| ; CHECK-NEXT: rori s1, a0, 32 |
| ; CHECK-NEXT: call g |
| ; CHECK-NEXT: sd s1, 0(s0) |
| ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_restore ra |
| ; CHECK-NEXT: .cfi_restore s0 |
| ; CHECK-NEXT: .cfi_restore s1 |
| ; CHECK-NEXT: addi sp, sp, 32 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: ret |
| %p0 = getelementptr i8, ptr %p, i64 0 |
| %p1 = getelementptr i8, ptr %p, i64 4 |
| %x0 = load i32, ptr %p0, align 8 |
| %x1 = load i32, ptr %p1 |
| call void @g() |
| %q0 = getelementptr i8, ptr %q, i64 0 |
| %q1 = getelementptr i8, ptr %q, i64 4 |
| store i32 %x1, ptr %q0, align 8 |
| store i32 %x0, ptr %q1 |
| ret void |
| } |
| |
| ; Merging vectors is profitable, it reduces pressure within a single |
| ; register class. |
| define void @v2i8_v4i8(ptr %p, ptr %q) { |
| ; CHECK-LABEL: v2i8_v4i8: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: addi sp, sp, -32 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 32 |
| ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_offset ra, -8 |
| ; CHECK-NEXT: .cfi_offset s0, -16 |
| ; CHECK-NEXT: csrr a2, vlenb |
| ; CHECK-NEXT: sub sp, sp, a2 |
| ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 1 * vlenb |
| ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma |
| ; CHECK-NEXT: vle8.v v8, (a0) |
| ; CHECK-NEXT: addi a0, sp, 16 |
| ; CHECK-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill |
| ; CHECK-NEXT: mv s0, a1 |
| ; CHECK-NEXT: call g |
| ; CHECK-NEXT: addi a0, sp, 16 |
| ; CHECK-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload |
| ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma |
| ; CHECK-NEXT: vse8.v v8, (s0) |
| ; CHECK-NEXT: csrr a0, vlenb |
| ; CHECK-NEXT: add sp, sp, a0 |
| ; CHECK-NEXT: .cfi_def_cfa sp, 32 |
| ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_restore ra |
| ; CHECK-NEXT: .cfi_restore s0 |
| ; CHECK-NEXT: addi sp, sp, 32 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: ret |
| %p0 = getelementptr i8, ptr %p, i64 0 |
| %p1 = getelementptr i8, ptr %p, i64 2 |
| %x0 = load <2 x i8>, ptr %p0, align 2 |
| %x1 = load <2 x i8>, ptr %p1 |
| call void @g() |
| %q0 = getelementptr i8, ptr %q, i64 0 |
| %q1 = getelementptr i8, ptr %q, i64 2 |
| store <2 x i8> %x0, ptr %q0, align 2 |
| store <2 x i8> %x1, ptr %q1 |
| ret void |
| } |
| |
| ; Merging two 16 x i8 into one 32 x i8 (on zvl128b) will require the same |
| ; numbers of registers to be spilled, but it can be done with fewer |
| ; instructions |
| define void @v16i8_v32i8(ptr %p, ptr %q) { |
| ; CHECK-LABEL: v16i8_v32i8: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: addi sp, sp, -64 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 64 |
| ; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s1, 40(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_offset ra, -8 |
| ; CHECK-NEXT: .cfi_offset s0, -16 |
| ; CHECK-NEXT: .cfi_offset s1, -24 |
| ; CHECK-NEXT: csrr a2, vlenb |
| ; CHECK-NEXT: slli a2, a2, 1 |
| ; CHECK-NEXT: sub sp, sp, a2 |
| ; CHECK-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb |
| ; CHECK-NEXT: li s1, 32 |
| ; CHECK-NEXT: vsetvli zero, s1, e8, m2, ta, ma |
| ; CHECK-NEXT: vle8.v v8, (a0) |
| ; CHECK-NEXT: addi a0, sp, 32 |
| ; CHECK-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill |
| ; CHECK-NEXT: mv s0, a1 |
| ; CHECK-NEXT: call g |
| ; CHECK-NEXT: addi a0, sp, 32 |
| ; CHECK-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload |
| ; CHECK-NEXT: vsetvli zero, s1, e8, m2, ta, ma |
| ; CHECK-NEXT: vse8.v v8, (s0) |
| ; CHECK-NEXT: csrr a0, vlenb |
| ; CHECK-NEXT: sh1add sp, a0, sp |
| ; CHECK-NEXT: .cfi_def_cfa sp, 64 |
| ; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s1, 40(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_restore ra |
| ; CHECK-NEXT: .cfi_restore s0 |
| ; CHECK-NEXT: .cfi_restore s1 |
| ; CHECK-NEXT: addi sp, sp, 64 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: ret |
| %p0 = getelementptr i8, ptr %p, i64 0 |
| %p1 = getelementptr i8, ptr %p, i64 16 |
| %x0 = load <16 x i8>, ptr %p0, align 2 |
| %x1 = load <16 x i8>, ptr %p1 |
| call void @g() |
| %q0 = getelementptr i8, ptr %q, i64 0 |
| %q1 = getelementptr i8, ptr %q, i64 16 |
| store <16 x i8> %x0, ptr %q0, align 2 |
| store <16 x i8> %x1, ptr %q1 |
| ret void |
| } |
| |
| define void @two_half(ptr %p, ptr %q) { |
| ; CHECK-LABEL: two_half: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: addi sp, sp, -32 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 32 |
| ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_offset ra, -8 |
| ; CHECK-NEXT: .cfi_offset s0, -16 |
| ; CHECK-NEXT: .cfi_offset s1, -24 |
| ; CHECK-NEXT: lw s1, 0(a0) |
| ; CHECK-NEXT: mv s0, a1 |
| ; CHECK-NEXT: call g |
| ; CHECK-NEXT: sw s1, 0(s0) |
| ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_restore ra |
| ; CHECK-NEXT: .cfi_restore s0 |
| ; CHECK-NEXT: .cfi_restore s1 |
| ; CHECK-NEXT: addi sp, sp, 32 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: ret |
| %p0 = getelementptr i8, ptr %p, i64 0 |
| %p1 = getelementptr i8, ptr %p, i64 2 |
| %x0 = load half, ptr %p0, align 4 |
| %x1 = load half, ptr %p1 |
| call void @g() |
| %q0 = getelementptr i8, ptr %q, i64 0 |
| %q1 = getelementptr i8, ptr %q, i64 2 |
| store half %x0, ptr %q0, align 4 |
| store half %x1, ptr %q1 |
| ret void |
| } |
| |
| define void @two_half_unaligned(ptr %p, ptr %q) { |
| ; V-LABEL: two_half_unaligned: |
| ; V: # %bb.0: |
| ; V-NEXT: addi sp, sp, -32 |
| ; V-NEXT: .cfi_def_cfa_offset 32 |
| ; V-NEXT: sd ra, 24(sp) # 8-byte Folded Spill |
| ; V-NEXT: sd s0, 16(sp) # 8-byte Folded Spill |
| ; V-NEXT: sd s1, 8(sp) # 8-byte Folded Spill |
| ; V-NEXT: sd s2, 0(sp) # 8-byte Folded Spill |
| ; V-NEXT: .cfi_offset ra, -8 |
| ; V-NEXT: .cfi_offset s0, -16 |
| ; V-NEXT: .cfi_offset s1, -24 |
| ; V-NEXT: .cfi_offset s2, -32 |
| ; V-NEXT: lh s1, 0(a0) |
| ; V-NEXT: lh s2, 2(a0) |
| ; V-NEXT: mv s0, a1 |
| ; V-NEXT: call g |
| ; V-NEXT: sh s1, 0(s0) |
| ; V-NEXT: sh s2, 2(s0) |
| ; V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload |
| ; V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload |
| ; V-NEXT: ld s1, 8(sp) # 8-byte Folded Reload |
| ; V-NEXT: ld s2, 0(sp) # 8-byte Folded Reload |
| ; V-NEXT: .cfi_restore ra |
| ; V-NEXT: .cfi_restore s0 |
| ; V-NEXT: .cfi_restore s1 |
| ; V-NEXT: .cfi_restore s2 |
| ; V-NEXT: addi sp, sp, 32 |
| ; V-NEXT: .cfi_def_cfa_offset 0 |
| ; V-NEXT: ret |
| ; |
| ; ZVFH-LABEL: two_half_unaligned: |
| ; ZVFH: # %bb.0: |
| ; ZVFH-NEXT: addi sp, sp, -32 |
| ; ZVFH-NEXT: .cfi_def_cfa_offset 32 |
| ; ZVFH-NEXT: sd ra, 24(sp) # 8-byte Folded Spill |
| ; ZVFH-NEXT: sd s0, 16(sp) # 8-byte Folded Spill |
| ; ZVFH-NEXT: fsd fs0, 8(sp) # 8-byte Folded Spill |
| ; ZVFH-NEXT: fsd fs1, 0(sp) # 8-byte Folded Spill |
| ; ZVFH-NEXT: .cfi_offset ra, -8 |
| ; ZVFH-NEXT: .cfi_offset s0, -16 |
| ; ZVFH-NEXT: .cfi_offset fs0, -24 |
| ; ZVFH-NEXT: .cfi_offset fs1, -32 |
| ; ZVFH-NEXT: flh fs0, 0(a0) |
| ; ZVFH-NEXT: flh fs1, 2(a0) |
| ; ZVFH-NEXT: mv s0, a1 |
| ; ZVFH-NEXT: call g |
| ; ZVFH-NEXT: fsh fs0, 0(s0) |
| ; ZVFH-NEXT: fsh fs1, 2(s0) |
| ; ZVFH-NEXT: ld ra, 24(sp) # 8-byte Folded Reload |
| ; ZVFH-NEXT: ld s0, 16(sp) # 8-byte Folded Reload |
| ; ZVFH-NEXT: fld fs0, 8(sp) # 8-byte Folded Reload |
| ; ZVFH-NEXT: fld fs1, 0(sp) # 8-byte Folded Reload |
| ; ZVFH-NEXT: .cfi_restore ra |
| ; ZVFH-NEXT: .cfi_restore s0 |
| ; ZVFH-NEXT: .cfi_restore fs0 |
| ; ZVFH-NEXT: .cfi_restore fs1 |
| ; ZVFH-NEXT: addi sp, sp, 32 |
| ; ZVFH-NEXT: .cfi_def_cfa_offset 0 |
| ; ZVFH-NEXT: ret |
| %p0 = getelementptr i8, ptr %p, i64 0 |
| %p1 = getelementptr i8, ptr %p, i64 2 |
| %x0 = load half, ptr %p0 |
| %x1 = load half, ptr %p1 |
| call void @g() |
| %q0 = getelementptr i8, ptr %q, i64 0 |
| %q1 = getelementptr i8, ptr %q, i64 2 |
| store half %x0, ptr %q0 |
| store half %x1, ptr %q1 |
| ret void |
| } |
| |
| define void @two_float(ptr %p, ptr %q) { |
| ; CHECK-LABEL: two_float: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: addi sp, sp, -32 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 32 |
| ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_offset ra, -8 |
| ; CHECK-NEXT: .cfi_offset s0, -16 |
| ; CHECK-NEXT: .cfi_offset s1, -24 |
| ; CHECK-NEXT: ld s1, 0(a0) |
| ; CHECK-NEXT: mv s0, a1 |
| ; CHECK-NEXT: call g |
| ; CHECK-NEXT: sd s1, 0(s0) |
| ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_restore ra |
| ; CHECK-NEXT: .cfi_restore s0 |
| ; CHECK-NEXT: .cfi_restore s1 |
| ; CHECK-NEXT: addi sp, sp, 32 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: ret |
| %p0 = getelementptr i8, ptr %p, i64 0 |
| %p1 = getelementptr i8, ptr %p, i64 4 |
| %x0 = load float, ptr %p0, align 8 |
| %x1 = load float, ptr %p1 |
| call void @g() |
| %q0 = getelementptr i8, ptr %q, i64 0 |
| %q1 = getelementptr i8, ptr %q, i64 4 |
| store float %x0, ptr %q0, align 8 |
| store float %x1, ptr %q1 |
| ret void |
| } |
| |
| define void @two_float_unaligned(ptr %p, ptr %q) { |
| ; CHECK-LABEL: two_float_unaligned: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: addi sp, sp, -32 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 32 |
| ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: fsd fs0, 8(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: fsd fs1, 0(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_offset ra, -8 |
| ; CHECK-NEXT: .cfi_offset s0, -16 |
| ; CHECK-NEXT: .cfi_offset fs0, -24 |
| ; CHECK-NEXT: .cfi_offset fs1, -32 |
| ; CHECK-NEXT: flw fs0, 0(a0) |
| ; CHECK-NEXT: flw fs1, 4(a0) |
| ; CHECK-NEXT: mv s0, a1 |
| ; CHECK-NEXT: call g |
| ; CHECK-NEXT: fsw fs0, 0(s0) |
| ; CHECK-NEXT: fsw fs1, 4(s0) |
| ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: fld fs0, 8(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: fld fs1, 0(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_restore ra |
| ; CHECK-NEXT: .cfi_restore s0 |
| ; CHECK-NEXT: .cfi_restore fs0 |
| ; CHECK-NEXT: .cfi_restore fs1 |
| ; CHECK-NEXT: addi sp, sp, 32 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: ret |
| %p0 = getelementptr i8, ptr %p, i64 0 |
| %p1 = getelementptr i8, ptr %p, i64 4 |
| %x0 = load float, ptr %p0 |
| %x1 = load float, ptr %p1 |
| call void @g() |
| %q0 = getelementptr i8, ptr %q, i64 0 |
| %q1 = getelementptr i8, ptr %q, i64 4 |
| store float %x0, ptr %q0 |
| store float %x1, ptr %q1 |
| ret void |
| } |
| |
| define void @two_float_rotate(ptr %p, ptr %q) { |
| ; CHECK-LABEL: two_float_rotate: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: addi sp, sp, -32 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 32 |
| ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_offset ra, -8 |
| ; CHECK-NEXT: .cfi_offset s0, -16 |
| ; CHECK-NEXT: .cfi_offset s1, -24 |
| ; CHECK-NEXT: mv s0, a1 |
| ; CHECK-NEXT: ld a0, 0(a0) |
| ; CHECK-NEXT: rori s1, a0, 32 |
| ; CHECK-NEXT: call g |
| ; CHECK-NEXT: sd s1, 0(s0) |
| ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_restore ra |
| ; CHECK-NEXT: .cfi_restore s0 |
| ; CHECK-NEXT: .cfi_restore s1 |
| ; CHECK-NEXT: addi sp, sp, 32 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: ret |
| %p0 = getelementptr i8, ptr %p, i64 0 |
| %p1 = getelementptr i8, ptr %p, i64 4 |
| %x0 = load float, ptr %p0, align 8 |
| %x1 = load float, ptr %p1 |
| call void @g() |
| %q0 = getelementptr i8, ptr %q, i64 0 |
| %q1 = getelementptr i8, ptr %q, i64 4 |
| store float %x1, ptr %q0, align 8 |
| store float %x0, ptr %q1 |
| ret void |
| } |
| |
| define void @two_double(ptr %p, ptr %q) { |
| ; CHECK-LABEL: two_double: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: addi sp, sp, -32 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 32 |
| ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: fsd fs0, 8(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: fsd fs1, 0(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_offset ra, -8 |
| ; CHECK-NEXT: .cfi_offset s0, -16 |
| ; CHECK-NEXT: .cfi_offset fs0, -24 |
| ; CHECK-NEXT: .cfi_offset fs1, -32 |
| ; CHECK-NEXT: fld fs0, 0(a0) |
| ; CHECK-NEXT: fld fs1, 8(a0) |
| ; CHECK-NEXT: mv s0, a1 |
| ; CHECK-NEXT: call g |
| ; CHECK-NEXT: fsd fs0, 0(s0) |
| ; CHECK-NEXT: fsd fs1, 8(s0) |
| ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: fld fs0, 8(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: fld fs1, 0(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_restore ra |
| ; CHECK-NEXT: .cfi_restore s0 |
| ; CHECK-NEXT: .cfi_restore fs0 |
| ; CHECK-NEXT: .cfi_restore fs1 |
| ; CHECK-NEXT: addi sp, sp, 32 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: ret |
| %p0 = getelementptr i8, ptr %p, i64 0 |
| %p1 = getelementptr i8, ptr %p, i64 8 |
| %x0 = load double, ptr %p0 |
| %x1 = load double, ptr %p1 |
| call void @g() |
| %q0 = getelementptr i8, ptr %q, i64 0 |
| %q1 = getelementptr i8, ptr %q, i64 8 |
| store double %x0, ptr %q0 |
| store double %x1, ptr %q1 |
| ret void |
| } |