| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s |
| |
| define void @rvv_vla(i64 %n, i64 %i) nounwind { |
| ; CHECK-LABEL: rvv_vla: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: addi sp, sp, -32 |
| ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: addi s0, sp, 32 |
| ; CHECK-NEXT: csrr a2, vlenb |
| ; CHECK-NEXT: slli a2, a2, 2 |
| ; CHECK-NEXT: sub sp, sp, a2 |
| ; CHECK-NEXT: slli a0, a0, 2 |
| ; CHECK-NEXT: addi a0, a0, 15 |
| ; CHECK-NEXT: andi a0, a0, -16 |
| ; CHECK-NEXT: sub a0, sp, a0 |
| ; CHECK-NEXT: mv sp, a0 |
| ; CHECK-NEXT: csrr a2, vlenb |
| ; CHECK-NEXT: sub a2, s0, a2 |
| ; CHECK-NEXT: addi a2, a2, -32 |
| ; CHECK-NEXT: vl1re64.v v8, (a2) |
| ; CHECK-NEXT: csrr a2, vlenb |
| ; CHECK-NEXT: slli a2, a2, 2 |
| ; CHECK-NEXT: sub a2, s0, a2 |
| ; CHECK-NEXT: addi a2, a2, -32 |
| ; CHECK-NEXT: vl2re64.v v8, (a2) |
| ; CHECK-NEXT: slli a1, a1, 2 |
| ; CHECK-NEXT: add a0, a0, a1 |
| ; CHECK-NEXT: lw zero, 0(a0) |
| ; CHECK-NEXT: addi sp, s0, -32 |
| ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: addi sp, sp, 32 |
| ; CHECK-NEXT: ret |
| %vla.addr = alloca i32, i64 %n |
| |
| %v1.addr = alloca <vscale x 1 x i64> |
| %v1 = load volatile <vscale x 1 x i64>, ptr %v1.addr |
| |
| %v2.addr = alloca <vscale x 2 x i64> |
| %v2 = load volatile <vscale x 2 x i64>, ptr %v2.addr |
| |
| %p = getelementptr i32, ptr %vla.addr, i64 %i |
| %s = load volatile i32, ptr %p |
| ret void |
| } |
| |
| define void @rvv_overaligned() nounwind { |
| ; CHECK-LABEL: rvv_overaligned: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: addi sp, sp, -128 |
| ; CHECK-NEXT: sd ra, 120(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s0, 112(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: addi s0, sp, 128 |
| ; CHECK-NEXT: csrr a0, vlenb |
| ; CHECK-NEXT: slli a0, a0, 2 |
| ; CHECK-NEXT: sub sp, sp, a0 |
| ; CHECK-NEXT: andi sp, sp, -64 |
| ; CHECK-NEXT: csrr a0, vlenb |
| ; CHECK-NEXT: slli a1, a0, 1 |
| ; CHECK-NEXT: add a0, a1, a0 |
| ; CHECK-NEXT: add a0, sp, a0 |
| ; CHECK-NEXT: addi a0, a0, 112 |
| ; CHECK-NEXT: vl1re64.v v8, (a0) |
| ; CHECK-NEXT: addi a0, sp, 112 |
| ; CHECK-NEXT: vl2re64.v v8, (a0) |
| ; CHECK-NEXT: lw zero, 64(sp) |
| ; CHECK-NEXT: addi sp, s0, -128 |
| ; CHECK-NEXT: ld ra, 120(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s0, 112(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: addi sp, sp, 128 |
| ; CHECK-NEXT: ret |
| %overaligned = alloca i32, align 64 |
| |
| %v1.addr = alloca <vscale x 1 x i64> |
| %v1 = load volatile <vscale x 1 x i64>, ptr %v1.addr |
| |
| %v2.addr = alloca <vscale x 2 x i64> |
| %v2 = load volatile <vscale x 2 x i64>, ptr %v2.addr |
| |
| %s = load volatile i32, ptr %overaligned, align 64 |
| ret void |
| } |
| |
| define void @rvv_vla_and_overaligned(i64 %n, i64 %i) nounwind { |
| ; CHECK-LABEL: rvv_vla_and_overaligned: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: addi sp, sp, -144 |
| ; CHECK-NEXT: sd ra, 136(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s0, 128(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: sd s1, 120(sp) # 8-byte Folded Spill |
| ; CHECK-NEXT: addi s0, sp, 144 |
| ; CHECK-NEXT: csrr a2, vlenb |
| ; CHECK-NEXT: slli a2, a2, 2 |
| ; CHECK-NEXT: sub sp, sp, a2 |
| ; CHECK-NEXT: andi sp, sp, -64 |
| ; CHECK-NEXT: mv s1, sp |
| ; CHECK-NEXT: slli a0, a0, 2 |
| ; CHECK-NEXT: addi a0, a0, 15 |
| ; CHECK-NEXT: andi a0, a0, -16 |
| ; CHECK-NEXT: sub a0, sp, a0 |
| ; CHECK-NEXT: mv sp, a0 |
| ; CHECK-NEXT: csrr a2, vlenb |
| ; CHECK-NEXT: slli a3, a2, 1 |
| ; CHECK-NEXT: add a2, a3, a2 |
| ; CHECK-NEXT: add a2, s1, a2 |
| ; CHECK-NEXT: addi a2, a2, 112 |
| ; CHECK-NEXT: vl1re64.v v8, (a2) |
| ; CHECK-NEXT: addi a2, s1, 112 |
| ; CHECK-NEXT: vl2re64.v v8, (a2) |
| ; CHECK-NEXT: lw zero, 64(s1) |
| ; CHECK-NEXT: slli a1, a1, 2 |
| ; CHECK-NEXT: add a0, a0, a1 |
| ; CHECK-NEXT: lw zero, 0(a0) |
| ; CHECK-NEXT: addi sp, s0, -144 |
| ; CHECK-NEXT: ld ra, 136(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s0, 128(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld s1, 120(sp) # 8-byte Folded Reload |
| ; CHECK-NEXT: addi sp, sp, 144 |
| ; CHECK-NEXT: ret |
| %overaligned = alloca i32, align 64 |
| %vla.addr = alloca i32, i64 %n |
| |
| %v1.addr = alloca <vscale x 1 x i64> |
| %v1 = load volatile <vscale x 1 x i64>, ptr %v1.addr |
| |
| %v2.addr = alloca <vscale x 2 x i64> |
| %v2 = load volatile <vscale x 2 x i64>, ptr %v2.addr |
| |
| %s1 = load volatile i32, ptr %overaligned, align 64 |
| %p = getelementptr i32, ptr %vla.addr, i64 %i |
| %s2 = load volatile i32, ptr %p |
| ret void |
| |
| } |