| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 |
| ; RUN: llc < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -verify-machineinstrs | FileCheck %s |
| |
| ; This showcases a miscompile that was fixed in #83107: |
| ; - The memset will be type-legalized to a 512 bit store + 2 x 128 bit stores. |
| ; - the load and store of q aliases the upper 128 bits store of p. |
| ; - The aliasing 128 bit store will be between the chain of the scalar |
| ; load/store: |
| ; |
| ; t54: ch = store<(store (s512) into %ir.p, align 1)> t0, ... |
| ; t51: ch = store<(store (s128) into %ir.p + 64, align 1)> t0, ... |
| ; |
| ; t44: i64,ch = load<(load (s32) from %ir.q), sext from i32> t0, ... |
| ; t50: ch = store<(store (s128) into %ir.p + 80, align 1)> t44:1, ... |
| ; t46: ch = store<(store (s32) into %ir.q), trunc to i32> t50, ... |
| ; |
| ; Previously, the scalar load/store was incorrectly combined away: |
| ; |
| ; t54: ch = store<(store (s512) into %ir.p, align 1)> t0, ... |
| ; t51: ch = store<(store (s128) into %ir.p + 64, align 1)> t0, ... |
| ; |
| ; // MISSING |
| ; t50: ch = store<(store (s128) into %ir.p + 80, align 1)> t44:1, ... |
| ; // MISSING |
| ; |
| ; - We need to compile with an exact VLEN so that we select an ISD::STORE node |
| ; which triggers the combine |
| ; - The miscompile doesn't happen if we use separate GEPs as we need the stores |
| ; to share the same MachinePointerInfo |
| define void @aliasing(ptr %p) { |
| ; CHECK-LABEL: aliasing: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 84(a0) |
| ; CHECK-NEXT: addi a2, a0, 80 |
| ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma |
| ; CHECK-NEXT: vmv.v.i v8, 0 |
| ; CHECK-NEXT: vs1r.v v8, (a2) |
| ; CHECK-NEXT: addi a2, a0, 64 |
| ; CHECK-NEXT: vs1r.v v8, (a2) |
| ; CHECK-NEXT: vsetvli a2, zero, e8, m4, ta, ma |
| ; CHECK-NEXT: vmv.v.i v8, 0 |
| ; CHECK-NEXT: vs4r.v v8, (a0) |
| ; CHECK-NEXT: sw a1, 84(a0) |
| ; CHECK-NEXT: ret |
| %q = getelementptr inbounds i8, ptr %p, i64 84 |
| %tmp = load i32, ptr %q |
| tail call void @llvm.memset.p0.i64(ptr %p, i8 0, i64 96, i1 false) |
| store i32 %tmp, ptr %q |
| ret void |
| } |