| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mattr=+sve -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s |
| target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" |
| target triple = "aarch64-unknown-linux-gnu" |
| |
| define <4 x i32> @test(<16 x i32>* %arg1, <16 x i32>* %arg2) { |
| ; CHECK-LABEL: test: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov x8, #8 |
| ; CHECK-NEXT: ptrue p0.s, vl8 |
| ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0, x8, lsl #2] |
| ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0] |
| ; CHECK-NEXT: mov z0.d, z1.d |
| ; CHECK-NEXT: add z2.s, p0/m, z2.s, z2.s |
| ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #16 |
| ; CHECK-NEXT: add z1.s, p0/m, z1.s, z1.s |
| ; CHECK-NEXT: dup v0.4s, v0.s[2] |
| ; CHECK-NEXT: st1w { z1.s }, p0, [x0, x8, lsl #2] |
| ; CHECK-NEXT: st1w { z2.s }, p0, [x0] |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = load <16 x i32>, <16 x i32>* %arg1, align 256 |
| %1 = load <16 x i32>, <16 x i32>* %arg2, align 256 |
| %shvec = shufflevector <16 x i32> %0, <16 x i32> %1, <4 x i32> <i32 14, i32 14, i32 14, i32 14> |
| %2 = add <16 x i32> %0, %0 |
| store <16 x i32> %2, <16 x i32>* %arg1, align 256 |
| ret <4 x i32> %shvec |
| } |