| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s -verify-machineinstrs -mattr=+sve -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-NONSTREAMING |
| ; RUN: llc < %s -verify-machineinstrs -mattr=+sme -global-isel=0 -force-streaming | FileCheck %s --check-prefixes=CHECK,STREAMING-COMPAT |
| ; RUN: llc < %s -verify-machineinstrs -mattr=+sve -global-isel=0 -force-streaming-compatible | FileCheck %s --check-prefixes=CHECK,STREAMING-COMPAT |
| |
| target triple = "aarch64-unknown-linux-gnu" |
| |
| define void @test_str_lane_s32(ptr %a, <vscale x 4 x i32> %b) { |
| ; CHECK-LABEL: test_str_lane_s32: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov z0.s, z0.s[3] |
| ; CHECK-NEXT: str s0, [x0] |
| ; CHECK-NEXT: ret |
| |
| entry: |
| %0 = extractelement <vscale x 4 x i32> %b, i32 3 |
| store i32 %0, ptr %a, align 4 |
| ret void |
| } |
| |
| define void @test_str_lane0_s32(ptr %a, <vscale x 4 x i32> %b) { |
| ; CHECK-LABEL: test_str_lane0_s32: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str s0, [x0] |
| ; CHECK-NEXT: ret |
| |
| entry: |
| %0 = extractelement <vscale x 4 x i32> %b, i32 0 |
| store i32 %0, ptr %a, align 4 |
| ret void |
| } |
| |
| define void @test_str_lane_s64(ptr %a, <vscale x 2 x i64> %b) { |
| ; CHECK-LABEL: test_str_lane_s64: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov z0.d, z0.d[1] |
| ; CHECK-NEXT: str d0, [x0] |
| ; CHECK-NEXT: ret |
| |
| entry: |
| %0 = extractelement <vscale x 2 x i64> %b, i32 1 |
| store i64 %0, ptr %a, align 8 |
| ret void |
| } |
| |
| define void @test_str_lane0_s64(ptr %a, <vscale x 2 x i64> %b) { |
| ; CHECK-LABEL: test_str_lane0_s64: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str d0, [x0] |
| ; CHECK-NEXT: ret |
| |
| entry: |
| %0 = extractelement <vscale x 2 x i64> %b, i32 0 |
| store i64 %0, ptr %a, align 8 |
| ret void |
| } |
| |
| define void @test_str_lane_f32(ptr %a, <vscale x 4 x float> %b) { |
| ; CHECK-LABEL: test_str_lane_f32: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov z0.s, z0.s[3] |
| ; CHECK-NEXT: str s0, [x0] |
| ; CHECK-NEXT: ret |
| |
| entry: |
| %0 = extractelement <vscale x 4 x float> %b, i32 3 |
| store float %0, ptr %a, align 4 |
| ret void |
| } |
| |
| define void @test_str_lane0_f32(ptr %a, <vscale x 4 x float> %b) { |
| ; CHECK-LABEL: test_str_lane0_f32: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str s0, [x0] |
| ; CHECK-NEXT: ret |
| |
| entry: |
| %0 = extractelement <vscale x 4 x float> %b, i32 0 |
| store float %0, ptr %a, align 4 |
| ret void |
| } |
| |
| define void @test_str_lane_f64(ptr %a, <vscale x 2 x double> %b) { |
| ; CHECK-LABEL: test_str_lane_f64: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov z0.d, z0.d[1] |
| ; CHECK-NEXT: str d0, [x0] |
| ; CHECK-NEXT: ret |
| |
| entry: |
| %0 = extractelement <vscale x 2 x double> %b, i32 1 |
| store double %0, ptr %a, align 8 |
| ret void |
| } |
| |
| define void @test_str_lane0_f64(ptr %a, <vscale x 2 x double> %b) { |
| ; CHECK-LABEL: test_str_lane0_f64: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str d0, [x0] |
| ; CHECK-NEXT: ret |
| |
| entry: |
| %0 = extractelement <vscale x 2 x double> %b, i32 0 |
| store double %0, ptr %a, align 8 |
| ret void |
| } |
| |
| define void @test_str_lane_s8(ptr %a, <vscale x 16 x i8> %b) { |
| ; CHECK-NONSTREAMING-LABEL: test_str_lane_s8: |
| ; CHECK-NONSTREAMING: // %bb.0: // %entry |
| ; CHECK-NONSTREAMING-NEXT: umov w8, v0.b[7] |
| ; CHECK-NONSTREAMING-NEXT: strb w8, [x0] |
| ; CHECK-NONSTREAMING-NEXT: ret |
| ; |
| ; STREAMING-COMPAT-LABEL: test_str_lane_s8: |
| ; STREAMING-COMPAT: // %bb.0: // %entry |
| ; STREAMING-COMPAT-NEXT: mov z0.b, z0.b[7] |
| ; STREAMING-COMPAT-NEXT: fmov w8, s0 |
| ; STREAMING-COMPAT-NEXT: strb w8, [x0] |
| ; STREAMING-COMPAT-NEXT: ret |
| |
| entry: |
| %0 = extractelement <vscale x 16 x i8> %b, i32 7 |
| store i8 %0, ptr %a, align 1 |
| ret void |
| } |
| |
| define void @test_str_lane0_s8(ptr %a, <vscale x 16 x i8> %b) { |
| ; CHECK-LABEL: test_str_lane0_s8: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: fmov w8, s0 |
| ; CHECK-NEXT: strb w8, [x0] |
| ; CHECK-NEXT: ret |
| |
| entry: |
| %0 = extractelement <vscale x 16 x i8> %b, i32 0 |
| store i8 %0, ptr %a, align 1 |
| ret void |
| } |
| |
| define void @test_str_lane_s16(ptr %a, <vscale x 8 x i16> %b) { |
| ; CHECK-LABEL: test_str_lane_s16: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov z0.h, z0.h[3] |
| ; CHECK-NEXT: str h0, [x0] |
| ; CHECK-NEXT: ret |
| |
| entry: |
| %0 = extractelement <vscale x 8 x i16> %b, i32 3 |
| store i16 %0, ptr %a, align 2 |
| ret void |
| } |
| |
| define void @test_str_lane0_s16(ptr %a, <vscale x 8 x i16> %b) { |
| ; CHECK-LABEL: test_str_lane0_s16: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str h0, [x0] |
| ; CHECK-NEXT: ret |
| |
| entry: |
| %0 = extractelement <vscale x 8 x i16> %b, i32 0 |
| store i16 %0, ptr %a, align 2 |
| ret void |
| } |
| |
| define void @test_str_reduction_i32_to_i32(ptr %ptr, <vscale x 4 x i1> %p0, <vscale x 4 x i32> %v) { |
| ; CHECK-LABEL: test_str_reduction_i32_to_i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uaddv d0, p0, z0.s |
| ; CHECK-NEXT: str s0, [x0] |
| ; CHECK-NEXT: ret |
| |
| %reduce = tail call i64 @llvm.aarch64.sve.uaddv.nxv4i32(<vscale x 4 x i1> %p0, <vscale x 4 x i32> %v) |
| %trunc = trunc i64 %reduce to i32 |
| store i32 %trunc, ptr %ptr, align 4 |
| ret void |
| } |
| |
| define void @test_str_reduction_i32_to_i64(ptr %ptr, <vscale x 4 x i1> %p0, <vscale x 4 x i32> %v) { |
| ; CHECK-LABEL: test_str_reduction_i32_to_i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uaddv d0, p0, z0.s |
| ; CHECK-NEXT: str d0, [x0] |
| ; CHECK-NEXT: ret |
| |
| %reduce = tail call i64 @llvm.aarch64.sve.uaddv.nxv4i32(<vscale x 4 x i1> %p0, <vscale x 4 x i32> %v) |
| store i64 %reduce, ptr %ptr, align 8 |
| ret void |
| } |
| |
| define void @test_str_reduction_i32_to_i16(ptr %ptr, <vscale x 4 x i1> %p0, <vscale x 4 x i32> %v) { |
| ; CHECK-LABEL: test_str_reduction_i32_to_i16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uaddv d0, p0, z0.s |
| ; CHECK-NEXT: str h0, [x0] |
| ; CHECK-NEXT: ret |
| |
| %reduce = tail call i64 @llvm.aarch64.sve.uaddv.nxv4i32(<vscale x 4 x i1> %p0, <vscale x 4 x i32> %v) |
| %trunc = trunc i64 %reduce to i16 |
| store i16 %trunc, ptr %ptr, align 2 |
| ret void |
| } |
| |
| define void @test_str_reduction_i32_to_i32_negative_offset(ptr %ptr, <vscale x 4 x i1> %p0, <vscale x 4 x i32> %v) { |
| ; CHECK-LABEL: test_str_reduction_i32_to_i32_negative_offset: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uaddv d0, p0, z0.s |
| ; CHECK-NEXT: stur s0, [x0, #-32] |
| ; CHECK-NEXT: ret |
| |
| %reduce = tail call i64 @llvm.aarch64.sve.uaddv.nxv4i32(<vscale x 4 x i1> %p0, <vscale x 4 x i32> %v) |
| %trunc = trunc i64 %reduce to i32 |
| %out_ptr = getelementptr inbounds i32, ptr %ptr, i64 -8 |
| store i32 %trunc, ptr %out_ptr, align 4 |
| ret void |
| } |
| |
| define void @test_str_reduction_i32_to_i64_negative_offset(ptr %ptr, <vscale x 4 x i1> %p0, <vscale x 4 x i32> %v) { |
| ; CHECK-LABEL: test_str_reduction_i32_to_i64_negative_offset: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uaddv d0, p0, z0.s |
| ; CHECK-NEXT: stur d0, [x0, #-64] |
| ; CHECK-NEXT: ret |
| |
| %reduce = tail call i64 @llvm.aarch64.sve.uaddv.nxv4i32(<vscale x 4 x i1> %p0, <vscale x 4 x i32> %v) |
| %out_ptr = getelementptr inbounds i64, ptr %ptr, i64 -8 |
| store i64 %reduce, ptr %out_ptr, align 8 |
| ret void |
| } |
| |
| define void @test_str_reduction_i32_to_i16_negative_offset(ptr %ptr, <vscale x 4 x i1> %p0, <vscale x 4 x i32> %v) { |
| ; CHECK-LABEL: test_str_reduction_i32_to_i16_negative_offset: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uaddv d0, p0, z0.s |
| ; CHECK-NEXT: stur h0, [x0, #-16] |
| ; CHECK-NEXT: ret |
| |
| %reduce = tail call i64 @llvm.aarch64.sve.uaddv.nxv4i32(<vscale x 4 x i1> %p0, <vscale x 4 x i32> %v) |
| %trunc = trunc i64 %reduce to i16 |
| %out_ptr = getelementptr inbounds i16, ptr %ptr, i64 -8 |
| store i16 %trunc, ptr %out_ptr, align 2 |
| ret void |
| } |
| |
| define void @test_str_lane_s32_negative_offset(ptr %a, <vscale x 4 x i32> %b) { |
| ; CHECK-LABEL: test_str_lane_s32_negative_offset: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov z0.s, z0.s[3] |
| ; CHECK-NEXT: stur s0, [x0, #-32] |
| ; CHECK-NEXT: ret |
| |
| entry: |
| %0 = extractelement <vscale x 4 x i32> %b, i32 3 |
| %out_ptr = getelementptr inbounds i32, ptr %a, i64 -8 |
| store i32 %0, ptr %out_ptr, align 4 |
| ret void |
| } |
| |
| define void @test_str_lane0_s32_negative_offset(ptr %a, <vscale x 4 x i32> %b) { |
| ; CHECK-LABEL: test_str_lane0_s32_negative_offset: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: stur s0, [x0, #-32] |
| ; CHECK-NEXT: ret |
| |
| entry: |
| %0 = extractelement <vscale x 4 x i32> %b, i32 0 |
| %out_ptr = getelementptr inbounds i32, ptr %a, i64 -8 |
| store i32 %0, ptr %out_ptr, align 4 |
| ret void |
| } |
| |
| define void @test_str_lane_s64_negative_offset(ptr %a, <vscale x 2 x i64> %b) { |
| ; CHECK-LABEL: test_str_lane_s64_negative_offset: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov z0.d, z0.d[1] |
| ; CHECK-NEXT: stur d0, [x0, #-64] |
| ; CHECK-NEXT: ret |
| |
| entry: |
| %0 = extractelement <vscale x 2 x i64> %b, i32 1 |
| %out_ptr = getelementptr inbounds i64, ptr %a, i64 -8 |
| store i64 %0, ptr %out_ptr, align 8 |
| ret void |
| } |
| |
| define void @test_str_lane0_s64_negative_offset(ptr %a, <vscale x 2 x i64> %b) { |
| ; CHECK-LABEL: test_str_lane0_s64_negative_offset: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: stur d0, [x0, #-64] |
| ; CHECK-NEXT: ret |
| |
| entry: |
| %0 = extractelement <vscale x 2 x i64> %b, i32 0 |
| %out_ptr = getelementptr inbounds i64, ptr %a, i64 -8 |
| store i64 %0, ptr %out_ptr, align 8 |
| ret void |
| } |
| |
| define void @test_str_lane_s8_negative_offset(ptr %a, <vscale x 16 x i8> %b) { |
| ; CHECK-NONSTREAMING-LABEL: test_str_lane_s8_negative_offset: |
| ; CHECK-NONSTREAMING: // %bb.0: // %entry |
| ; CHECK-NONSTREAMING-NEXT: umov w8, v0.b[7] |
| ; CHECK-NONSTREAMING-NEXT: sturb w8, [x0, #-8] |
| ; CHECK-NONSTREAMING-NEXT: ret |
| ; |
| ; STREAMING-COMPAT-LABEL: test_str_lane_s8_negative_offset: |
| ; STREAMING-COMPAT: // %bb.0: // %entry |
| ; STREAMING-COMPAT-NEXT: mov z0.b, z0.b[7] |
| ; STREAMING-COMPAT-NEXT: fmov w8, s0 |
| ; STREAMING-COMPAT-NEXT: sturb w8, [x0, #-8] |
| ; STREAMING-COMPAT-NEXT: ret |
| |
| entry: |
| %0 = extractelement <vscale x 16 x i8> %b, i32 7 |
| %out_ptr = getelementptr inbounds i8, ptr %a, i64 -8 |
| store i8 %0, ptr %out_ptr, align 1 |
| ret void |
| } |
| |
| define void @test_str_lane0_s8_negative_offset(ptr %a, <vscale x 16 x i8> %b) { |
| ; CHECK-LABEL: test_str_lane0_s8_negative_offset: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: fmov w8, s0 |
| ; CHECK-NEXT: sturb w8, [x0, #-8] |
| ; CHECK-NEXT: ret |
| |
| entry: |
| %0 = extractelement <vscale x 16 x i8> %b, i32 0 |
| %out_ptr = getelementptr inbounds i8, ptr %a, i64 -8 |
| store i8 %0, ptr %out_ptr, align 1 |
| ret void |
| } |
| |
| define void @test_str_lane_s16_negative_offset(ptr %a, <vscale x 8 x i16> %b) { |
| ; CHECK-LABEL: test_str_lane_s16_negative_offset: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov z0.h, z0.h[3] |
| ; CHECK-NEXT: stur h0, [x0, #-16] |
| ; CHECK-NEXT: ret |
| |
| entry: |
| %0 = extractelement <vscale x 8 x i16> %b, i32 3 |
| %out_ptr = getelementptr inbounds i16, ptr %a, i64 -8 |
| store i16 %0, ptr %out_ptr, align 2 |
| ret void |
| } |
| |
| define void @test_str_lane0_s16_negative_offset(ptr %a, <vscale x 8 x i16> %b) { |
| ; CHECK-LABEL: test_str_lane0_s16_negative_offset: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: stur h0, [x0, #-16] |
| ; CHECK-NEXT: ret |
| |
| entry: |
| %0 = extractelement <vscale x 8 x i16> %b, i32 0 |
| %out_ptr = getelementptr inbounds i16, ptr %a, i64 -8 |
| store i16 %0, ptr %out_ptr, align 2 |
| ret void |
| } |
| |
| define void @test_str_trunc_lane_s32_to_s16(ptr %a, <vscale x 4 x i32> %b) { |
| ; CHECK-LABEL: test_str_trunc_lane_s32_to_s16: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov z0.s, z0.s[3] |
| ; CHECK-NEXT: str h0, [x0] |
| ; CHECK-NEXT: ret |
| |
| entry: |
| %0 = extractelement <vscale x 4 x i32> %b, i32 3 |
| %trunc = trunc i32 %0 to i16 |
| store i16 %trunc, ptr %a, align 2 |
| ret void |
| } |
| |
| define void @test_str_trunc_lane0_s32_to_s16(ptr %a, <vscale x 4 x i32> %b) { |
| ; CHECK-LABEL: test_str_trunc_lane0_s32_to_s16: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str h0, [x0] |
| ; CHECK-NEXT: ret |
| |
| entry: |
| %0 = extractelement <vscale x 4 x i32> %b, i32 0 |
| %trunc = trunc i32 %0 to i16 |
| store i16 %trunc, ptr %a, align 2 |
| ret void |
| } |
| |
| define void @test_str_trunc_lane_s32_to_s16_negative_offset(ptr %a, <vscale x 4 x i32> %b) { |
| ; CHECK-LABEL: test_str_trunc_lane_s32_to_s16_negative_offset: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov z0.s, z0.s[3] |
| ; CHECK-NEXT: stur h0, [x0, #-16] |
| ; CHECK-NEXT: ret |
| |
| entry: |
| %0 = extractelement <vscale x 4 x i32> %b, i32 3 |
| %trunc = trunc i32 %0 to i16 |
| %out_ptr = getelementptr inbounds i16, ptr %a, i64 -8 |
| store i16 %trunc, ptr %out_ptr, align 2 |
| ret void |
| } |
| |
| define void @test_str_trunc_lane0_s32_to_s16_negative_offset(ptr %a, <vscale x 4 x i32> %b) { |
| ; CHECK-LABEL: test_str_trunc_lane0_s32_to_s16_negative_offset: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: stur h0, [x0, #-16] |
| ; CHECK-NEXT: ret |
| |
| entry: |
| %0 = extractelement <vscale x 4 x i32> %b, i32 0 |
| %trunc = trunc i32 %0 to i16 |
| %out_ptr = getelementptr inbounds i16, ptr %a, i64 -8 |
| store i16 %trunc, ptr %out_ptr, align 2 |
| ret void |
| } |