blob: d39c9bf7606216354839337a7741aabddd5ed456 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -verify-machineinstrs -mattr=+sve -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-NONSTREAMING
; RUN: llc < %s -verify-machineinstrs -mattr=+sme -global-isel=0 -force-streaming | FileCheck %s --check-prefixes=CHECK,STREAMING-COMPAT
; RUN: llc < %s -verify-machineinstrs -mattr=+sve -global-isel=0 -force-streaming-compatible | FileCheck %s --check-prefixes=CHECK,STREAMING-COMPAT
target triple = "aarch64-unknown-linux-gnu"
define void @test_str_lane_s32(ptr %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: test_str_lane_s32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z0.s, z0.s[3]
; CHECK-NEXT: str s0, [x0]
; CHECK-NEXT: ret
entry:
%0 = extractelement <vscale x 4 x i32> %b, i32 3
store i32 %0, ptr %a, align 4
ret void
}
define void @test_str_lane0_s32(ptr %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: test_str_lane0_s32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str s0, [x0]
; CHECK-NEXT: ret
entry:
%0 = extractelement <vscale x 4 x i32> %b, i32 0
store i32 %0, ptr %a, align 4
ret void
}
define void @test_str_lane_s64(ptr %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: test_str_lane_s64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z0.d, z0.d[1]
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
entry:
%0 = extractelement <vscale x 2 x i64> %b, i32 1
store i64 %0, ptr %a, align 8
ret void
}
define void @test_str_lane0_s64(ptr %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: test_str_lane0_s64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
entry:
%0 = extractelement <vscale x 2 x i64> %b, i32 0
store i64 %0, ptr %a, align 8
ret void
}
define void @test_str_lane_f32(ptr %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: test_str_lane_f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z0.s, z0.s[3]
; CHECK-NEXT: str s0, [x0]
; CHECK-NEXT: ret
entry:
%0 = extractelement <vscale x 4 x float> %b, i32 3
store float %0, ptr %a, align 4
ret void
}
define void @test_str_lane0_f32(ptr %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: test_str_lane0_f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str s0, [x0]
; CHECK-NEXT: ret
entry:
%0 = extractelement <vscale x 4 x float> %b, i32 0
store float %0, ptr %a, align 4
ret void
}
define void @test_str_lane_f64(ptr %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: test_str_lane_f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z0.d, z0.d[1]
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
entry:
%0 = extractelement <vscale x 2 x double> %b, i32 1
store double %0, ptr %a, align 8
ret void
}
define void @test_str_lane0_f64(ptr %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: test_str_lane0_f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
entry:
%0 = extractelement <vscale x 2 x double> %b, i32 0
store double %0, ptr %a, align 8
ret void
}
define void @test_str_lane_s8(ptr %a, <vscale x 16 x i8> %b) {
; CHECK-NONSTREAMING-LABEL: test_str_lane_s8:
; CHECK-NONSTREAMING: // %bb.0: // %entry
; CHECK-NONSTREAMING-NEXT: umov w8, v0.b[7]
; CHECK-NONSTREAMING-NEXT: strb w8, [x0]
; CHECK-NONSTREAMING-NEXT: ret
;
; STREAMING-COMPAT-LABEL: test_str_lane_s8:
; STREAMING-COMPAT: // %bb.0: // %entry
; STREAMING-COMPAT-NEXT: mov z0.b, z0.b[7]
; STREAMING-COMPAT-NEXT: fmov w8, s0
; STREAMING-COMPAT-NEXT: strb w8, [x0]
; STREAMING-COMPAT-NEXT: ret
entry:
%0 = extractelement <vscale x 16 x i8> %b, i32 7
store i8 %0, ptr %a, align 1
ret void
}
define void @test_str_lane0_s8(ptr %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: test_str_lane0_s8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: strb w8, [x0]
; CHECK-NEXT: ret
entry:
%0 = extractelement <vscale x 16 x i8> %b, i32 0
store i8 %0, ptr %a, align 1
ret void
}
define void @test_str_lane_s16(ptr %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: test_str_lane_s16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z0.h, z0.h[3]
; CHECK-NEXT: str h0, [x0]
; CHECK-NEXT: ret
entry:
%0 = extractelement <vscale x 8 x i16> %b, i32 3
store i16 %0, ptr %a, align 2
ret void
}
define void @test_str_lane0_s16(ptr %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: test_str_lane0_s16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str h0, [x0]
; CHECK-NEXT: ret
entry:
%0 = extractelement <vscale x 8 x i16> %b, i32 0
store i16 %0, ptr %a, align 2
ret void
}
define void @test_str_reduction_i32_to_i32(ptr %ptr, <vscale x 4 x i1> %p0, <vscale x 4 x i32> %v) {
; CHECK-LABEL: test_str_reduction_i32_to_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: uaddv d0, p0, z0.s
; CHECK-NEXT: str s0, [x0]
; CHECK-NEXT: ret
%reduce = tail call i64 @llvm.aarch64.sve.uaddv.nxv4i32(<vscale x 4 x i1> %p0, <vscale x 4 x i32> %v)
%trunc = trunc i64 %reduce to i32
store i32 %trunc, ptr %ptr, align 4
ret void
}
define void @test_str_reduction_i32_to_i64(ptr %ptr, <vscale x 4 x i1> %p0, <vscale x 4 x i32> %v) {
; CHECK-LABEL: test_str_reduction_i32_to_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: uaddv d0, p0, z0.s
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
%reduce = tail call i64 @llvm.aarch64.sve.uaddv.nxv4i32(<vscale x 4 x i1> %p0, <vscale x 4 x i32> %v)
store i64 %reduce, ptr %ptr, align 8
ret void
}
define void @test_str_reduction_i32_to_i16(ptr %ptr, <vscale x 4 x i1> %p0, <vscale x 4 x i32> %v) {
; CHECK-LABEL: test_str_reduction_i32_to_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: uaddv d0, p0, z0.s
; CHECK-NEXT: str h0, [x0]
; CHECK-NEXT: ret
%reduce = tail call i64 @llvm.aarch64.sve.uaddv.nxv4i32(<vscale x 4 x i1> %p0, <vscale x 4 x i32> %v)
%trunc = trunc i64 %reduce to i16
store i16 %trunc, ptr %ptr, align 2
ret void
}
define void @test_str_reduction_i32_to_i32_negative_offset(ptr %ptr, <vscale x 4 x i1> %p0, <vscale x 4 x i32> %v) {
; CHECK-LABEL: test_str_reduction_i32_to_i32_negative_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: uaddv d0, p0, z0.s
; CHECK-NEXT: stur s0, [x0, #-32]
; CHECK-NEXT: ret
%reduce = tail call i64 @llvm.aarch64.sve.uaddv.nxv4i32(<vscale x 4 x i1> %p0, <vscale x 4 x i32> %v)
%trunc = trunc i64 %reduce to i32
%out_ptr = getelementptr inbounds i32, ptr %ptr, i64 -8
store i32 %trunc, ptr %out_ptr, align 4
ret void
}
define void @test_str_reduction_i32_to_i64_negative_offset(ptr %ptr, <vscale x 4 x i1> %p0, <vscale x 4 x i32> %v) {
; CHECK-LABEL: test_str_reduction_i32_to_i64_negative_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: uaddv d0, p0, z0.s
; CHECK-NEXT: stur d0, [x0, #-64]
; CHECK-NEXT: ret
%reduce = tail call i64 @llvm.aarch64.sve.uaddv.nxv4i32(<vscale x 4 x i1> %p0, <vscale x 4 x i32> %v)
%out_ptr = getelementptr inbounds i64, ptr %ptr, i64 -8
store i64 %reduce, ptr %out_ptr, align 8
ret void
}
define void @test_str_reduction_i32_to_i16_negative_offset(ptr %ptr, <vscale x 4 x i1> %p0, <vscale x 4 x i32> %v) {
; CHECK-LABEL: test_str_reduction_i32_to_i16_negative_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: uaddv d0, p0, z0.s
; CHECK-NEXT: stur h0, [x0, #-16]
; CHECK-NEXT: ret
%reduce = tail call i64 @llvm.aarch64.sve.uaddv.nxv4i32(<vscale x 4 x i1> %p0, <vscale x 4 x i32> %v)
%trunc = trunc i64 %reduce to i16
%out_ptr = getelementptr inbounds i16, ptr %ptr, i64 -8
store i16 %trunc, ptr %out_ptr, align 2
ret void
}
define void @test_str_lane_s32_negative_offset(ptr %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: test_str_lane_s32_negative_offset:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z0.s, z0.s[3]
; CHECK-NEXT: stur s0, [x0, #-32]
; CHECK-NEXT: ret
entry:
%0 = extractelement <vscale x 4 x i32> %b, i32 3
%out_ptr = getelementptr inbounds i32, ptr %a, i64 -8
store i32 %0, ptr %out_ptr, align 4
ret void
}
define void @test_str_lane0_s32_negative_offset(ptr %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: test_str_lane0_s32_negative_offset:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stur s0, [x0, #-32]
; CHECK-NEXT: ret
entry:
%0 = extractelement <vscale x 4 x i32> %b, i32 0
%out_ptr = getelementptr inbounds i32, ptr %a, i64 -8
store i32 %0, ptr %out_ptr, align 4
ret void
}
define void @test_str_lane_s64_negative_offset(ptr %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: test_str_lane_s64_negative_offset:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z0.d, z0.d[1]
; CHECK-NEXT: stur d0, [x0, #-64]
; CHECK-NEXT: ret
entry:
%0 = extractelement <vscale x 2 x i64> %b, i32 1
%out_ptr = getelementptr inbounds i64, ptr %a, i64 -8
store i64 %0, ptr %out_ptr, align 8
ret void
}
define void @test_str_lane0_s64_negative_offset(ptr %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: test_str_lane0_s64_negative_offset:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stur d0, [x0, #-64]
; CHECK-NEXT: ret
entry:
%0 = extractelement <vscale x 2 x i64> %b, i32 0
%out_ptr = getelementptr inbounds i64, ptr %a, i64 -8
store i64 %0, ptr %out_ptr, align 8
ret void
}
define void @test_str_lane_s8_negative_offset(ptr %a, <vscale x 16 x i8> %b) {
; CHECK-NONSTREAMING-LABEL: test_str_lane_s8_negative_offset:
; CHECK-NONSTREAMING: // %bb.0: // %entry
; CHECK-NONSTREAMING-NEXT: umov w8, v0.b[7]
; CHECK-NONSTREAMING-NEXT: sturb w8, [x0, #-8]
; CHECK-NONSTREAMING-NEXT: ret
;
; STREAMING-COMPAT-LABEL: test_str_lane_s8_negative_offset:
; STREAMING-COMPAT: // %bb.0: // %entry
; STREAMING-COMPAT-NEXT: mov z0.b, z0.b[7]
; STREAMING-COMPAT-NEXT: fmov w8, s0
; STREAMING-COMPAT-NEXT: sturb w8, [x0, #-8]
; STREAMING-COMPAT-NEXT: ret
entry:
%0 = extractelement <vscale x 16 x i8> %b, i32 7
%out_ptr = getelementptr inbounds i8, ptr %a, i64 -8
store i8 %0, ptr %out_ptr, align 1
ret void
}
define void @test_str_lane0_s8_negative_offset(ptr %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: test_str_lane0_s8_negative_offset:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: sturb w8, [x0, #-8]
; CHECK-NEXT: ret
entry:
%0 = extractelement <vscale x 16 x i8> %b, i32 0
%out_ptr = getelementptr inbounds i8, ptr %a, i64 -8
store i8 %0, ptr %out_ptr, align 1
ret void
}
define void @test_str_lane_s16_negative_offset(ptr %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: test_str_lane_s16_negative_offset:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z0.h, z0.h[3]
; CHECK-NEXT: stur h0, [x0, #-16]
; CHECK-NEXT: ret
entry:
%0 = extractelement <vscale x 8 x i16> %b, i32 3
%out_ptr = getelementptr inbounds i16, ptr %a, i64 -8
store i16 %0, ptr %out_ptr, align 2
ret void
}
define void @test_str_lane0_s16_negative_offset(ptr %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: test_str_lane0_s16_negative_offset:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stur h0, [x0, #-16]
; CHECK-NEXT: ret
entry:
%0 = extractelement <vscale x 8 x i16> %b, i32 0
%out_ptr = getelementptr inbounds i16, ptr %a, i64 -8
store i16 %0, ptr %out_ptr, align 2
ret void
}
define void @test_str_trunc_lane_s32_to_s16(ptr %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: test_str_trunc_lane_s32_to_s16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z0.s, z0.s[3]
; CHECK-NEXT: str h0, [x0]
; CHECK-NEXT: ret
entry:
%0 = extractelement <vscale x 4 x i32> %b, i32 3
%trunc = trunc i32 %0 to i16
store i16 %trunc, ptr %a, align 2
ret void
}
define void @test_str_trunc_lane0_s32_to_s16(ptr %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: test_str_trunc_lane0_s32_to_s16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str h0, [x0]
; CHECK-NEXT: ret
entry:
%0 = extractelement <vscale x 4 x i32> %b, i32 0
%trunc = trunc i32 %0 to i16
store i16 %trunc, ptr %a, align 2
ret void
}
define void @test_str_trunc_lane_s32_to_s16_negative_offset(ptr %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: test_str_trunc_lane_s32_to_s16_negative_offset:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z0.s, z0.s[3]
; CHECK-NEXT: stur h0, [x0, #-16]
; CHECK-NEXT: ret
entry:
%0 = extractelement <vscale x 4 x i32> %b, i32 3
%trunc = trunc i32 %0 to i16
%out_ptr = getelementptr inbounds i16, ptr %a, i64 -8
store i16 %trunc, ptr %out_ptr, align 2
ret void
}
define void @test_str_trunc_lane0_s32_to_s16_negative_offset(ptr %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: test_str_trunc_lane0_s32_to_s16_negative_offset:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stur h0, [x0, #-16]
; CHECK-NEXT: ret
entry:
%0 = extractelement <vscale x 4 x i32> %b, i32 0
%trunc = trunc i32 %0 to i16
%out_ptr = getelementptr inbounds i16, ptr %a, i64 -8
store i16 %trunc, ptr %out_ptr, align 2
ret void
}