blob: 0d58fc59c2c319669b4057e476ab540964c267fa [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+neon < %s | FileCheck %s
; Inserting a truncated (i64 to i32) element from the bottom 128-bits of any vector type into a NEON vector should use INS (element) of the
; truncated size to avoid pointless GPR trips.
define <2 x i32> @test_s_trunc_d_lane0(<2 x i32> %a, <1 x i64> %b) {
; CHECK-LABEL: test_s_trunc_d_lane0:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: mov v0.s[0], v1.s[0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%c = extractelement <1 x i64> %b, i32 0
%d = trunc i64 %c to i32
%e = insertelement <2 x i32> %a, i32 %d, i64 0
ret <2 x i32> %e
}
define <2 x i32> @test_s_trunc_d_qlane1(<2 x i32> %a, <2 x i64> %b) {
; CHECK-LABEL: test_s_trunc_d_qlane1:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov v0.s[0], v1.s[2]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%c = extractelement <2 x i64> %b, i32 1
%d = trunc i64 %c to i32
%e = insertelement <2 x i32> %a, i32 %d, i64 0
ret <2 x i32> %e
}
define <4 x i32> @test_qs_trunc_d_lane0(<4 x i32> %a, <1 x i64> %b) {
; CHECK-LABEL: test_qs_trunc_d_lane0:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: mov v0.s[0], v1.s[0]
; CHECK-NEXT: ret
%c = extractelement <1 x i64> %b, i32 0
%d = trunc i64 %c to i32
%e = insertelement <4 x i32> %a, i32 %d, i64 0
ret <4 x i32> %e
}
define <4 x i32> @test_qs_trunc_d_qlane1(<4 x i32> %a, <2 x i64> %b) {
; CHECK-LABEL: test_qs_trunc_d_qlane1:
; CHECK: // %bb.0:
; CHECK-NEXT: mov v0.s[3], v1.s[2]
; CHECK-NEXT: ret
%c = extractelement <2 x i64> %b, i32 1
%d = trunc i64 %c to i32
%e = insertelement <4 x i32> %a, i32 %d, i64 3
ret <4 x i32> %e
}
; ---- From the bottom 128b of an SVE vector
define <2 x i32> @test_s_trunc_dsve_lane0(<2 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: test_s_trunc_dsve_lane0:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov v0.s[0], v1.s[0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%c = extractelement <vscale x 2 x i64> %b, i32 0
%d = trunc i64 %c to i32
%e = insertelement <2 x i32> %a, i32 %d, i64 0
ret <2 x i32> %e
}
define <2 x i32> @test_s_trunc_dsve_lane1(<2 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: test_s_trunc_dsve_lane1:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov v0.s[1], v1.s[2]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%c = extractelement <vscale x 2 x i64> %b, i32 1
%d = trunc i64 %c to i32
%e = insertelement <2 x i32> %a, i32 %d, i64 1
ret <2 x i32> %e
}
; (negative test) Extracted element is not within V-register.
define <2 x i32> @test_s_trunc_dsve_lane2(<2 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: test_s_trunc_dsve_lane2:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.s, z1.s[4]
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fmov w8, s1
; CHECK-NEXT: mov v0.s[1], w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%c = extractelement <vscale x 2 x i64> %b, i32 2
%d = trunc i64 %c to i32
%e = insertelement <2 x i32> %a, i32 %d, i64 1
ret <2 x i32> %e
}
define <4 x i32> @test_qs_trunc_dsve_lane0(<4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: test_qs_trunc_dsve_lane0:
; CHECK: // %bb.0:
; CHECK-NEXT: mov v0.s[0], v1.s[0]
; CHECK-NEXT: ret
%c = extractelement <vscale x 2 x i64> %b, i32 0
%d = trunc i64 %c to i32
%e = insertelement <4 x i32> %a, i32 %d, i64 0
ret <4 x i32> %e
}
define <4 x i32> @test_qs_trunc_dsve_lane1(<4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: test_qs_trunc_dsve_lane1:
; CHECK: // %bb.0:
; CHECK-NEXT: mov v0.s[3], v1.s[2]
; CHECK-NEXT: ret
%c = extractelement <vscale x 2 x i64> %b, i32 1
%d = trunc i64 %c to i32
%e = insertelement <4 x i32> %a, i32 %d, i64 3
ret <4 x i32> %e
}
; (negative test) Extracted element is not within V-register.
define <4 x i32> @test_qs_trunc_dsve_lane2(<4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: test_qs_trunc_dsve_lane2:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.s, z1.s[4]
; CHECK-NEXT: fmov w8, s1
; CHECK-NEXT: mov v0.s[3], w8
; CHECK-NEXT: ret
%c = extractelement <vscale x 2 x i64> %b, i32 2
%d = trunc i64 %c to i32
%e = insertelement <4 x i32> %a, i32 %d, i64 3
ret <4 x i32> %e
}