| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple aarch64 -mattr=+sve < %s | FileCheck %s |
| ; RUN: llc -mtriple aarch64 -mattr=+sme < %s | FileCheck %s |
| |
| ; All these tests create a vector tuple, insert z5 into one of the elements, |
| ; and finally extracts that element from the wide vector to return it. These |
| ; checks ensure that z5 is always the value that is returned. |
| |
| ; |
| ; Insert into two element tuples |
| ; |
| |
| ; tuple: { tuple2.res0, tuple2.res1 } |
| ; insert z5: { z5 , tuple2.res1 } |
| ; extract z5: ^^ |
| define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) { |
| ; CHECK-LABEL: set_tuple2_nxv8i32_elt0: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z0.d, z5.d |
| ; CHECK-NEXT: ret |
| %tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1) |
| %ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5) |
| %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 0) |
| ret <vscale x 4 x i32> %ext |
| } |
| |
| ; tuple: { tuple2.res0, tuple2.res1 } |
| ; insert z5: { tuple2.res0, z5 } |
| ; extract z5: ^^ |
| define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) { |
| ; CHECK-LABEL: set_tuple2_nxv8i32_elt1: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z0.d, z5.d |
| ; CHECK-NEXT: ret |
| %tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1) |
| %ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5) |
| %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 1) |
| ret <vscale x 4 x i32> %ext |
| } |
| |
| ; This test checks the elements _not_ being set aren't changed. |
| |
| ; tuple: { tuple2.res0, tuple2.res1 } |
| ; insert z5: { tuple2.res0, z5 } |
| ; extract z0: ^^ |
| define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt1_ret_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) { |
| ; CHECK-LABEL: set_tuple2_nxv8i32_elt1_ret_elt0: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ret |
| %tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1) |
| %ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5) |
| %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 0) |
| ret <vscale x 4 x i32> %ext |
| } |
| |
| ; Test extract of tuple passed into function |
| define <vscale x 4 x i32> @get_tuple2_nxv8i32_elt1(<vscale x 8 x i32> %tuple) { |
| ; CHECK-LABEL: get_tuple2_nxv8i32_elt1: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z0.d, z1.d |
| ; CHECK-NEXT: ret |
| %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %tuple, i32 1) |
| ret <vscale x 4 x i32> %ext |
| } |
| |
| ; |
| ; Insert into three element tuples |
| ; |
| |
| ; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 } |
| ; insert z5: { z5 , tuple3.res0, tuple3.res2 } |
| ; extract z5: ^^ |
| define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) { |
| ; CHECK-LABEL: set_tuple3_nxv12i32_elt0: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z0.d, z5.d |
| ; CHECK-NEXT: ret |
| %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) |
| %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5) |
| %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 0) |
| ret <vscale x 4 x i32> %ext |
| } |
| |
| ; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 } |
| ; insert z5: { tuple3.res0, z5 , tuple3.res2 } |
| ; extract z5: ^^ |
| define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) { |
| ; CHECK-LABEL: set_tuple3_nxv12i32_elt1: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z0.d, z5.d |
| ; CHECK-NEXT: ret |
| %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) |
| %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5) |
| %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 1) |
| ret <vscale x 4 x i32> %ext |
| } |
| |
| ; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 } |
| ; insert z5: { tuple3.res0, tuple3.res1, z5 } |
| ; extract z5: ^^ |
| define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) { |
| ; CHECK-LABEL: set_tuple3_nxv12i32_elt2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z0.d, z5.d |
| ; CHECK-NEXT: ret |
| %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) |
| %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 2, <vscale x 4 x i32> %z5) |
| %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 2) |
| ret <vscale x 4 x i32> %ext |
| } |
| |
| ; This test checks the elements _not_ being set aren't changed. |
| |
| ; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 } |
| ; insert z5: { tuple3.res0, z5 , tuple3.res2 } |
| ; extract z2: ^^ |
| define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt1_ret_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) { |
| ; CHECK-LABEL: set_tuple3_nxv12i32_elt1_ret_elt2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z0.d, z2.d |
| ; CHECK-NEXT: ret |
| %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) |
| %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5) |
| %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 2) |
| ret <vscale x 4 x i32> %ext |
| } |
| |
| ; Test extract of tuple passed into function |
| define <vscale x 4 x i32> @get_tuple3_nxv12i32_elt2(<vscale x 4 x i32> %z0, <vscale x 12 x i32> %tuple) { |
| ; CHECK-LABEL: get_tuple3_nxv12i32_elt2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z0.d, z3.d |
| ; CHECK-NEXT: ret |
| %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %tuple, i32 2) |
| ret <vscale x 4 x i32> %ext |
| } |
| |
| ; |
| ; Insert into four element tuples |
| ; |
| |
| ; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } |
| ; insert z5: { z5 , tuple4.res1, tuple4.res2, tuple4.res3 } |
| ; extract z5: ^^ |
| define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) { |
| ; CHECK-LABEL: set_tuple4_nxv16i32_elt0: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z0.d, z5.d |
| ; CHECK-NEXT: ret |
| %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) |
| %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5) |
| %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 0) |
| ret <vscale x 4 x i32> %ext |
| } |
| |
| ; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } |
| ; insert z5: { tuple4.res0, z5 , tuple4.res2, tuple4.res3 } |
| ; extract z5: ^^ |
| define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) { |
| ; CHECK-LABEL: set_tuple4_nxv16i32_elt1: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z0.d, z5.d |
| ; CHECK-NEXT: ret |
| %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) |
| %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5) |
| %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 1) |
| ret <vscale x 4 x i32> %ext |
| } |
| |
| ; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } |
| ; insert z5: { tuple4.res0, tuple4.res1, z5 , tuple4.res3 } |
| ; extract z5: ^^ |
| define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) { |
| ; CHECK-LABEL: set_tuple4_nxv16i32_elt2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z0.d, z5.d |
| ; CHECK-NEXT: ret |
| %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) |
| %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 2, <vscale x 4 x i32> %z5) |
| %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 2) |
| ret <vscale x 4 x i32> %ext |
| } |
| |
| ; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } |
| ; insert z5: { tuple4.res0, tuple4.res1, tuple4.res2, z5 } |
| ; extract z5: ^^ |
| define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt3(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) { |
| ; CHECK-LABEL: set_tuple4_nxv16i32_elt3: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z0.d, z5.d |
| ; CHECK-NEXT: ret |
| %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) |
| %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 3, <vscale x 4 x i32> %z5) |
| %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 3) |
| ret <vscale x 4 x i32> %ext |
| } |
| |
| ; This test checks the elements _not_ being set aren't changed. |
| |
| ; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } |
| ; insert z5: { tuple4.res0, tuple4.res1, tuple4.res2, z5 } |
| ; extract z2: ^^ |
| define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt3_ret_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) { |
| ; CHECK-LABEL: set_tuple4_nxv16i32_elt3_ret_elt2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z0.d, z2.d |
| ; CHECK-NEXT: ret |
| %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) |
| %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 3, <vscale x 4 x i32> %z5) |
| %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 2) |
| ret <vscale x 4 x i32> %ext |
| } |
| |
| ; Test extract of tuple passed into function |
| define <vscale x 4 x i32> @get_tuple4_nxv16i32_elt3(<vscale x 16 x i32> %tuple) { |
| ; CHECK-LABEL: get_tuple4_nxv16i32_elt3: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z0.d, z3.d |
| ; CHECK-NEXT: ret |
| %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %tuple, i32 3) |
| ret <vscale x 4 x i32> %ext |
| } |
| |
| declare <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) |
| declare <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32>, i32, <vscale x 4 x i32>) |
| declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32>, i32) |
| |
| declare <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) |
| declare <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32>, i32, <vscale x 4 x i32>) |
| declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32>, i32) |
| |
| declare <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) |
| declare <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32>, i32, <vscale x 4 x i32>) |
| declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32>, i32) |