| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 |
| ; RUN: llc -verify-machineinstrs -force-streaming < %s | FileCheck %s |
| |
| target triple = "aarch64-linux" |
| |
| ;MOVAZ (tile to vector, Multi) |
| |
| |
| ;; |
| ; X2 - Horiz |
| ;; |
| |
| define {<vscale x 16 x i8>, <vscale x 16 x i8>} @test_readz_hor_z8_i8_x2(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z8_i8_x2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz { z0.b, z1.b }, za0h.b[w12, 0:1] |
| ; CHECK-NEXT: movaz { z0.b, z1.b }, za0h.b[w12, 14:15] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.aarch64.sme.readz.horiz.x2.nxv16i8(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 14 |
| %res2 = call {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.aarch64.sme.readz.horiz.x2.nxv16i8(i32 0, i32 %slice.max) |
| ret {<vscale x 16 x i8>, <vscale x 16 x i8>} %res2 |
| } |
| define {<vscale x 8 x i16>, <vscale x 8 x i16>} @test_readz_hor_z16_i16_x2(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z16_i16_x2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w0 |
| ; CHECK-NEXT: movaz { z0.h, z1.h }, za0h.h[w12, 0:1] |
| ; CHECK-NEXT: movaz { z0.h, z1.h }, za1h.h[w12, 6:7] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.aarch64.sme.readz.horiz.x2.nxv8i16(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 6 |
| %res2 = call {<vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.aarch64.sme.readz.horiz.x2.nxv8i16(i32 1, i32 %slice.max) |
| ret {<vscale x 8 x i16>, <vscale x 8 x i16>} %res2 |
| } |
| |
| define {<vscale x 4 x i32>, <vscale x 4 x i32>} @test_readz_hor_z32_i32_x2(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z32_i32_x2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w0 |
| ; CHECK-NEXT: movaz { z0.s, z1.s }, za0h.s[w12, 0:1] |
| ; CHECK-NEXT: movaz { z0.s, z1.s }, za3h.s[w12, 2:3] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sme.readz.horiz.x2.nxv4i32(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 2 |
| %res2 = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sme.readz.horiz.x2.nxv4i32(i32 3, i32 %slice.max) |
| ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %res2 |
| } |
| |
| define {<vscale x 2 x i64>, <vscale x 2 x i64>} @test_readz_hor_z64_i64_x2(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z64_i64_x2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w0 |
| ; CHECK-NEXT: movaz { z0.d, z1.d }, za0h.d[w12, 0:1] |
| ; CHECK-NEXT: movaz { z2.d, z3.d }, za7h.d[w12, 0:1] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.aarch64.sme.readz.horiz.x2.nxv2i64(i32 0, i32 %slice) |
| %res2 = call {<vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.aarch64.sme.readz.horiz.x2.nxv2i64(i32 7, i32 %slice) |
| ret {<vscale x 2 x i64>, <vscale x 2 x i64>} %res |
| } |
| |
| define {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @test_readz_hor_z16_bf16_x2(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z16_bf16_x2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w0 |
| ; CHECK-NEXT: movaz { z0.h, z1.h }, za0h.h[w12, 0:1] |
| ; CHECK-NEXT: movaz { z0.h, z1.h }, za1h.h[w12, 6:7] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @llvm.aarch64.sme.readz.horiz.x2.nxv8bf16(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 6 |
| %res2 = call {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @llvm.aarch64.sme.readz.horiz.x2.nxv8bf16(i32 1, i32 %slice.max) |
| ret {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} %res2 |
| } |
| |
| define {<vscale x 8 x half>, <vscale x 8 x half>} @test_readz_hor_z16_f16_x2(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z16_f16_x2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w0 |
| ; CHECK-NEXT: movaz { z0.h, z1.h }, za0h.h[w12, 0:1] |
| ; CHECK-NEXT: movaz { z0.h, z1.h }, za1h.h[w12, 6:7] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 8 x half>, <vscale x 8 x half>} @llvm.aarch64.sme.readz.horiz.x2.nxv8f16(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 6 |
| %res2 = call {<vscale x 8 x half>, <vscale x 8 x half>} @llvm.aarch64.sme.readz.horiz.x2.nxv8f16(i32 1, i32 %slice.max) |
| ret {<vscale x 8 x half>, <vscale x 8 x half>} %res2 |
| } |
| |
| define {<vscale x 4 x float>, <vscale x 4 x float>} @test_readz_hor_z32_f32_x2(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z32_f32_x2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w0 |
| ; CHECK-NEXT: movaz { z0.s, z1.s }, za0h.s[w12, 0:1] |
| ; CHECK-NEXT: movaz { z0.s, z1.s }, za3h.s[w12, 2:3] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sme.readz.horiz.x2.nxv4f32(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 2 |
| %res2 = call {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sme.readz.horiz.x2.nxv4f32(i32 3, i32 %slice.max) |
| ret {<vscale x 4 x float>, <vscale x 4 x float>} %res2 |
| } |
| |
| define {<vscale x 2 x double>, <vscale x 2 x double>} @test_readz_hor_z64_f64_x2(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z64_f64_x2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w0 |
| ; CHECK-NEXT: movaz { z0.d, z1.d }, za0h.d[w12, 0:1] |
| ; CHECK-NEXT: movaz { z2.d, z3.d }, za7h.d[w12, 0:1] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sme.readz.horiz.x2.nxv2f64(i32 0, i32 %slice) |
| %res2 = call {<vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sme.readz.horiz.x2.nxv2f64(i32 7, i32 %slice) |
| ret {<vscale x 2 x double>, <vscale x 2 x double>} %res |
| } |
| |
| ;; |
| ; X2- Vert |
| ;; |
| |
| define {<vscale x 16 x i8>, <vscale x 16 x i8>} @test_readz_ver_z8_i8_x2(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z8_i8_x2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz { z0.b, z1.b }, za0v.b[w12, 0:1] |
| ; CHECK-NEXT: movaz { z0.b, z1.b }, za0v.b[w12, 14:15] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.aarch64.sme.readz.vert.x2.nxv16i8(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 14 |
| %res2 = call {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.aarch64.sme.readz.vert.x2.nxv16i8(i32 0, i32 %slice.max) |
| ret {<vscale x 16 x i8>, <vscale x 16 x i8>} %res2 |
| } |
| define {<vscale x 8 x i16>, <vscale x 8 x i16>} @test_readz_ver_z16_i16_x2(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z16_i16_x2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w0 |
| ; CHECK-NEXT: movaz { z0.h, z1.h }, za0v.h[w12, 0:1] |
| ; CHECK-NEXT: movaz { z0.h, z1.h }, za1v.h[w12, 6:7] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.aarch64.sme.readz.vert.x2.nxv8i16(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 6 |
| %res2 = call {<vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.aarch64.sme.readz.vert.x2.nxv8i16(i32 1, i32 %slice.max) |
| ret {<vscale x 8 x i16>, <vscale x 8 x i16>} %res2 |
| } |
| |
| define {<vscale x 4 x i32>, <vscale x 4 x i32>} @test_readz_ver_z32_i32_x2(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z32_i32_x2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w0 |
| ; CHECK-NEXT: movaz { z0.s, z1.s }, za0v.s[w12, 0:1] |
| ; CHECK-NEXT: movaz { z0.s, z1.s }, za3v.s[w12, 2:3] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sme.readz.vert.x2.nxv4i32(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 2 |
| %res2 = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sme.readz.vert.x2.nxv4i32(i32 3, i32 %slice.max) |
| ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %res2 |
| } |
| |
| define {<vscale x 2 x i64>, <vscale x 2 x i64>} @test_readz_ver_z64_i64_x2(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z64_i64_x2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w0 |
| ; CHECK-NEXT: movaz { z0.d, z1.d }, za0v.d[w12, 0:1] |
| ; CHECK-NEXT: movaz { z2.d, z3.d }, za7v.d[w12, 0:1] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.aarch64.sme.readz.vert.x2.nxv2i64(i32 0, i32 %slice) |
| %res2 = call {<vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.aarch64.sme.readz.vert.x2.nxv2i64(i32 7, i32 %slice) |
| ret {<vscale x 2 x i64>, <vscale x 2 x i64>} %res |
| } |
| |
| define {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @test_readz_ver_z16_bf16_x2(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z16_bf16_x2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w0 |
| ; CHECK-NEXT: movaz { z0.h, z1.h }, za0v.h[w12, 0:1] |
| ; CHECK-NEXT: movaz { z0.h, z1.h }, za1v.h[w12, 6:7] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @llvm.aarch64.sme.readz.vert.x2.nxv8bf16(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 6 |
| %res2 = call {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @llvm.aarch64.sme.readz.vert.x2.nxv8bf16(i32 1, i32 %slice.max) |
| ret {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} %res2 |
| } |
| |
| define {<vscale x 8 x half>, <vscale x 8 x half>} @test_readz_ver_z16_f16_x2(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z16_f16_x2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w0 |
| ; CHECK-NEXT: movaz { z0.h, z1.h }, za0v.h[w12, 0:1] |
| ; CHECK-NEXT: movaz { z0.h, z1.h }, za1v.h[w12, 6:7] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 8 x half>, <vscale x 8 x half>} @llvm.aarch64.sme.readz.vert.x2.nxv8f16(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 6 |
| %res2 = call {<vscale x 8 x half>, <vscale x 8 x half>} @llvm.aarch64.sme.readz.vert.x2.nxv8f16(i32 1, i32 %slice.max) |
| ret {<vscale x 8 x half>, <vscale x 8 x half>} %res2 |
| } |
| |
| define {<vscale x 4 x float>, <vscale x 4 x float>} @test_readz_ver_z32_f32_x2(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z32_f32_x2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w0 |
| ; CHECK-NEXT: movaz { z0.s, z1.s }, za0v.s[w12, 0:1] |
| ; CHECK-NEXT: movaz { z0.s, z1.s }, za3v.s[w12, 2:3] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sme.readz.vert.x2.nxv4f32(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 2 |
| %res2 = call {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sme.readz.vert.x2.nxv4f32(i32 3, i32 %slice.max) |
| ret {<vscale x 4 x float>, <vscale x 4 x float>} %res2 |
| } |
| |
| define {<vscale x 2 x double>, <vscale x 2 x double>} @test_readz_ver_z64_f64_x2(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z64_f64_x2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w0 |
| ; CHECK-NEXT: movaz { z0.d, z1.d }, za0v.d[w12, 0:1] |
| ; CHECK-NEXT: movaz { z2.d, z3.d }, za7v.d[w12, 0:1] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sme.readz.vert.x2.nxv2f64(i32 0, i32 %slice) |
| %res2 = call {<vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sme.readz.vert.x2.nxv2f64(i32 7, i32 %slice) |
| ret {<vscale x 2 x double>, <vscale x 2 x double>} %res |
| } |
| |
| ;; |
| ; X4 - Horiz |
| ;; |
| |
| define {<vscale x 16 x i8>, <vscale x 16 x i8>,<vscale x 16 x i8>, <vscale x 16 x i8>} @test_readz_hor_z8_i8_x4(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z8_i8_x4: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz { z0.b - z3.b }, za0h.b[w12, 0:3] |
| ; CHECK-NEXT: movaz { z0.b - z3.b }, za0h.b[w12, 12:15] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 16 x i8>, <vscale x 16 x i8>,<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.aarch64.sme.readz.horiz.x4.nxv16i8(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 12 |
| %res2 = call {<vscale x 16 x i8>, <vscale x 16 x i8>,<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.aarch64.sme.readz.horiz.x4.nxv16i8(i32 0, i32 %slice.max) |
| ret {<vscale x 16 x i8>, <vscale x 16 x i8>,<vscale x 16 x i8>, <vscale x 16 x i8>} %res2 |
| } |
| define {<vscale x 8 x i16>, <vscale x 8 x i16>,<vscale x 8 x i16>, <vscale x 8 x i16>} @test_readz_hor_z16_i16_x4(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z16_i16_x4: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w0 |
| ; CHECK-NEXT: movaz { z0.h - z3.h }, za0h.h[w12, 0:3] |
| ; CHECK-NEXT: movaz { z0.h - z3.h }, za1h.h[w12, 4:7] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 8 x i16>, <vscale x 8 x i16>,<vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.aarch64.sme.readz.horiz.x4.nxv8i16(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 4 |
| %res2 = call {<vscale x 8 x i16>, <vscale x 8 x i16>,<vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.aarch64.sme.readz.horiz.x4.nxv8i16(i32 1, i32 %slice.max) |
| ret {<vscale x 8 x i16>, <vscale x 8 x i16>,<vscale x 8 x i16>, <vscale x 8 x i16>} %res2 |
| } |
| |
| define {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @test_readz_hor_z32_i32_x4(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z32_i32_x4: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w0 |
| ; CHECK-NEXT: movaz { z0.s - z3.s }, za0h.s[w12, 0:3] |
| ; CHECK-NEXT: movaz { z0.s - z3.s }, za3h.s[w12, 0:3] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sme.readz.horiz.x4.nxv4i32(i32 0, i32 %slice) |
| %res2 = call {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sme.readz.horiz.x4.nxv4i32(i32 3, i32 %slice) |
| ret {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} %res2 |
| } |
| |
| define {<vscale x 2 x i64>, <vscale x 2 x i64>,<vscale x 2 x i64>, <vscale x 2 x i64>} @test_readz_hor_z64_i64_x4(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z64_i64_x4: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w0 |
| ; CHECK-NEXT: movaz { z0.d - z3.d }, za0h.d[w12, 0:3] |
| ; CHECK-NEXT: movaz { z4.d - z7.d }, za7h.d[w12, 0:3] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 2 x i64>, <vscale x 2 x i64>,<vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.aarch64.sme.readz.horiz.x4.nxv2i64(i32 0, i32 %slice) |
| %res2 = call {<vscale x 2 x i64>, <vscale x 2 x i64>,<vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.aarch64.sme.readz.horiz.x4.nxv2i64(i32 7, i32 %slice) |
| ret {<vscale x 2 x i64>, <vscale x 2 x i64>,<vscale x 2 x i64>, <vscale x 2 x i64>} %res |
| } |
| |
| define {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @test_readz_hor_z16_bf16_x4(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z16_bf16_x4: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w0 |
| ; CHECK-NEXT: movaz { z0.h - z3.h }, za0h.h[w12, 0:3] |
| ; CHECK-NEXT: movaz { z0.h - z3.h }, za1h.h[w12, 4:7] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @llvm.aarch64.sme.readz.horiz.x4.nxv8bf16(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 4 |
| %res2 = call {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @llvm.aarch64.sme.readz.horiz.x4.nxv8bf16(i32 1, i32 %slice.max) |
| ret {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>} %res2 |
| } |
| |
| define {<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>} @test_readz_hor_z16_f16_x4(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z16_f16_x4: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w0 |
| ; CHECK-NEXT: movaz { z0.h - z3.h }, za0h.h[w12, 0:3] |
| ; CHECK-NEXT: movaz { z0.h - z3.h }, za1h.h[w12, 4:7] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>} @llvm.aarch64.sme.readz.horiz.x4.nxv8f16(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 4 |
| %res2 = call {<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>} @llvm.aarch64.sme.readz.horiz.x4.nxv8f16(i32 1, i32 %slice.max) |
| ret {<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>} %res2 |
| } |
| |
| define {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} @test_readz_hor_z32_f32_x4(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z32_f32_x4: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w0 |
| ; CHECK-NEXT: movaz { z0.s - z3.s }, za0h.s[w12, 0:3] |
| ; CHECK-NEXT: movaz { z0.s - z3.s }, za3h.s[w12, 0:3] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sme.readz.horiz.x4.nxv4f32(i32 0, i32 %slice) |
| %res2 = call {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sme.readz.horiz.x4.nxv4f32(i32 3, i32 %slice) |
| ret {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} %res2 |
| } |
| |
| define {<vscale x 2 x double>, <vscale x 2 x double>,<vscale x 2 x double>, <vscale x 2 x double>} @test_readz_hor_z64_f64_x4(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z64_f64_x4: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w0 |
| ; CHECK-NEXT: movaz { z0.d - z3.d }, za0h.d[w12, 0:3] |
| ; CHECK-NEXT: movaz { z4.d - z7.d }, za7h.d[w12, 0:3] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 2 x double>, <vscale x 2 x double>,<vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sme.readz.horiz.x4.nxv2f64(i32 0, i32 %slice) |
| %res2 = call {<vscale x 2 x double>, <vscale x 2 x double>,<vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sme.readz.horiz.x4.nxv2f64(i32 7, i32 %slice) |
| ret {<vscale x 2 x double>, <vscale x 2 x double>,<vscale x 2 x double>, <vscale x 2 x double>} %res |
| } |
| |
| ;; |
| ; X4 - Vert |
| ;; |
| |
| define {<vscale x 16 x i8>, <vscale x 16 x i8>,<vscale x 16 x i8>, <vscale x 16 x i8>} @test_readz_ver_z8_i8_x4(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z8_i8_x4: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz { z0.b - z3.b }, za0v.b[w12, 0:3] |
| ; CHECK-NEXT: movaz { z0.b - z3.b }, za0v.b[w12, 12:15] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 16 x i8>, <vscale x 16 x i8>,<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.aarch64.sme.readz.vert.x4.nxv16i8(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 12 |
| %res2 = call {<vscale x 16 x i8>, <vscale x 16 x i8>,<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.aarch64.sme.readz.vert.x4.nxv16i8(i32 0, i32 %slice.max) |
| ret {<vscale x 16 x i8>, <vscale x 16 x i8>,<vscale x 16 x i8>, <vscale x 16 x i8>} %res2 |
| } |
| define {<vscale x 8 x i16>, <vscale x 8 x i16>,<vscale x 8 x i16>, <vscale x 8 x i16>} @test_readz_ver_z16_i16_x4(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z16_i16_x4: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w0 |
| ; CHECK-NEXT: movaz { z0.h - z3.h }, za0v.h[w12, 0:3] |
| ; CHECK-NEXT: movaz { z0.h - z3.h }, za1v.h[w12, 4:7] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 8 x i16>, <vscale x 8 x i16>,<vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.aarch64.sme.readz.vert.x4.nxv8i16(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 4 |
| %res2 = call {<vscale x 8 x i16>, <vscale x 8 x i16>,<vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.aarch64.sme.readz.vert.x4.nxv8i16(i32 1, i32 %slice.max) |
| ret {<vscale x 8 x i16>, <vscale x 8 x i16>,<vscale x 8 x i16>, <vscale x 8 x i16>} %res2 |
| } |
| |
| define {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @test_readz_ver_z32_i32_x4(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z32_i32_x4: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w0 |
| ; CHECK-NEXT: movaz { z0.s - z3.s }, za0v.s[w12, 0:3] |
| ; CHECK-NEXT: movaz { z0.s - z3.s }, za3v.s[w12, 0:3] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sme.readz.vert.x4.nxv4i32(i32 0, i32 %slice) |
| %res2 = call {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sme.readz.vert.x4.nxv4i32(i32 3, i32 %slice) |
| ret {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} %res2 |
| } |
| |
| define {<vscale x 2 x i64>, <vscale x 2 x i64>,<vscale x 2 x i64>, <vscale x 2 x i64>} @test_readz_ver_z64_i64_x4(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z64_i64_x4: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w0 |
| ; CHECK-NEXT: movaz { z0.d - z3.d }, za0v.d[w12, 0:3] |
| ; CHECK-NEXT: movaz { z4.d - z7.d }, za7v.d[w12, 0:3] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 2 x i64>, <vscale x 2 x i64>,<vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.aarch64.sme.readz.vert.x4.nxv2i64(i32 0, i32 %slice) |
| %res2 = call {<vscale x 2 x i64>, <vscale x 2 x i64>,<vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.aarch64.sme.readz.vert.x4.nxv2i64(i32 7, i32 %slice) |
| ret {<vscale x 2 x i64>, <vscale x 2 x i64>,<vscale x 2 x i64>, <vscale x 2 x i64>} %res |
| } |
| |
| define {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @test_readz_ver_z16_bf16_x4(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z16_bf16_x4: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w0 |
| ; CHECK-NEXT: movaz { z0.h - z3.h }, za0v.h[w12, 0:3] |
| ; CHECK-NEXT: movaz { z0.h - z3.h }, za1v.h[w12, 4:7] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @llvm.aarch64.sme.readz.vert.x4.nxv8bf16(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 4 |
| %res2 = call {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @llvm.aarch64.sme.readz.vert.x4.nxv8bf16(i32 1, i32 %slice.max) |
| ret {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>} %res2 |
| } |
| |
| define {<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>} @test_readz_ver_z16_f16_x4(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z16_f16_x4: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w0 |
| ; CHECK-NEXT: movaz { z0.h - z3.h }, za0v.h[w12, 0:3] |
| ; CHECK-NEXT: movaz { z0.h - z3.h }, za1v.h[w12, 4:7] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>} @llvm.aarch64.sme.readz.vert.x4.nxv8f16(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 4 |
| %res2 = call {<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>} @llvm.aarch64.sme.readz.vert.x4.nxv8f16(i32 1, i32 %slice.max) |
| ret {<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>} %res2 |
| } |
| |
| define {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} @test_readz_ver_z32_f32_x4(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z32_f32_x4: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w0 |
| ; CHECK-NEXT: movaz { z0.s - z3.s }, za0v.s[w12, 0:3] |
| ; CHECK-NEXT: movaz { z0.s - z3.s }, za3v.s[w12, 0:3] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sme.readz.vert.x4.nxv4f32(i32 0, i32 %slice) |
| %res2 = call {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sme.readz.vert.x4.nxv4f32(i32 3, i32 %slice) |
| ret {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} %res2 |
| } |
| |
| define {<vscale x 2 x double>, <vscale x 2 x double>,<vscale x 2 x double>, <vscale x 2 x double>} @test_readz_ver_z64_f64_x4(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z64_f64_x4: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w0 |
| ; CHECK-NEXT: movaz { z0.d - z3.d }, za0v.d[w12, 0:3] |
| ; CHECK-NEXT: movaz { z4.d - z7.d }, za7v.d[w12, 0:3] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 2 x double>, <vscale x 2 x double>,<vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sme.readz.vert.x4.nxv2f64(i32 0, i32 %slice) |
| %res2 = call {<vscale x 2 x double>, <vscale x 2 x double>,<vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sme.readz.vert.x4.nxv2f64(i32 7, i32 %slice) |
| ret {<vscale x 2 x double>, <vscale x 2 x double>,<vscale x 2 x double>, <vscale x 2 x double>} %res |
| } |
| |
| |
| declare {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.aarch64.sme.readz.horiz.za8.x2.nxv16i8(i32, i32) |
| declare {<vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.aarch64.sme.readz.horiz.x2.nxv8i16(i32, i32) |
| declare {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sme.readz.horiz.x2.nxv4i32(i32, i32) |
| declare {<vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.aarch64.sme.readz.horiz.x2.nxv2i64(i32, i32) |
| declare {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @llvm.aarch64.sme.readz.horiz.x2.nxv8bf16(i32, i32) |
| declare {<vscale x 8 x half>, <vscale x 8 x half>} @llvm.aarch64.sme.readz.horiz.x2.nxv8f16(i32, i32) |
| declare {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sme.readz.horiz.x2.nxv4f32(i32, i32) |
| declare {<vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sme.readz.horiz.x2.nxv2f64(i32, i32) |
| |
| declare {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.aarch64.sme.readz.vert.za8.x2.nxv16i8(i32, i32) |
| declare {<vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.aarch64.sme.readz.vert.x2.nxv8i16(i32, i32) |
| declare {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sme.readz.vert.x2.nxv4i32(i32, i32) |
| declare {<vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.aarch64.sme.readz.vert.x2.nxv2i64(i32, i32) |
| declare {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @llvm.aarch64.sme.readz.vert.x2.nxv8bf16(i32, i32) |
| declare {<vscale x 8 x half>, <vscale x 8 x half>} @llvm.aarch64.sme.readz.vert.x2.nxv8f16(i32, i32) |
| declare {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sme.readz.vert.x2.nxv4f32(i32, i32) |
| declare {<vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sme.readz.vert.x2.nxv2f64(i32, i32) |
| |
| declare {<vscale x 16 x i8>, <vscale x 16 x i8>,<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.aarch64.sme.readz.horiz.za8.x4.nxv16i8(i32, i32) |
| declare {<vscale x 8 x i16>, <vscale x 8 x i16>,<vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.aarch64.sme.readz.horiz.x4.nxv8i16(i32, i32) |
| declare {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sme.readz.horiz.x4.nxv4i32(i32, i32) |
| declare {<vscale x 2 x i64>, <vscale x 2 x i64>,<vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.aarch64.sme.readz.horiz.x4.nxv2i64(i32, i32) |
| declare {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @llvm.aarch64.sme.readz.horiz.x4.nxv8bf16(i32, i32) |
| declare {<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>} @llvm.aarch64.sme.readz.horiz.x4.nxv8f16(i32, i32) |
| declare {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sme.readz.horiz.x4.nxv4f32(i32, i32) |
| declare {<vscale x 2 x double>, <vscale x 2 x double>,<vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sme.readz.horiz.x4.nxv2f64(i32, i32) |
| |
| declare {<vscale x 16 x i8>, <vscale x 16 x i8>,<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.aarch64.sme.readz.vert.za8.x4.nxv16i8(i32, i32) |
| declare {<vscale x 8 x i16>, <vscale x 8 x i16>,<vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.aarch64.sme.readz.vert.x4.nxv8i16(i32, i32) |
| declare {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sme.readz.vert.x4.nxv4i32(i32, i32) |
| declare {<vscale x 2 x i64>, <vscale x 2 x i64>,<vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.aarch64.sme.readz.vert.x4.nxv2i64(i32, i32) |
| declare {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @llvm.aarch64.sme.readz.vert.x4.nxv8bf16(i32, i32) |
| declare {<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>} @llvm.aarch64.sme.readz.vert.x4.nxv8f16(i32, i32) |
| declare {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sme.readz.vert.x4.nxv4f32(i32, i32) |
| declare {<vscale x 2 x double>, <vscale x 2 x double>,<vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sme.readz.vert.x4.nxv2f64(i32, i32) |
| |
| ;MOVAZ (tile to vector, single) |
| |
| ;; |
| ; Horiz |
| ;; |
| define <vscale x 16 x i8> @test_readz_hor_z8_i8(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z8_i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.b, za0h.b[w12, 0] |
| ; CHECK-NEXT: movaz z0.b, za0h.b[w12, 14] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 16 x i8> @llvm.aarch64.sme.readz.horiz.nxv16i8(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 14 |
| %res2 = call <vscale x 16 x i8> @llvm.aarch64.sme.readz.horiz.nxv16i8(i32 0, i32 %slice.max) |
| ret <vscale x 16 x i8> %res2 |
| } |
| |
| define <vscale x 8 x i16> @test_readz_hor_z16_i16(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z16_i16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.h, za0h.h[w12, 0] |
| ; CHECK-NEXT: movaz z0.h, za1h.h[w12, 7] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 8 x i16> @llvm.aarch64.sme.readz.horiz.nxv8i16(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 7 |
| %res2 = call <vscale x 8 x i16> @llvm.aarch64.sme.readz.horiz.nxv8i16(i32 1, i32 %slice.max) |
| ret <vscale x 8 x i16> %res2 |
| } |
| |
| define <vscale x 4 x i32> @test_readz_hor_z32_i32(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z32_i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.s, za0h.s[w12, 0] |
| ; CHECK-NEXT: movaz z0.s, za3h.s[w12, 3] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 4 x i32> @llvm.aarch64.sme.readz.horiz.nxv4i32(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 3 |
| %res2 = call <vscale x 4 x i32> @llvm.aarch64.sme.readz.horiz.nxv4i32(i32 3, i32 %slice.max) |
| ret <vscale x 4 x i32> %res2 |
| } |
| |
| define <vscale x 2 x i64> @test_readz_hor_z64_i64(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z64_i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.d, za0h.d[w12, 0] |
| ; CHECK-NEXT: movaz z1.d, za7h.d[w12, 1] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 2 x i64> @llvm.aarch64.sme.readz.horiz.nxv2i64(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 1 |
| %res2 = call <vscale x 2 x i64> @llvm.aarch64.sme.readz.horiz.nxv2i64(i32 7, i32 %slice.max) |
| ret <vscale x 2 x i64> %res |
| } |
| |
| define <vscale x 8 x bfloat> @test_readz_hor_z16_bf16(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z16_bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.h, za0h.h[w12, 0] |
| ; CHECK-NEXT: movaz z0.h, za1h.h[w12, 7] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 8 x bfloat> @llvm.aarch64.sme.readz.horiz.nxv8bf16(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 7 |
| %res2 = call <vscale x 8 x bfloat> @llvm.aarch64.sme.readz.horiz.nxv8bf16(i32 1, i32 %slice.max) |
| ret <vscale x 8 x bfloat> %res2 |
| } |
| |
| define <vscale x 8 x half> @test_readz_hor_z16_f16(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z16_f16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.h, za0h.h[w12, 0] |
| ; CHECK-NEXT: movaz z0.h, za1h.h[w12, 7] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 8 x half> @llvm.aarch64.sme.readz.horiz.nxv8f16(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 7 |
| %res2 = call <vscale x 8 x half> @llvm.aarch64.sme.readz.horiz.nxv8f16(i32 1, i32 %slice.max) |
| ret <vscale x 8 x half> %res2 |
| } |
| |
| define <vscale x 4 x float> @test_readz_hor_z32_f32(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z32_f32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.s, za0h.s[w12, 0] |
| ; CHECK-NEXT: movaz z0.s, za3h.s[w12, 3] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 4 x float> @llvm.aarch64.sme.readz.horiz.nxv4f32(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 3 |
| %res2 = call <vscale x 4 x float> @llvm.aarch64.sme.readz.horiz.nxv4f32(i32 3, i32 %slice.max) |
| ret <vscale x 4 x float> %res2 |
| } |
| |
| define <vscale x 2 x double> @test_readz_hor_z64_f64(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z64_f64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.d, za0h.d[w12, 0] |
| ; CHECK-NEXT: movaz z1.d, za7h.d[w12, 1] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 2 x double> @llvm.aarch64.sme.readz.horiz.nxv2f64(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 1 |
| %res2 = call <vscale x 2 x double> @llvm.aarch64.sme.readz.horiz.nxv2f64(i32 7, i32 %slice.max) |
| ret <vscale x 2 x double> %res |
| } |
| |
| define <vscale x 16 x i8> @test_readz_hor_z128_i8(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z128_i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.q, za0h.q[w12, 0] |
| ; CHECK-NEXT: movaz z0.q, za15h.q[w12, 0] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 16 x i8> @llvm.aarch64.sme.readz.q.horiz.nxv16i8(i32 0, i32 %slice) |
| %res2 = call <vscale x 16 x i8> @llvm.aarch64.sme.readz.q.horiz.nxv16i8(i32 15, i32 %slice) |
| ret <vscale x 16 x i8> %res2 |
| } |
| |
| define <vscale x 8 x i16> @test_readz_hor_z128_i16(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z128_i16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.q, za0h.q[w12, 0] |
| ; CHECK-NEXT: movaz z0.q, za15h.q[w12, 0] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 8 x i16> @llvm.aarch64.sme.readz.q.horiz.nxv8i16(i32 0, i32 %slice) |
| %res2 = call <vscale x 8 x i16> @llvm.aarch64.sme.readz.q.horiz.nxv8i16(i32 15, i32 %slice) |
| ret <vscale x 8 x i16> %res2 |
| } |
| |
| define <vscale x 4 x i32> @test_readz_hor_z128_i32(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z128_i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.q, za0h.q[w12, 0] |
| ; CHECK-NEXT: movaz z0.q, za15h.q[w12, 0] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 4 x i32> @llvm.aarch64.sme.readz.q.horiz.nxv4i32(i32 0, i32 %slice) |
| %res2 = call <vscale x 4 x i32> @llvm.aarch64.sme.readz.q.horiz.nxv4i32(i32 15, i32 %slice) |
| ret <vscale x 4 x i32> %res2 |
| } |
| |
| define <vscale x 2 x i64> @test_readz_hor_z128_i64(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z128_i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.q, za0h.q[w12, 0] |
| ; CHECK-NEXT: movaz z1.q, za15h.q[w12, 0] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 2 x i64> @llvm.aarch64.sme.readz.q.horiz.nxv2i64(i32 0, i32 %slice) |
| %res2 = call <vscale x 2 x i64> @llvm.aarch64.sme.readz.q.horiz.nxv2i64(i32 15, i32 %slice) |
| ret <vscale x 2 x i64> %res |
| } |
| |
| define <vscale x 8 x bfloat> @test_readz_hor_z128_bf16(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z128_bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.q, za0h.q[w12, 0] |
| ; CHECK-NEXT: movaz z0.q, za15h.q[w12, 0] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 8 x bfloat> @llvm.aarch64.sme.readz.q.horiz.nxv8bf16(i32 0, i32 %slice) |
| %res2 = call <vscale x 8 x bfloat> @llvm.aarch64.sme.readz.q.horiz.nxv8bf16(i32 15, i32 %slice) |
| ret <vscale x 8 x bfloat> %res2 |
| } |
| |
| define <vscale x 8 x half> @test_readz_hor_z128_f16(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z128_f16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.q, za0h.q[w12, 0] |
| ; CHECK-NEXT: movaz z0.q, za15h.q[w12, 0] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 8 x half> @llvm.aarch64.sme.readz.q.horiz.nxv8f16(i32 0, i32 %slice) |
| %res2 = call <vscale x 8 x half> @llvm.aarch64.sme.readz.q.horiz.nxv8f16(i32 15, i32 %slice) |
| ret <vscale x 8 x half> %res2 |
| } |
| |
| define <vscale x 4 x float> @test_readz_hor_z128_f32(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z128_f32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.q, za0h.q[w12, 0] |
| ; CHECK-NEXT: movaz z0.q, za15h.q[w12, 0] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 4 x float> @llvm.aarch64.sme.readz.q.horiz.nxv4f32(i32 0, i32 %slice) |
| %res2 = call <vscale x 4 x float> @llvm.aarch64.sme.readz.q.horiz.nxv4f32(i32 15, i32 %slice) |
| ret <vscale x 4 x float> %res2 |
| } |
| |
| define <vscale x 2 x double> @test_readz_hor_z128_f64(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_hor_z128_f64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.q, za0h.q[w12, 0] |
| ; CHECK-NEXT: movaz z1.q, za15h.q[w12, 0] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 2 x double> @llvm.aarch64.sme.readz.q.horiz.nxv2f64(i32 0, i32 %slice) |
| %res2 = call <vscale x 2 x double> @llvm.aarch64.sme.readz.q.horiz.nxv2f64(i32 15, i32 %slice) |
| ret <vscale x 2 x double> %res |
| } |
| |
| ;; |
| ; Vert |
| ;; |
| define <vscale x 16 x i8> @test_readz_ver_z8_i8(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z8_i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.b, za0v.b[w12, 0] |
| ; CHECK-NEXT: movaz z0.b, za0v.b[w12, 14] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 16 x i8> @llvm.aarch64.sme.readz.vert.nxv16i8(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 14 |
| %res2 = call <vscale x 16 x i8> @llvm.aarch64.sme.readz.vert.nxv16i8(i32 0, i32 %slice.max) |
| ret <vscale x 16 x i8> %res2 |
| } |
| |
| define <vscale x 8 x i16> @test_readz_ver_z16_i16(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z16_i16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.h, za0v.h[w12, 0] |
| ; CHECK-NEXT: movaz z0.h, za1v.h[w12, 7] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 8 x i16> @llvm.aarch64.sme.readz.vert.nxv8i16(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 7 |
| %res2 = call <vscale x 8 x i16> @llvm.aarch64.sme.readz.vert.nxv8i16(i32 1, i32 %slice.max) |
| ret <vscale x 8 x i16> %res2 |
| } |
| |
| define <vscale x 4 x i32> @test_readz_ver_z32_i32(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z32_i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.s, za0v.s[w12, 0] |
| ; CHECK-NEXT: movaz z0.s, za3v.s[w12, 3] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 4 x i32> @llvm.aarch64.sme.readz.vert.nxv4i32(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 3 |
| %res2 = call <vscale x 4 x i32> @llvm.aarch64.sme.readz.vert.nxv4i32(i32 3, i32 %slice.max) |
| ret <vscale x 4 x i32> %res2 |
| } |
| |
| define <vscale x 2 x i64> @test_readz_ver_z64_i64(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z64_i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.d, za0v.d[w12, 0] |
| ; CHECK-NEXT: movaz z1.d, za7v.d[w12, 1] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 2 x i64> @llvm.aarch64.sme.readz.vert.nxv2i64(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 1 |
| %res2 = call <vscale x 2 x i64> @llvm.aarch64.sme.readz.vert.nxv2i64(i32 7, i32 %slice.max) |
| ret <vscale x 2 x i64> %res |
| } |
| |
| define <vscale x 8 x bfloat> @test_readz_ver_z16_bf16(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z16_bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.h, za0v.h[w12, 0] |
| ; CHECK-NEXT: movaz z0.h, za1v.h[w12, 7] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 8 x bfloat> @llvm.aarch64.sme.readz.vert.nxv8bf16(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 7 |
| %res2 = call <vscale x 8 x bfloat> @llvm.aarch64.sme.readz.vert.nxv8bf16(i32 1, i32 %slice.max) |
| ret <vscale x 8 x bfloat> %res2 |
| } |
| |
| define <vscale x 8 x half> @test_readz_ver_z16_f16(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z16_f16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.h, za0v.h[w12, 0] |
| ; CHECK-NEXT: movaz z0.h, za1v.h[w12, 7] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 8 x half> @llvm.aarch64.sme.readz.vert.nxv8f16(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 7 |
| %res2 = call <vscale x 8 x half> @llvm.aarch64.sme.readz.vert.nxv8f16(i32 1, i32 %slice.max) |
| ret <vscale x 8 x half> %res2 |
| } |
| |
| define <vscale x 4 x float> @test_readz_ver_z32_f32(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z32_f32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.s, za0v.s[w12, 0] |
| ; CHECK-NEXT: movaz z0.s, za3v.s[w12, 3] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 4 x float> @llvm.aarch64.sme.readz.vert.nxv4f32(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 3 |
| %res2 = call <vscale x 4 x float> @llvm.aarch64.sme.readz.vert.nxv4f32(i32 3, i32 %slice.max) |
| ret <vscale x 4 x float> %res2 |
| } |
| |
| define <vscale x 2 x double> @test_readz_ver_z64_f64(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z64_f64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.d, za0v.d[w12, 0] |
| ; CHECK-NEXT: movaz z1.d, za7v.d[w12, 1] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 2 x double> @llvm.aarch64.sme.readz.vert.nxv2f64(i32 0, i32 %slice) |
| %slice.max = add i32 %slice, 1 |
| %res2 = call <vscale x 2 x double> @llvm.aarch64.sme.readz.vert.nxv2f64(i32 7, i32 %slice.max) |
| ret <vscale x 2 x double> %res |
| } |
| |
| define <vscale x 16 x i8> @test_readz_ver_z128_i8(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z128_i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.q, za0v.q[w12, 0] |
| ; CHECK-NEXT: movaz z0.q, za15v.q[w12, 0] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 16 x i8> @llvm.aarch64.sme.readz.q.vert.nxv16i8(i32 0, i32 %slice) |
| %res2 = call <vscale x 16 x i8> @llvm.aarch64.sme.readz.q.vert.nxv16i8(i32 15, i32 %slice) |
| ret <vscale x 16 x i8> %res2 |
| } |
| |
| define <vscale x 8 x i16> @test_readz_ver_z128_i16(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z128_i16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.q, za0v.q[w12, 0] |
| ; CHECK-NEXT: movaz z0.q, za15v.q[w12, 0] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 8 x i16> @llvm.aarch64.sme.readz.q.vert.nxv8i16(i32 0, i32 %slice) |
| %res2 = call <vscale x 8 x i16> @llvm.aarch64.sme.readz.q.vert.nxv8i16(i32 15, i32 %slice) |
| ret <vscale x 8 x i16> %res2 |
| } |
| |
| define <vscale x 4 x i32> @test_readz_ver_z128_i32(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z128_i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.q, za0v.q[w12, 0] |
| ; CHECK-NEXT: movaz z0.q, za15v.q[w12, 0] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 4 x i32> @llvm.aarch64.sme.readz.q.vert.nxv4i32(i32 0, i32 %slice) |
| %res2 = call <vscale x 4 x i32> @llvm.aarch64.sme.readz.q.vert.nxv4i32(i32 15, i32 %slice) |
| ret <vscale x 4 x i32> %res2 |
| } |
| |
| define <vscale x 2 x i64> @test_readz_ver_z128_i64(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z128_i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.q, za0v.q[w12, 0] |
| ; CHECK-NEXT: movaz z1.q, za15v.q[w12, 0] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 2 x i64> @llvm.aarch64.sme.readz.q.vert.nxv2i64(i32 0, i32 %slice) |
| %res2 = call <vscale x 2 x i64> @llvm.aarch64.sme.readz.q.vert.nxv2i64(i32 15, i32 %slice) |
| ret <vscale x 2 x i64> %res |
| } |
| |
| define <vscale x 8 x bfloat> @test_readz_ver_z128_bf16(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z128_bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.q, za0v.q[w12, 0] |
| ; CHECK-NEXT: movaz z0.q, za15v.q[w12, 0] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 8 x bfloat> @llvm.aarch64.sme.readz.q.vert.nxv8bf16(i32 0, i32 %slice) |
| %res2 = call <vscale x 8 x bfloat> @llvm.aarch64.sme.readz.q.vert.nxv8bf16(i32 15, i32 %slice) |
| ret <vscale x 8 x bfloat> %res2 |
| } |
| |
| define <vscale x 8 x half> @test_readz_ver_z128_f16(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z128_f16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.q, za0v.q[w12, 0] |
| ; CHECK-NEXT: movaz z0.q, za15v.q[w12, 0] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 8 x half> @llvm.aarch64.sme.readz.q.vert.nxv8f16(i32 0, i32 %slice) |
| %res2 = call <vscale x 8 x half> @llvm.aarch64.sme.readz.q.vert.nxv8f16(i32 15, i32 %slice) |
| ret <vscale x 8 x half> %res2 |
| } |
| |
| define <vscale x 4 x float> @test_readz_ver_z128_f32(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z128_f32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.q, za0v.q[w12, 0] |
| ; CHECK-NEXT: movaz z0.q, za15v.q[w12, 0] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 4 x float> @llvm.aarch64.sme.readz.q.vert.nxv4f32(i32 0, i32 %slice) |
| %res2 = call <vscale x 4 x float> @llvm.aarch64.sme.readz.q.vert.nxv4f32(i32 15, i32 %slice) |
| ret <vscale x 4 x float> %res2 |
| } |
| |
| define <vscale x 2 x double> @test_readz_ver_z128_f64(i32 %tile, i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_ver_z128_f64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w12, w1 |
| ; CHECK-NEXT: movaz z0.q, za0v.q[w12, 0] |
| ; CHECK-NEXT: movaz z1.q, za15v.q[w12, 0] |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 2 x double> @llvm.aarch64.sme.readz.q.vert.nxv2f64(i32 0, i32 %slice) |
| %res2 = call <vscale x 2 x double> @llvm.aarch64.sme.readz.q.vert.nxv2f64(i32 15, i32 %slice) |
| ret <vscale x 2 x double> %res |
| } |
| |
| declare <vscale x 16 x i8> @llvm.aarch64.sme.readz.horiz.nxv16i8(i32, i32) |
| declare <vscale x 8 x i16> @llvm.aarch64.sme.readz.horiz.nxv8i16(i32, i32) |
| declare <vscale x 4 x i32> @llvm.aarch64.sme.readz.horiz.nxv4i32(i32, i32) |
| declare <vscale x 2 x i64> @llvm.aarch64.sme.readz.horiz.nxv2i64(i32, i32) |
| declare <vscale x 8 x bfloat> @llvm.aarch64.sme.readz.horiz.nxv8bf16(i32, i32) |
| declare <vscale x 8 x half> @llvm.aarch64.sme.readz.horiz.nxv8f16(i32, i32) |
| declare <vscale x 4 x float> @llvm.aarch64.sme.readz.horiz.nxv4f32(i32, i32) |
| declare <vscale x 2 x double> @llvm.aarch64.sme.readz.horiz.nxv2f64(i32, i32) |
| declare <vscale x 16 x i8> @llvm.aarch64.sme.readz.q.horiz.nxv16i8(i32, i32) |
| declare <vscale x 8 x i16> @llvm.aarch64.sme.readz.q.horiz.nxv8i16(i32, i32) |
| declare <vscale x 4 x i32> @llvm.aarch64.sme.readz.q.horiz.nxv4i32(i32, i32) |
| declare <vscale x 2 x i64> @llvm.aarch64.sme.readz.q.horiz.nxv2i64(i32, i32) |
| declare <vscale x 8 x bfloat> @llvm.aarch64.sme.readz.q.horiz.nxv8bf16(i32, i32) |
| declare <vscale x 8 x half> @llvm.aarch64.sme.readz.q.horiz.nxv8f16(i32, i32) |
| declare <vscale x 4 x float> @llvm.aarch64.sme.readz.q.horiz.nxv4f32(i32, i32) |
| declare <vscale x 2 x double> @llvm.aarch64.sme.readz.q.horiz.nxv2f64(i32, i32) |
| |
| |
| declare <vscale x 16 x i8> @llvm.aarch64.sme.readz.vert.nxv16i8(i32, i32) |
| declare <vscale x 8 x i16> @llvm.aarch64.sme.readz.vert.nxv8i16(i32, i32) |
| declare <vscale x 4 x i32> @llvm.aarch64.sme.readz.vert.nxv4i32(i32, i32) |
| declare <vscale x 2 x i64> @llvm.aarch64.sme.readz.vert.nxv2i64(i32, i32) |
| declare <vscale x 8 x bfloat> @llvm.aarch64.sme.readz.vert.nxv8bf16(i32, i32) |
| declare <vscale x 8 x half> @llvm.aarch64.sme.readz.vert.nxv8f16(i32, i32) |
| declare <vscale x 4 x float> @llvm.aarch64.sme.readz.vert.nxv4f32(i32, i32) |
| declare <vscale x 2 x double> @llvm.aarch64.sme.readz.vert.nxv2f64(i32, i32) |
| declare <vscale x 16 x i8> @llvm.aarch64.sme.readz.q.vert.nxv16i8(i32, i32) |
| declare <vscale x 8 x i16> @llvm.aarch64.sme.readz.q.vert.nxv8i16(i32, i32) |
| declare <vscale x 4 x i32> @llvm.aarch64.sme.readz.q.vert.nxv4i32(i32, i32) |
| declare <vscale x 2 x i64> @llvm.aarch64.sme.readz.q.vert.nxv2i64(i32, i32) |
| declare <vscale x 8 x bfloat> @llvm.aarch64.sme.readz.q.vert.nxv8bf16(i32, i32) |
| declare <vscale x 8 x half> @llvm.aarch64.sme.readz.q.vert.nxv8f16(i32, i32) |
| declare <vscale x 4 x float> @llvm.aarch64.sme.readz.q.vert.nxv4f32(i32, i32) |
| declare <vscale x 2 x double> @llvm.aarch64.sme.readz.q.vert.nxv2f64(i32, i32) |
| |
| ;MOVAZ (array to vector, Multi) |
| |
| |
| ;; |
| ; X2 |
| ;; |
| |
| define {<vscale x 16 x i8>, <vscale x 16 x i8>} @test_readz_z8_i8_x2(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_z8_i8_x2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w8, w0 |
| ; CHECK-NEXT: movaz { z0.d, z1.d }, za.d[w8, 0, vgx2] |
| ; CHECK-NEXT: movaz { z0.d, z1.d }, za.d[w8, 7, vgx2] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.aarch64.sme.readz.x2.nxv16i8(i32 %slice) |
| %slice.max = add i32 %slice, 7 |
| %res2 = call {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.aarch64.sme.readz.x2.nxv16i8(i32 %slice.max) |
| ret {<vscale x 16 x i8>, <vscale x 16 x i8>} %res2 |
| } |
| |
| define {<vscale x 8 x i16>, <vscale x 8 x i16>} @test_readz_z16_i16_x2(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_z16_i16_x2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w8, w0 |
| ; CHECK-NEXT: movaz { z0.d, z1.d }, za.d[w8, 0, vgx2] |
| ; CHECK-NEXT: movaz { z0.d, z1.d }, za.d[w8, 7, vgx2] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.aarch64.sme.readz.x2.nxv8i16(i32 %slice) |
| %slice.max = add i32 %slice, 7 |
| %res2 = call {<vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.aarch64.sme.readz.x2.nxv8i16(i32 %slice.max) |
| ret {<vscale x 8 x i16>, <vscale x 8 x i16>} %res2 |
| } |
| |
| define {<vscale x 4 x i32>, <vscale x 4 x i32>} @test_readz_z32_i32_x2(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_z32_i32_x2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w8, w0 |
| ; CHECK-NEXT: movaz { z0.d, z1.d }, za.d[w8, 0, vgx2] |
| ; CHECK-NEXT: movaz { z0.d, z1.d }, za.d[w8, 7, vgx2] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sme.readz.x2.nxv4i32(i32 %slice) |
| %slice.max = add i32 %slice, 7 |
| %res2 = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sme.readz.x2.nxv4i32(i32 %slice.max) |
| ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %res2 |
| } |
| |
| define {<vscale x 2 x i64>, <vscale x 2 x i64>} @test_readz_z64_i64_x2(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_z64_i64_x2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w8, w0 |
| ; CHECK-NEXT: movaz { z0.d, z1.d }, za.d[w8, 0, vgx2] |
| ; CHECK-NEXT: movaz { z0.d, z1.d }, za.d[w8, 7, vgx2] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.aarch64.sme.readz.x2.nxv2i64(i32 %slice) |
| %slice.max = add i32 %slice, 7 |
| %res2 = call {<vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.aarch64.sme.readz.x2.nxv2i64(i32 %slice.max) |
| ret {<vscale x 2 x i64>, <vscale x 2 x i64>} %res2 |
| } |
| |
| define {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @test_readz_z16_bf16_x2(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_z16_bf16_x2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w8, w0 |
| ; CHECK-NEXT: movaz { z0.d, z1.d }, za.d[w8, 0, vgx2] |
| ; CHECK-NEXT: movaz { z0.d, z1.d }, za.d[w8, 7, vgx2] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @llvm.aarch64.sme.readz.x2.nxv8bf16(i32 %slice) |
| %slice.max = add i32 %slice, 7 |
| %res2 = call {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @llvm.aarch64.sme.readz.x2.nxv8bf16(i32 %slice.max) |
| ret {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} %res2 |
| } |
| |
| define {<vscale x 8 x half>, <vscale x 8 x half>} @test_readz_z16_f16_x2(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_z16_f16_x2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w8, w0 |
| ; CHECK-NEXT: movaz { z0.d, z1.d }, za.d[w8, 0, vgx2] |
| ; CHECK-NEXT: movaz { z0.d, z1.d }, za.d[w8, 7, vgx2] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 8 x half>, <vscale x 8 x half>} @llvm.aarch64.sme.readz.x2.nxv8f16(i32 %slice) |
| %slice.max = add i32 %slice, 7 |
| %res2 = call {<vscale x 8 x half>, <vscale x 8 x half>} @llvm.aarch64.sme.readz.x2.nxv8f16(i32 %slice.max) |
| ret {<vscale x 8 x half>, <vscale x 8 x half>} %res2 |
| } |
| |
| define {<vscale x 4 x float>, <vscale x 4 x float>} @test_readz_z32_f32_x2(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_z32_f32_x2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w8, w0 |
| ; CHECK-NEXT: movaz { z0.d, z1.d }, za.d[w8, 0, vgx2] |
| ; CHECK-NEXT: movaz { z0.d, z1.d }, za.d[w8, 7, vgx2] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sme.readz.x2.nxv4f32(i32 %slice) |
| %slice.max = add i32 %slice, 7 |
| %res2 = call {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sme.readz.x2.nxv4f32(i32 %slice.max) |
| ret {<vscale x 4 x float>, <vscale x 4 x float>} %res2 |
| } |
| |
| define {<vscale x 2 x double>, <vscale x 2 x double>} @test_readz_z64_f64_x2(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_z64_f64_x2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w8, w0 |
| ; CHECK-NEXT: movaz { z0.d, z1.d }, za.d[w8, 0, vgx2] |
| ; CHECK-NEXT: movaz { z0.d, z1.d }, za.d[w8, 7, vgx2] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sme.readz.x2.nxv2f64(i32 %slice) |
| %slice.max = add i32 %slice, 7 |
| %res2 = call {<vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sme.readz.x2.nxv2f64(i32 %slice.max) |
| ret {<vscale x 2 x double>, <vscale x 2 x double>} %res2 |
| } |
| |
| ;; |
| ; X4 |
| ;; |
| |
| define {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} @test_readz_z8_i8_x4(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_z8_i8_x4: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w8, w0 |
| ; CHECK-NEXT: movaz { z0.d - z3.d }, za.d[w8, 0, vgx4] |
| ; CHECK-NEXT: movaz { z0.d - z3.d }, za.d[w8, 7, vgx4] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.aarch64.sme.readz.x4.nxv16i8(i32 %slice) |
| %slice.max = add i32 %slice, 7 |
| %res2 = call {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.aarch64.sme.readz.x4.nxv16i8(i32 %slice.max) |
| ret {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} %res2 |
| } |
| |
| define {<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>} @test_readz_z16_i16_x4(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_z16_i16_x4: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w8, w0 |
| ; CHECK-NEXT: movaz { z0.d - z3.d }, za.d[w8, 0, vgx4] |
| ; CHECK-NEXT: movaz { z0.d - z3.d }, za.d[w8, 7, vgx4] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.aarch64.sme.readz.x4.nxv8i16(i32 %slice) |
| %slice.max = add i32 %slice, 7 |
| %res2 = call {<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.aarch64.sme.readz.x4.nxv8i16(i32 %slice.max) |
| ret {<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>} %res2 |
| } |
| |
| define {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} @test_readz_z32_i32_x4(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_z32_i32_x4: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w8, w0 |
| ; CHECK-NEXT: movaz { z0.d - z3.d }, za.d[w8, 0, vgx4] |
| ; CHECK-NEXT: movaz { z0.d - z3.d }, za.d[w8, 7, vgx4] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sme.readz.x4.nxv4i32(i32 %slice) |
| %slice.max = add i32 %slice, 7 |
| %res2 = call {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sme.readz.x4.nxv4i32(i32 %slice.max) |
| ret {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} %res2 |
| } |
| |
| define {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} @test_readz_z64_i64_x4(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_z64_i64_x4: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w8, w0 |
| ; CHECK-NEXT: movaz { z0.d - z3.d }, za.d[w8, 0, vgx4] |
| ; CHECK-NEXT: movaz { z0.d - z3.d }, za.d[w8, 7, vgx4] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.aarch64.sme.readz.x4.nxv2i64(i32 %slice) |
| %slice.max = add i32 %slice, 7 |
| %res2 = call {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.aarch64.sme.readz.x4.nxv2i64(i32 %slice.max) |
| ret {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} %res2 |
| } |
| |
| define {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @test_readz_z16_bf16_x4(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_z16_bf16_x4: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w8, w0 |
| ; CHECK-NEXT: movaz { z0.d - z3.d }, za.d[w8, 0, vgx4] |
| ; CHECK-NEXT: movaz { z0.d - z3.d }, za.d[w8, 7, vgx4] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @llvm.aarch64.sme.readz.x4.nxv8bf16(i32 %slice) |
| %slice.max = add i32 %slice, 7 |
| %res2 = call {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @llvm.aarch64.sme.readz.x4.nxv8bf16(i32 %slice.max) |
| ret {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>} %res2 |
| } |
| |
| define {<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>} @test_readz_z16_f16_x4(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_z16_f16_x4: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w8, w0 |
| ; CHECK-NEXT: movaz { z0.d - z3.d }, za.d[w8, 0, vgx4] |
| ; CHECK-NEXT: movaz { z0.d - z3.d }, za.d[w8, 7, vgx4] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>} @llvm.aarch64.sme.readz.x4.nxv8f16(i32 %slice) |
| %slice.max = add i32 %slice, 7 |
| %res2 = call {<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>} @llvm.aarch64.sme.readz.x4.nxv8f16(i32 %slice.max) |
| ret {<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>} %res2 |
| } |
| |
| define {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>} @test_readz_z32_f32_x4(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_z32_f32_x4: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w8, w0 |
| ; CHECK-NEXT: movaz { z0.d - z3.d }, za.d[w8, 0, vgx4] |
| ; CHECK-NEXT: movaz { z0.d - z3.d }, za.d[w8, 7, vgx4] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sme.readz.x4.nxv4f32(i32 %slice) |
| %slice.max = add i32 %slice, 7 |
| %res2 = call {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sme.readz.x4.nxv4f32(i32 %slice.max) |
| ret {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>} %res2 |
| } |
| |
| define {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @test_readz_z64_f64_x4(i32 %slice) #0 { |
| ; CHECK-LABEL: test_readz_z64_f64_x4: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov w8, w0 |
| ; CHECK-NEXT: movaz { z0.d - z3.d }, za.d[w8, 0, vgx4] |
| ; CHECK-NEXT: movaz { z0.d - z3.d }, za.d[w8, 7, vgx4] |
| ; CHECK-NEXT: ret |
| %res = call {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sme.readz.x4.nxv2f64(i32 %slice) |
| %slice.max = add i32 %slice, 7 |
| %res2 = call {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sme.readz.x4.nxv2f64(i32 %slice.max) |
| ret {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} %res2 |
| } |
| |
| attributes #0 = { "target-features"="+sme2p1" } |