| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK |
| |
| define arm_aapcs_vfpcc <4 x i32> @add_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: add_v4i32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vaddt.i32 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = add <4 x i32> %x, %y |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @add_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: add_v8i16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vaddt.i16 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = add <8 x i16> %x, %y |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @add_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: add_v16i8_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vaddt.i8 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %a = add <16 x i8> %x, %y |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @sub_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: sub_v4i32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vsubt.i32 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = sub <4 x i32> %x, %y |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @sub_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: sub_v8i16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vsubt.i16 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = sub <8 x i16> %x, %y |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @sub_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: sub_v16i8_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vsubt.i8 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %a = sub <16 x i8> %x, %y |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @mul_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: mul_v4i32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmult.i32 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = mul <4 x i32> %x, %y |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @mul_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: mul_v8i16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmult.i16 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = mul <8 x i16> %x, %y |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @mul_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: mul_v16i8_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmult.i8 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %a = mul <16 x i8> %x, %y |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @and_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: and_v4i32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vandt q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = and <4 x i32> %x, %y |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @and_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: and_v8i16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vandt q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = and <8 x i16> %x, %y |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @and_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: and_v16i8_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vandt q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %a = and <16 x i8> %x, %y |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @or_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: or_v4i32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vorrt q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = or <4 x i32> %x, %y |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @or_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: or_v8i16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vorrt q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = or <8 x i16> %x, %y |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @or_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: or_v16i8_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vorrt q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %a = or <16 x i8> %x, %y |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @xor_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: xor_v4i32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: veort q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = xor <4 x i32> %x, %y |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @xor_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: xor_v8i16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: veort q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = xor <8 x i16> %x, %y |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @xor_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: xor_v16i8_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: veort q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %a = xor <16 x i8> %x, %y |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @shl_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: shl_v4i32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vshlt.u32 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = shl <4 x i32> %x, %y |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @shl_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: shl_v8i16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vshlt.u16 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = shl <8 x i16> %x, %y |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @shl_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: shl_v16i8_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vshlt.u8 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %a = shl <16 x i8> %x, %y |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @ashr_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: ashr_v4i32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vneg.s32 q1, q1 |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vshlt.s32 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = ashr <4 x i32> %x, %y |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @ashr_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: ashr_v8i16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vneg.s16 q1, q1 |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vshlt.s16 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = ashr <8 x i16> %x, %y |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @ashr_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: ashr_v16i8_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vneg.s8 q1, q1 |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vshlt.s8 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %a = ashr <16 x i8> %x, %y |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @lshr_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: lshr_v4i32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vneg.s32 q1, q1 |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vshlt.u32 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = lshr <4 x i32> %x, %y |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @lshr_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: lshr_v8i16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vneg.s16 q1, q1 |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vshlt.u16 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = lshr <8 x i16> %x, %y |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @lshr_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: lshr_v16i8_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vneg.s8 q1, q1 |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vshlt.u8 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %a = lshr <16 x i8> %x, %y |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @andnot_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: andnot_v4i32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vbict q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> |
| %a = and <4 x i32> %x, %y1 |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @andnot_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: andnot_v8i16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vbict q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> |
| %a = and <8 x i16> %x, %y1 |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @andnot_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: andnot_v16i8_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vbict q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> |
| %a = and <16 x i8> %x, %y1 |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @ornot_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: ornot_v4i32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vornt q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> |
| %a = or <4 x i32> %x, %y1 |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @ornot_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: ornot_v8i16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vornt q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> |
| %a = or <8 x i16> %x, %y1 |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @ornot_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: ornot_v16i8_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vornt q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> |
| %a = or <16 x i8> %x, %y1 |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x float> @fadd_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) { |
| ; CHECK-LABEL: fadd_v4f32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vaddt.f32 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = fadd <4 x float> %x, %y |
| %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x |
| ret <4 x float> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x half> @fadd_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) { |
| ; CHECK-LABEL: fadd_v8f16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vaddt.f16 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = fadd <8 x half> %x, %y |
| %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x |
| ret <8 x half> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x float> @fsub_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) { |
| ; CHECK-LABEL: fsub_v4f32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vsubt.f32 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = fsub <4 x float> %x, %y |
| %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x |
| ret <4 x float> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x half> @fsub_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) { |
| ; CHECK-LABEL: fsub_v8f16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vsubt.f16 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = fsub <8 x half> %x, %y |
| %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x |
| ret <8 x half> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x float> @fmul_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) { |
| ; CHECK-LABEL: fmul_v4f32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmult.f32 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = fmul <4 x float> %x, %y |
| %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x |
| ret <4 x float> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x half> @fmul_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) { |
| ; CHECK-LABEL: fmul_v8f16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmult.f16 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = fmul <8 x half> %x, %y |
| %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x |
| ret <8 x half> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x float> @fdiv_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) { |
| ; CHECK-LABEL: fdiv_v4f32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vdiv.f32 s7, s3, s7 |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vdiv.f32 s6, s2, s6 |
| ; CHECK-NEXT: vdiv.f32 s5, s1, s5 |
| ; CHECK-NEXT: vdiv.f32 s4, s0, s4 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmovt q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = fdiv <4 x float> %x, %y |
| %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x |
| ret <4 x float> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x half> @fdiv_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) { |
| ; CHECK-LABEL: fdiv_v8f16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmovx.f16 s8, s4 |
| ; CHECK-NEXT: vmovx.f16 s10, s0 |
| ; CHECK-NEXT: vdiv.f16 s8, s10, s8 |
| ; CHECK-NEXT: vdiv.f16 s4, s0, s4 |
| ; CHECK-NEXT: vins.f16 s4, s8 |
| ; CHECK-NEXT: vmovx.f16 s8, s5 |
| ; CHECK-NEXT: vmovx.f16 s10, s1 |
| ; CHECK-NEXT: vdiv.f16 s5, s1, s5 |
| ; CHECK-NEXT: vdiv.f16 s8, s10, s8 |
| ; CHECK-NEXT: vmovx.f16 s10, s2 |
| ; CHECK-NEXT: vins.f16 s5, s8 |
| ; CHECK-NEXT: vmovx.f16 s8, s6 |
| ; CHECK-NEXT: vdiv.f16 s8, s10, s8 |
| ; CHECK-NEXT: vdiv.f16 s6, s2, s6 |
| ; CHECK-NEXT: vins.f16 s6, s8 |
| ; CHECK-NEXT: vmovx.f16 s8, s7 |
| ; CHECK-NEXT: vmovx.f16 s10, s3 |
| ; CHECK-NEXT: vdiv.f16 s7, s3, s7 |
| ; CHECK-NEXT: vdiv.f16 s8, s10, s8 |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vins.f16 s7, s8 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmovt q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = fdiv <8 x half> %x, %y |
| %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x |
| ret <8 x half> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x float> @fmai_v4f32_x(<4 x float> %x, <4 x float> %y, <4 x float> %z, i32 %n) { |
| ; CHECK-LABEL: fmai_v4f32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vfmat.f32 q0, q1, q2 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = call <4 x float> @llvm.fma.v4f32(<4 x float> %y, <4 x float> %z, <4 x float> %x) |
| %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x |
| ret <4 x float> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x half> @fmai_v8f16_x(<8 x half> %x, <8 x half> %y, <8 x half> %z, i32 %n) { |
| ; CHECK-LABEL: fmai_v8f16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vfmat.f16 q0, q1, q2 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %z, <8 x half> %x) |
| %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x |
| ret <8 x half> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x float> @fma_v4f32_x(<4 x float> %x, <4 x float> %y, <4 x float> %z, i32 %n) { |
| ; CHECK-LABEL: fma_v4f32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vfmat.f32 q0, q1, q2 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %m = fmul fast <4 x float> %y, %z |
| %a = fadd fast <4 x float> %m, %x |
| %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x |
| ret <4 x float> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x half> @fma_v8f16_x(<8 x half> %x, <8 x half> %y, <8 x half> %z, i32 %n) { |
| ; CHECK-LABEL: fma_v8f16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vfmat.f16 q0, q1, q2 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %m = fmul fast <8 x half> %y, %z |
| %a = fadd fast <8 x half> %m, %x |
| %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x |
| ret <8 x half> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @icmp_slt_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: icmp_slt_v4i32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmint.s32 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a1 = icmp slt <4 x i32> %x, %y |
| %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @icmp_slt_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: icmp_slt_v8i16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmint.s16 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a1 = icmp slt <8 x i16> %x, %y |
| %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @icmp_slt_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: icmp_slt_v16i8_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmint.s8 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %a1 = icmp slt <16 x i8> %x, %y |
| %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @icmp_sgt_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: icmp_sgt_v4i32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmaxt.s32 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a1 = icmp sgt <4 x i32> %x, %y |
| %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @icmp_sgt_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: icmp_sgt_v8i16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmaxt.s16 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a1 = icmp sgt <8 x i16> %x, %y |
| %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @icmp_sgt_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: icmp_sgt_v16i8_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmaxt.s8 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %a1 = icmp sgt <16 x i8> %x, %y |
| %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @icmp_ult_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: icmp_ult_v4i32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmint.u32 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a1 = icmp ult <4 x i32> %x, %y |
| %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @icmp_ult_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: icmp_ult_v8i16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmint.u16 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a1 = icmp ult <8 x i16> %x, %y |
| %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @icmp_ult_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: icmp_ult_v16i8_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmint.u8 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %a1 = icmp ult <16 x i8> %x, %y |
| %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @icmp_ugt_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: icmp_ugt_v4i32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmaxt.u32 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a1 = icmp ugt <4 x i32> %x, %y |
| %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @icmp_ugt_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: icmp_ugt_v8i16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmaxt.u16 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a1 = icmp ugt <8 x i16> %x, %y |
| %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @icmp_ugt_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: icmp_ugt_v16i8_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmaxt.u8 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %a1 = icmp ugt <16 x i8> %x, %y |
| %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) { |
| ; CHECK-LABEL: fcmp_fast_olt_v4f32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vminnmt.f32 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a1 = fcmp fast olt <4 x float> %x, %y |
| %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y |
| %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x |
| ret <4 x float> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) { |
| ; CHECK-LABEL: fcmp_fast_olt_v8f16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vminnmt.f16 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a1 = fcmp fast olt <8 x half> %x, %y |
| %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y |
| %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x |
| ret <8 x half> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) { |
| ; CHECK-LABEL: fcmp_fast_ogt_v4f32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmaxnmt.f32 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a1 = fcmp fast ogt <4 x float> %x, %y |
| %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y |
| %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x |
| ret <4 x float> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) { |
| ; CHECK-LABEL: fcmp_fast_ogt_v8f16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmaxnmt.f16 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a1 = fcmp fast ogt <8 x half> %x, %y |
| %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y |
| %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x |
| ret <8 x half> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @sadd_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: sadd_sat_v4i32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vqaddt.s32 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y) |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @sadd_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: sadd_sat_v8i16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vqaddt.s16 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y) |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @sadd_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: sadd_sat_v16i8_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vqaddt.s8 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y) |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @uadd_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: uadd_sat_v4i32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vqaddt.u32 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y) |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @uadd_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: uadd_sat_v8i16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vqaddt.u16 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y) |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @uadd_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: uadd_sat_v16i8_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vqaddt.u8 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y) |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @ssub_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: ssub_sat_v4i32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vqsubt.s32 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y) |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @ssub_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: ssub_sat_v8i16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vqsubt.s16 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %y) |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @ssub_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: ssub_sat_v16i8_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vqsubt.s8 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %y) |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @usub_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: usub_sat_v4i32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vqsubt.u32 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %y) |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @usub_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: usub_sat_v8i16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vqsubt.u16 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %y) |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @usub_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: usub_sat_v16i8_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vqsubt.u8 q0, q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %y) |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @addqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { |
| ; CHECK-LABEL: addqr_v4i32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vaddt.i32 q0, q0, r0 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %i = insertelement <4 x i32> undef, i32 %y, i32 0 |
| %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer |
| %a = add <4 x i32> %x, %ys |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @addqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { |
| ; CHECK-LABEL: addqr_v8i16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vaddt.i16 q0, q0, r0 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %i = insertelement <8 x i16> undef, i16 %y, i32 0 |
| %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer |
| %a = add <8 x i16> %x, %ys |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @addqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { |
| ; CHECK-LABEL: addqr_v16i8_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vaddt.i8 q0, q0, r0 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %i = insertelement <16 x i8> undef, i8 %y, i32 0 |
| %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer |
| %a = add <16 x i8> %x, %ys |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @subqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { |
| ; CHECK-LABEL: subqr_v4i32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vsubt.i32 q0, q0, r0 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %i = insertelement <4 x i32> undef, i32 %y, i32 0 |
| %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer |
| %a = sub <4 x i32> %x, %ys |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @subqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { |
| ; CHECK-LABEL: subqr_v8i16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vsubt.i16 q0, q0, r0 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %i = insertelement <8 x i16> undef, i16 %y, i32 0 |
| %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer |
| %a = sub <8 x i16> %x, %ys |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @subqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { |
| ; CHECK-LABEL: subqr_v16i8_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vsubt.i8 q0, q0, r0 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %i = insertelement <16 x i8> undef, i8 %y, i32 0 |
| %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer |
| %a = sub <16 x i8> %x, %ys |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @mulqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { |
| ; CHECK-LABEL: mulqr_v4i32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmult.i32 q0, q0, r0 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %i = insertelement <4 x i32> undef, i32 %y, i32 0 |
| %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer |
| %a = mul <4 x i32> %x, %ys |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @mulqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { |
| ; CHECK-LABEL: mulqr_v8i16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmult.i16 q0, q0, r0 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %i = insertelement <8 x i16> undef, i16 %y, i32 0 |
| %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer |
| %a = mul <8 x i16> %x, %ys |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @mulqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { |
| ; CHECK-LABEL: mulqr_v16i8_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmult.i8 q0, q0, r0 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %i = insertelement <16 x i8> undef, i8 %y, i32 0 |
| %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer |
| %a = mul <16 x i8> %x, %ys |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x float> @faddqr_v4f32_x(<4 x float> %x, float %y, i32 %n) { |
| ; CHECK-LABEL: faddqr_v4f32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmov r1, s4 |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vaddt.f32 q0, q0, r1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %i = insertelement <4 x float> undef, float %y, i32 0 |
| %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer |
| %a = fadd <4 x float> %x, %ys |
| %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x |
| ret <4 x float> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x half> @faddqr_v8f16_x(<8 x half> %x, half %y, i32 %n) { |
| ; CHECK-LABEL: faddqr_v8f16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmov.f16 r1, s4 |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vaddt.f16 q0, q0, r1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %i = insertelement <8 x half> undef, half %y, i32 0 |
| %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer |
| %a = fadd <8 x half> %x, %ys |
| %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x |
| ret <8 x half> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x float> @fsubqr_v4f32_x(<4 x float> %x, float %y, i32 %n) { |
| ; CHECK-LABEL: fsubqr_v4f32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmov r1, s4 |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vsubt.f32 q0, q0, r1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %i = insertelement <4 x float> undef, float %y, i32 0 |
| %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer |
| %a = fsub <4 x float> %x, %ys |
| %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x |
| ret <4 x float> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x half> @fsubqr_v8f16_x(<8 x half> %x, half %y, i32 %n) { |
| ; CHECK-LABEL: fsubqr_v8f16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmov.f16 r1, s4 |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vsubt.f16 q0, q0, r1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %i = insertelement <8 x half> undef, half %y, i32 0 |
| %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer |
| %a = fsub <8 x half> %x, %ys |
| %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x |
| ret <8 x half> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x float> @fmulqr_v4f32_x(<4 x float> %x, float %y, i32 %n) { |
| ; CHECK-LABEL: fmulqr_v4f32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmov r1, s4 |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmult.f32 q0, q0, r1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %i = insertelement <4 x float> undef, float %y, i32 0 |
| %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer |
| %a = fmul <4 x float> %x, %ys |
| %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x |
| ret <4 x float> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x half> @fmulqr_v8f16_x(<8 x half> %x, half %y, i32 %n) { |
| ; CHECK-LABEL: fmulqr_v8f16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmov.f16 r1, s4 |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmult.f16 q0, q0, r1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %i = insertelement <8 x half> undef, half %y, i32 0 |
| %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer |
| %a = fmul <8 x half> %x, %ys |
| %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x |
| ret <8 x half> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @sadd_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { |
| ; CHECK-LABEL: sadd_satqr_v4i32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vqaddt.s32 q0, q0, r0 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %i = insertelement <4 x i32> undef, i32 %y, i32 0 |
| %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer |
| %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @sadd_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { |
| ; CHECK-LABEL: sadd_satqr_v8i16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vqaddt.s16 q0, q0, r0 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %i = insertelement <8 x i16> undef, i16 %y, i32 0 |
| %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer |
| %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @sadd_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { |
| ; CHECK-LABEL: sadd_satqr_v16i8_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vqaddt.s8 q0, q0, r0 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %i = insertelement <16 x i8> undef, i8 %y, i32 0 |
| %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer |
| %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @uadd_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { |
| ; CHECK-LABEL: uadd_satqr_v4i32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vqaddt.u32 q0, q0, r0 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %i = insertelement <4 x i32> undef, i32 %y, i32 0 |
| %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer |
| %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @uadd_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { |
| ; CHECK-LABEL: uadd_satqr_v8i16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vqaddt.u16 q0, q0, r0 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %i = insertelement <8 x i16> undef, i16 %y, i32 0 |
| %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer |
| %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @uadd_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { |
| ; CHECK-LABEL: uadd_satqr_v16i8_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vqaddt.u8 q0, q0, r0 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %i = insertelement <16 x i8> undef, i8 %y, i32 0 |
| %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer |
| %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @ssub_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { |
| ; CHECK-LABEL: ssub_satqr_v4i32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vqsubt.s32 q0, q0, r0 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %i = insertelement <4 x i32> undef, i32 %y, i32 0 |
| %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer |
| %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @ssub_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { |
| ; CHECK-LABEL: ssub_satqr_v8i16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vqsubt.s16 q0, q0, r0 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %i = insertelement <8 x i16> undef, i16 %y, i32 0 |
| %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer |
| %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @ssub_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { |
| ; CHECK-LABEL: ssub_satqr_v16i8_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vqsubt.s8 q0, q0, r0 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %i = insertelement <16 x i8> undef, i8 %y, i32 0 |
| %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer |
| %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @usub_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { |
| ; CHECK-LABEL: usub_satqr_v4i32_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vqsubt.u32 q0, q0, r0 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %i = insertelement <4 x i32> undef, i32 %y, i32 0 |
| %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer |
| %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @usub_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { |
| ; CHECK-LABEL: usub_satqr_v8i16_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vqsubt.u16 q0, q0, r0 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %i = insertelement <8 x i16> undef, i16 %y, i32 0 |
| %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer |
| %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @usub_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { |
| ; CHECK-LABEL: usub_satqr_v16i8_x: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vqsubt.u8 q0, q0, r0 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %i = insertelement <16 x i8> undef, i8 %y, i32 0 |
| %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer |
| %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @add_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: add_v4i32_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vaddt.i32 q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = add <4 x i32> %x, %y |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @add_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: add_v8i16_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vaddt.i16 q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = add <8 x i16> %x, %y |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @add_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: add_v16i8_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vaddt.i8 q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %a = add <16 x i8> %x, %y |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @sub_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: sub_v4i32_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vsubt.i32 q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = sub <4 x i32> %x, %y |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @sub_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: sub_v8i16_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vsubt.i16 q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = sub <8 x i16> %x, %y |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @sub_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: sub_v16i8_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vsubt.i8 q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %a = sub <16 x i8> %x, %y |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @mul_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: mul_v4i32_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmult.i32 q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = mul <4 x i32> %x, %y |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @mul_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: mul_v8i16_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmult.i16 q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = mul <8 x i16> %x, %y |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @mul_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: mul_v16i8_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmult.i8 q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %a = mul <16 x i8> %x, %y |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @and_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: and_v4i32_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vandt q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = and <4 x i32> %x, %y |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @and_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: and_v8i16_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vandt q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = and <8 x i16> %x, %y |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @and_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: and_v16i8_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vandt q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %a = and <16 x i8> %x, %y |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @or_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: or_v4i32_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vorrt q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = or <4 x i32> %x, %y |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @or_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: or_v8i16_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vorrt q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = or <8 x i16> %x, %y |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @or_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: or_v16i8_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vorrt q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %a = or <16 x i8> %x, %y |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @xor_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: xor_v4i32_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: veort q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = xor <4 x i32> %x, %y |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @xor_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: xor_v8i16_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: veort q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = xor <8 x i16> %x, %y |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @xor_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: xor_v16i8_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: veort q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %a = xor <16 x i8> %x, %y |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @shl_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: shl_v4i32_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vshlt.u32 q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = shl <4 x i32> %x, %y |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @shl_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: shl_v8i16_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vshlt.u16 q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = shl <8 x i16> %x, %y |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @shl_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: shl_v16i8_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vshlt.u8 q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %a = shl <16 x i8> %x, %y |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @ashr_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: ashr_v4i32_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vneg.s32 q2, q1 |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vshlt.s32 q1, q0, q2 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = ashr <4 x i32> %x, %y |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @ashr_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: ashr_v8i16_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vneg.s16 q2, q1 |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vshlt.s16 q1, q0, q2 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = ashr <8 x i16> %x, %y |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @ashr_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: ashr_v16i8_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vneg.s8 q2, q1 |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vshlt.s8 q1, q0, q2 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %a = ashr <16 x i8> %x, %y |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @lshr_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: lshr_v4i32_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vneg.s32 q2, q1 |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vshlt.u32 q1, q0, q2 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = lshr <4 x i32> %x, %y |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @lshr_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: lshr_v8i16_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vneg.s16 q2, q1 |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vshlt.u16 q1, q0, q2 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = lshr <8 x i16> %x, %y |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @lshr_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: lshr_v16i8_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vneg.s8 q2, q1 |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vshlt.u8 q1, q0, q2 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %a = lshr <16 x i8> %x, %y |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @andnot_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: andnot_v4i32_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vbict q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> |
| %a = and <4 x i32> %x, %y1 |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @andnot_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: andnot_v8i16_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vbict q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> |
| %a = and <8 x i16> %x, %y1 |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @andnot_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: andnot_v16i8_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vbict q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> |
| %a = and <16 x i8> %x, %y1 |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @ornot_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: ornot_v4i32_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vornt q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> |
| %a = or <4 x i32> %x, %y1 |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @ornot_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: ornot_v8i16_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vornt q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> |
| %a = or <8 x i16> %x, %y1 |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @ornot_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: ornot_v16i8_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.8 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vornt q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) |
| %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> |
| %a = or <16 x i8> %x, %y1 |
| %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y |
| ret <16 x i8> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x float> @fadd_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) { |
| ; CHECK-LABEL: fadd_v4f32_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vaddt.f32 q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = fadd <4 x float> %x, %y |
| %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y |
| ret <4 x float> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x half> @fadd_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) { |
| ; CHECK-LABEL: fadd_v8f16_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vaddt.f16 q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = fadd <8 x half> %x, %y |
| %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y |
| ret <8 x half> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x float> @fsub_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) { |
| ; CHECK-LABEL: fsub_v4f32_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vsubt.f32 q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = fsub <4 x float> %x, %y |
| %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y |
| ret <4 x float> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x half> @fsub_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) { |
| ; CHECK-LABEL: fsub_v8f16_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vsubt.f16 q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = fsub <8 x half> %x, %y |
| %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y |
| ret <8 x half> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x float> @fmul_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) { |
| ; CHECK-LABEL: fmul_v4f32_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmult.f32 q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = fmul <4 x float> %x, %y |
| %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y |
| ret <4 x float> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x half> @fmul_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) { |
| ; CHECK-LABEL: fmul_v8f16_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmult.f16 q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = fmul <8 x half> %x, %y |
| %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y |
| ret <8 x half> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x float> @fdiv_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) { |
| ; CHECK-LABEL: fdiv_v4f32_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vdiv.f32 s3, s3, s7 |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vdiv.f32 s2, s2, s6 |
| ; CHECK-NEXT: vdiv.f32 s1, s1, s5 |
| ; CHECK-NEXT: vdiv.f32 s0, s0, s4 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmovt q1, q0 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = fdiv <4 x float> %x, %y |
| %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y |
| ret <4 x float> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x half> @fdiv_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) { |
| ; CHECK-LABEL: fdiv_v8f16_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmovx.f16 s10, s0 |
| ; CHECK-NEXT: vmovx.f16 s8, s4 |
| ; CHECK-NEXT: vdiv.f16 s8, s10, s8 |
| ; CHECK-NEXT: vdiv.f16 s0, s0, s4 |
| ; CHECK-NEXT: vins.f16 s0, s8 |
| ; CHECK-NEXT: vmovx.f16 s10, s1 |
| ; CHECK-NEXT: vmovx.f16 s8, s5 |
| ; CHECK-NEXT: vdiv.f16 s1, s1, s5 |
| ; CHECK-NEXT: vdiv.f16 s8, s10, s8 |
| ; CHECK-NEXT: vmovx.f16 s10, s2 |
| ; CHECK-NEXT: vins.f16 s1, s8 |
| ; CHECK-NEXT: vmovx.f16 s8, s6 |
| ; CHECK-NEXT: vdiv.f16 s8, s10, s8 |
| ; CHECK-NEXT: vdiv.f16 s2, s2, s6 |
| ; CHECK-NEXT: vins.f16 s2, s8 |
| ; CHECK-NEXT: vmovx.f16 s10, s3 |
| ; CHECK-NEXT: vmovx.f16 s8, s7 |
| ; CHECK-NEXT: vdiv.f16 s3, s3, s7 |
| ; CHECK-NEXT: vdiv.f16 s8, s10, s8 |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vins.f16 s3, s8 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmovt q1, q0 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = fdiv <8 x half> %x, %y |
| %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y |
| ret <8 x half> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x float> @fmai_v4f32_y(<4 x float> %x, <4 x float> %y, <4 x float> %z, i32 %n) { |
| ; CHECK-LABEL: fmai_v4f32_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vfma.f32 q0, q1, q2 |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmovt q1, q0 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a = call <4 x float> @llvm.fma.v4f32(<4 x float> %y, <4 x float> %z, <4 x float> %x) |
| %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y |
| ret <4 x float> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x half> @fmai_v8f16_y(<8 x half> %x, <8 x half> %y, <8 x half> %z, i32 %n) { |
| ; CHECK-LABEL: fmai_v8f16_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vfma.f16 q0, q1, q2 |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmovt q1, q0 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %z, <8 x half> %x) |
| %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y |
| ret <8 x half> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x float> @fma_v4f32_y(<4 x float> %x, <4 x float> %y, <4 x float> %z, i32 %n) { |
| ; CHECK-LABEL: fma_v4f32_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vfma.f32 q0, q1, q2 |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmovt q1, q0 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %m = fmul fast <4 x float> %y, %z |
| %a = fadd fast <4 x float> %m, %x |
| %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y |
| ret <4 x float> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x half> @fma_v8f16_y(<8 x half> %x, <8 x half> %y, <8 x half> %z, i32 %n) { |
| ; CHECK-LABEL: fma_v8f16_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vfma.f16 q0, q1, q2 |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmovt q1, q0 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %m = fmul fast <8 x half> %y, %z |
| %a = fadd fast <8 x half> %m, %x |
| %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y |
| ret <8 x half> %b |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @icmp_slt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { |
| ; CHECK-LABEL: icmp_slt_v4i32_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.32 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmint.s32 q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) |
| %a1 = icmp slt <4 x i32> %x, %y |
| %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y |
| %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y |
| ret <4 x i32> %b |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @icmp_slt_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { |
| ; CHECK-LABEL: icmp_slt_v8i16_y: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vctp.16 r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vmint.s16 q1, q0, q1 |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) |
| %a1 = icmp slt <8 x i16> %x, %y |
| %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y |
| %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y |
| ret <8 x i16> %b |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @icmp_slt_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { |
| ; CHECK-LABEL: icmp_slt_v16i8_y: |
| ; CHECK: @ %bb.0: @ %entry |
|