| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=arm64-eabi -mcpu=cyclone | FileCheck %s |
| |
| define i32 @qadds(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { |
| ; CHECK-LABEL: qadds: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: sqadd s0, s0, s1 |
| ; CHECK-NEXT: fmov w0, s0 |
| ; CHECK-NEXT: ret |
| %vecext = extractelement <4 x i32> %b, i32 0 |
| %vecext1 = extractelement <4 x i32> %c, i32 0 |
| %vqadd.i = tail call i32 @llvm.aarch64.neon.sqadd.i32(i32 %vecext, i32 %vecext1) nounwind |
| ret i32 %vqadd.i |
| } |
| |
| define i64 @qaddd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { |
| ; CHECK-LABEL: qaddd: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: sqadd d0, d0, d1 |
| ; CHECK-NEXT: fmov x0, d0 |
| ; CHECK-NEXT: ret |
| %vecext = extractelement <2 x i64> %b, i32 0 |
| %vecext1 = extractelement <2 x i64> %c, i32 0 |
| %vqadd.i = tail call i64 @llvm.aarch64.neon.sqadd.i64(i64 %vecext, i64 %vecext1) nounwind |
| ret i64 %vqadd.i |
| } |
| |
| define i32 @uqadds(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { |
| ; CHECK-LABEL: uqadds: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uqadd s0, s0, s1 |
| ; CHECK-NEXT: fmov w0, s0 |
| ; CHECK-NEXT: ret |
| %vecext = extractelement <4 x i32> %b, i32 0 |
| %vecext1 = extractelement <4 x i32> %c, i32 0 |
| %vqadd.i = tail call i32 @llvm.aarch64.neon.uqadd.i32(i32 %vecext, i32 %vecext1) nounwind |
| ret i32 %vqadd.i |
| } |
| |
| define i64 @uqaddd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { |
| ; CHECK-LABEL: uqaddd: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uqadd d0, d0, d1 |
| ; CHECK-NEXT: fmov x0, d0 |
| ; CHECK-NEXT: ret |
| %vecext = extractelement <2 x i64> %b, i32 0 |
| %vecext1 = extractelement <2 x i64> %c, i32 0 |
| %vqadd.i = tail call i64 @llvm.aarch64.neon.uqadd.i64(i64 %vecext, i64 %vecext1) nounwind |
| ret i64 %vqadd.i |
| } |
| |
| declare i64 @llvm.aarch64.neon.uqadd.i64(i64, i64) nounwind readnone |
| declare i32 @llvm.aarch64.neon.uqadd.i32(i32, i32) nounwind readnone |
| declare i64 @llvm.aarch64.neon.sqadd.i64(i64, i64) nounwind readnone |
| declare i32 @llvm.aarch64.neon.sqadd.i32(i32, i32) nounwind readnone |
| |
| define i32 @qsubs(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { |
| ; CHECK-LABEL: qsubs: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: sqsub s0, s0, s1 |
| ; CHECK-NEXT: fmov w0, s0 |
| ; CHECK-NEXT: ret |
| %vecext = extractelement <4 x i32> %b, i32 0 |
| %vecext1 = extractelement <4 x i32> %c, i32 0 |
| %vqsub.i = tail call i32 @llvm.aarch64.neon.sqsub.i32(i32 %vecext, i32 %vecext1) nounwind |
| ret i32 %vqsub.i |
| } |
| |
| define i64 @qsubd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { |
| ; CHECK-LABEL: qsubd: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: sqsub d0, d0, d1 |
| ; CHECK-NEXT: fmov x0, d0 |
| ; CHECK-NEXT: ret |
| %vecext = extractelement <2 x i64> %b, i32 0 |
| %vecext1 = extractelement <2 x i64> %c, i32 0 |
| %vqsub.i = tail call i64 @llvm.aarch64.neon.sqsub.i64(i64 %vecext, i64 %vecext1) nounwind |
| ret i64 %vqsub.i |
| } |
| |
| define i32 @uqsubs(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { |
| ; CHECK-LABEL: uqsubs: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uqsub s0, s0, s1 |
| ; CHECK-NEXT: fmov w0, s0 |
| ; CHECK-NEXT: ret |
| %vecext = extractelement <4 x i32> %b, i32 0 |
| %vecext1 = extractelement <4 x i32> %c, i32 0 |
| %vqsub.i = tail call i32 @llvm.aarch64.neon.uqsub.i32(i32 %vecext, i32 %vecext1) nounwind |
| ret i32 %vqsub.i |
| } |
| |
| define i64 @uqsubd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { |
| ; CHECK-LABEL: uqsubd: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uqsub d0, d0, d1 |
| ; CHECK-NEXT: fmov x0, d0 |
| ; CHECK-NEXT: ret |
| %vecext = extractelement <2 x i64> %b, i32 0 |
| %vecext1 = extractelement <2 x i64> %c, i32 0 |
| %vqsub.i = tail call i64 @llvm.aarch64.neon.uqsub.i64(i64 %vecext, i64 %vecext1) nounwind |
| ret i64 %vqsub.i |
| } |
| |
| declare i64 @llvm.aarch64.neon.uqsub.i64(i64, i64) nounwind readnone |
| declare i32 @llvm.aarch64.neon.uqsub.i32(i32, i32) nounwind readnone |
| declare i64 @llvm.aarch64.neon.sqsub.i64(i64, i64) nounwind readnone |
| declare i32 @llvm.aarch64.neon.sqsub.i32(i32, i32) nounwind readnone |
| |
| define i32 @qabss(<4 x i32> %b, <4 x i32> %c) nounwind readnone { |
| ; CHECK-LABEL: qabss: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: sqabs s0, s0 |
| ; CHECK-NEXT: fmov w0, s0 |
| ; CHECK-NEXT: ret |
| %vecext = extractelement <4 x i32> %b, i32 0 |
| %vqabs.i = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %vecext) nounwind |
| ret i32 %vqabs.i |
| } |
| |
| define i64 @qabsd(<2 x i64> %b, <2 x i64> %c) nounwind readnone { |
| ; CHECK-LABEL: qabsd: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: sqabs d0, d0 |
| ; CHECK-NEXT: fmov x0, d0 |
| ; CHECK-NEXT: ret |
| %vecext = extractelement <2 x i64> %b, i32 0 |
| %vqabs.i = tail call i64 @llvm.aarch64.neon.sqabs.i64(i64 %vecext) nounwind |
| ret i64 %vqabs.i |
| } |
| |
| define i32 @qnegs(<4 x i32> %b, <4 x i32> %c) nounwind readnone { |
| ; CHECK-LABEL: qnegs: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: sqneg s0, s0 |
| ; CHECK-NEXT: fmov w0, s0 |
| ; CHECK-NEXT: ret |
| %vecext = extractelement <4 x i32> %b, i32 0 |
| %vqneg.i = tail call i32 @llvm.aarch64.neon.sqneg.i32(i32 %vecext) nounwind |
| ret i32 %vqneg.i |
| } |
| |
| define i64 @qnegd(<2 x i64> %b, <2 x i64> %c) nounwind readnone { |
| ; CHECK-LABEL: qnegd: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: sqneg d0, d0 |
| ; CHECK-NEXT: fmov x0, d0 |
| ; CHECK-NEXT: ret |
| %vecext = extractelement <2 x i64> %b, i32 0 |
| %vqneg.i = tail call i64 @llvm.aarch64.neon.sqneg.i64(i64 %vecext) nounwind |
| ret i64 %vqneg.i |
| } |
| |
| declare i64 @llvm.aarch64.neon.sqneg.i64(i64) nounwind readnone |
| declare i32 @llvm.aarch64.neon.sqneg.i32(i32) nounwind readnone |
| declare i64 @llvm.aarch64.neon.sqabs.i64(i64) nounwind readnone |
| declare i32 @llvm.aarch64.neon.sqabs.i32(i32) nounwind readnone |
| |
| |
| define i32 @vqmovund(<2 x i64> %b) nounwind readnone { |
| ; CHECK-LABEL: vqmovund: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: sqxtun s0, d0 |
| ; CHECK-NEXT: fmov w0, s0 |
| ; CHECK-NEXT: ret |
| %vecext = extractelement <2 x i64> %b, i32 0 |
| %vqmovun.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %vecext) nounwind |
| ret i32 %vqmovun.i |
| } |
| |
| define i32 @vqmovnd_s(<2 x i64> %b) nounwind readnone { |
| ; CHECK-LABEL: vqmovnd_s: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: sqxtn s0, d0 |
| ; CHECK-NEXT: fmov w0, s0 |
| ; CHECK-NEXT: ret |
| %vecext = extractelement <2 x i64> %b, i32 0 |
| %vqmovn.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %vecext) nounwind |
| ret i32 %vqmovn.i |
| } |
| |
| define i32 @vqmovnd_u(<2 x i64> %b) nounwind readnone { |
| ; CHECK-LABEL: vqmovnd_u: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uqxtn s0, d0 |
| ; CHECK-NEXT: fmov w0, s0 |
| ; CHECK-NEXT: ret |
| %vecext = extractelement <2 x i64> %b, i32 0 |
| %vqmovn.i = tail call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %vecext) nounwind |
| ret i32 %vqmovn.i |
| } |
| |
| define i32 @uqxtn_ext(<4 x i32> noundef %a, <4 x i32> noundef %b, i32 %c, float %d, <2 x i64> %e) { |
| ; CHECK-LABEL: uqxtn_ext: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov v0.d[0], v3.d[1] |
| ; CHECK-NEXT: uqxtn s0, d0 |
| ; CHECK-NEXT: fmov w0, s0 |
| ; CHECK-NEXT: ret |
| entry: |
| %e1 = extractelement <2 x i64> %e, i64 1 |
| %r = tail call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %e1) |
| ret i32 %r |
| } |
| |
| define <4 x i32> @sqxtn_ins(<4 x i32> noundef %a, i64 %c) { |
| ; CHECK-LABEL: sqxtn_ins: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: fmov d1, x0 |
| ; CHECK-NEXT: sqxtn s1, d1 |
| ; CHECK-NEXT: mov v0.s[3], v1.s[0] |
| ; CHECK-NEXT: ret |
| entry: |
| %vqmovnd_s64.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %c) |
| %vecins = insertelement <4 x i32> %a, i32 %vqmovnd_s64.i, i64 3 |
| ret <4 x i32> %vecins |
| } |
| |
| define <4 x i32> @sqxtun_insext(<4 x i32> noundef %a, <2 x i64> %e) { |
| ; CHECK-LABEL: sqxtun_insext: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov v1.d[0], v1.d[1] |
| ; CHECK-NEXT: sqxtun s1, d1 |
| ; CHECK-NEXT: mov v0.s[3], v1.s[0] |
| ; CHECK-NEXT: ret |
| entry: |
| %c = extractelement <2 x i64> %e, i64 1 |
| %vqmovnd_s64.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %c) |
| %vecins = insertelement <4 x i32> %a, i32 %vqmovnd_s64.i, i64 3 |
| ret <4 x i32> %vecins |
| } |
| |
| define <4 x i32> @saddluse(<4 x i32> noundef %a, <4 x i32> noundef %b, i32 %c, float %d, <2 x i64> %e) { |
| ; CHECK-LABEL: saddluse: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: saddlv d1, v1.4s |
| ; CHECK-NEXT: sqxtn s1, d1 |
| ; CHECK-NEXT: mov v0.s[1], v1.s[0] |
| ; CHECK-NEXT: ret |
| entry: |
| %vaddlvq_s32.i = tail call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %b) |
| %vqmovnd_s64.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %vaddlvq_s32.i) |
| %vecins = insertelement <4 x i32> %a, i32 %vqmovnd_s64.i, i64 1 |
| ret <4 x i32> %vecins |
| } |
| |
| declare i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64) nounwind readnone |
| declare i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64) nounwind readnone |
| declare i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64) nounwind readnone |
| declare i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32>) |