| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub |
| ; RUN: llc -mattr=+sve < %s | FileCheck %s |
| ; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s |
| |
| target triple = "aarch64-unknown-linux-gnu" |
| |
| ; |
| ; OEQ |
| ; |
| |
| define <vscale x 2 x i1> @fcmp_oeq_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_oeq_nxv2bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: ptrue p0.d |
| ; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: ret |
| %res = fcmp oeq <vscale x 2 x bfloat> %a, %b |
| ret <vscale x 2 x i1> %res |
| } |
| |
| define <vscale x 4 x i1> @fcmp_oeq_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_oeq_nxv4bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: ret |
| %res = fcmp oeq <vscale x 4 x bfloat> %a, %b |
| ret <vscale x 4 x i1> %res |
| } |
| |
| define <vscale x 8 x i1> @fcmp_oeq_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_oeq_nxv8bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uunpkhi z2.s, z1.h |
| ; CHECK-NEXT: uunpkhi z3.s, z0.h |
| ; CHECK-NEXT: uunpklo z1.s, z1.h |
| ; CHECK-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: lsl z2.s, z2.s, #16 |
| ; CHECK-NEXT: lsl z3.s, z3.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: fcmeq p1.s, p0/z, z3.s, z2.s |
| ; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h |
| ; CHECK-NEXT: ret |
| %res = fcmp oeq <vscale x 8 x bfloat> %a, %b |
| ret <vscale x 8 x i1> %res |
| } |
| |
| ; |
| ; OGT |
| ; |
| |
| define <vscale x 2 x i1> @fcmp_ogt_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_ogt_nxv2bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: ptrue p0.d |
| ; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: ret |
| %res = fcmp ogt <vscale x 2 x bfloat> %a, %b |
| ret <vscale x 2 x i1> %res |
| } |
| |
| define <vscale x 4 x i1> @fcmp_ogt_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_ogt_nxv4bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: ret |
| %res = fcmp ogt <vscale x 4 x bfloat> %a, %b |
| ret <vscale x 4 x i1> %res |
| } |
| |
| define <vscale x 8 x i1> @fcmp_ogt_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_ogt_nxv8bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uunpkhi z2.s, z1.h |
| ; CHECK-NEXT: uunpkhi z3.s, z0.h |
| ; CHECK-NEXT: uunpklo z1.s, z1.h |
| ; CHECK-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: lsl z2.s, z2.s, #16 |
| ; CHECK-NEXT: lsl z3.s, z3.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: fcmgt p1.s, p0/z, z3.s, z2.s |
| ; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h |
| ; CHECK-NEXT: ret |
| %res = fcmp ogt <vscale x 8 x bfloat> %a, %b |
| ret <vscale x 8 x i1> %res |
| } |
| |
| ; |
| ; OGE |
| ; |
| |
| define <vscale x 2 x i1> @fcmp_oge_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_oge_nxv2bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: ptrue p0.d |
| ; CHECK-NEXT: fcmge p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: ret |
| %res = fcmp oge <vscale x 2 x bfloat> %a, %b |
| ret <vscale x 2 x i1> %res |
| } |
| |
| define <vscale x 4 x i1> @fcmp_oge_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_oge_nxv4bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: fcmge p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: ret |
| %res = fcmp oge <vscale x 4 x bfloat> %a, %b |
| ret <vscale x 4 x i1> %res |
| } |
| |
| define <vscale x 8 x i1> @fcmp_oge_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_oge_nxv8bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uunpkhi z2.s, z1.h |
| ; CHECK-NEXT: uunpkhi z3.s, z0.h |
| ; CHECK-NEXT: uunpklo z1.s, z1.h |
| ; CHECK-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: lsl z2.s, z2.s, #16 |
| ; CHECK-NEXT: lsl z3.s, z3.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: fcmge p1.s, p0/z, z3.s, z2.s |
| ; CHECK-NEXT: fcmge p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h |
| ; CHECK-NEXT: ret |
| %res = fcmp oge <vscale x 8 x bfloat> %a, %b |
| ret <vscale x 8 x i1> %res |
| } |
| |
| ; |
| ; OLT |
| ; |
| |
| define <vscale x 2 x i1> @fcmp_olt_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_olt_nxv2bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: ptrue p0.d |
| ; CHECK-NEXT: fcmgt p0.s, p0/z, z1.s, z0.s |
| ; CHECK-NEXT: ret |
| %res = fcmp olt <vscale x 2 x bfloat> %a, %b |
| ret <vscale x 2 x i1> %res |
| } |
| |
| define <vscale x 4 x i1> @fcmp_olt_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_olt_nxv4bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: fcmgt p0.s, p0/z, z1.s, z0.s |
| ; CHECK-NEXT: ret |
| %res = fcmp olt <vscale x 4 x bfloat> %a, %b |
| ret <vscale x 4 x i1> %res |
| } |
| |
| define <vscale x 8 x i1> @fcmp_olt_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_olt_nxv8bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uunpkhi z2.s, z0.h |
| ; CHECK-NEXT: uunpkhi z3.s, z1.h |
| ; CHECK-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-NEXT: uunpklo z1.s, z1.h |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: lsl z2.s, z2.s, #16 |
| ; CHECK-NEXT: lsl z3.s, z3.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: fcmgt p1.s, p0/z, z3.s, z2.s |
| ; CHECK-NEXT: fcmgt p0.s, p0/z, z1.s, z0.s |
| ; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h |
| ; CHECK-NEXT: ret |
| %res = fcmp olt <vscale x 8 x bfloat> %a, %b |
| ret <vscale x 8 x i1> %res |
| } |
| |
| ; |
| ; OLE |
| ; |
| |
| define <vscale x 2 x i1> @fcmp_ole_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_ole_nxv2bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: ptrue p0.d |
| ; CHECK-NEXT: fcmge p0.s, p0/z, z1.s, z0.s |
| ; CHECK-NEXT: ret |
| %res = fcmp ole <vscale x 2 x bfloat> %a, %b |
| ret <vscale x 2 x i1> %res |
| } |
| |
| define <vscale x 4 x i1> @fcmp_ole_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_ole_nxv4bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: fcmge p0.s, p0/z, z1.s, z0.s |
| ; CHECK-NEXT: ret |
| %res = fcmp ole <vscale x 4 x bfloat> %a, %b |
| ret <vscale x 4 x i1> %res |
| } |
| |
| define <vscale x 8 x i1> @fcmp_ole_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_ole_nxv8bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uunpkhi z2.s, z0.h |
| ; CHECK-NEXT: uunpkhi z3.s, z1.h |
| ; CHECK-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-NEXT: uunpklo z1.s, z1.h |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: lsl z2.s, z2.s, #16 |
| ; CHECK-NEXT: lsl z3.s, z3.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: fcmge p1.s, p0/z, z3.s, z2.s |
| ; CHECK-NEXT: fcmge p0.s, p0/z, z1.s, z0.s |
| ; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h |
| ; CHECK-NEXT: ret |
| %res = fcmp ole <vscale x 8 x bfloat> %a, %b |
| ret <vscale x 8 x i1> %res |
| } |
| |
| ; |
| ; ONE |
| ; |
| |
| define <vscale x 2 x i1> @fcmp_one_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_one_nxv2bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: ptrue p0.d |
| ; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z0.s |
| ; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b |
| ; CHECK-NEXT: ret |
| %res = fcmp one <vscale x 2 x bfloat> %a, %b |
| ret <vscale x 2 x i1> %res |
| } |
| |
| define <vscale x 4 x i1> @fcmp_one_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_one_nxv4bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z0.s |
| ; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b |
| ; CHECK-NEXT: ret |
| %res = fcmp one <vscale x 4 x bfloat> %a, %b |
| ret <vscale x 4 x i1> %res |
| } |
| |
| define <vscale x 8 x i1> @fcmp_one_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_one_nxv8bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uunpkhi z2.s, z0.h |
| ; CHECK-NEXT: uunpkhi z3.s, z1.h |
| ; CHECK-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-NEXT: uunpklo z1.s, z1.h |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: lsl z2.s, z2.s, #16 |
| ; CHECK-NEXT: lsl z3.s, z3.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: fcmgt p1.s, p0/z, z3.s, z2.s |
| ; CHECK-NEXT: fcmgt p2.s, p0/z, z2.s, z3.s |
| ; CHECK-NEXT: fcmgt p3.s, p0/z, z1.s, z0.s |
| ; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: mov p1.b, p2/m, p2.b |
| ; CHECK-NEXT: sel p0.b, p0, p0.b, p3.b |
| ; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h |
| ; CHECK-NEXT: ret |
| %res = fcmp one <vscale x 8 x bfloat> %a, %b |
| ret <vscale x 8 x i1> %res |
| } |
| |
| ; |
| ; ORD |
| ; |
| |
| define <vscale x 2 x i1> @fcmp_ord_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_ord_nxv2bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: ptrue p0.d |
| ; CHECK-NEXT: fcmuo p1.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: not p0.b, p0/z, p1.b |
| ; CHECK-NEXT: ret |
| %res = fcmp ord <vscale x 2 x bfloat> %a, %b |
| ret <vscale x 2 x i1> %res |
| } |
| |
| define <vscale x 4 x i1> @fcmp_ord_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_ord_nxv4bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: fcmuo p1.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: not p0.b, p0/z, p1.b |
| ; CHECK-NEXT: ret |
| %res = fcmp ord <vscale x 4 x bfloat> %a, %b |
| ret <vscale x 4 x i1> %res |
| } |
| |
| define <vscale x 8 x i1> @fcmp_ord_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_ord_nxv8bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uunpkhi z2.s, z1.h |
| ; CHECK-NEXT: uunpkhi z3.s, z0.h |
| ; CHECK-NEXT: uunpklo z1.s, z1.h |
| ; CHECK-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: lsl z2.s, z2.s, #16 |
| ; CHECK-NEXT: lsl z3.s, z3.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: fcmuo p1.s, p0/z, z3.s, z2.s |
| ; CHECK-NEXT: fcmuo p2.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: not p1.b, p0/z, p1.b |
| ; CHECK-NEXT: not p0.b, p0/z, p2.b |
| ; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h |
| ; CHECK-NEXT: ret |
| %res = fcmp ord <vscale x 8 x bfloat> %a, %b |
| ret <vscale x 8 x i1> %res |
| } |
| |
| ; |
| ; UEQ |
| ; |
| |
| define <vscale x 2 x i1> @fcmp_ueq_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_ueq_nxv2bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: ptrue p0.d |
| ; CHECK-NEXT: fcmuo p1.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b |
| ; CHECK-NEXT: ret |
| %res = fcmp ueq <vscale x 2 x bfloat> %a, %b |
| ret <vscale x 2 x i1> %res |
| } |
| |
| define <vscale x 4 x i1> @fcmp_ueq_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_ueq_nxv4bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: fcmuo p1.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b |
| ; CHECK-NEXT: ret |
| %res = fcmp ueq <vscale x 4 x bfloat> %a, %b |
| ret <vscale x 4 x i1> %res |
| } |
| |
| define <vscale x 8 x i1> @fcmp_ueq_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_ueq_nxv8bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uunpkhi z2.s, z1.h |
| ; CHECK-NEXT: uunpkhi z3.s, z0.h |
| ; CHECK-NEXT: uunpklo z1.s, z1.h |
| ; CHECK-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: lsl z2.s, z2.s, #16 |
| ; CHECK-NEXT: lsl z3.s, z3.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: fcmuo p1.s, p0/z, z3.s, z2.s |
| ; CHECK-NEXT: fcmeq p2.s, p0/z, z3.s, z2.s |
| ; CHECK-NEXT: fcmuo p3.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: mov p1.b, p2/m, p2.b |
| ; CHECK-NEXT: sel p0.b, p0, p0.b, p3.b |
| ; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h |
| ; CHECK-NEXT: ret |
| %res = fcmp ueq <vscale x 8 x bfloat> %a, %b |
| ret <vscale x 8 x i1> %res |
| } |
| |
| ; |
| ; UGT |
| ; |
| |
| define <vscale x 2 x i1> @fcmp_ugt_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_ugt_nxv2bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: ptrue p0.d |
| ; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, z0.s |
| ; CHECK-NEXT: not p0.b, p0/z, p1.b |
| ; CHECK-NEXT: ret |
| %res = fcmp ugt <vscale x 2 x bfloat> %a, %b |
| ret <vscale x 2 x i1> %res |
| } |
| |
| define <vscale x 4 x i1> @fcmp_ugt_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_ugt_nxv4bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, z0.s |
| ; CHECK-NEXT: not p0.b, p0/z, p1.b |
| ; CHECK-NEXT: ret |
| %res = fcmp ugt <vscale x 4 x bfloat> %a, %b |
| ret <vscale x 4 x i1> %res |
| } |
| |
| define <vscale x 8 x i1> @fcmp_ugt_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_ugt_nxv8bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uunpkhi z2.s, z0.h |
| ; CHECK-NEXT: uunpkhi z3.s, z1.h |
| ; CHECK-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-NEXT: uunpklo z1.s, z1.h |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: lsl z2.s, z2.s, #16 |
| ; CHECK-NEXT: lsl z3.s, z3.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: fcmge p1.s, p0/z, z3.s, z2.s |
| ; CHECK-NEXT: fcmge p2.s, p0/z, z1.s, z0.s |
| ; CHECK-NEXT: not p1.b, p0/z, p1.b |
| ; CHECK-NEXT: not p0.b, p0/z, p2.b |
| ; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h |
| ; CHECK-NEXT: ret |
| %res = fcmp ugt <vscale x 8 x bfloat> %a, %b |
| ret <vscale x 8 x i1> %res |
| } |
| |
| ; |
| ; UGE |
| ; |
| |
| define <vscale x 2 x i1> @fcmp_uge_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_uge_nxv2bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: ptrue p0.d |
| ; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z0.s |
| ; CHECK-NEXT: not p0.b, p0/z, p1.b |
| ; CHECK-NEXT: ret |
| %res = fcmp uge <vscale x 2 x bfloat> %a, %b |
| ret <vscale x 2 x i1> %res |
| } |
| |
| define <vscale x 4 x i1> @fcmp_uge_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_uge_nxv4bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z0.s |
| ; CHECK-NEXT: not p0.b, p0/z, p1.b |
| ; CHECK-NEXT: ret |
| %res = fcmp uge <vscale x 4 x bfloat> %a, %b |
| ret <vscale x 4 x i1> %res |
| } |
| |
| define <vscale x 8 x i1> @fcmp_uge_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_uge_nxv8bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uunpkhi z2.s, z0.h |
| ; CHECK-NEXT: uunpkhi z3.s, z1.h |
| ; CHECK-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-NEXT: uunpklo z1.s, z1.h |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: lsl z2.s, z2.s, #16 |
| ; CHECK-NEXT: lsl z3.s, z3.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: fcmgt p1.s, p0/z, z3.s, z2.s |
| ; CHECK-NEXT: fcmgt p2.s, p0/z, z1.s, z0.s |
| ; CHECK-NEXT: not p1.b, p0/z, p1.b |
| ; CHECK-NEXT: not p0.b, p0/z, p2.b |
| ; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h |
| ; CHECK-NEXT: ret |
| %res = fcmp uge <vscale x 8 x bfloat> %a, %b |
| ret <vscale x 8 x i1> %res |
| } |
| |
| ; |
| ; ULT |
| ; |
| |
| define <vscale x 2 x i1> @fcmp_ult_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_ult_nxv2bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: ptrue p0.d |
| ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: not p0.b, p0/z, p1.b |
| ; CHECK-NEXT: ret |
| %res = fcmp ult <vscale x 2 x bfloat> %a, %b |
| ret <vscale x 2 x i1> %res |
| } |
| |
| define <vscale x 4 x i1> @fcmp_ult_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_ult_nxv4bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: not p0.b, p0/z, p1.b |
| ; CHECK-NEXT: ret |
| %res = fcmp ult <vscale x 4 x bfloat> %a, %b |
| ret <vscale x 4 x i1> %res |
| } |
| |
| define <vscale x 8 x i1> @fcmp_ult_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_ult_nxv8bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uunpkhi z2.s, z1.h |
| ; CHECK-NEXT: uunpkhi z3.s, z0.h |
| ; CHECK-NEXT: uunpklo z1.s, z1.h |
| ; CHECK-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: lsl z2.s, z2.s, #16 |
| ; CHECK-NEXT: lsl z3.s, z3.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: fcmge p1.s, p0/z, z3.s, z2.s |
| ; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: not p1.b, p0/z, p1.b |
| ; CHECK-NEXT: not p0.b, p0/z, p2.b |
| ; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h |
| ; CHECK-NEXT: ret |
| %res = fcmp ult <vscale x 8 x bfloat> %a, %b |
| ret <vscale x 8 x i1> %res |
| } |
| |
| ; |
| ; ULE |
| ; |
| |
| define <vscale x 2 x i1> @fcmp_ule_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_ule_nxv2bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: ptrue p0.d |
| ; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: not p0.b, p0/z, p1.b |
| ; CHECK-NEXT: ret |
| %res = fcmp ule <vscale x 2 x bfloat> %a, %b |
| ret <vscale x 2 x i1> %res |
| } |
| |
| define <vscale x 4 x i1> @fcmp_ule_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_ule_nxv4bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: not p0.b, p0/z, p1.b |
| ; CHECK-NEXT: ret |
| %res = fcmp ule <vscale x 4 x bfloat> %a, %b |
| ret <vscale x 4 x i1> %res |
| } |
| |
| define <vscale x 8 x i1> @fcmp_ule_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_ule_nxv8bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uunpkhi z2.s, z1.h |
| ; CHECK-NEXT: uunpkhi z3.s, z0.h |
| ; CHECK-NEXT: uunpklo z1.s, z1.h |
| ; CHECK-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: lsl z2.s, z2.s, #16 |
| ; CHECK-NEXT: lsl z3.s, z3.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: fcmgt p1.s, p0/z, z3.s, z2.s |
| ; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: not p1.b, p0/z, p1.b |
| ; CHECK-NEXT: not p0.b, p0/z, p2.b |
| ; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h |
| ; CHECK-NEXT: ret |
| %res = fcmp ule <vscale x 8 x bfloat> %a, %b |
| ret <vscale x 8 x i1> %res |
| } |
| |
| ; |
| ; UNE |
| ; |
| |
| define <vscale x 2 x i1> @fcmp_une_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_une_nxv2bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: ptrue p0.d |
| ; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: ret |
| %res = fcmp une <vscale x 2 x bfloat> %a, %b |
| ret <vscale x 2 x i1> %res |
| } |
| |
| define <vscale x 4 x i1> @fcmp_une_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_une_nxv4bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: ret |
| %res = fcmp une <vscale x 4 x bfloat> %a, %b |
| ret <vscale x 4 x i1> %res |
| } |
| |
| define <vscale x 8 x i1> @fcmp_une_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_une_nxv8bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uunpkhi z2.s, z1.h |
| ; CHECK-NEXT: uunpkhi z3.s, z0.h |
| ; CHECK-NEXT: uunpklo z1.s, z1.h |
| ; CHECK-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: lsl z2.s, z2.s, #16 |
| ; CHECK-NEXT: lsl z3.s, z3.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: fcmne p1.s, p0/z, z3.s, z2.s |
| ; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h |
| ; CHECK-NEXT: ret |
| %res = fcmp une <vscale x 8 x bfloat> %a, %b |
| ret <vscale x 8 x i1> %res |
| } |
| |
| ; |
| ; UNO |
| ; |
| |
| define <vscale x 2 x i1> @fcmp_uno_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_uno_nxv2bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: ptrue p0.d |
| ; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: ret |
| %res = fcmp uno <vscale x 2 x bfloat> %a, %b |
| ret <vscale x 2 x i1> %res |
| } |
| |
| define <vscale x 4 x i1> @fcmp_uno_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_uno_nxv4bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: ret |
| %res = fcmp uno <vscale x 4 x bfloat> %a, %b |
| ret <vscale x 4 x i1> %res |
| } |
| |
| define <vscale x 8 x i1> @fcmp_uno_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_uno_nxv8bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uunpkhi z2.s, z1.h |
| ; CHECK-NEXT: uunpkhi z3.s, z0.h |
| ; CHECK-NEXT: uunpklo z1.s, z1.h |
| ; CHECK-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: lsl z2.s, z2.s, #16 |
| ; CHECK-NEXT: lsl z3.s, z3.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: fcmuo p1.s, p0/z, z3.s, z2.s |
| ; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h |
| ; CHECK-NEXT: ret |
| %res = fcmp uno <vscale x 8 x bfloat> %a, %b |
| ret <vscale x 8 x i1> %res |
| } |
| |
| ; |
| ; EQ |
| ; |
| |
| define <vscale x 2 x i1> @fcmp_eq_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_eq_nxv2bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: ptrue p0.d |
| ; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: ret |
| %res = fcmp fast oeq <vscale x 2 x bfloat> %a, %b |
| ret <vscale x 2 x i1> %res |
| } |
| |
| define <vscale x 4 x i1> @fcmp_eq_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_eq_nxv4bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: ret |
| %res = fcmp fast oeq <vscale x 4 x bfloat> %a, %b |
| ret <vscale x 4 x i1> %res |
| } |
| |
| define <vscale x 8 x i1> @fcmp_eq_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_eq_nxv8bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uunpkhi z2.s, z1.h |
| ; CHECK-NEXT: uunpkhi z3.s, z0.h |
| ; CHECK-NEXT: uunpklo z1.s, z1.h |
| ; CHECK-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: lsl z2.s, z2.s, #16 |
| ; CHECK-NEXT: lsl z3.s, z3.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: fcmeq p1.s, p0/z, z3.s, z2.s |
| ; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h |
| ; CHECK-NEXT: ret |
| %res = fcmp fast oeq <vscale x 8 x bfloat> %a, %b |
| ret <vscale x 8 x i1> %res |
| } |
| |
| ; |
| ; GT |
| ; |
| |
| define <vscale x 2 x i1> @fcmp_gt_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_gt_nxv2bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: ptrue p0.d |
| ; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: ret |
| %res = fcmp fast ogt <vscale x 2 x bfloat> %a, %b |
| ret <vscale x 2 x i1> %res |
| } |
| |
| define <vscale x 4 x i1> @fcmp_gt_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_gt_nxv4bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: ret |
| %res = fcmp fast ogt <vscale x 4 x bfloat> %a, %b |
| ret <vscale x 4 x i1> %res |
| } |
| |
| define <vscale x 8 x i1> @fcmp_gt_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_gt_nxv8bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uunpkhi z2.s, z1.h |
| ; CHECK-NEXT: uunpkhi z3.s, z0.h |
| ; CHECK-NEXT: uunpklo z1.s, z1.h |
| ; CHECK-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: lsl z2.s, z2.s, #16 |
| ; CHECK-NEXT: lsl z3.s, z3.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: fcmgt p1.s, p0/z, z3.s, z2.s |
| ; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h |
| ; CHECK-NEXT: ret |
| %res = fcmp fast ogt <vscale x 8 x bfloat> %a, %b |
| ret <vscale x 8 x i1> %res |
| } |
| |
| ; |
| ; GE |
| ; |
| |
| define <vscale x 2 x i1> @fcmp_ge_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_ge_nxv2bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: ptrue p0.d |
| ; CHECK-NEXT: fcmge p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: ret |
| %res = fcmp fast oge <vscale x 2 x bfloat> %a, %b |
| ret <vscale x 2 x i1> %res |
| } |
| |
| define <vscale x 4 x i1> @fcmp_ge_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_ge_nxv4bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: fcmge p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: ret |
| %res = fcmp fast oge <vscale x 4 x bfloat> %a, %b |
| ret <vscale x 4 x i1> %res |
| } |
| |
| define <vscale x 8 x i1> @fcmp_ge_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_ge_nxv8bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uunpkhi z2.s, z1.h |
| ; CHECK-NEXT: uunpkhi z3.s, z0.h |
| ; CHECK-NEXT: uunpklo z1.s, z1.h |
| ; CHECK-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: lsl z2.s, z2.s, #16 |
| ; CHECK-NEXT: lsl z3.s, z3.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: fcmge p1.s, p0/z, z3.s, z2.s |
| ; CHECK-NEXT: fcmge p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h |
| ; CHECK-NEXT: ret |
| %res = fcmp fast oge <vscale x 8 x bfloat> %a, %b |
| ret <vscale x 8 x i1> %res |
| } |
| |
| ; |
| ; LT |
| ; |
| |
| define <vscale x 2 x i1> @fcmp_lt_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_lt_nxv2bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: ptrue p0.d |
| ; CHECK-NEXT: fcmgt p0.s, p0/z, z1.s, z0.s |
| ; CHECK-NEXT: ret |
| %res = fcmp fast olt <vscale x 2 x bfloat> %a, %b |
| ret <vscale x 2 x i1> %res |
| } |
| |
| define <vscale x 4 x i1> @fcmp_lt_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_lt_nxv4bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: fcmgt p0.s, p0/z, z1.s, z0.s |
| ; CHECK-NEXT: ret |
| %res = fcmp fast olt <vscale x 4 x bfloat> %a, %b |
| ret <vscale x 4 x i1> %res |
| } |
| |
| define <vscale x 8 x i1> @fcmp_lt_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_lt_nxv8bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uunpkhi z2.s, z0.h |
| ; CHECK-NEXT: uunpkhi z3.s, z1.h |
| ; CHECK-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-NEXT: uunpklo z1.s, z1.h |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: lsl z2.s, z2.s, #16 |
| ; CHECK-NEXT: lsl z3.s, z3.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: fcmgt p1.s, p0/z, z3.s, z2.s |
| ; CHECK-NEXT: fcmgt p0.s, p0/z, z1.s, z0.s |
| ; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h |
| ; CHECK-NEXT: ret |
| %res = fcmp fast olt <vscale x 8 x bfloat> %a, %b |
| ret <vscale x 8 x i1> %res |
| } |
| |
| ; |
| ; LE |
| ; |
| |
| define <vscale x 2 x i1> @fcmp_le_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_le_nxv2bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: ptrue p0.d |
| ; CHECK-NEXT: fcmge p0.s, p0/z, z1.s, z0.s |
| ; CHECK-NEXT: ret |
| %res = fcmp fast ole <vscale x 2 x bfloat> %a, %b |
| ret <vscale x 2 x i1> %res |
| } |
| |
| define <vscale x 4 x i1> @fcmp_le_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_le_nxv4bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: fcmge p0.s, p0/z, z1.s, z0.s |
| ; CHECK-NEXT: ret |
| %res = fcmp fast ole <vscale x 4 x bfloat> %a, %b |
| ret <vscale x 4 x i1> %res |
| } |
| |
| define <vscale x 8 x i1> @fcmp_le_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_le_nxv8bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uunpkhi z2.s, z0.h |
| ; CHECK-NEXT: uunpkhi z3.s, z1.h |
| ; CHECK-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-NEXT: uunpklo z1.s, z1.h |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: lsl z2.s, z2.s, #16 |
| ; CHECK-NEXT: lsl z3.s, z3.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: fcmge p1.s, p0/z, z3.s, z2.s |
| ; CHECK-NEXT: fcmge p0.s, p0/z, z1.s, z0.s |
| ; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h |
| ; CHECK-NEXT: ret |
| %res = fcmp fast ole <vscale x 8 x bfloat> %a, %b |
| ret <vscale x 8 x i1> %res |
| } |
| |
| ; |
| ; NE |
| ; |
| |
| define <vscale x 2 x i1> @fcmp_ne_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_ne_nxv2bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: ptrue p0.d |
| ; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: ret |
| %res = fcmp fast one <vscale x 2 x bfloat> %a, %b |
| ret <vscale x 2 x i1> %res |
| } |
| |
| define <vscale x 4 x i1> @fcmp_ne_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_ne_nxv4bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: ret |
| %res = fcmp fast one <vscale x 4 x bfloat> %a, %b |
| ret <vscale x 4 x i1> %res |
| } |
| |
| define <vscale x 8 x i1> @fcmp_ne_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) { |
| ; CHECK-LABEL: fcmp_ne_nxv8bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uunpkhi z2.s, z1.h |
| ; CHECK-NEXT: uunpkhi z3.s, z0.h |
| ; CHECK-NEXT: uunpklo z1.s, z1.h |
| ; CHECK-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: lsl z2.s, z2.s, #16 |
| ; CHECK-NEXT: lsl z3.s, z3.s, #16 |
| ; CHECK-NEXT: lsl z1.s, z1.s, #16 |
| ; CHECK-NEXT: lsl z0.s, z0.s, #16 |
| ; CHECK-NEXT: fcmne p1.s, p0/z, z3.s, z2.s |
| ; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h |
| ; CHECK-NEXT: ret |
| %res = fcmp fast one <vscale x 8 x bfloat> %a, %b |
| ret <vscale x 8 x i1> %res |
| } |