| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc --mtriple=aarch64 < %s | FileCheck %s |
| |
| ; Test late AArch64 folding of ordered FCMP+CSEL patterns to fmaxnm/fminnm. |
| ; Pattern: max(mul, 0.0) then min(result, 1.0) |
| ; First fcmp/fcsel folds to fmaxnm, outer one does NOT fold as operands cannot |
| ; be proven as non-sNaN. |
| define float @max_after_fmul(float %a, float %b) { |
| ; CHECK-LABEL: max_after_fmul: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: movi d2, #0000000000000000 |
| ; CHECK-NEXT: fmul s0, s0, s1 |
| ; CHECK-NEXT: fmov s1, #1.00000000 |
| ; CHECK-NEXT: fmaxnm s0, s0, s2 |
| ; CHECK-NEXT: fcmp s0, s1 |
| ; CHECK-NEXT: fcsel s0, s0, s1, mi |
| ; CHECK-NEXT: ret |
| entry: |
| %mul = fmul float %a, %b |
| %cmp1 = fcmp nsz ogt float %mul, 0.000000e+00 |
| %max = select i1 %cmp1, float %mul, float 0.000000e+00 |
| %cmp2 = fcmp nsz olt float %max, 1.000000e+00 |
| %min = select i1 %cmp2, float %max, float 1.000000e+00 |
| ret float %min |
| } |
| |
| ; fadd input |
| define float @max_after_fadd(float %a, float %b) { |
| ; CHECK-LABEL: max_after_fadd: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: movi d2, #0000000000000000 |
| ; CHECK-NEXT: fadd s0, s0, s1 |
| ; CHECK-NEXT: fmaxnm s0, s0, s2 |
| ; CHECK-NEXT: ret |
| entry: |
| %add = fadd float %a, %b |
| %cmp = fcmp nsz ogt float %add, 0.000000e+00 |
| %max = select i1 %cmp, float %add, float 0.000000e+00 |
| ret float %max |
| } |
| |
| ; Double precision |
| define double @max_after_fmul_f64(double %a, double %b) { |
| ; CHECK-LABEL: max_after_fmul_f64: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: movi d2, #0000000000000000 |
| ; CHECK-NEXT: fmul d0, d0, d1 |
| ; CHECK-NEXT: fmaxnm d0, d0, d2 |
| ; CHECK-NEXT: ret |
| entry: |
| %mul = fmul double %a, %b |
| %cmp = fcmp nsz ogt double %mul, 0.000000e+00 |
| %max = select i1 %cmp, double %mul, double 0.000000e+00 |
| ret double %max |
| } |
| |
| ; Function argument: isKnownNeverSNaN returns false, so the fold is skipped. |
| define float @max_arg(float %x) { |
| ; CHECK-LABEL: max_arg: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: movi d1, #0000000000000000 |
| ; CHECK-NEXT: fcmp s0, #0.0 |
| ; CHECK-NEXT: fcsel s0, s0, s1, gt |
| ; CHECK-NEXT: ret |
| entry: |
| %cmp = fcmp nsz ogt float %x, 0.000000e+00 |
| %max = select i1 %cmp, float %x, float 0.000000e+00 |
| ret float %max |
| } |
| |
| ; fmin with arg: isKnownNeverSNaN returns false, so the fold is skipped. |
| define float @min_arg(float %x) { |
| ; CHECK-LABEL: min_arg: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: fmov s1, #1.00000000 |
| ; CHECK-NEXT: fcmp s0, s1 |
| ; CHECK-NEXT: fcsel s0, s0, s1, mi |
| ; CHECK-NEXT: ret |
| entry: |
| %cmp = fcmp nsz olt float %x, 1.000000e+00 |
| %min = select i1 %cmp, float %x, float 1.000000e+00 |
| ret float %min |
| } |
| |
| ; fmin with arithmetic input |
| define float @min_after_fmul(float %a, float %b) { |
| ; CHECK-LABEL: min_after_fmul: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: fmul s0, s0, s1 |
| ; CHECK-NEXT: fmov s1, #1.00000000 |
| ; CHECK-NEXT: fminnm s0, s0, s1 |
| ; CHECK-NEXT: ret |
| entry: |
| %mul = fmul float %a, %b |
| %cmp = fcmp nsz olt float %mul, 1.000000e+00 |
| %min = select i1 %cmp, float %mul, float 1.000000e+00 |
| ret float %min |
| } |
| |
| ; Negative: no nsz flag - cannot transform (signed zero mismatch) |
| define float @no_nsz(float %x) { |
| ; CHECK-LABEL: no_nsz: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: movi d1, #0000000000000000 |
| ; CHECK-NEXT: fcmp s0, #0.0 |
| ; CHECK-NEXT: fcsel s0, s0, s1, gt |
| ; CHECK-NEXT: ret |
| entry: |
| %cmp = fcmp ogt float %x, 0.000000e+00 |
| %max = select i1 %cmp, float %x, float 0.000000e+00 |
| ret float %max |
| } |
| |
| ; Negative: unordered comparison - fold is NOT correct, skip. |
| define float @unordered_cmp(float %x) { |
| ; CHECK-LABEL: unordered_cmp: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: movi d1, #0000000000000000 |
| ; CHECK-NEXT: fcmp s0, #0.0 |
| ; CHECK-NEXT: fcsel s0, s0, s1, hi |
| ; CHECK-NEXT: ret |
| entry: |
| %cmp = fcmp nsz ugt float %x, 0.000000e+00 |
| %max = select i1 %cmp, float %x, float 0.000000e+00 |
| ret float %max |
| } |
| |
| ; nsz on the setcc only (not the select) - should still work |
| define float @nsz_on_setcc_only(float %a, float %b) { |
| ; CHECK-LABEL: nsz_on_setcc_only: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: movi d2, #0000000000000000 |
| ; CHECK-NEXT: fmul s0, s0, s1 |
| ; CHECK-NEXT: fmaxnm s0, s0, s2 |
| ; CHECK-NEXT: ret |
| entry: |
| %mul = fmul float %a, %b |
| %cmp = fcmp nsz ogt float %mul, 0.000000e+00 |
| %max = select i1 %cmp, float %mul, float 0.000000e+00 |
| ret float %max |
| } |
| |
| ; GE (>=) with arithmetic input: folds to fmaxnm (same semantics as GT with nsz) |
| define float @max_oge(float %a, float %b) { |
| ; CHECK-LABEL: max_oge: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: movi d2, #0000000000000000 |
| ; CHECK-NEXT: fmul s0, s0, s1 |
| ; CHECK-NEXT: fmaxnm s0, s0, s2 |
| ; CHECK-NEXT: ret |
| entry: |
| %mul = fmul float %a, %b |
| %cmp = fcmp nsz oge float %mul, 0.000000e+00 |
| %max = select i1 %cmp, float %mul, float 0.000000e+00 |
| ret float %max |
| } |
| |
| ; LE (<=) with arithmetic input: folds to fminnm (same semantics as MI with nsz) |
| define float @min_ole(float %a, float %b) { |
| ; CHECK-LABEL: min_ole: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: fmul s0, s0, s1 |
| ; CHECK-NEXT: fmov s1, #1.00000000 |
| ; CHECK-NEXT: fminnm s0, s0, s1 |
| ; CHECK-NEXT: ret |
| entry: |
| %mul = fmul float %a, %b |
| %cmp = fcmp nsz ole float %mul, 1.000000e+00 |
| %min = select i1 %cmp, float %mul, float 1.000000e+00 |
| ret float %min |
| } |