blob: 5e21c25215887d7283b9a364781debb5a7566013 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc --mtriple=aarch64 < %s | FileCheck %s
; Test late AArch64 folding of ordered FCMP+CSEL patterns to fmaxnm/fminnm.
; Pattern: max(mul, 0.0) then min(result, 1.0)
; First fcmp/fcsel folds to fmaxnm, outer one does NOT fold as operands cannot
; be proven as non-sNaN.
define float @max_after_fmul(float %a, float %b) {
; CHECK-LABEL: max_after_fmul:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi d2, #0000000000000000
; CHECK-NEXT: fmul s0, s0, s1
; CHECK-NEXT: fmov s1, #1.00000000
; CHECK-NEXT: fmaxnm s0, s0, s2
; CHECK-NEXT: fcmp s0, s1
; CHECK-NEXT: fcsel s0, s0, s1, mi
; CHECK-NEXT: ret
entry:
%mul = fmul float %a, %b
%cmp1 = fcmp nsz ogt float %mul, 0.000000e+00
%max = select i1 %cmp1, float %mul, float 0.000000e+00
%cmp2 = fcmp nsz olt float %max, 1.000000e+00
%min = select i1 %cmp2, float %max, float 1.000000e+00
ret float %min
}
; fadd input
define float @max_after_fadd(float %a, float %b) {
; CHECK-LABEL: max_after_fadd:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi d2, #0000000000000000
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: fmaxnm s0, s0, s2
; CHECK-NEXT: ret
entry:
%add = fadd float %a, %b
%cmp = fcmp nsz ogt float %add, 0.000000e+00
%max = select i1 %cmp, float %add, float 0.000000e+00
ret float %max
}
; Double precision
define double @max_after_fmul_f64(double %a, double %b) {
; CHECK-LABEL: max_after_fmul_f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi d2, #0000000000000000
; CHECK-NEXT: fmul d0, d0, d1
; CHECK-NEXT: fmaxnm d0, d0, d2
; CHECK-NEXT: ret
entry:
%mul = fmul double %a, %b
%cmp = fcmp nsz ogt double %mul, 0.000000e+00
%max = select i1 %cmp, double %mul, double 0.000000e+00
ret double %max
}
; Function argument: isKnownNeverSNaN returns false, so the fold is skipped.
define float @max_arg(float %x) {
; CHECK-LABEL: max_arg:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: fcsel s0, s0, s1, gt
; CHECK-NEXT: ret
entry:
%cmp = fcmp nsz ogt float %x, 0.000000e+00
%max = select i1 %cmp, float %x, float 0.000000e+00
ret float %max
}
; fmin with arg: isKnownNeverSNaN returns false, so the fold is skipped.
define float @min_arg(float %x) {
; CHECK-LABEL: min_arg:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmov s1, #1.00000000
; CHECK-NEXT: fcmp s0, s1
; CHECK-NEXT: fcsel s0, s0, s1, mi
; CHECK-NEXT: ret
entry:
%cmp = fcmp nsz olt float %x, 1.000000e+00
%min = select i1 %cmp, float %x, float 1.000000e+00
ret float %min
}
; fmin with arithmetic input
define float @min_after_fmul(float %a, float %b) {
; CHECK-LABEL: min_after_fmul:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmul s0, s0, s1
; CHECK-NEXT: fmov s1, #1.00000000
; CHECK-NEXT: fminnm s0, s0, s1
; CHECK-NEXT: ret
entry:
%mul = fmul float %a, %b
%cmp = fcmp nsz olt float %mul, 1.000000e+00
%min = select i1 %cmp, float %mul, float 1.000000e+00
ret float %min
}
; Negative: no nsz flag - cannot transform (signed zero mismatch)
define float @no_nsz(float %x) {
; CHECK-LABEL: no_nsz:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: fcsel s0, s0, s1, gt
; CHECK-NEXT: ret
entry:
%cmp = fcmp ogt float %x, 0.000000e+00
%max = select i1 %cmp, float %x, float 0.000000e+00
ret float %max
}
; Negative: unordered comparison - fold is NOT correct, skip.
define float @unordered_cmp(float %x) {
; CHECK-LABEL: unordered_cmp:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: fcsel s0, s0, s1, hi
; CHECK-NEXT: ret
entry:
%cmp = fcmp nsz ugt float %x, 0.000000e+00
%max = select i1 %cmp, float %x, float 0.000000e+00
ret float %max
}
; nsz on the setcc only (not the select) - should still work
define float @nsz_on_setcc_only(float %a, float %b) {
; CHECK-LABEL: nsz_on_setcc_only:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi d2, #0000000000000000
; CHECK-NEXT: fmul s0, s0, s1
; CHECK-NEXT: fmaxnm s0, s0, s2
; CHECK-NEXT: ret
entry:
%mul = fmul float %a, %b
%cmp = fcmp nsz ogt float %mul, 0.000000e+00
%max = select i1 %cmp, float %mul, float 0.000000e+00
ret float %max
}
; GE (>=) with arithmetic input: folds to fmaxnm (same semantics as GT with nsz)
define float @max_oge(float %a, float %b) {
; CHECK-LABEL: max_oge:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi d2, #0000000000000000
; CHECK-NEXT: fmul s0, s0, s1
; CHECK-NEXT: fmaxnm s0, s0, s2
; CHECK-NEXT: ret
entry:
%mul = fmul float %a, %b
%cmp = fcmp nsz oge float %mul, 0.000000e+00
%max = select i1 %cmp, float %mul, float 0.000000e+00
ret float %max
}
; LE (<=) with arithmetic input: folds to fminnm (same semantics as MI with nsz)
define float @min_ole(float %a, float %b) {
; CHECK-LABEL: min_ole:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmul s0, s0, s1
; CHECK-NEXT: fmov s1, #1.00000000
; CHECK-NEXT: fminnm s0, s0, s1
; CHECK-NEXT: ret
entry:
%mul = fmul float %a, %b
%cmp = fcmp nsz ole float %mul, 1.000000e+00
%min = select i1 %cmp, float %mul, float 1.000000e+00
ret float %min
}