blob: adf5b05bd5305b7469a5545235c56d4d01bff593 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; NOTE: Test cases for FCMP-FCSEL and CMP/CMN-CSEL code layout optimization
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-apple-darwin -mcpu=apple-m4 -aarch64-code-layout-opt-enable=fcmp-fcsel,cmp-csel | FileCheck %s
; Default for -mcpu=apple-m4 enables both fcmp-fcsel and cmp-csel; expect identical output.
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-apple-darwin -mcpu=apple-m4 | FileCheck %s
; Test coverage for optimizeForCodeLayout function:
; * Basic FCMP-FCSEL instruction pair detection and function alignment (single/double precision)
; * Multiple FCMP-FCSEL pairs in same function (also tests different predicates)
; * Mixed single and double precision in same function
; * FCMP with immediate operand (#0.0) is excluded from optimization
; * Instruction pairs with function calls
; * Negative tests (no false positives)
; * Basic CMP-CSEL and CMN-CSEL instruction pair detection and function alignment
; * CMP/CMN with immediate <=15 qualifies; immediate >15 is excluded
; * Basic single-precision FCMP-FCSEL instruction pair
; CHECK: .globl _test_basic_fcmp_fcsel_single
; CHECK-NEXT: .p2align 6
define float @test_basic_fcmp_fcsel_single(float %a, float %b, float %c, float %d) {
; CHECK-LABEL: test_basic_fcmp_fcsel_single:
; CHECK: .p2align 6, , 4
; CHECK-NEXT: ; %bb.0: ; %entry
; CHECK-NEXT: fcmp s0, s1
; CHECK-NEXT: fcsel s0, s2, s3, eq
; CHECK-NEXT: ret
entry:
%cmp = fcmp oeq float %a, %b
%sel = select i1 %cmp, float %c, float %d
ret float %sel
}
; * Basic double-precision FCMP-FCSEL instruction pair
; CHECK: .globl _test_basic_fcmp_fcsel_double
; CHECK-NEXT: .p2align 6
define double @test_basic_fcmp_fcsel_double(double %a, double %b, double %c, double %d) {
; CHECK-LABEL: test_basic_fcmp_fcsel_double:
; CHECK: .p2align 6, , 4
; CHECK-NEXT: ; %bb.0: ; %entry
; CHECK-NEXT: fcmp d0, d1
; CHECK-NEXT: fcsel d0, d2, d3, eq
; CHECK-NEXT: ret
entry:
%cmp = fcmp oeq double %a, %b
%sel = select i1 %cmp, double %c, double %d
ret double %sel
}
; * Multiple FCMP-FCSEL instruction pairs in same function
; CHECK: .globl _test_multiple_patterns
; CHECK-NEXT: .p2align 6
define float @test_multiple_patterns(float %a, float %b, float %c, float %d, float %e, float %f) {
; CHECK-LABEL: test_multiple_patterns:
; CHECK: .p2align 6, , 4
; CHECK-NEXT: ; %bb.0: ; %entry
; CHECK-NEXT: fcmp s0, s1
; CHECK-NEXT: fcsel s0, s2, s3, eq
; CHECK-NEXT: .p2align 6, , 4
; CHECK-NEXT: ; %bb.1: ; %entry
; CHECK-NEXT: fcmp s0, s4
; CHECK-NEXT: fcsel s0, s0, s5, gt
; CHECK-NEXT: ret
entry:
%cmp1 = fcmp oeq float %a, %b
%sel1 = select i1 %cmp1, float %c, float %d
%cmp2 = fcmp ogt float %sel1, %e
%sel2 = select i1 %cmp2, float %sel1, float %f
ret float %sel2
}
; * FCMP with comparison to zero (immediate) - excluded from optimization
; FCMP #0.0 uses the ri-form opcode which is not in the detection list
; CHECK: .globl _test_fcmp_immediate
; CHECK-NEXT: .p2align 2
define float @test_fcmp_immediate(float %a, float %b) {
; CHECK-LABEL: test_fcmp_immediate:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: fcsel s0, s0, s1, eq
; CHECK-NEXT: ret
entry:
%cmp = fcmp oeq float %a, 0.0
%sel = select i1 %cmp, float %a, float %b
ret float %sel
}
; * Mixed single and double precision in same function
; CHECK: .globl _test_mixed_precision
; CHECK-NEXT: .p2align 6
define float @test_mixed_precision(float %a, float %b, double %c, double %d) {
; CHECK-LABEL: test_mixed_precision:
; CHECK: .p2align 6, , 4
; CHECK-NEXT: ; %bb.0: ; %entry
; CHECK-NEXT: fcmp s0, s1
; CHECK-NEXT: fcsel s0, s0, s1, gt
; CHECK-NEXT: .p2align 6, , 4
; CHECK-NEXT: ; %bb.1: ; %entry
; CHECK-NEXT: fcmp d2, d3
; CHECK-NEXT: fcsel d1, d2, d3, mi
; CHECK-NEXT: fcvt s1, d1
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
entry:
%cmp_single = fcmp ogt float %a, %b
%sel_single = select i1 %cmp_single, float %a, float %b
%cmp_double = fcmp olt double %c, %d
%sel_double = select i1 %cmp_double, double %c, double %d
%trunc = fptrunc double %sel_double to float
%final = fadd float %sel_single, %trunc
ret float %final
}
; * FCMP-FCSEL instruction pair with a function call present
; CHECK: .globl _test_with_function_calls
; CHECK-NEXT: .p2align 6
declare float @external_func(float)
define float @test_with_function_calls(float %a, float %b, float %c, float %d) {
; CHECK-LABEL: test_with_function_calls:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: .p2align 6, , 4
; CHECK-NEXT: ; %bb.1: ; %entry
; CHECK-NEXT: fcmp s0, s1
; CHECK-NEXT: fcsel s0, s2, s3, gt
; CHECK-NEXT: bl _external_func
; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-NEXT: ret
entry:
%cmp = fcmp ogt float %a, %b
%sel = select i1 %cmp, float %c, float %d
%result = call float @external_func(float %sel)
ret float %result
}
; * Verify no false positives - FCMP without FCSEL
; CHECK: .globl _test_fcmp_without_fcsel
; CHECK-NEXT: .p2align 2
define i32 @test_fcmp_without_fcsel(float %a, float %b) {
; CHECK-LABEL: test_fcmp_without_fcsel:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: fcmp s0, s1
; CHECK-NEXT: cset w0, gt
; CHECK-NEXT: ret
entry:
%cmp = fcmp ogt float %a, %b
%result = zext i1 %cmp to i32
ret i32 %result
}
; * Verify no false positives - FCSEL without preceding FCMP
; CHECK: .globl _test_fcsel_without_fcmp
; CHECK-NEXT: .p2align 2
define float @test_fcsel_without_fcmp(i1 %cond, float %a, float %b) {
; CHECK-LABEL: test_fcsel_without_fcmp:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: tst w0, #0x1
; CHECK-NEXT: fcsel s0, s0, s1, ne
; CHECK-NEXT: ret
entry:
%result = select i1 %cond, float %a, float %b
ret float %result
}
;------------------------------------------------------------------------------
; CMP/CMN-CSEL tests (cmp-csel flag of -aarch64-code-layout-opt-enable)
;------------------------------------------------------------------------------
; * Basic CMP-CSEL instruction pair (integer register comparison)
; CHECK: .globl _test_basic_cmp_csel
; CHECK-NEXT: .p2align 6
define i32 @test_basic_cmp_csel(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: test_basic_cmp_csel:
; CHECK: .p2align 6, , 4
; CHECK-NEXT: ; %bb.0: ; %entry
; CHECK-NEXT: cmp w0, w1
; CHECK-NEXT: csel w0, w2, w3, eq
; CHECK-NEXT: ret
entry:
%cmp = icmp eq i32 %a, %b
%sel = select i1 %cmp, i32 %c, i32 %d
ret i32 %sel
}
; * CMP-CSEL instruction pair with small immediate (<=15, qualifies for optimization)
; CHECK: .globl _test_cmp_small_imm_csel
; CHECK-NEXT: .p2align 6
define i32 @test_cmp_small_imm_csel(i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: test_cmp_small_imm_csel:
; CHECK: .p2align 6, , 4
; CHECK-NEXT: ; %bb.0: ; %entry
; CHECK-NEXT: cmp w0, #7
; CHECK-NEXT: csel w0, w1, w2, eq
; CHECK-NEXT: ret
entry:
%cmp = icmp eq i32 %a, 7
%sel = select i1 %cmp, i32 %b, i32 %c
ret i32 %sel
}
; * CMP-CSEL with immediate > 15 - excluded from optimization
; CHECK: .globl _test_cmp_large_imm_csel
; CHECK-NEXT: .p2align 2
define i32 @test_cmp_large_imm_csel(i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: test_cmp_large_imm_csel:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: cmp w0, #100
; CHECK-NEXT: csel w0, w1, w2, eq
; CHECK-NEXT: ret
entry:
%cmp = icmp eq i32 %a, 100
%sel = select i1 %cmp, i32 %b, i32 %c
ret i32 %sel
}
; * Basic CMN-CSEL instruction pair (ADDSWrr with WZR destination)
; CHECK: .globl _test_basic_cmn_csel
; CHECK-NEXT: .p2align 6
define i32 @test_basic_cmn_csel(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: test_basic_cmn_csel:
; CHECK: .p2align 6, , 4
; CHECK-NEXT: ; %bb.0: ; %entry
; CHECK-NEXT: cmn w0, w1
; CHECK-NEXT: csel w0, w2, w3, eq
; CHECK-NEXT: ret
entry:
%sum = add i32 %a, %b
%cmp = icmp eq i32 %sum, 0
%sel = select i1 %cmp, i32 %c, i32 %d
ret i32 %sel
}
; * CMN-CSEL instruction pair with small immediate (ADDSWri imm=7, qualifies)
; CHECK: .globl _test_cmn_small_imm_csel
; CHECK-NEXT: .p2align 6
define i32 @test_cmn_small_imm_csel(i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: test_cmn_small_imm_csel:
; CHECK: .p2align 6, , 4
; CHECK-NEXT: ; %bb.0: ; %entry
; CHECK-NEXT: cmn w0, #7
; CHECK-NEXT: csel w0, w1, w2, eq
; CHECK-NEXT: ret
entry:
%cmp = icmp eq i32 %a, -7
%sel = select i1 %cmp, i32 %b, i32 %c
ret i32 %sel
}
; * CMP without CSEL - no false positive
; CHECK: .globl _test_cmp_without_csel
; CHECK-NEXT: .p2align 2
define i32 @test_cmp_without_csel(i32 %a, i32 %b) {
; CHECK-LABEL: test_cmp_without_csel:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: cmp w0, w1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
entry:
%cmp = icmp eq i32 %a, %b
%result = zext i1 %cmp to i32
ret i32 %result
}