| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; NOTE: Test cases for FCMP-FCSEL and CMP/CMN-CSEL code layout optimization |
| ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-apple-darwin -mcpu=apple-m4 -aarch64-code-layout-opt-enable=fcmp-fcsel,cmp-csel | FileCheck %s |
| ; Default for -mcpu=apple-m4 enables both fcmp-fcsel and cmp-csel; expect identical output. |
| ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-apple-darwin -mcpu=apple-m4 | FileCheck %s |
| |
| ; Test coverage for optimizeForCodeLayout function: |
| ; * Basic FCMP-FCSEL instruction pair detection and function alignment (single/double precision) |
| ; * Multiple FCMP-FCSEL pairs in same function (also tests different predicates) |
| ; * Mixed single and double precision in same function |
| ; * FCMP with immediate operand (#0.0) is excluded from optimization |
| ; * Instruction pairs with function calls |
| ; * Negative tests (no false positives) |
| ; * Basic CMP-CSEL and CMN-CSEL instruction pair detection and function alignment |
| ; * CMP/CMN with immediate <=15 qualifies; immediate >15 is excluded |
| |
| ; * Basic single-precision FCMP-FCSEL instruction pair |
| ; CHECK: .globl _test_basic_fcmp_fcsel_single |
| ; CHECK-NEXT: .p2align 6 |
| define float @test_basic_fcmp_fcsel_single(float %a, float %b, float %c, float %d) { |
| ; CHECK-LABEL: test_basic_fcmp_fcsel_single: |
| ; CHECK: .p2align 6, , 4 |
| ; CHECK-NEXT: ; %bb.0: ; %entry |
| ; CHECK-NEXT: fcmp s0, s1 |
| ; CHECK-NEXT: fcsel s0, s2, s3, eq |
| ; CHECK-NEXT: ret |
| entry: |
| %cmp = fcmp oeq float %a, %b |
| %sel = select i1 %cmp, float %c, float %d |
| ret float %sel |
| } |
| |
| ; * Basic double-precision FCMP-FCSEL instruction pair |
| ; CHECK: .globl _test_basic_fcmp_fcsel_double |
| ; CHECK-NEXT: .p2align 6 |
| define double @test_basic_fcmp_fcsel_double(double %a, double %b, double %c, double %d) { |
| ; CHECK-LABEL: test_basic_fcmp_fcsel_double: |
| ; CHECK: .p2align 6, , 4 |
| ; CHECK-NEXT: ; %bb.0: ; %entry |
| ; CHECK-NEXT: fcmp d0, d1 |
| ; CHECK-NEXT: fcsel d0, d2, d3, eq |
| ; CHECK-NEXT: ret |
| entry: |
| %cmp = fcmp oeq double %a, %b |
| %sel = select i1 %cmp, double %c, double %d |
| ret double %sel |
| } |
| |
| ; * Multiple FCMP-FCSEL instruction pairs in same function |
| ; CHECK: .globl _test_multiple_patterns |
| ; CHECK-NEXT: .p2align 6 |
| define float @test_multiple_patterns(float %a, float %b, float %c, float %d, float %e, float %f) { |
| ; CHECK-LABEL: test_multiple_patterns: |
| ; CHECK: .p2align 6, , 4 |
| ; CHECK-NEXT: ; %bb.0: ; %entry |
| ; CHECK-NEXT: fcmp s0, s1 |
| ; CHECK-NEXT: fcsel s0, s2, s3, eq |
| ; CHECK-NEXT: .p2align 6, , 4 |
| ; CHECK-NEXT: ; %bb.1: ; %entry |
| ; CHECK-NEXT: fcmp s0, s4 |
| ; CHECK-NEXT: fcsel s0, s0, s5, gt |
| ; CHECK-NEXT: ret |
| entry: |
| %cmp1 = fcmp oeq float %a, %b |
| %sel1 = select i1 %cmp1, float %c, float %d |
| %cmp2 = fcmp ogt float %sel1, %e |
| %sel2 = select i1 %cmp2, float %sel1, float %f |
| ret float %sel2 |
| } |
| |
| ; * FCMP with comparison to zero (immediate) - excluded from optimization |
| ; FCMP #0.0 uses the ri-form opcode which is not in the detection list |
| ; CHECK: .globl _test_fcmp_immediate |
| ; CHECK-NEXT: .p2align 2 |
| define float @test_fcmp_immediate(float %a, float %b) { |
| ; CHECK-LABEL: test_fcmp_immediate: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: fcmp s0, #0.0 |
| ; CHECK-NEXT: fcsel s0, s0, s1, eq |
| ; CHECK-NEXT: ret |
| entry: |
| %cmp = fcmp oeq float %a, 0.0 |
| %sel = select i1 %cmp, float %a, float %b |
| ret float %sel |
| } |
| |
| ; * Mixed single and double precision in same function |
| ; CHECK: .globl _test_mixed_precision |
| ; CHECK-NEXT: .p2align 6 |
| define float @test_mixed_precision(float %a, float %b, double %c, double %d) { |
| ; CHECK-LABEL: test_mixed_precision: |
| ; CHECK: .p2align 6, , 4 |
| ; CHECK-NEXT: ; %bb.0: ; %entry |
| ; CHECK-NEXT: fcmp s0, s1 |
| ; CHECK-NEXT: fcsel s0, s0, s1, gt |
| ; CHECK-NEXT: .p2align 6, , 4 |
| ; CHECK-NEXT: ; %bb.1: ; %entry |
| ; CHECK-NEXT: fcmp d2, d3 |
| ; CHECK-NEXT: fcsel d1, d2, d3, mi |
| ; CHECK-NEXT: fcvt s1, d1 |
| ; CHECK-NEXT: fadd s0, s0, s1 |
| ; CHECK-NEXT: ret |
| entry: |
| %cmp_single = fcmp ogt float %a, %b |
| %sel_single = select i1 %cmp_single, float %a, float %b |
| %cmp_double = fcmp olt double %c, %d |
| %sel_double = select i1 %cmp_double, double %c, double %d |
| %trunc = fptrunc double %sel_double to float |
| %final = fadd float %sel_single, %trunc |
| ret float %final |
| } |
| |
| ; * FCMP-FCSEL instruction pair with a function call present |
| ; CHECK: .globl _test_with_function_calls |
| ; CHECK-NEXT: .p2align 6 |
| declare float @external_func(float) |
| define float @test_with_function_calls(float %a, float %b, float %c, float %d) { |
| ; CHECK-LABEL: test_with_function_calls: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: .cfi_offset w30, -8 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: .p2align 6, , 4 |
| ; CHECK-NEXT: ; %bb.1: ; %entry |
| ; CHECK-NEXT: fcmp s0, s1 |
| ; CHECK-NEXT: fcsel s0, s2, s3, gt |
| ; CHECK-NEXT: bl _external_func |
| ; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| entry: |
| %cmp = fcmp ogt float %a, %b |
| %sel = select i1 %cmp, float %c, float %d |
| %result = call float @external_func(float %sel) |
| ret float %result |
| } |
| |
| ; * Verify no false positives - FCMP without FCSEL |
| ; CHECK: .globl _test_fcmp_without_fcsel |
| ; CHECK-NEXT: .p2align 2 |
| define i32 @test_fcmp_without_fcsel(float %a, float %b) { |
| ; CHECK-LABEL: test_fcmp_without_fcsel: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: fcmp s0, s1 |
| ; CHECK-NEXT: cset w0, gt |
| ; CHECK-NEXT: ret |
| entry: |
| %cmp = fcmp ogt float %a, %b |
| %result = zext i1 %cmp to i32 |
| ret i32 %result |
| } |
| |
| ; * Verify no false positives - FCSEL without preceding FCMP |
| ; CHECK: .globl _test_fcsel_without_fcmp |
| ; CHECK-NEXT: .p2align 2 |
| define float @test_fcsel_without_fcmp(i1 %cond, float %a, float %b) { |
| ; CHECK-LABEL: test_fcsel_without_fcmp: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: tst w0, #0x1 |
| ; CHECK-NEXT: fcsel s0, s0, s1, ne |
| ; CHECK-NEXT: ret |
| entry: |
| %result = select i1 %cond, float %a, float %b |
| ret float %result |
| } |
| |
| ;------------------------------------------------------------------------------ |
| ; CMP/CMN-CSEL tests (cmp-csel flag of -aarch64-code-layout-opt-enable) |
| ;------------------------------------------------------------------------------ |
| |
| ; * Basic CMP-CSEL instruction pair (integer register comparison) |
| ; CHECK: .globl _test_basic_cmp_csel |
| ; CHECK-NEXT: .p2align 6 |
| define i32 @test_basic_cmp_csel(i32 %a, i32 %b, i32 %c, i32 %d) { |
| ; CHECK-LABEL: test_basic_cmp_csel: |
| ; CHECK: .p2align 6, , 4 |
| ; CHECK-NEXT: ; %bb.0: ; %entry |
| ; CHECK-NEXT: cmp w0, w1 |
| ; CHECK-NEXT: csel w0, w2, w3, eq |
| ; CHECK-NEXT: ret |
| entry: |
| %cmp = icmp eq i32 %a, %b |
| %sel = select i1 %cmp, i32 %c, i32 %d |
| ret i32 %sel |
| } |
| |
| ; * CMP-CSEL instruction pair with small immediate (<=15, qualifies for optimization) |
| ; CHECK: .globl _test_cmp_small_imm_csel |
| ; CHECK-NEXT: .p2align 6 |
| define i32 @test_cmp_small_imm_csel(i32 %a, i32 %b, i32 %c) { |
| ; CHECK-LABEL: test_cmp_small_imm_csel: |
| ; CHECK: .p2align 6, , 4 |
| ; CHECK-NEXT: ; %bb.0: ; %entry |
| ; CHECK-NEXT: cmp w0, #7 |
| ; CHECK-NEXT: csel w0, w1, w2, eq |
| ; CHECK-NEXT: ret |
| entry: |
| %cmp = icmp eq i32 %a, 7 |
| %sel = select i1 %cmp, i32 %b, i32 %c |
| ret i32 %sel |
| } |
| |
| ; * CMP-CSEL with immediate > 15 - excluded from optimization |
| ; CHECK: .globl _test_cmp_large_imm_csel |
| ; CHECK-NEXT: .p2align 2 |
| define i32 @test_cmp_large_imm_csel(i32 %a, i32 %b, i32 %c) { |
| ; CHECK-LABEL: test_cmp_large_imm_csel: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: cmp w0, #100 |
| ; CHECK-NEXT: csel w0, w1, w2, eq |
| ; CHECK-NEXT: ret |
| entry: |
| %cmp = icmp eq i32 %a, 100 |
| %sel = select i1 %cmp, i32 %b, i32 %c |
| ret i32 %sel |
| } |
| |
| ; * Basic CMN-CSEL instruction pair (ADDSWrr with WZR destination) |
| ; CHECK: .globl _test_basic_cmn_csel |
| ; CHECK-NEXT: .p2align 6 |
| define i32 @test_basic_cmn_csel(i32 %a, i32 %b, i32 %c, i32 %d) { |
| ; CHECK-LABEL: test_basic_cmn_csel: |
| ; CHECK: .p2align 6, , 4 |
| ; CHECK-NEXT: ; %bb.0: ; %entry |
| ; CHECK-NEXT: cmn w0, w1 |
| ; CHECK-NEXT: csel w0, w2, w3, eq |
| ; CHECK-NEXT: ret |
| entry: |
| %sum = add i32 %a, %b |
| %cmp = icmp eq i32 %sum, 0 |
| %sel = select i1 %cmp, i32 %c, i32 %d |
| ret i32 %sel |
| } |
| |
| ; * CMN-CSEL instruction pair with small immediate (ADDSWri imm=7, qualifies) |
| ; CHECK: .globl _test_cmn_small_imm_csel |
| ; CHECK-NEXT: .p2align 6 |
| define i32 @test_cmn_small_imm_csel(i32 %a, i32 %b, i32 %c) { |
| ; CHECK-LABEL: test_cmn_small_imm_csel: |
| ; CHECK: .p2align 6, , 4 |
| ; CHECK-NEXT: ; %bb.0: ; %entry |
| ; CHECK-NEXT: cmn w0, #7 |
| ; CHECK-NEXT: csel w0, w1, w2, eq |
| ; CHECK-NEXT: ret |
| entry: |
| %cmp = icmp eq i32 %a, -7 |
| %sel = select i1 %cmp, i32 %b, i32 %c |
| ret i32 %sel |
| } |
| |
| ; * CMP without CSEL - no false positive |
| ; CHECK: .globl _test_cmp_without_csel |
| ; CHECK-NEXT: .p2align 2 |
| define i32 @test_cmp_without_csel(i32 %a, i32 %b) { |
| ; CHECK-LABEL: test_cmp_without_csel: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: cmp w0, w1 |
| ; CHECK-NEXT: cset w0, eq |
| ; CHECK-NEXT: ret |
| entry: |
| %cmp = icmp eq i32 %a, %b |
| %result = zext i1 %cmp to i32 |
| ret i32 %result |
| } |