blob: c90d00389430d00bfc0cf4a2de09ec53e677310b [file]
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Checking a loop in" --filter "Cost for VF" --filter "Final VPlan for" --filter "Selecting VF:" --version 6
; RUN: opt -passes=loop-vectorize -enable-epilogue-vectorization=false -scalable-vectorization=always -debug-only=loop-vectorize -disable-output %s 2>&1 | FileCheck %s
; REQUIRES: asserts
target triple = "aarch64"
; Check that the cost of a fixed-VF is lower than that of comparable scalable VFs,
; but that a scalable VF is still chosen (due to the `-scalable-vectorization=always` flag)
define i32 @cost_prefers_fixed_width_vf_but_force_scalable_vf(ptr noalias %dst, ptr noalias %src, i64 %n) "target-cpu"="neoverse-n2" {
; CHECK-LABEL: 'cost_prefers_fixed_width_vf_but_force_scalable_vf'
; CHECK: Cost for VF 2: 11 (Estimated cost per lane: 5.5)
; CHECK: Cost for VF 4: 7 (Estimated cost per lane: 1.8)
; CHECK: Cost for VF 8: 9 (Estimated cost per lane: 1.1)
; CHECK: Cost for VF vscale x 1: Invalid (Estimated cost per lane: Invalid)
; CHECK: Cost for VF vscale x 2: 8 (Estimated cost per lane: 4.0)
; CHECK: Cost for VF vscale x 4: 7 (Estimated cost per lane: 1.8)
; CHECK: LV: Selecting VF: vscale x 4.
; CHECK: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={2}' {
;
entry:
br label %loop
loop:
%iv = phi i64 [ %n, %entry ], [ %iv.next, %loop ]
%sum = phi i32 [ 0, %entry ], [ %sum.next, %loop ]
%uniform.load = load i16, ptr %src, align 2
%ext = sext i16 %uniform.load to i32
%sum.next = add i32 %sum, %ext
%dst.gep = getelementptr i16, ptr %dst, i64 %iv
store i16 %uniform.load, ptr %dst.gep, align 2
%iv.next = add i64 %iv, -1
%cmp = icmp ugt i64 %iv, 0
br i1 %cmp, label %loop, label %exit
exit:
ret i32 %sum.next
}
; Test that with '-scalable-vectorization=always', we still fall back to NEON
; if we can't vectorize with SVE (in this case, because SVE is unavailable)
define i32 @no_sve_fallback_to_neon(ptr %src, i64 %n) "target-features"="+neon" {
; CHECK-LABEL: 'no_sve_fallback_to_neon'
; CHECK: Cost for VF 2: 4 (Estimated cost per lane: 2.0)
; CHECK: Cost for VF 4: 4 (Estimated cost per lane: 1.0)
; CHECK: LV: Selecting VF: 4.
; CHECK: VPlan 'Final VPlan for VF={2,4},UF={2}' {
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%sum = phi i32 [ 0, %entry ], [ %sum.next, %loop ]
%src.gep = getelementptr i32, ptr %src, i64 %iv
%load = load i32, ptr %src.gep, align 4
%sum.next = add i32 %sum, %load
%iv.next = add i64 %iv, 1
%cmp = icmp ult i64 %iv, %n
br i1 %cmp, label %loop, label %exit
exit:
ret i32 %sum.next
}