blob: 4a8ebde88a0c499b4a3c6d8cf77aae8190e264bc [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
; RUN: opt -passes=loop-vectorize -force-vector-width=2 -S %s | FileCheck %s --check-prefix=VF2
; RUN: opt -passes=loop-vectorize -force-vector-width=4 -S %s | FileCheck %s --check-prefix=VF4
; RUN: opt -passes=loop-vectorize -force-vector-width=8 -S %s | FileCheck %s --check-prefix=VF8
define i32 @clamped_load_reduction_bound4(ptr %A) {
; VF2-LABEL: define i32 @clamped_load_reduction_bound4(
; VF2-SAME: ptr [[A:%.*]]) {
; VF2-NEXT: [[ENTRY:.*]]:
; VF2-NEXT: br label %[[LOOP:.*]]
; VF2: [[LOOP]]:
; VF2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP]] ]
; VF2-NEXT: [[SUM:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SUM_NEXT:%.*]], %[[LOOP]] ]
; VF2-NEXT: [[CLAMPED:%.*]] = urem i32 [[INDEX]], 4
; VF2-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[CLAMPED]]
; VF2-NEXT: [[LV:%.*]] = load i32, ptr [[GEP_A]], align 4
; VF2-NEXT: [[SUM_NEXT]] = add i32 [[SUM]], [[LV]]
; VF2-NEXT: [[INDEX_NEXT]] = add nuw nsw i32 [[INDEX]], 1
; VF2-NEXT: [[COND:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128
; VF2-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP]]
; VF2: [[EXIT]]:
; VF2-NEXT: [[R:%.*]] = phi i32 [ [[SUM_NEXT]], %[[LOOP]] ]
; VF2-NEXT: ret i32 [[R]]
;
; VF4-LABEL: define i32 @clamped_load_reduction_bound4(
; VF4-SAME: ptr [[A:%.*]]) {
; VF4-NEXT: [[ENTRY:.*]]:
; VF4-NEXT: br label %[[LOOP:.*]]
; VF4: [[LOOP]]:
; VF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP]] ]
; VF4-NEXT: [[SUM:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SUM_NEXT:%.*]], %[[LOOP]] ]
; VF4-NEXT: [[CLAMPED:%.*]] = urem i32 [[INDEX]], 4
; VF4-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[CLAMPED]]
; VF4-NEXT: [[LV:%.*]] = load i32, ptr [[GEP_A]], align 4
; VF4-NEXT: [[SUM_NEXT]] = add i32 [[SUM]], [[LV]]
; VF4-NEXT: [[INDEX_NEXT]] = add nuw nsw i32 [[INDEX]], 1
; VF4-NEXT: [[COND:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128
; VF4-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP]]
; VF4: [[EXIT]]:
; VF4-NEXT: [[R:%.*]] = phi i32 [ [[SUM_NEXT]], %[[LOOP]] ]
; VF4-NEXT: ret i32 [[R]]
;
; VF8-LABEL: define i32 @clamped_load_reduction_bound4(
; VF8-SAME: ptr [[A:%.*]]) {
; VF8-NEXT: [[ENTRY:.*]]:
; VF8-NEXT: br label %[[LOOP:.*]]
; VF8: [[LOOP]]:
; VF8-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP]] ]
; VF8-NEXT: [[SUM:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SUM_NEXT:%.*]], %[[LOOP]] ]
; VF8-NEXT: [[CLAMPED:%.*]] = urem i32 [[INDEX]], 4
; VF8-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[CLAMPED]]
; VF8-NEXT: [[LV:%.*]] = load i32, ptr [[GEP_A]], align 4
; VF8-NEXT: [[SUM_NEXT]] = add i32 [[SUM]], [[LV]]
; VF8-NEXT: [[INDEX_NEXT]] = add nuw nsw i32 [[INDEX]], 1
; VF8-NEXT: [[COND:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128
; VF8-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP]]
; VF8: [[EXIT]]:
; VF8-NEXT: [[R:%.*]] = phi i32 [ [[SUM_NEXT]], %[[LOOP]] ]
; VF8-NEXT: ret i32 [[R]]
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%sum = phi i32 [ 0, %entry ], [ %sum.next, %loop ]
%clamped = urem i32 %iv, 4
%gep.A = getelementptr inbounds i32, ptr %A, i32 %clamped
%lv = load i32, ptr %gep.A
%sum.next = add i32 %sum, %lv
%iv.next = add nuw nsw i32 %iv, 1
%cond = icmp eq i32 %iv.next, 128
br i1 %cond, label %exit, label %loop
exit:
%r = phi i32 [ %sum.next, %loop ]
ret i32 %r
}
define i32 @clamped_load_reduction_bound8(ptr %A) {
; VF2-LABEL: define i32 @clamped_load_reduction_bound8(
; VF2-SAME: ptr [[A:%.*]]) {
; VF2-NEXT: [[ENTRY:.*]]:
; VF2-NEXT: br label %[[LOOP:.*]]
; VF2: [[LOOP]]:
; VF2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP]] ]
; VF2-NEXT: [[SUM:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SUM_NEXT:%.*]], %[[LOOP]] ]
; VF2-NEXT: [[CLAMPED:%.*]] = urem i32 [[INDEX]], 8
; VF2-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[CLAMPED]]
; VF2-NEXT: [[LV:%.*]] = load i32, ptr [[GEP_A]], align 4
; VF2-NEXT: [[SUM_NEXT]] = add i32 [[SUM]], [[LV]]
; VF2-NEXT: [[INDEX_NEXT]] = add nuw nsw i32 [[INDEX]], 1
; VF2-NEXT: [[COND:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128
; VF2-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP]]
; VF2: [[EXIT]]:
; VF2-NEXT: [[R:%.*]] = phi i32 [ [[SUM_NEXT]], %[[LOOP]] ]
; VF2-NEXT: ret i32 [[R]]
;
; VF4-LABEL: define i32 @clamped_load_reduction_bound8(
; VF4-SAME: ptr [[A:%.*]]) {
; VF4-NEXT: [[ENTRY:.*]]:
; VF4-NEXT: br label %[[LOOP:.*]]
; VF4: [[LOOP]]:
; VF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP]] ]
; VF4-NEXT: [[SUM:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SUM_NEXT:%.*]], %[[LOOP]] ]
; VF4-NEXT: [[CLAMPED:%.*]] = urem i32 [[INDEX]], 8
; VF4-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[CLAMPED]]
; VF4-NEXT: [[LV:%.*]] = load i32, ptr [[GEP_A]], align 4
; VF4-NEXT: [[SUM_NEXT]] = add i32 [[SUM]], [[LV]]
; VF4-NEXT: [[INDEX_NEXT]] = add nuw nsw i32 [[INDEX]], 1
; VF4-NEXT: [[COND:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128
; VF4-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP]]
; VF4: [[EXIT]]:
; VF4-NEXT: [[R:%.*]] = phi i32 [ [[SUM_NEXT]], %[[LOOP]] ]
; VF4-NEXT: ret i32 [[R]]
;
; VF8-LABEL: define i32 @clamped_load_reduction_bound8(
; VF8-SAME: ptr [[A:%.*]]) {
; VF8-NEXT: [[ENTRY:.*]]:
; VF8-NEXT: br label %[[LOOP:.*]]
; VF8: [[LOOP]]:
; VF8-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP]] ]
; VF8-NEXT: [[SUM:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SUM_NEXT:%.*]], %[[LOOP]] ]
; VF8-NEXT: [[CLAMPED:%.*]] = urem i32 [[INDEX]], 8
; VF8-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[CLAMPED]]
; VF8-NEXT: [[LV:%.*]] = load i32, ptr [[GEP_A]], align 4
; VF8-NEXT: [[SUM_NEXT]] = add i32 [[SUM]], [[LV]]
; VF8-NEXT: [[INDEX_NEXT]] = add nuw nsw i32 [[INDEX]], 1
; VF8-NEXT: [[COND:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128
; VF8-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP]]
; VF8: [[EXIT]]:
; VF8-NEXT: [[R:%.*]] = phi i32 [ [[SUM_NEXT]], %[[LOOP]] ]
; VF8-NEXT: ret i32 [[R]]
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%sum = phi i32 [ 0, %entry ], [ %sum.next, %loop ]
%clamped = urem i32 %iv, 8
%gep.A = getelementptr inbounds i32, ptr %A, i32 %clamped
%lv = load i32, ptr %gep.A
%sum.next = add i32 %sum, %lv
%iv.next = add nuw nsw i32 %iv, 1
%cond = icmp eq i32 %iv.next, 128
br i1 %cond, label %exit, label %loop
exit:
%r = phi i32 [ %sum.next, %loop ]
ret i32 %r
}