| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 |
| ; RUN: opt -passes=loop-vectorize -force-vector-width=2 -S %s | FileCheck %s --check-prefix=VF2 |
| ; RUN: opt -passes=loop-vectorize -force-vector-width=4 -S %s | FileCheck %s --check-prefix=VF4 |
| ; RUN: opt -passes=loop-vectorize -force-vector-width=8 -S %s | FileCheck %s --check-prefix=VF8 |
| |
| define i32 @clamped_load_reduction_bound4(ptr %A) { |
| ; VF2-LABEL: define i32 @clamped_load_reduction_bound4( |
| ; VF2-SAME: ptr [[A:%.*]]) { |
| ; VF2-NEXT: [[ENTRY:.*]]: |
| ; VF2-NEXT: br label %[[LOOP:.*]] |
| ; VF2: [[LOOP]]: |
| ; VF2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP]] ] |
| ; VF2-NEXT: [[SUM:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SUM_NEXT:%.*]], %[[LOOP]] ] |
| ; VF2-NEXT: [[CLAMPED:%.*]] = urem i32 [[INDEX]], 4 |
| ; VF2-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[CLAMPED]] |
| ; VF2-NEXT: [[LV:%.*]] = load i32, ptr [[GEP_A]], align 4 |
| ; VF2-NEXT: [[SUM_NEXT]] = add i32 [[SUM]], [[LV]] |
| ; VF2-NEXT: [[INDEX_NEXT]] = add nuw nsw i32 [[INDEX]], 1 |
| ; VF2-NEXT: [[COND:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128 |
| ; VF2-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP]] |
| ; VF2: [[EXIT]]: |
| ; VF2-NEXT: [[R:%.*]] = phi i32 [ [[SUM_NEXT]], %[[LOOP]] ] |
| ; VF2-NEXT: ret i32 [[R]] |
| ; |
| ; VF4-LABEL: define i32 @clamped_load_reduction_bound4( |
| ; VF4-SAME: ptr [[A:%.*]]) { |
| ; VF4-NEXT: [[ENTRY:.*]]: |
| ; VF4-NEXT: br label %[[LOOP:.*]] |
| ; VF4: [[LOOP]]: |
| ; VF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP]] ] |
| ; VF4-NEXT: [[SUM:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SUM_NEXT:%.*]], %[[LOOP]] ] |
| ; VF4-NEXT: [[CLAMPED:%.*]] = urem i32 [[INDEX]], 4 |
| ; VF4-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[CLAMPED]] |
| ; VF4-NEXT: [[LV:%.*]] = load i32, ptr [[GEP_A]], align 4 |
| ; VF4-NEXT: [[SUM_NEXT]] = add i32 [[SUM]], [[LV]] |
| ; VF4-NEXT: [[INDEX_NEXT]] = add nuw nsw i32 [[INDEX]], 1 |
| ; VF4-NEXT: [[COND:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128 |
| ; VF4-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP]] |
| ; VF4: [[EXIT]]: |
| ; VF4-NEXT: [[R:%.*]] = phi i32 [ [[SUM_NEXT]], %[[LOOP]] ] |
| ; VF4-NEXT: ret i32 [[R]] |
| ; |
| ; VF8-LABEL: define i32 @clamped_load_reduction_bound4( |
| ; VF8-SAME: ptr [[A:%.*]]) { |
| ; VF8-NEXT: [[ENTRY:.*]]: |
| ; VF8-NEXT: br label %[[LOOP:.*]] |
| ; VF8: [[LOOP]]: |
| ; VF8-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP]] ] |
| ; VF8-NEXT: [[SUM:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SUM_NEXT:%.*]], %[[LOOP]] ] |
| ; VF8-NEXT: [[CLAMPED:%.*]] = urem i32 [[INDEX]], 4 |
| ; VF8-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[CLAMPED]] |
| ; VF8-NEXT: [[LV:%.*]] = load i32, ptr [[GEP_A]], align 4 |
| ; VF8-NEXT: [[SUM_NEXT]] = add i32 [[SUM]], [[LV]] |
| ; VF8-NEXT: [[INDEX_NEXT]] = add nuw nsw i32 [[INDEX]], 1 |
| ; VF8-NEXT: [[COND:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128 |
| ; VF8-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP]] |
| ; VF8: [[EXIT]]: |
| ; VF8-NEXT: [[R:%.*]] = phi i32 [ [[SUM_NEXT]], %[[LOOP]] ] |
| ; VF8-NEXT: ret i32 [[R]] |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] |
| %sum = phi i32 [ 0, %entry ], [ %sum.next, %loop ] |
| %clamped = urem i32 %iv, 4 |
| %gep.A = getelementptr inbounds i32, ptr %A, i32 %clamped |
| %lv = load i32, ptr %gep.A |
| %sum.next = add i32 %sum, %lv |
| %iv.next = add nuw nsw i32 %iv, 1 |
| %cond = icmp eq i32 %iv.next, 128 |
| br i1 %cond, label %exit, label %loop |
| |
| exit: |
| %r = phi i32 [ %sum.next, %loop ] |
| ret i32 %r |
| } |
| |
| define i32 @clamped_load_reduction_bound8(ptr %A) { |
| ; VF2-LABEL: define i32 @clamped_load_reduction_bound8( |
| ; VF2-SAME: ptr [[A:%.*]]) { |
| ; VF2-NEXT: [[ENTRY:.*]]: |
| ; VF2-NEXT: br label %[[LOOP:.*]] |
| ; VF2: [[LOOP]]: |
| ; VF2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP]] ] |
| ; VF2-NEXT: [[SUM:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SUM_NEXT:%.*]], %[[LOOP]] ] |
| ; VF2-NEXT: [[CLAMPED:%.*]] = urem i32 [[INDEX]], 8 |
| ; VF2-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[CLAMPED]] |
| ; VF2-NEXT: [[LV:%.*]] = load i32, ptr [[GEP_A]], align 4 |
| ; VF2-NEXT: [[SUM_NEXT]] = add i32 [[SUM]], [[LV]] |
| ; VF2-NEXT: [[INDEX_NEXT]] = add nuw nsw i32 [[INDEX]], 1 |
| ; VF2-NEXT: [[COND:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128 |
| ; VF2-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP]] |
| ; VF2: [[EXIT]]: |
| ; VF2-NEXT: [[R:%.*]] = phi i32 [ [[SUM_NEXT]], %[[LOOP]] ] |
| ; VF2-NEXT: ret i32 [[R]] |
| ; |
| ; VF4-LABEL: define i32 @clamped_load_reduction_bound8( |
| ; VF4-SAME: ptr [[A:%.*]]) { |
| ; VF4-NEXT: [[ENTRY:.*]]: |
| ; VF4-NEXT: br label %[[LOOP:.*]] |
| ; VF4: [[LOOP]]: |
| ; VF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP]] ] |
| ; VF4-NEXT: [[SUM:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SUM_NEXT:%.*]], %[[LOOP]] ] |
| ; VF4-NEXT: [[CLAMPED:%.*]] = urem i32 [[INDEX]], 8 |
| ; VF4-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[CLAMPED]] |
| ; VF4-NEXT: [[LV:%.*]] = load i32, ptr [[GEP_A]], align 4 |
| ; VF4-NEXT: [[SUM_NEXT]] = add i32 [[SUM]], [[LV]] |
| ; VF4-NEXT: [[INDEX_NEXT]] = add nuw nsw i32 [[INDEX]], 1 |
| ; VF4-NEXT: [[COND:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128 |
| ; VF4-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP]] |
| ; VF4: [[EXIT]]: |
| ; VF4-NEXT: [[R:%.*]] = phi i32 [ [[SUM_NEXT]], %[[LOOP]] ] |
| ; VF4-NEXT: ret i32 [[R]] |
| ; |
| ; VF8-LABEL: define i32 @clamped_load_reduction_bound8( |
| ; VF8-SAME: ptr [[A:%.*]]) { |
| ; VF8-NEXT: [[ENTRY:.*]]: |
| ; VF8-NEXT: br label %[[LOOP:.*]] |
| ; VF8: [[LOOP]]: |
| ; VF8-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP]] ] |
| ; VF8-NEXT: [[SUM:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SUM_NEXT:%.*]], %[[LOOP]] ] |
| ; VF8-NEXT: [[CLAMPED:%.*]] = urem i32 [[INDEX]], 8 |
| ; VF8-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[CLAMPED]] |
| ; VF8-NEXT: [[LV:%.*]] = load i32, ptr [[GEP_A]], align 4 |
| ; VF8-NEXT: [[SUM_NEXT]] = add i32 [[SUM]], [[LV]] |
| ; VF8-NEXT: [[INDEX_NEXT]] = add nuw nsw i32 [[INDEX]], 1 |
| ; VF8-NEXT: [[COND:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128 |
| ; VF8-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP]] |
| ; VF8: [[EXIT]]: |
| ; VF8-NEXT: [[R:%.*]] = phi i32 [ [[SUM_NEXT]], %[[LOOP]] ] |
| ; VF8-NEXT: ret i32 [[R]] |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] |
| %sum = phi i32 [ 0, %entry ], [ %sum.next, %loop ] |
| %clamped = urem i32 %iv, 8 |
| %gep.A = getelementptr inbounds i32, ptr %A, i32 %clamped |
| %lv = load i32, ptr %gep.A |
| %sum.next = add i32 %sum, %lv |
| %iv.next = add nuw nsw i32 %iv, 1 |
| %cond = icmp eq i32 %iv.next, 128 |
| br i1 %cond, label %exit, label %loop |
| |
| exit: |
| %r = phi i32 [ %sum.next, %loop ] |
| ret i32 %r |
| } |