| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: opt -p loop-unroll -unroll-add-parallel-reductions -unroll-allow-partial -unroll-max-count=4 -S %s | FileCheck %s |
| |
| define i32 @test_add(ptr %src, i64 %n, i32 %start) { |
| ; CHECK-LABEL: define i32 @test_add( |
| ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: br label %[[LOOP:.*]] |
| ; CHECK: [[LOOP]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RDX_3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_24:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 |
| ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]] |
| ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1 |
| ; CHECK-NEXT: [[RDX_NEXT]] = add i32 [[RDX]], [[L]] |
| ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 |
| ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]] |
| ; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1 |
| ; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_1]], [[L_1]] |
| ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 |
| ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]] |
| ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1 |
| ; CHECK-NEXT: [[RDX_NEXT_2]] = add i32 [[RDX_NEXT_1]], [[L_2]] |
| ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 |
| ; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]] |
| ; CHECK-NEXT: [[L_24:%.*]] = load i32, ptr [[GEP_SRC_24]], align 1 |
| ; CHECK-NEXT: [[RDX_NEXT_24]] = add i32 [[RDX_3]], [[L_24]] |
| ; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 |
| ; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: [[RDX_NEXT_LCSSA1:%.*]] = phi i32 [ [[RDX_NEXT_24]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_3]], [[RDX_NEXT]] |
| ; CHECK-NEXT: [[BIN_RDX1:%.*]] = add i32 [[RDX_NEXT_2]], [[BIN_RDX]] |
| ; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = add i32 [[RDX_NEXT_24]], [[BIN_RDX1]] |
| ; CHECK-NEXT: ret i32 [[RDX_NEXT_LCSSA]] |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ] |
| %iv.next = add i64 %iv, 1 |
| %gep.src = getelementptr i32, ptr %src, i64 %iv |
| %l = load i32 , ptr %gep.src, align 1 |
| %rdx.next = add i32 %rdx, %l |
| %ec = icmp ne i64 %iv.next, 1000 |
| br i1 %ec, label %loop, label %exit |
| |
| exit: |
| ret i32 %rdx.next |
| } |
| |
| define i32 @test_add_tc_not_multiple_of_4(ptr %src, i64 %n, i32 %start) { |
| ; CHECK-LABEL: define i32 @test_add_tc_not_multiple_of_4( |
| ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: br label %[[LOOP:.*]] |
| ; CHECK: [[LOOP]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP_1:.*]] ] |
| ; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP_1]] ] |
| ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 |
| ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]] |
| ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1 |
| ; CHECK-NEXT: [[RDX_NEXT:%.*]] = add i32 [[RDX]], [[L]] |
| ; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 1001 |
| ; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_1]], label %[[EXIT:.*]] |
| ; CHECK: [[LOOP_1]]: |
| ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 |
| ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]] |
| ; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1 |
| ; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = add i32 [[RDX_NEXT]], [[L_1]] |
| ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 |
| ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]] |
| ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1 |
| ; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = add i32 [[RDX_NEXT_1]], [[L_2]] |
| ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 |
| ; CHECK-NEXT: [[GEP_SRC_12:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]] |
| ; CHECK-NEXT: [[L_12:%.*]] = load i32, ptr [[GEP_SRC_12]], align 1 |
| ; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_NEXT_2]], [[L_12]] |
| ; CHECK-NEXT: br label %[[LOOP]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP]] ] |
| ; CHECK-NEXT: ret i32 [[RDX_NEXT_LCSSA]] |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ] |
| %iv.next = add i64 %iv, 1 |
| %gep.src = getelementptr i32, ptr %src, i64 %iv |
| %l = load i32 , ptr %gep.src, align 1 |
| %rdx.next = add i32 %rdx, %l |
| %ec = icmp ne i64 %iv.next, 1001 |
| br i1 %ec, label %loop, label %exit |
| |
| exit: |
| ret i32 %rdx.next |
| } |
| |
| define i32 @test_add_rdx_used_in_loop(ptr %src, i64 %n, i32 %start) { |
| ; CHECK-LABEL: define i32 @test_add_rdx_used_in_loop( |
| ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: br label %[[LOOP:.*]] |
| ; CHECK: [[LOOP]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_24:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 |
| ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]] |
| ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1 |
| ; CHECK-NEXT: [[RDX_NEXT:%.*]] = add i32 [[RDX]], [[L]] |
| ; CHECK-NEXT: store i32 [[RDX_NEXT]], ptr [[GEP_SRC]], align 4 |
| ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 |
| ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]] |
| ; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1 |
| ; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = add i32 [[RDX_NEXT]], [[L_1]] |
| ; CHECK-NEXT: store i32 [[RDX_NEXT_1]], ptr [[GEP_SRC_1]], align 4 |
| ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 |
| ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]] |
| ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1 |
| ; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = add i32 [[RDX_NEXT_1]], [[L_2]] |
| ; CHECK-NEXT: store i32 [[RDX_NEXT_2]], ptr [[GEP_SRC_2]], align 4 |
| ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 |
| ; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]] |
| ; CHECK-NEXT: [[L_24:%.*]] = load i32, ptr [[GEP_SRC_24]], align 1 |
| ; CHECK-NEXT: [[RDX_NEXT_24]] = add i32 [[RDX_NEXT_2]], [[L_24]] |
| ; CHECK-NEXT: store i32 [[RDX_NEXT_24]], ptr [[GEP_SRC_24]], align 4 |
| ; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 |
| ; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT_24]], %[[LOOP]] ] |
| ; CHECK-NEXT: ret i32 [[RDX_NEXT_LCSSA]] |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ] |
| %iv.next = add i64 %iv, 1 |
| %gep.src = getelementptr i32, ptr %src, i64 %iv |
| %l = load i32 , ptr %gep.src, align 1 |
| %rdx.next = add i32 %rdx, %l |
| store i32 %rdx.next, ptr %gep.src |
| %ec = icmp ne i64 %iv.next, 1000 |
| br i1 %ec, label %loop, label %exit |
| |
| exit: |
| ret i32 %rdx.next |
| } |
| |
| define i32 @test_add_phi_used_outside_loop(ptr %src, i64 %n, i32 %start) { |
| ; CHECK-LABEL: define i32 @test_add_phi_used_outside_loop( |
| ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: br label %[[LOOP:.*]] |
| ; CHECK: [[LOOP]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 |
| ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]] |
| ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1 |
| ; CHECK-NEXT: [[RDX_NEXT:%.*]] = add i32 [[RDX]], [[L]] |
| ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 |
| ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]] |
| ; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1 |
| ; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = add i32 [[RDX_NEXT]], [[L_1]] |
| ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 |
| ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]] |
| ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1 |
| ; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = add i32 [[RDX_NEXT_1]], [[L_2]] |
| ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 |
| ; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]] |
| ; CHECK-NEXT: [[L_24:%.*]] = load i32, ptr [[GEP_SRC_24]], align 1 |
| ; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_NEXT_2]], [[L_24]] |
| ; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 |
| ; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT_2]], %[[LOOP]] ] |
| ; CHECK-NEXT: ret i32 [[RDX_LCSSA]] |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ] |
| %iv.next = add i64 %iv, 1 |
| %gep.src = getelementptr i32, ptr %src, i64 %iv |
| %l = load i32 , ptr %gep.src, align 1 |
| %rdx.next = add i32 %rdx, %l |
| %ec = icmp ne i64 %iv.next, 1000 |
| br i1 %ec, label %loop, label %exit |
| |
| exit: |
| ret i32 %rdx |
| } |
| |
| define i32 @test_add_and_mul_reduction(ptr %src, i64 %n, i32 %start) { |
| ; CHECK-LABEL: define i32 @test_add_and_mul_reduction( |
| ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: br label %[[LOOP:.*]] |
| ; CHECK: [[LOOP]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RDX_1_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_1_NEXT_1:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RDX_1_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_1_NEXT_2:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RDX_1_3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_1_NEXT_24:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RDX_1:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_1_NEXT:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RDX_2:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_2_NEXT_3:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 |
| ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]] |
| ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1 |
| ; CHECK-NEXT: [[RDX_1_NEXT]] = add i32 [[RDX_1]], [[L]] |
| ; CHECK-NEXT: [[RDX_2_NEXT:%.*]] = mul i32 [[RDX_2]], [[L]] |
| ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 |
| ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]] |
| ; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1 |
| ; CHECK-NEXT: [[RDX_1_NEXT_1]] = add i32 [[RDX_1_1]], [[L_1]] |
| ; CHECK-NEXT: [[RDX_2_2:%.*]] = mul i32 [[RDX_2_NEXT]], [[L_1]] |
| ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 |
| ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]] |
| ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1 |
| ; CHECK-NEXT: [[RDX_1_NEXT_2]] = add i32 [[RDX_1_2]], [[L_2]] |
| ; CHECK-NEXT: [[RDX_2_NEXT_2:%.*]] = mul i32 [[RDX_2_2]], [[L_2]] |
| ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 |
| ; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]] |
| ; CHECK-NEXT: [[L_24:%.*]] = load i32, ptr [[GEP_SRC_24]], align 1 |
| ; CHECK-NEXT: [[RDX_1_NEXT_24]] = add i32 [[RDX_1_3]], [[L_24]] |
| ; CHECK-NEXT: [[RDX_2_NEXT_3]] = mul i32 [[RDX_2_NEXT_2]], [[L_24]] |
| ; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 |
| ; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: [[RDX_1_NEXT_LCSSA1:%.*]] = phi i32 [ [[RDX_1_NEXT_24]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[BIN_RDX5:%.*]] = phi i32 [ [[RDX_2_NEXT_3]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_1_NEXT_1]], [[RDX_1_NEXT]] |
| ; CHECK-NEXT: [[BIN_RDX1:%.*]] = add i32 [[RDX_1_NEXT_2]], [[BIN_RDX]] |
| ; CHECK-NEXT: [[RDX_1_NEXT_LCSSA:%.*]] = add i32 [[RDX_1_NEXT_24]], [[BIN_RDX1]] |
| ; CHECK-NEXT: [[RES:%.*]] = add i32 [[RDX_1_NEXT_LCSSA]], [[BIN_RDX5]] |
| ; CHECK-NEXT: ret i32 [[RES]] |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %rdx.1 = phi i32 [ %start, %entry ], [ %rdx.1.next, %loop ] |
| %rdx.2 = phi i32 [ %start, %entry ], [ %rdx.2.next, %loop ] |
| %iv.next = add i64 %iv, 1 |
| %gep.src = getelementptr i32, ptr %src, i64 %iv |
| %l = load i32 , ptr %gep.src, align 1 |
| %rdx.1.next = add i32 %rdx.1, %l |
| %rdx.2.next = mul i32 %rdx.2, %l |
| %ec = icmp ne i64 %iv.next, 1000 |
| br i1 %ec, label %loop, label %exit |
| |
| exit: |
| %res = add i32 %rdx.1.next, %rdx.2.next |
| ret i32 %res |
| } |
| |
| define float @test_fadd_no_fmfs(ptr %src, i64 %n, float %start) { |
| ; CHECK-LABEL: define float @test_fadd_no_fmfs( |
| ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], float [[START:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: br label %[[LOOP:.*]] |
| ; CHECK: [[LOOP]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RDX:%.*]] = phi float [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 |
| ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV]] |
| ; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 1 |
| ; CHECK-NEXT: [[RDX_NEXT:%.*]] = fadd float [[RDX]], [[L]] |
| ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 |
| ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT]] |
| ; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 1 |
| ; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = fadd float [[RDX_NEXT]], [[L_1]] |
| ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 |
| ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT_1]] |
| ; CHECK-NEXT: [[L_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 1 |
| ; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = fadd float [[RDX_NEXT_1]], [[L_2]] |
| ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 |
| ; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT_2]] |
| ; CHECK-NEXT: [[L_24:%.*]] = load float, ptr [[GEP_SRC_24]], align 1 |
| ; CHECK-NEXT: [[RDX_NEXT_3]] = fadd float [[RDX_NEXT_2]], [[L_24]] |
| ; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 |
| ; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi float [ [[RDX_NEXT_3]], %[[LOOP]] ] |
| ; CHECK-NEXT: ret float [[RDX_NEXT_LCSSA]] |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %rdx = phi float [ %start, %entry ], [ %rdx.next, %loop ] |
| %iv.next = add i64 %iv, 1 |
| %gep.src = getelementptr float, ptr %src, i64 %iv |
| %l = load float, ptr %gep.src, align 1 |
| %rdx.next = fadd float %rdx, %l |
| %ec = icmp ne i64 %iv.next, 1000 |
| br i1 %ec, label %loop, label %exit |
| |
| exit: |
| ret float %rdx.next |
| } |
| |
| define float @test_fadd_with_ressaoc(ptr %src, i64 %n, float %start) { |
| ; CHECK-LABEL: define float @test_fadd_with_ressaoc( |
| ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], float [[START:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: br label %[[LOOP:.*]] |
| ; CHECK: [[LOOP]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RDX:%.*]] = phi float [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 |
| ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV]] |
| ; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 1 |
| ; CHECK-NEXT: [[RDX_NEXT:%.*]] = fadd float [[RDX]], [[L]] |
| ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 |
| ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT]] |
| ; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 1 |
| ; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = fadd float [[RDX_NEXT]], [[L_1]] |
| ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 |
| ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT_1]] |
| ; CHECK-NEXT: [[L_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 1 |
| ; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = fadd float [[RDX_NEXT_1]], [[L_2]] |
| ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 |
| ; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT_2]] |
| ; CHECK-NEXT: [[L_24:%.*]] = load float, ptr [[GEP_SRC_24]], align 1 |
| ; CHECK-NEXT: [[RDX_NEXT_3]] = fadd float [[RDX_NEXT_2]], [[L_24]] |
| ; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 |
| ; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi float [ [[RDX_NEXT_3]], %[[LOOP]] ] |
| ; CHECK-NEXT: ret float [[RDX_NEXT_LCSSA]] |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %rdx = phi float [ %start, %entry ], [ %rdx.next, %loop ] |
| %iv.next = add i64 %iv, 1 |
| %gep.src = getelementptr float, ptr %src, i64 %iv |
| %l = load float, ptr %gep.src, align 1 |
| %rdx.next = fadd float %rdx, %l |
| %ec = icmp ne i64 %iv.next, 1000 |
| br i1 %ec, label %loop, label %exit |
| |
| exit: |
| ret float %rdx.next |
| } |
| define i32 @test_smin(ptr %src, i64 %n) { |
| ; CHECK-LABEL: define i32 @test_smin( |
| ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: br label %[[LOOP:.*]] |
| ; CHECK: [[LOOP]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[MIN:%.*]] = phi i32 [ 1000, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 |
| ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]] |
| ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1 |
| ; CHECK-NEXT: [[RDX_NEXT:%.*]] = call i32 @llvm.smin.i32(i32 [[MIN]], i32 [[L]]) |
| ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 |
| ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]] |
| ; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1 |
| ; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = call i32 @llvm.smin.i32(i32 [[RDX_NEXT]], i32 [[L_1]]) |
| ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 |
| ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]] |
| ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1 |
| ; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = call i32 @llvm.smin.i32(i32 [[RDX_NEXT_1]], i32 [[L_2]]) |
| ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 |
| ; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]] |
| ; CHECK-NEXT: [[L_24:%.*]] = load i32, ptr [[GEP_SRC_24]], align 1 |
| ; CHECK-NEXT: [[RDX_NEXT_3]] = call i32 @llvm.smin.i32(i32 [[RDX_NEXT_2]], i32 [[L_24]]) |
| ; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 |
| ; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ] |
| ; CHECK-NEXT: ret i32 [[RDX_NEXT_LCSSA]] |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %min = phi i32 [ 1000, %entry ], [ %rdx.next, %loop ] |
| %iv.next = add i64 %iv, 1 |
| %gep.src = getelementptr i32, ptr %src, i64 %iv |
| %l = load i32 , ptr %gep.src, align 1 |
| %rdx.next = call i32 @llvm.smin(i32 %min, i32 %l) |
| %ec = icmp ne i64 %iv.next, 1000 |
| br i1 %ec, label %loop, label %exit |
| |
| exit: |
| ret i32 %rdx.next |
| } |
| |
| define i64 @test_any_of_reduction(ptr %src, i64 %n) { |
| ; CHECK-LABEL: define i64 @test_any_of_reduction( |
| ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: br label %[[LOOP:.*]] |
| ; CHECK: [[LOOP]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[ANY_OF_RDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 |
| ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV]] |
| ; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC]], align 1 |
| ; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0 |
| ; CHECK-NEXT: [[RDX_NEXT:%.*]] = select i1 [[C]], i64 [[ANY_OF_RDX]], i64 0 |
| ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 |
| ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV_NEXT]] |
| ; CHECK-NEXT: [[L_1:%.*]] = load i8, ptr [[GEP_SRC_1]], align 1 |
| ; CHECK-NEXT: [[C_1:%.*]] = icmp eq i8 [[L_1]], 0 |
| ; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = select i1 [[C_1]], i64 [[RDX_NEXT]], i64 0 |
| ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 |
| ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV_NEXT_1]] |
| ; CHECK-NEXT: [[L_2:%.*]] = load i8, ptr [[GEP_SRC_2]], align 1 |
| ; CHECK-NEXT: [[C_2:%.*]] = icmp eq i8 [[L_2]], 0 |
| ; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = select i1 [[C_2]], i64 [[RDX_NEXT_1]], i64 0 |
| ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 |
| ; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV_NEXT_2]] |
| ; CHECK-NEXT: [[L_24:%.*]] = load i8, ptr [[GEP_SRC_24]], align 1 |
| ; CHECK-NEXT: [[C_24:%.*]] = icmp eq i8 [[L_24]], 0 |
| ; CHECK-NEXT: [[RDX_NEXT_3]] = select i1 [[C_24]], i64 [[RDX_NEXT_2]], i64 0 |
| ; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 |
| ; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i64 [ [[RDX_NEXT_3]], %[[LOOP]] ] |
| ; CHECK-NEXT: ret i64 [[RDX_NEXT_LCSSA]] |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] |
| %any.of.rdx = phi i64 [ %rdx.next, %loop ], [ 0, %entry ] |
| %iv.next = add i64 %iv, 1 |
| %gep.src = getelementptr i8, ptr %src, i64 %iv |
| %l = load i8, ptr %gep.src, align 1 |
| %c = icmp eq i8 %l, 0 |
| %rdx.next = select i1 %c, i64 %any.of.rdx, i64 0 |
| %ec = icmp ne i64 %iv.next, 1000 |
| br i1 %ec, label %loop, label %exit |
| |
| exit: |
| ret i64 %rdx.next |
| } |
| |
| define void @reduction_with_intermediate_store(ptr %src, ptr %sum) { |
| ; CHECK-LABEL: define void @reduction_with_intermediate_store( |
| ; CHECK-SAME: ptr [[SRC:%.*]], ptr [[SUM:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: [[SUM_PROMOTED:%.*]] = load i32, ptr [[SUM]], align 4 |
| ; CHECK-NEXT: br label %[[LOOP:.*]] |
| ; CHECK: [[LOOP]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[SUM_PROMOTED]], %[[ENTRY]] ], [ [[RED_NEXT_3:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[IV]] |
| ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 |
| ; CHECK-NEXT: [[RED_NEXT:%.*]] = add nsw i32 [[RED]], [[L]] |
| ; CHECK-NEXT: store i32 [[RED_NEXT]], ptr [[SUM]], align 4 |
| ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 |
| ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[IV_NEXT]] |
| ; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4 |
| ; CHECK-NEXT: [[RED_NEXT_1:%.*]] = add nsw i32 [[RED_NEXT]], [[L_1]] |
| ; CHECK-NEXT: store i32 [[RED_NEXT_1]], ptr [[SUM]], align 4 |
| ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 |
| ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[IV_NEXT_1]] |
| ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4 |
| ; CHECK-NEXT: [[RED_NEXT_2:%.*]] = add nsw i32 [[RED_NEXT_1]], [[L_2]] |
| ; CHECK-NEXT: store i32 [[RED_NEXT_2]], ptr [[SUM]], align 4 |
| ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 |
| ; CHECK-NEXT: [[GEP_SRC_3:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[IV_NEXT_2]] |
| ; CHECK-NEXT: [[L_3:%.*]] = load i32, ptr [[GEP_SRC_3]], align 4 |
| ; CHECK-NEXT: [[RED_NEXT_3]] = add nsw i32 [[RED_NEXT_2]], [[L_3]] |
| ; CHECK-NEXT: store i32 [[RED_NEXT_3]], ptr [[SUM]], align 4 |
| ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 |
| ; CHECK-NEXT: [[EC_3:%.*]] = icmp eq i64 [[IV_NEXT_3]], 10000 |
| ; CHECK-NEXT: br i1 [[EC_3]], label %[[EXIT:.*]], label %[[LOOP]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %sum.promoted = load i32, ptr %sum, align 4 |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %red = phi i32 [ %sum.promoted, %entry ], [ %red.next, %loop ] |
| %gep.src = getelementptr inbounds nuw i32, ptr %src, i64 %iv |
| %l = load i32, ptr %gep.src, align 4 |
| %red.next = add nsw i32 %red, %l |
| store i32 %red.next, ptr %sum, align 4 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 10000 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| declare i32 @foo() |
| |
| ; Loop with a call cannot be handled by LoopVectorize, introducing additional |
| ; accumulators when unrolling increases throughput. |
| define i32 @test_add_with_call(i64 %n, i32 %start) { |
| ; CHECK-LABEL: define i32 @test_add_with_call( |
| ; CHECK-SAME: i64 [[N:%.*]], i32 [[START:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: br label %[[LOOP:.*]] |
| ; CHECK: [[LOOP]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RDX_3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[L:%.*]] = call i32 @foo() |
| ; CHECK-NEXT: [[RDX_NEXT]] = add i32 [[RDX]], [[L]] |
| ; CHECK-NEXT: [[L_1:%.*]] = call i32 @foo() |
| ; CHECK-NEXT: [[RDX_NEXT_1]] = add i32 [[RDX_1]], [[L_1]] |
| ; CHECK-NEXT: [[L_2:%.*]] = call i32 @foo() |
| ; CHECK-NEXT: [[RDX_NEXT_2]] = add i32 [[RDX_2]], [[L_2]] |
| ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 |
| ; CHECK-NEXT: [[L_3:%.*]] = call i32 @foo() |
| ; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_3]], [[L_3]] |
| ; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 |
| ; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_1]], [[RDX_NEXT]] |
| ; CHECK-NEXT: [[BIN_RDX1:%.*]] = add i32 [[RDX_NEXT_2]], [[BIN_RDX]] |
| ; CHECK-NEXT: [[BIN_RDX2:%.*]] = add i32 [[RDX_NEXT_3]], [[BIN_RDX1]] |
| ; CHECK-NEXT: ret i32 [[BIN_RDX2]] |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ] |
| %iv.next = add i64 %iv, 1 |
| %l = call i32 @foo() |
| %rdx.next = add i32 %rdx, %l |
| %ec = icmp ne i64 %iv.next, 1000 |
| br i1 %ec, label %loop, label %exit |
| |
| exit: |
| ret i32 %rdx.next |
| } |
| |
| ; Loop with backward dependence cannot be handled LoopVectorize, introducing additional |
| ; accumulators when unrolling increases throughput. |
| define i32 @test_add_with_backward_dep(ptr %p, i64 %n, i32 %start) { |
| ; CHECK-LABEL: define i32 @test_add_with_backward_dep( |
| ; CHECK-SAME: ptr [[P:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: br label %[[LOOP:.*]] |
| ; CHECK: [[LOOP]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RDX_3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 |
| ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV]] |
| ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4 |
| ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT]] |
| ; CHECK-NEXT: store i32 0, ptr [[GEP_1]], align 4 |
| ; CHECK-NEXT: [[RDX_NEXT]] = add i32 [[RDX]], [[L]] |
| ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 |
| ; CHECK-NEXT: [[GEP_11:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT]] |
| ; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_11]], align 4 |
| ; CHECK-NEXT: [[GEP_1_1:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_1]] |
| ; CHECK-NEXT: store i32 0, ptr [[GEP_1_1]], align 4 |
| ; CHECK-NEXT: [[RDX_NEXT_1]] = add i32 [[RDX_1]], [[L_1]] |
| ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 |
| ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_1]] |
| ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_2]], align 4 |
| ; CHECK-NEXT: [[GEP_1_2:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_2]] |
| ; CHECK-NEXT: store i32 0, ptr [[GEP_1_2]], align 4 |
| ; CHECK-NEXT: [[RDX_NEXT_2]] = add i32 [[RDX_2]], [[L_2]] |
| ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 |
| ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_2]] |
| ; CHECK-NEXT: [[L_3:%.*]] = load i32, ptr [[GEP_3]], align 4 |
| ; CHECK-NEXT: [[GEP_1_3:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_3]] |
| ; CHECK-NEXT: store i32 0, ptr [[GEP_1_3]], align 4 |
| ; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_3]], [[L_3]] |
| ; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 |
| ; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_1]], [[RDX_NEXT]] |
| ; CHECK-NEXT: [[BIN_RDX2:%.*]] = add i32 [[RDX_NEXT_2]], [[BIN_RDX]] |
| ; CHECK-NEXT: [[BIN_RDX3:%.*]] = add i32 [[RDX_NEXT_3]], [[BIN_RDX2]] |
| ; CHECK-NEXT: ret i32 [[BIN_RDX3]] |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ] |
| %iv.next = add i64 %iv, 1 |
| %gep = getelementptr inbounds nuw i32, ptr %p, i64 %iv |
| %l = load i32, ptr %gep |
| %gep.1 = getelementptr inbounds nuw i32, ptr %p, i64 %iv.next |
| store i32 0, ptr %gep.1 |
| %rdx.next = add i32 %rdx, %l |
| %ec = icmp ne i64 %iv.next, 1000 |
| br i1 %ec, label %loop, label %exit |
| |
| exit: |
| ret i32 %rdx.next |
| } |