| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 |
| ; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s |
| |
| ; One dimensional loop with load that can be hoisted outside of loop |
| ; for (int i = 0; i < N; ++i) |
| ; if (!memcmp(a[i], b, 4)) |
| ; sum += 1; |
| ; |
| define i64 @one_dimensional(ptr %a, ptr %b, i64 %N) { |
| ; CHECK-LABEL: one_dimensional: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: ldr w9, [x1] |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: .LBB0_1: // %for.body |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr x10, [x0], #8 |
| ; CHECK-NEXT: ldr w10, [x10] |
| ; CHECK-NEXT: cmp w10, w9 |
| ; CHECK-NEXT: cinc x8, x8, eq |
| ; CHECK-NEXT: subs x2, x2, #1 |
| ; CHECK-NEXT: b.ne .LBB0_1 |
| ; CHECK-NEXT: // %bb.2: // %for.exit |
| ; CHECK-NEXT: mov x0, x8 |
| ; CHECK-NEXT: ret |
| entry: |
| br label %for.body |
| |
| for.body: ; preds = %entry, %for.body |
| %i.06 = phi i64 [ %inc, %for.body ], [ 0, %entry ] |
| %sum.05 = phi i64 [ %spec.select, %for.body ], [ 0, %entry ] |
| %arrayidx = getelementptr inbounds ptr, ptr %a, i64 %i.06 |
| %0 = load ptr, ptr %arrayidx, align 8 |
| %bcmp = tail call i32 @bcmp(ptr %0, ptr %b, i64 4) |
| %tobool = icmp eq i32 %bcmp, 0 |
| %add = zext i1 %tobool to i64 |
| %spec.select = add i64 %sum.05, %add |
| %inc = add nuw i64 %i.06, 1 |
| %exitcond = icmp eq i64 %inc, %N |
| br i1 %exitcond, label %for.exit, label %for.body |
| |
| for.exit: ; preds = %for.body |
| ret i64 %spec.select |
| } |
| |
| ; Same but loop is two dimensional. Load is hosted outside of both loops |
| ; for (int i = 0; i < N; ++i) |
| ; for (int j = 0; j < M; ++j) |
| ; if (!memcmp(a[i][j], b, 4)) |
| ; sum += 1; |
| ; |
| define i64 @two_dimensional(ptr %a, ptr %b, i64 %N, i64 %M) { |
| ; CHECK-LABEL: two_dimensional: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: ldr w10, [x1] |
| ; CHECK-NEXT: mov x9, xzr |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: .LBB1_1: // %for.cond1.preheader |
| ; CHECK-NEXT: // =>This Loop Header: Depth=1 |
| ; CHECK-NEXT: // Child Loop BB1_2 Depth 2 |
| ; CHECK-NEXT: ldr x11, [x0, x9, lsl #3] |
| ; CHECK-NEXT: mov x12, x3 |
| ; CHECK-NEXT: .LBB1_2: // %for.body4 |
| ; CHECK-NEXT: // Parent Loop BB1_1 Depth=1 |
| ; CHECK-NEXT: // => This Inner Loop Header: Depth=2 |
| ; CHECK-NEXT: ldr x13, [x11], #8 |
| ; CHECK-NEXT: ldr w13, [x13] |
| ; CHECK-NEXT: cmp w13, w10 |
| ; CHECK-NEXT: cinc x8, x8, eq |
| ; CHECK-NEXT: subs x12, x12, #1 |
| ; CHECK-NEXT: b.ne .LBB1_2 |
| ; CHECK-NEXT: // %bb.3: // %for.cond1.for.exit3_crit_edge |
| ; CHECK-NEXT: // in Loop: Header=BB1_1 Depth=1 |
| ; CHECK-NEXT: add x9, x9, #1 |
| ; CHECK-NEXT: cmp x9, x2 |
| ; CHECK-NEXT: b.ne .LBB1_1 |
| ; CHECK-NEXT: // %bb.4: // %for.exit |
| ; CHECK-NEXT: mov x0, x8 |
| ; CHECK-NEXT: ret |
| entry: |
| br label %for.cond1.preheader |
| |
| for.cond1.preheader: ; preds = %entry, %for.cond1.for.exit3_crit_edge |
| %i.019 = phi i64 [ %inc7, %for.cond1.for.exit3_crit_edge ], [ 0, %entry ] |
| %sum.018 = phi i64 [ %spec.select, %for.cond1.for.exit3_crit_edge ], [ 0, %entry ] |
| %arrayidx = getelementptr inbounds ptr, ptr %a, i64 %i.019 |
| %0 = load ptr, ptr %arrayidx, align 8 |
| br label %for.body4 |
| |
| for.body4: ; preds = %for.cond1.preheader, %for.body4 |
| %j.016 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body4 ] |
| %sum.115 = phi i64 [ %sum.018, %for.cond1.preheader ], [ %spec.select, %for.body4 ] |
| %arrayidx5 = getelementptr inbounds ptr, ptr %0, i64 %j.016 |
| %1 = load ptr, ptr %arrayidx5, align 8 |
| %bcmp = tail call i32 @bcmp(ptr %1, ptr %b, i64 4) |
| %tobool = icmp eq i32 %bcmp, 0 |
| %add = zext i1 %tobool to i64 |
| %spec.select = add i64 %sum.115, %add |
| %inc = add nuw i64 %j.016, 1 |
| %exitcond = icmp eq i64 %inc, %M |
| br i1 %exitcond, label %for.cond1.for.exit3_crit_edge, label %for.body4 |
| |
| for.cond1.for.exit3_crit_edge: ; preds = %for.body4 |
| %inc7 = add nuw i64 %i.019, 1 |
| %exitcond22 = icmp eq i64 %inc7, %N |
| br i1 %exitcond22, label %for.exit, label %for.cond1.preheader |
| |
| for.exit: ; preds = %for.cond1.for.exit3_crit_edge |
| ret i64 %spec.select |
| } |
| |
| ; Same but loop is three dimensional. Load is hosted outside of all three loops |
| ; for (int i = 0; i < N; ++i) |
| ; for (int j = 0; j < M; ++j) |
| ; for (int k = 0; k < K; ++k) |
| ; if (!memcmp(a[i][j][k], b, 4)) |
| ; sum += 1; |
| ; |
| define i64 @three_dimensional(ptr %a, ptr %b, i64 %N, i64 %M, i64 %K) { |
| ; CHECK-LABEL: three_dimensional: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: ldr w10, [x1] |
| ; CHECK-NEXT: mov x9, xzr |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: .LBB2_1: // %for.cond1.preheader |
| ; CHECK-NEXT: // =>This Loop Header: Depth=1 |
| ; CHECK-NEXT: // Child Loop BB2_2 Depth 2 |
| ; CHECK-NEXT: // Child Loop BB2_3 Depth 3 |
| ; CHECK-NEXT: ldr x11, [x0, x9, lsl #3] |
| ; CHECK-NEXT: mov x12, xzr |
| ; CHECK-NEXT: .LBB2_2: // %for.cond5.preheader |
| ; CHECK-NEXT: // Parent Loop BB2_1 Depth=1 |
| ; CHECK-NEXT: // => This Loop Header: Depth=2 |
| ; CHECK-NEXT: // Child Loop BB2_3 Depth 3 |
| ; CHECK-NEXT: ldr x13, [x11, x12, lsl #3] |
| ; CHECK-NEXT: mov x14, x4 |
| ; CHECK-NEXT: .LBB2_3: // %for.body8 |
| ; CHECK-NEXT: // Parent Loop BB2_1 Depth=1 |
| ; CHECK-NEXT: // Parent Loop BB2_2 Depth=2 |
| ; CHECK-NEXT: // => This Inner Loop Header: Depth=3 |
| ; CHECK-NEXT: ldr x15, [x13], #8 |
| ; CHECK-NEXT: ldr w15, [x15] |
| ; CHECK-NEXT: cmp w15, w10 |
| ; CHECK-NEXT: cinc x8, x8, eq |
| ; CHECK-NEXT: subs x14, x14, #1 |
| ; CHECK-NEXT: b.ne .LBB2_3 |
| ; CHECK-NEXT: // %bb.4: // %for.cond5.for.cond |
| ; CHECK-NEXT: // in Loop: Header=BB2_2 Depth=2 |
| ; CHECK-NEXT: add x12, x12, #1 |
| ; CHECK-NEXT: cmp x12, x3 |
| ; CHECK-NEXT: b.ne .LBB2_2 |
| ; CHECK-NEXT: // %bb.5: // %for.cond1.for.cond |
| ; CHECK-NEXT: // in Loop: Header=BB2_1 Depth=1 |
| ; CHECK-NEXT: add x9, x9, #1 |
| ; CHECK-NEXT: cmp x9, x2 |
| ; CHECK-NEXT: b.ne .LBB2_1 |
| ; CHECK-NEXT: // %bb.6: // %for.exit |
| ; CHECK-NEXT: mov x0, x8 |
| ; CHECK-NEXT: ret |
| entry: |
| br label %for.cond1.preheader |
| |
| for.cond1.preheader: ; preds = %entry, %for.cond1.for.cond |
| %i.033 = phi i64 [ %inc15, %for.cond1.for.cond ], [ 0, %entry ] |
| %sum.032 = phi i64 [ %spec.select, %for.cond1.for.cond ], [ 0, %entry ] |
| %arrayidx = getelementptr inbounds ptr, ptr %a, i64 %i.033 |
| %0 = load ptr, ptr %arrayidx, align 8 |
| br label %for.cond5.preheader |
| |
| for.cond5.preheader: ; preds = %for.cond5.for.cond, %for.cond1.preheader |
| %j.029 = phi i64 [ 0, %for.cond1.preheader ], [ %inc12, %for.cond5.for.cond ] |
| %sum.128 = phi i64 [ %sum.032, %for.cond1.preheader ], [ %spec.select, %for.cond5.for.cond ] |
| %arrayidx9 = getelementptr inbounds ptr, ptr %0, i64 %j.029 |
| %1 = load ptr, ptr %arrayidx9, align 8 |
| br label %for.body8 |
| |
| for.body8: ; preds = %for.body8, %for.cond5.preheader |
| %k.026 = phi i64 [ 0, %for.cond5.preheader ], [ %inc, %for.body8 ] |
| %sum.225 = phi i64 [ %sum.128, %for.cond5.preheader ], [ %spec.select, %for.body8 ] |
| %arrayidx10 = getelementptr inbounds ptr, ptr %1, i64 %k.026 |
| %2 = load ptr, ptr %arrayidx10, align 8 |
| %bcmp = tail call i32 @bcmp(ptr %2, ptr %b, i64 4) |
| %tobool = icmp eq i32 %bcmp, 0 |
| %add = zext i1 %tobool to i64 |
| %spec.select = add i64 %sum.225, %add |
| %inc = add nuw i64 %k.026, 1 |
| %exitcond = icmp eq i64 %inc, %K |
| br i1 %exitcond, label %for.cond5.for.cond, label %for.body8 |
| |
| for.cond5.for.cond: ; preds = %for.body8 |
| %inc12 = add nuw i64 %j.029, 1 |
| %exitcond44 = icmp eq i64 %inc12, %M |
| br i1 %exitcond44, label %for.cond1.for.cond, label %for.cond5.preheader |
| |
| for.cond1.for.cond: ; preds = %for.cond5.for.cond |
| %inc15 = add nuw i64 %i.033, 1 |
| %exitcond45 = icmp eq i64 %inc15, %N |
| br i1 %exitcond45, label %for.exit, label %for.cond1.preheader |
| |
| for.exit: ; preds = %for.cond1.for.cond |
| ret i64 %spec.select |
| } |
| |
| ; Three dimensional loop but `b` is invariant only relatively to the inner loop. |
| ; Make sure that load is hoisted only outside of first loop |
| ; for (int i = 0; i < N; ++i) |
| ; for (int j = 0; j < M; ++j) |
| ; for (int k = 0; k < K; ++k) |
| ; if (!memcmp(a[i][j][k], b[j], 4)) |
| ; sum += 1; |
| ; |
| define i64 @three_dimensional_middle(ptr %a, ptr %b, i64 %N, i64 %M, i64 %K) { |
| ; CHECK-LABEL: three_dimensional_middle: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov x9, xzr |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: .LBB3_1: // %for.cond1.preheader |
| ; CHECK-NEXT: // =>This Loop Header: Depth=1 |
| ; CHECK-NEXT: // Child Loop BB3_2 Depth 2 |
| ; CHECK-NEXT: // Child Loop BB3_3 Depth 3 |
| ; CHECK-NEXT: ldr x10, [x0, x9, lsl #3] |
| ; CHECK-NEXT: mov x11, xzr |
| ; CHECK-NEXT: .LBB3_2: // %for.cond5.preheader |
| ; CHECK-NEXT: // Parent Loop BB3_1 Depth=1 |
| ; CHECK-NEXT: // => This Loop Header: Depth=2 |
| ; CHECK-NEXT: // Child Loop BB3_3 Depth 3 |
| ; CHECK-NEXT: ldr x13, [x1, x11, lsl #3] |
| ; CHECK-NEXT: ldr x12, [x10, x11, lsl #3] |
| ; CHECK-NEXT: mov x14, x4 |
| ; CHECK-NEXT: ldr w13, [x13] |
| ; CHECK-NEXT: .LBB3_3: // %for.body8 |
| ; CHECK-NEXT: // Parent Loop BB3_1 Depth=1 |
| ; CHECK-NEXT: // Parent Loop BB3_2 Depth=2 |
| ; CHECK-NEXT: // => This Inner Loop Header: Depth=3 |
| ; CHECK-NEXT: ldr x15, [x12], #8 |
| ; CHECK-NEXT: ldr w15, [x15] |
| ; CHECK-NEXT: cmp w15, w13 |
| ; CHECK-NEXT: cinc x8, x8, eq |
| ; CHECK-NEXT: subs x14, x14, #1 |
| ; CHECK-NEXT: b.ne .LBB3_3 |
| ; CHECK-NEXT: // %bb.4: // %for.cond5.for.cond |
| ; CHECK-NEXT: // in Loop: Header=BB3_2 Depth=2 |
| ; CHECK-NEXT: add x11, x11, #1 |
| ; CHECK-NEXT: cmp x11, x3 |
| ; CHECK-NEXT: b.ne .LBB3_2 |
| ; CHECK-NEXT: // %bb.5: // %for.cond1.for.cond |
| ; CHECK-NEXT: // in Loop: Header=BB3_1 Depth=1 |
| ; CHECK-NEXT: add x9, x9, #1 |
| ; CHECK-NEXT: cmp x9, x2 |
| ; CHECK-NEXT: b.ne .LBB3_1 |
| ; CHECK-NEXT: // %bb.6: // %for.exit |
| ; CHECK-NEXT: mov x0, x8 |
| ; CHECK-NEXT: ret |
| entry: |
| br label %for.cond1.preheader |
| |
| for.cond1.preheader: ; preds = %entry, %for.cond1.for.cond |
| %i.035 = phi i64 [ %inc16, %for.cond1.for.cond ], [ 0, %entry ] |
| %sum.034 = phi i64 [ %spec.select, %for.cond1.for.cond ], [ 0, %entry ] |
| %arrayidx = getelementptr inbounds ptr, ptr %a, i64 %i.035 |
| %0 = load ptr, ptr %arrayidx, align 8 |
| br label %for.cond5.preheader |
| |
| for.cond5.preheader: ; preds = %for.cond5.for.cond, %for.cond1.preheader |
| %j.031 = phi i64 [ 0, %for.cond1.preheader ], [ %inc13, %for.cond5.for.cond ] |
| %sum.130 = phi i64 [ %sum.034, %for.cond1.preheader ], [ %spec.select, %for.cond5.for.cond ] |
| %arrayidx9 = getelementptr inbounds ptr, ptr %0, i64 %j.031 |
| %1 = load ptr, ptr %arrayidx9, align 8 |
| %arrayidx11 = getelementptr inbounds ptr, ptr %b, i64 %j.031 |
| %2 = load ptr, ptr %arrayidx11, align 8 |
| br label %for.body8 |
| |
| for.body8: ; preds = %for.body8, %for.cond5.preheader |
| %k.028 = phi i64 [ 0, %for.cond5.preheader ], [ %inc, %for.body8 ] |
| %sum.227 = phi i64 [ %sum.130, %for.cond5.preheader ], [ %spec.select, %for.body8 ] |
| %arrayidx10 = getelementptr inbounds ptr, ptr %1, i64 %k.028 |
| %3 = load ptr, ptr %arrayidx10, align 8 |
| %bcmp = tail call i32 @bcmp(ptr %3, ptr %2, i64 4) |
| %tobool = icmp eq i32 %bcmp, 0 |
| %add = zext i1 %tobool to i64 |
| %spec.select = add i64 %sum.227, %add |
| %inc = add nuw i64 %k.028, 1 |
| %exitcond = icmp eq i64 %inc, %K |
| br i1 %exitcond, label %for.cond5.for.cond, label %for.body8 |
| |
| for.cond5.for.cond: ; preds = %for.body8 |
| %inc13 = add nuw i64 %j.031, 1 |
| %exitcond46 = icmp eq i64 %inc13, %M |
| br i1 %exitcond46, label %for.cond1.for.cond, label %for.cond5.preheader |
| |
| for.cond1.for.cond: ; preds = %for.cond5.for.cond |
| %inc16 = add nuw i64 %i.035, 1 |
| %exitcond47 = icmp eq i64 %inc16, %N |
| br i1 %exitcond47, label %for.exit, label %for.cond1.preheader |
| |
| for.exit: ; preds = %for.cond1.for.cond |
| ret i64 %spec.select |
| } |
| |
| ; Make sure that store inside loop prevents hoisting invariant loads |
| ; for (int i = 0; i < N; ++i) |
| ; c[i] = memcmp(a[i], b, 4); |
| ; |
| define void @one_dimensional_with_store(ptr %a, ptr %b, ptr %c, i32 %N) { |
| ; CHECK-LABEL: one_dimensional_with_store: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov w8, w3 |
| ; CHECK-NEXT: .LBB4_1: // %for.body |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr x9, [x0], #8 |
| ; CHECK-NEXT: ldr w10, [x1] |
| ; CHECK-NEXT: ldr w9, [x9] |
| ; CHECK-NEXT: rev w10, w10 |
| ; CHECK-NEXT: rev w9, w9 |
| ; CHECK-NEXT: cmp w9, w10 |
| ; CHECK-NEXT: cset w9, hi |
| ; CHECK-NEXT: csinv w9, w9, wzr, hs |
| ; CHECK-NEXT: subs x8, x8, #1 |
| ; CHECK-NEXT: strb w9, [x2], #1 |
| ; CHECK-NEXT: b.ne .LBB4_1 |
| ; CHECK-NEXT: // %bb.2: // %for.exit |
| ; CHECK-NEXT: ret |
| entry: |
| br label %for.body.preheader |
| |
| for.body.preheader: ; preds = %entry |
| %wide.trip.count = zext i32 %N to i64 |
| br label %for.body |
| |
| for.body: ; preds = %for.body.preheader, %for.body |
| %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] |
| %arrayidx = getelementptr inbounds ptr, ptr %a, i64 %indvars.iv |
| %0 = load ptr, ptr %arrayidx, align 8 |
| %call = tail call i32 @memcmp(ptr %0, ptr %b, i64 4) |
| %conv = trunc i32 %call to i8 |
| %arrayidx2 = getelementptr inbounds i8, ptr %c, i64 %indvars.iv |
| store i8 %conv, ptr %arrayidx2, align 1 |
| %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 |
| %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count |
| br i1 %exitcond.not, label %for.exit, label %for.body |
| |
| for.exit: ; preds = %for.body |
| ret void |
| } |
| |
| ; Make sure that call inside loop prevents hoisting invariant loads |
| ; |
| define i32 @one_dimensional_with_call(ptr %a, ptr %b, i32 %N) { |
| ; CHECK-LABEL: one_dimensional_with_call: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill |
| ; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 48 |
| ; CHECK-NEXT: .cfi_offset w19, -8 |
| ; CHECK-NEXT: .cfi_offset w20, -16 |
| ; CHECK-NEXT: .cfi_offset w21, -24 |
| ; CHECK-NEXT: .cfi_offset w22, -32 |
| ; CHECK-NEXT: .cfi_offset w30, -48 |
| ; CHECK-NEXT: mov x19, x1 |
| ; CHECK-NEXT: mov x21, x0 |
| ; CHECK-NEXT: mov w20, wzr |
| ; CHECK-NEXT: mov w22, w2 |
| ; CHECK-NEXT: .LBB5_1: // %for.body |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr x8, [x21], #8 |
| ; CHECK-NEXT: ldr w9, [x19] |
| ; CHECK-NEXT: ldr w8, [x8] |
| ; CHECK-NEXT: cmp w8, w9 |
| ; CHECK-NEXT: cinc w20, w20, eq |
| ; CHECK-NEXT: bl func |
| ; CHECK-NEXT: subs x22, x22, #1 |
| ; CHECK-NEXT: b.ne .LBB5_1 |
| ; CHECK-NEXT: // %bb.2: // %for.exit |
| ; CHECK-NEXT: mov w0, w20 |
| ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload |
| ; CHECK-NEXT: ret |
| entry: |
| br label %for.body.preheader |
| |
| for.body.preheader: ; preds = %entry |
| %wide.trip.count = zext i32 %N to i64 |
| br label %for.body |
| |
| for.body: ; preds = %for.body.preheader, %for.body |
| %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] |
| %sum.05 = phi i32 [ 0, %for.body.preheader ], [ %spec.select, %for.body ] |
| %arrayidx = getelementptr inbounds ptr, ptr %a, i64 %indvars.iv |
| %0 = load ptr, ptr %arrayidx, align 8 |
| %bcmp = tail call i32 @bcmp(ptr %0, ptr %b, i64 4) |
| %tobool.not = icmp eq i32 %bcmp, 0 |
| %add = zext i1 %tobool.not to i32 |
| %spec.select = add nuw nsw i32 %sum.05, %add |
| tail call void @func() |
| %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 |
| %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count |
| br i1 %exitcond.not, label %for.exit, label %for.body |
| |
| for.exit: ; preds = %for.body |
| ret i32 %spec.select |
| } |
| |
| ; One dimensional loop with memcmp size equal six. |
| ; The test shows that shows that several loads can be hoisted at the same time. |
| ; for (int i = 0; i < N; ++i) |
| ; if (!memcmp(a[i], b, 6)) |
| ; sum += 1; |
| ; |
| define i64 @one_dimensional_two_loads(ptr %a, ptr %b, i64 %N) { |
| ; CHECK-LABEL: one_dimensional_two_loads: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: ldr w9, [x1] |
| ; CHECK-NEXT: ldrh w10, [x1, #4] |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: .LBB6_1: // %for.body |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr x11, [x0], #8 |
| ; CHECK-NEXT: ldr w12, [x11] |
| ; CHECK-NEXT: ldrh w11, [x11, #4] |
| ; CHECK-NEXT: cmp w12, w9 |
| ; CHECK-NEXT: ccmp w11, w10, #0, eq |
| ; CHECK-NEXT: cinc x8, x8, eq |
| ; CHECK-NEXT: subs x2, x2, #1 |
| ; CHECK-NEXT: b.ne .LBB6_1 |
| ; CHECK-NEXT: // %bb.2: // %for.exit |
| ; CHECK-NEXT: mov x0, x8 |
| ; CHECK-NEXT: ret |
| entry: |
| br label %for.body |
| |
| for.body: ; preds = %entry, %for.body |
| %i.06 = phi i64 [ %inc, %for.body ], [ 0, %entry ] |
| %sum.05 = phi i64 [ %spec.select, %for.body ], [ 0, %entry ] |
| %arrayidx = getelementptr inbounds ptr, ptr %a, i64 %i.06 |
| %0 = load ptr, ptr %arrayidx, align 8 |
| %bcmp = tail call i32 @bcmp(ptr %0, ptr %b, i64 6) |
| %tobool = icmp eq i32 %bcmp, 0 |
| %add = zext i1 %tobool to i64 |
| %spec.select = add i64 %sum.05, %add |
| %inc = add nuw i64 %i.06, 1 |
| %exitcond = icmp eq i64 %inc, %N |
| br i1 %exitcond, label %for.exit, label %for.body |
| |
| for.exit: ; preds = %for.body |
| ret i64 %spec.select |
| } |
| |
| ; See issue https://github.com/llvm/llvm-project/issues/72855 |
| ; |
| ; When hoisting instruction out of the loop, ensure that loads are not common |
| ; subexpressions eliminated. In this example pointer %c may alias pointer %b, |
| ; so when hoisting `%y = load i64, ptr %b` instruction we can't replace it with |
| ; `%b.val = load i64, ptr %b` |
| ; |
| define i64 @hoisting_no_cse(ptr %a, ptr %b, ptr %c, i64 %N) { |
| ; CHECK-LABEL: hoisting_no_cse: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: ldr x8, [x1] |
| ; CHECK-NEXT: add x8, x8, #1 |
| ; CHECK-NEXT: str x8, [x2] |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: ldr x9, [x1] |
| ; CHECK-NEXT: .LBB7_1: // %for.body |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr x10, [x0], #8 |
| ; CHECK-NEXT: ldr x10, [x10] |
| ; CHECK-NEXT: cmp x10, x9 |
| ; CHECK-NEXT: cinc x8, x8, eq |
| ; CHECK-NEXT: subs x3, x3, #1 |
| ; CHECK-NEXT: b.ne .LBB7_1 |
| ; CHECK-NEXT: // %bb.2: // %for.exit |
| ; CHECK-NEXT: mov x0, x8 |
| ; CHECK-NEXT: ret |
| entry: |
| %b.val = load i64, ptr %b |
| %b.val.changed = add i64 %b.val, 1 |
| store i64 %b.val.changed, ptr %c |
| br label %for.body |
| |
| for.body: ; preds = %entry, %for.body |
| %idx = phi i64 [ %inc, %for.body ], [ 0, %entry ] |
| %sum = phi i64 [ %spec.select, %for.body ], [ 0, %entry ] |
| %arrayidx = getelementptr inbounds ptr, ptr %a, i64 %idx |
| %0 = load ptr, ptr %arrayidx, align 8 |
| %x = load i64, ptr %0 |
| %y = load i64, ptr %b |
| %cmp = icmp eq i64 %x, %y |
| %add = zext i1 %cmp to i64 |
| %spec.select = add i64 %sum, %add |
| %inc = add nuw i64 %idx, 1 |
| %exitcond = icmp eq i64 %inc, %N |
| br i1 %exitcond, label %for.exit, label %for.body |
| |
| for.exit: ; preds = %for.body |
| ret i64 %spec.select |
| } |
| |
| @a = external local_unnamed_addr global i32, align 4 |
| |
| ; Make sure the load is not hoisted out of the loop across memory barriers. |
| define i32 @load_between_memory_barriers() { |
| ; CHECK-LABEL: load_between_memory_barriers: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: adrp x8, :got:a |
| ; CHECK-NEXT: ldr x8, [x8, :got_lo12:a] |
| ; CHECK-NEXT: .LBB8_1: // %loop |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: //MEMBARRIER |
| ; CHECK-NEXT: ldr w0, [x8] |
| ; CHECK-NEXT: //MEMBARRIER |
| ; CHECK-NEXT: cbz w0, .LBB8_1 |
| ; CHECK-NEXT: // %bb.2: // %exit |
| ; CHECK-NEXT: ret |
| br label %loop |
| |
| loop: |
| fence syncscope("singlethread") acq_rel |
| %l = load i32, ptr @a, align 4 |
| fence syncscope("singlethread") acq_rel |
| %c = icmp eq i32 %l, 0 |
| br i1 %c, label %loop, label %exit |
| |
| exit: |
| ret i32 %l |
| } |
| |
| declare i32 @bcmp(ptr, ptr, i64) |
| declare i32 @memcmp(ptr, ptr, i64) |
| declare void @func() |