| ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: opt -passes="print<access-info>" %s 2>&1 | FileCheck %s |
| |
| @a = dso_local local_unnamed_addr global [65536 x float] zeroinitializer, align 16 |
| |
| ; Generated from the following C code: |
| ; #define LEN 256 * 256 |
| ; float a[LEN]; |
| ; |
| ; void different_strides() { |
| ; for (int i = 0; i < LEN - 1024 - 255; i++) { |
| ; #pragma clang loop interleave(disable) |
| ; #pragma clang loop unroll(disable) |
| ; for (int j = 0; j < 256; j++) |
| ; a[i + j + 1024] += a[j * 4 + i]; |
| ; } |
| ; } |
| ; The load and store have different strides(4 and 16 bytes respectively) but the store |
| ; is always at safe positive distance away from the load, thus BackwardVectorizable |
| define void @different_strides_backward_vectorizable() { |
| ; CHECK-LABEL: 'different_strides_backward_vectorizable' |
| ; CHECK-NEXT: inner.body: |
| ; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 2048 bits |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: BackwardVectorizable: |
| ; CHECK-NEXT: %3 = load float, ptr %arrayidx, align 4 -> |
| ; CHECK-NEXT: store float %add9, ptr %arrayidx8, align 4 |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Forward: |
| ; CHECK-NEXT: %5 = load float, ptr %arrayidx8, align 4 -> |
| ; CHECK-NEXT: store float %add9, ptr %arrayidx8, align 4 |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; CHECK-NEXT: outer.header: |
| ; CHECK-NEXT: Report: loop is not the innermost loop |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; |
| entry: |
| br label %outer.header |
| |
| outer.header: |
| %i = phi i64 [ 0, %entry ], [ %i.next, %outer.latch ] |
| %0 = add nuw nsw i64 %i, 1024 |
| br label %inner.body |
| |
| inner.body: |
| %j = phi i64 [ 0, %outer.header ], [ %j.next, %inner.body ] |
| %1 = shl nuw nsw i64 %j, 2 |
| %2 = add nuw nsw i64 %1, %i |
| %arrayidx = getelementptr inbounds [65536 x float], ptr @a, i64 0, i64 %2 |
| %3 = load float, ptr %arrayidx, align 4 |
| %4 = add nuw nsw i64 %0, %j |
| %arrayidx8 = getelementptr inbounds [65536 x float], ptr @a, i64 0, i64 %4 |
| %5 = load float, ptr %arrayidx8, align 4 |
| %add9 = fadd fast float %5, %3 |
| store float %add9, ptr %arrayidx8, align 4 |
| %j.next = add nuw nsw i64 %j, 1 |
| %exitcond.not = icmp eq i64 %j.next, 256 |
| br i1 %exitcond.not, label %outer.latch, label %inner.body |
| |
| outer.latch: |
| %i.next = add nuw nsw i64 %i, 1 |
| %outerexitcond.not = icmp eq i64 %i.next, 64257 |
| br i1 %outerexitcond.not, label %exit, label %outer.header |
| |
| exit: |
| ret void |
| } |
| |
| |
| ; Generated from following C code: |
| ; void different_stride_and_not_vectorizable(){ |
| ; for(int i = 0; i < LEN2; i++){ |
| ; for(int j = 0 ; j < LEN; j++){ |
| ; a[i + j + LEN] += a[i + 4*j]; |
| ; } |
| ; } |
| ; } |
| ; The load and store have different strides, but the store and load are not at a |
| ; safe distance away from each other, thus not safe for vectorization. |
| define void @different_stride_and_not_vectorizable() { |
| ; CHECK-LABEL: 'different_stride_and_not_vectorizable' |
| ; CHECK-NEXT: inner.body: |
| ; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop |
| ; CHECK-NEXT: Unknown data dependence. |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: Unknown: |
| ; CHECK-NEXT: %3 = load float, ptr %arrayidx, align 4 -> |
| ; CHECK-NEXT: store float %add9, ptr %arrayidx8, align 4 |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Forward: |
| ; CHECK-NEXT: %5 = load float, ptr %arrayidx8, align 4 -> |
| ; CHECK-NEXT: store float %add9, ptr %arrayidx8, align 4 |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; CHECK-NEXT: outer.header: |
| ; CHECK-NEXT: Report: loop is not the innermost loop |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; |
| entry: |
| br label %outer.header |
| |
| outer.header: |
| %i = phi i64 [ 0, %entry ], [ %i.next, %outer.latch ] |
| %0 = add nuw nsw i64 %i, 256 |
| br label %inner.body |
| |
| inner.body: |
| %j = phi i64 [ 0, %outer.header ], [ %j.next, %inner.body ] |
| %1 = shl nuw nsw i64 %j, 2 |
| %2 = add nuw nsw i64 %1, %i |
| %arrayidx = getelementptr inbounds [65536 x float], ptr @a, i64 0, i64 %2 |
| %3 = load float, ptr %arrayidx, align 4 |
| %4 = add nuw nsw i64 %0, %j |
| %arrayidx8 = getelementptr inbounds [65536 x float], ptr @a, i64 0, i64 %4 |
| %5 = load float, ptr %arrayidx8, align 4 |
| %add9 = fadd fast float %5, %3 |
| store float %add9, ptr %arrayidx8, align 4 |
| %j.next = add nuw nsw i64 %j, 1 |
| %exitcond.not = icmp eq i64 %j.next, 256 |
| br i1 %exitcond.not, label %outer.latch, label %inner.body |
| |
| outer.latch: |
| %i.next = add nuw nsw i64 %i, 1 |
| %exitcond29.not = icmp eq i64 %i.next, 65536 |
| br i1 %exitcond29.not, label %exit, label %outer.header |
| |
| exit: |
| ret void |
| } |