test/Analysis/LoopAccessAnalysis/different_strides.ll - llvm-project/llvm - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt -passes="print<access-info>" %s 2>&1 | FileCheck %s

 @a = dso_local local_unnamed_addr global [65536 x float] zeroinitializer, align 16

 ; Generated from the following C code:
 ; #define LEN 256 * 256
 ; float a[LEN];
 ;
 ; void different_strides() {
 ;   for (int i = 0; i < LEN - 1024 - 255; i++) {
 ;   #pragma clang loop interleave(disable)
 ;   #pragma clang loop unroll(disable)
 ;     for (int j = 0; j < 256; j++)
 ;       a[i + j + 1024] += a[j * 4 + i];
 ;   }
 ; }
 ; The load and store have different strides(4 and 16 bytes respectively) but the store
 ; is always at safe positive distance away from the load, thus BackwardVectorizable
 define void @different_strides_backward_vectorizable() {
 ; CHECK-LABEL: 'different_strides_backward_vectorizable'
 ; CHECK-NEXT:    inner.body:
 ; CHECK-NEXT:      Memory dependences are safe with a maximum safe vector width of 2048 bits
 ; CHECK-NEXT:      Dependences:
 ; CHECK-NEXT:        BackwardVectorizable:
 ; CHECK-NEXT:            %3 = load float, ptr %arrayidx, align 4 ->
 ; CHECK-NEXT:            store float %add9, ptr %arrayidx8, align 4
 ; CHECK-EMPTY:
 ; CHECK-NEXT:        Forward:
 ; CHECK-NEXT:            %5 = load float, ptr %arrayidx8, align 4 ->
 ; CHECK-NEXT:            store float %add9, ptr %arrayidx8, align 4
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Run-time memory checks:
 ; CHECK-NEXT:      Grouped accesses:
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Expressions re-written:
 ; CHECK-NEXT:    outer.header:
 ; CHECK-NEXT:      Report: loop is not the innermost loop
 ; CHECK-NEXT:      Dependences:
 ; CHECK-NEXT:      Run-time memory checks:
 ; CHECK-NEXT:      Grouped accesses:
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Expressions re-written:
 ;
 entry:
   br label %outer.header

 outer.header:
   %i = phi i64 [ 0, %entry ], [ %i.next, %outer.latch ]
   %0 = add nuw nsw i64 %i, 1024
   br label %inner.body

 inner.body:
   %j = phi i64 [ 0, %outer.header ], [ %j.next, %inner.body ]
   %1 = shl nuw nsw i64 %j, 2
   %2 = add nuw nsw i64 %1, %i
   %arrayidx = getelementptr inbounds [65536 x float], ptr @a, i64 0, i64 %2
   %3 = load float, ptr %arrayidx, align 4
   %4 = add nuw nsw i64 %0, %j
   %arrayidx8 = getelementptr inbounds [65536 x float], ptr @a, i64 0, i64 %4
   %5 = load float, ptr %arrayidx8, align 4
   %add9 = fadd fast float %5, %3
   store float %add9, ptr %arrayidx8, align 4
   %j.next = add nuw nsw i64 %j, 1
   %exitcond.not = icmp eq i64 %j.next, 256
   br i1 %exitcond.not, label %outer.latch, label %inner.body

 outer.latch:
   %i.next = add nuw nsw i64 %i, 1
   %outerexitcond.not = icmp eq i64 %i.next, 64257
   br i1 %outerexitcond.not, label %exit, label %outer.header

 exit:
   ret void
 }


 ; Generated from following C code:
 ; void different_stride_and_not_vectorizable(){
 ;    for(int i = 0; i < LEN2; i++){
 ;        for(int j = 0 ; j < LEN; j++){
 ;            a[i + j + LEN] += a[i + 4*j];
 ;        }
 ;    }
 ; }
 ; The load and store have different strides, but the store and load are not at a
 ; safe distance away from each other, thus not safe for vectorization.
 define void @different_stride_and_not_vectorizable() {
 ; CHECK-LABEL: 'different_stride_and_not_vectorizable'
 ; CHECK-NEXT:    inner.body:
 ; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
 ; CHECK-NEXT:  Unknown data dependence.
 ; CHECK-NEXT:      Dependences:
 ; CHECK-NEXT:        Unknown:
 ; CHECK-NEXT:            %3 = load float, ptr %arrayidx, align 4 ->
 ; CHECK-NEXT:            store float %add9, ptr %arrayidx8, align 4
 ; CHECK-EMPTY:
 ; CHECK-NEXT:        Forward:
 ; CHECK-NEXT:            %5 = load float, ptr %arrayidx8, align 4 ->
 ; CHECK-NEXT:            store float %add9, ptr %arrayidx8, align 4
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Run-time memory checks:
 ; CHECK-NEXT:      Grouped accesses:
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Expressions re-written:
 ; CHECK-NEXT:    outer.header:
 ; CHECK-NEXT:      Report: loop is not the innermost loop
 ; CHECK-NEXT:      Dependences:
 ; CHECK-NEXT:      Run-time memory checks:
 ; CHECK-NEXT:      Grouped accesses:
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Expressions re-written:
 ;
 entry:
   br label %outer.header

 outer.header:
   %i = phi i64 [ 0, %entry ], [ %i.next, %outer.latch ]
   %0 = add nuw nsw i64 %i, 256
   br label %inner.body

 inner.body:
   %j = phi i64 [ 0, %outer.header ], [ %j.next, %inner.body ]
   %1 = shl nuw nsw i64 %j, 2
   %2 = add nuw nsw i64 %1, %i
   %arrayidx = getelementptr inbounds [65536 x float], ptr @a, i64 0, i64 %2
   %3 = load float, ptr %arrayidx, align 4
   %4 = add nuw nsw i64 %0, %j
   %arrayidx8 = getelementptr inbounds [65536 x float], ptr @a, i64 0, i64 %4
   %5 = load float, ptr %arrayidx8, align 4
   %add9 = fadd fast float %5, %3
   store float %add9, ptr %arrayidx8, align 4
   %j.next = add nuw nsw i64 %j, 1
   %exitcond.not = icmp eq i64 %j.next, 256
   br i1 %exitcond.not, label %outer.latch, label %inner.body

 outer.latch:
   %i.next = add nuw nsw i64 %i, 1
   %exitcond29.not = icmp eq i64 %i.next, 65536
   br i1 %exitcond29.not, label %exit, label %outer.header

 exit:
   ret void
 }
	; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
	; RUN: opt -passes="print<access-info>" %s 2>&1 \| FileCheck %s

	@a = dso_local local_unnamed_addr global [65536 x float] zeroinitializer, align 16

	; Generated from the following C code:
	; #define LEN 256 * 256
	; float a[LEN];
	;
	; void different_strides() {
	; for (int i = 0; i < LEN - 1024 - 255; i++) {
	; #pragma clang loop interleave(disable)
	; #pragma clang loop unroll(disable)
	; for (int j = 0; j < 256; j++)
	; a[i + j + 1024] += a[j * 4 + i];
	; }
	; }
	; The load and store have different strides(4 and 16 bytes respectively) but the store
	; is always at safe positive distance away from the load, thus BackwardVectorizable
	define void @different_strides_backward_vectorizable() {
	; CHECK-LABEL: 'different_strides_backward_vectorizable'
	; CHECK-NEXT: inner.body:
	; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 2048 bits
	; CHECK-NEXT: Dependences:
	; CHECK-NEXT: BackwardVectorizable:
	; CHECK-NEXT: %3 = load float, ptr %arrayidx, align 4 ->
	; CHECK-NEXT: store float %add9, ptr %arrayidx8, align 4
	; CHECK-EMPTY:
	; CHECK-NEXT: Forward:
	; CHECK-NEXT: %5 = load float, ptr %arrayidx8, align 4 ->
	; CHECK-NEXT: store float %add9, ptr %arrayidx8, align 4
	; CHECK-EMPTY:
	; CHECK-NEXT: Run-time memory checks:
	; CHECK-NEXT: Grouped accesses:
	; CHECK-EMPTY:
	; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
	; CHECK-NEXT: SCEV assumptions:
	; CHECK-EMPTY:
	; CHECK-NEXT: Expressions re-written:
	; CHECK-NEXT: outer.header:
	; CHECK-NEXT: Report: loop is not the innermost loop
	; CHECK-NEXT: Dependences:
	; CHECK-NEXT: Run-time memory checks:
	; CHECK-NEXT: Grouped accesses:
	; CHECK-EMPTY:
	; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
	; CHECK-NEXT: SCEV assumptions:
	; CHECK-EMPTY:
	; CHECK-NEXT: Expressions re-written:
	;
	entry:
	br label %outer.header

	outer.header:
	%i = phi i64 [ 0, %entry ], [ %i.next, %outer.latch ]
	%0 = add nuw nsw i64 %i, 1024
	br label %inner.body

	inner.body:
	%j = phi i64 [ 0, %outer.header ], [ %j.next, %inner.body ]
	%1 = shl nuw nsw i64 %j, 2
	%2 = add nuw nsw i64 %1, %i
	%arrayidx = getelementptr inbounds [65536 x float], ptr @a, i64 0, i64 %2
	%3 = load float, ptr %arrayidx, align 4
	%4 = add nuw nsw i64 %0, %j
	%arrayidx8 = getelementptr inbounds [65536 x float], ptr @a, i64 0, i64 %4
	%5 = load float, ptr %arrayidx8, align 4
	%add9 = fadd fast float %5, %3
	store float %add9, ptr %arrayidx8, align 4
	%j.next = add nuw nsw i64 %j, 1
	%exitcond.not = icmp eq i64 %j.next, 256
	br i1 %exitcond.not, label %outer.latch, label %inner.body

	outer.latch:
	%i.next = add nuw nsw i64 %i, 1
	%outerexitcond.not = icmp eq i64 %i.next, 64257
	br i1 %outerexitcond.not, label %exit, label %outer.header

	exit:
	ret void
	}


	; Generated from following C code:
	; void different_stride_and_not_vectorizable(){
	; for(int i = 0; i < LEN2; i++){
	; for(int j = 0 ; j < LEN; j++){
	; a[i + j + LEN] += a[i + 4*j];
	; }
	; }
	; }
	; The load and store have different strides, but the store and load are not at a
	; safe distance away from each other, thus not safe for vectorization.
	define void @different_stride_and_not_vectorizable() {
	; CHECK-LABEL: 'different_stride_and_not_vectorizable'
	; CHECK-NEXT: inner.body:
	; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
	; CHECK-NEXT: Unknown data dependence.
	; CHECK-NEXT: Dependences:
	; CHECK-NEXT: Unknown:
	; CHECK-NEXT: %3 = load float, ptr %arrayidx, align 4 ->
	; CHECK-NEXT: store float %add9, ptr %arrayidx8, align 4
	; CHECK-EMPTY:
	; CHECK-NEXT: Forward:
	; CHECK-NEXT: %5 = load float, ptr %arrayidx8, align 4 ->
	; CHECK-NEXT: store float %add9, ptr %arrayidx8, align 4
	; CHECK-EMPTY:
	; CHECK-NEXT: Run-time memory checks:
	; CHECK-NEXT: Grouped accesses:
	; CHECK-EMPTY:
	; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
	; CHECK-NEXT: SCEV assumptions:
	; CHECK-EMPTY:
	; CHECK-NEXT: Expressions re-written:
	; CHECK-NEXT: outer.header:
	; CHECK-NEXT: Report: loop is not the innermost loop
	; CHECK-NEXT: Dependences:
	; CHECK-NEXT: Run-time memory checks:
	; CHECK-NEXT: Grouped accesses:
	; CHECK-EMPTY:
	; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
	; CHECK-NEXT: SCEV assumptions:
	; CHECK-EMPTY:
	; CHECK-NEXT: Expressions re-written:
	;
	entry:
	br label %outer.header

	outer.header:
	%i = phi i64 [ 0, %entry ], [ %i.next, %outer.latch ]
	%0 = add nuw nsw i64 %i, 256
	br label %inner.body

	inner.body:
	%j = phi i64 [ 0, %outer.header ], [ %j.next, %inner.body ]
	%1 = shl nuw nsw i64 %j, 2
	%2 = add nuw nsw i64 %1, %i
	%arrayidx = getelementptr inbounds [65536 x float], ptr @a, i64 0, i64 %2
	%3 = load float, ptr %arrayidx, align 4
	%4 = add nuw nsw i64 %0, %j
	%arrayidx8 = getelementptr inbounds [65536 x float], ptr @a, i64 0, i64 %4
	%5 = load float, ptr %arrayidx8, align 4
	%add9 = fadd fast float %5, %3
	store float %add9, ptr %arrayidx8, align 4
	%j.next = add nuw nsw i64 %j, 1
	%exitcond.not = icmp eq i64 %j.next, 256
	br i1 %exitcond.not, label %outer.latch, label %inner.body

	outer.latch:
	%i.next = add nuw nsw i64 %i, 1
	%exitcond29.not = icmp eq i64 %i.next, 65536
	br i1 %exitcond29.not, label %exit, label %outer.header

	exit:
	ret void
	}