test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll - llvm-project/llvm - Git at Google

 ; RUN: opt -S -passes=loop-vectorize -force-vector-width=8 -force-vector-interleave=1 < %s | FileCheck %s -check-prefix=VF8
 ; RUN: opt -S -passes=loop-vectorize -force-vector-width=1 -force-vector-interleave=4 < %s | FileCheck %s -check-prefix=VF1

 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

 ; Given a loop with an induction variable which is being
 ; truncated/extended using casts that had been proven to
 ; be redundant under a runtime test, we want to make sure
 ; that these casts, do not get vectorized/scalarized/widened.
 ; This is the case for inductions whose SCEV expression is
 ; of the form "ExtTrunc(%phi) + %step", where "ExtTrunc"
 ; can be a result of the IR sequences we check below.
 ;
 ; See also pr30654.
 ;

 ; Case1: Check the following induction pattern:
 ;
 ;  %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
 ;  %sext = shl i32 %p.09, 24
 ;  %conv = ashr exact i32 %sext, 24
 ;  %add = add nsw i32 %conv, %step
 ;
 ; This is the case in the following code:
 ;
 ; void doit1(int n, int step) {
 ;   int i;
 ;   char p = 0;
 ;   for (i = 0; i < n; i++) {
 ;      a[i] = p;
 ;      p = p + step;
 ;   }
 ; }
 ;
 ; The "ExtTrunc" IR sequence here is:
 ;  "%sext = shl i32 %p.09, 24"
 ;  "%conv = ashr exact i32 %sext, 24"
 ; We check that it does not appear in the vector loop body, whether
 ; we vectorize or scalarize the induction.
 ; In the case of widened induction, this means that the induction phi
 ; is directly used, without shl/ashr on the way.

 ; VF8-LABEL: @doit1
 ; VF8: vector.body:
 ; VF8: %vec.ind = phi <8 x i32>
 ; VF8: store <8 x i32> %vec.ind
 ; VF8: middle.block:

 ; VF1-LABEL: @doit1
 ; VF1: vector.body:
 ; VF1-NOT: %{{.*}} = shl i32
 ; VF1: middle.block:

 @a = common local_unnamed_addr global [250 x i32] zeroinitializer, align 16

 define void @doit1(i32 %n, i32 %step) {
 entry:
   %cmp7 = icmp sgt i32 %n, 0
   br i1 %cmp7, label %for.body.lr.ph, label %for.end

 for.body.lr.ph:
   %wide.trip.count = zext i32 %n to i64
   br label %for.body

 for.body:
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
   %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
   %sext = shl i32 %p.09, 24
   %conv = ashr exact i32 %sext, 24
   %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
   store i32 %conv, i32* %arrayidx, align 4
   %add = add nsw i32 %conv, %step
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %for.end.loopexit, label %for.body

 for.end.loopexit:
   br label %for.end

 for.end:
   ret void
 }


 ; Case2: Another variant of the above pattern is where the induction variable
 ; is used only for address compuation (i.e. it is a GEP index) and therefore
 ; the induction is not vectorized but rather only the step is widened.
 ;
 ; This is the case in the following code, where the induction variable 'w_ix'
 ; is only used to access the array 'in':
 ;
 ; void doit2(int *in, int *out, size_t size, size_t step)
 ; {
 ;    int w_ix = 0;
 ;    for (size_t offset = 0; offset < size; ++offset)
 ;     {
 ;        int w = in[w_ix];
 ;        out[offset] = w;
 ;        w_ix += step;
 ;     }
 ; }
 ;
 ; The "ExtTrunc" IR sequence here is similar to the previous case:
 ;  "%sext = shl i64 %w_ix.012, 32
 ;  %idxprom = ashr exact i64 %sext, 32"
 ; We check that it does not appear in the vector loop body, whether
 ; we widen or scalarize the induction.
 ; In the case of widened induction, this means that the induction phi
 ; is directly used, without shl/ashr on the way.

 ; VF8-LABEL: @doit2
 ; VF8: vector.body:
 ; VF8-NEXT:  [[INDEX:%.+]] = phi i64 [ 0, %vector.ph ]
 ; VF8-NEXT:  [[I0:%.+]] = add i64 [[INDEX]], 0
 ; VF8-NEXT:  [[OFFSET_IDX:%.+]] = mul i64 [[INDEX]], %step
 ; VF8-NEXT:  [[MUL0:%.+]] = mul i64 0, %step
 ; VF8-NEXT:  [[ADD:%.+]] = add i64 [[OFFSET_IDX]], [[MUL0]]
 ; VF8:       getelementptr inbounds i32, i32* %in, i64 [[ADD]]
 ; VF8: middle.block:

 ; VF1-LABEL: @doit2
 ; VF1: vector.body:
 ; VF1-NOT: %{{.*}} = shl i64
 ; VF1: middle.block:
 ;

 define void @doit2(i32* nocapture readonly %in, i32* nocapture %out, i64 %size, i64 %step)  {
 entry:
   %cmp9 = icmp eq i64 %size, 0
   br i1 %cmp9, label %for.cond.cleanup, label %for.body.lr.ph

 for.body.lr.ph:
   br label %for.body

 for.cond.cleanup.loopexit:
   br label %for.cond.cleanup

 for.cond.cleanup:
   ret void

 for.body:
   %w_ix.011 = phi i64 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
   %offset.010 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
   %sext = shl i64 %w_ix.011, 32
   %idxprom = ashr exact i64 %sext, 32
   %arrayidx = getelementptr inbounds i32, i32* %in, i64 %idxprom
   %0 = load i32, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %out, i64 %offset.010
   store i32 %0, i32* %arrayidx1, align 4
   %add = add i64 %idxprom, %step
   %inc = add nuw i64 %offset.010, 1
   %exitcond = icmp eq i64 %inc, %size
   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
 }

 ; Case3: Lastly, check also the following induction pattern:
 ;
 ;  %p.09 = phi i32 [ %val0, %scalar.ph ], [ %add, %for.body ]
 ;  %conv = and i32 %p.09, 255
 ;  %add = add nsw i32 %conv, %step
 ;
 ; This is the case in the following code:
 ;
 ; int a[N];
 ; void doit3(int n, int step) {
 ;   int i;
 ;   unsigned char p = 0;
 ;   for (i = 0; i < n; i++) {
 ;      a[i] = p;
 ;      p = p + step;
 ;   }
 ; }
 ;
 ; The "ExtTrunc" IR sequence here is:
 ;  "%conv = and i32 %p.09, 255".
 ; We check that it does not appear in the vector loop body, whether
 ; we vectorize or scalarize the induction.

 ; VF8-LABEL: @doit3
 ; VF8: vector.body:
 ; VF8: %vec.ind = phi <8 x i32>
 ; VF8: store <8 x i32> %vec.ind
 ; VF8: middle.block:

 ; VF1-LABEL: @doit3
 ; VF1: vector.body:
 ; VF1-NOT: %{{.*}} = and i32
 ; VF1: middle.block:

 define void @doit3(i32 %n, i32 %step) {
 entry:
   %cmp7 = icmp sgt i32 %n, 0
   br i1 %cmp7, label %for.body.lr.ph, label %for.end

 for.body.lr.ph:
   %wide.trip.count = zext i32 %n to i64
   br label %for.body

 for.body:
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
   %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
   %conv = and i32 %p.09, 255
   %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
   store i32 %conv, i32* %arrayidx, align 4
   %add = add nsw i32 %conv, %step
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %for.end.loopexit, label %for.body

 for.end.loopexit:
   br label %for.end

 for.end:
   ret void
 }

 ; VF8-LABEL: @test_conv_in_latch_block
 ; VF8: vector.body:
 ; VF8-NEXT: %index = phi i64
 ; VF8-NEXT: %vec.ind = phi <8 x i32>
 ; VF8: store <8 x i32> %vec.ind
 ; VF8: middle.block:
 ;
 define void @test_conv_in_latch_block(i32 %n, i32 %step, i32* noalias %A, i32* noalias %B) {
 entry:
   %wide.trip.count = zext i32 %n to i64
   br label %loop

 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
   %p.09 = phi i32 [ 0, %entry ], [ %add, %latch ]
   %B.gep = getelementptr inbounds i32, i32* %B, i64 %iv
   %l = load i32, i32* %B.gep
   %c = icmp eq i32 %l, 0
   br i1 %c, label %then, label %latch

 then:
   %A.gep = getelementptr inbounds i32, i32* %A, i64 %iv
   store i32 0, i32* %A.gep
   br label %latch

 latch:
   %sext = shl i32 %p.09, 24
   %conv = ashr exact i32 %sext, 24
   %add = add nsw i32 %conv, %step
   store i32 %conv, i32* %B.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %wide.trip.count
   br i1 %exitcond, label %exit, label %loop

 exit:
   ret void
 }
	; RUN: opt -S -passes=loop-vectorize -force-vector-width=8 -force-vector-interleave=1 < %s \| FileCheck %s -check-prefix=VF8
	; RUN: opt -S -passes=loop-vectorize -force-vector-width=1 -force-vector-interleave=4 < %s \| FileCheck %s -check-prefix=VF1

	target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

	; Given a loop with an induction variable which is being
	; truncated/extended using casts that had been proven to
	; be redundant under a runtime test, we want to make sure
	; that these casts, do not get vectorized/scalarized/widened.
	; This is the case for inductions whose SCEV expression is
	; of the form "ExtTrunc(%phi) + %step", where "ExtTrunc"
	; can be a result of the IR sequences we check below.
	;
	; See also pr30654.
	;

	; Case1: Check the following induction pattern:
	;
	; %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
	; %sext = shl i32 %p.09, 24
	; %conv = ashr exact i32 %sext, 24
	; %add = add nsw i32 %conv, %step
	;
	; This is the case in the following code:
	;
	; void doit1(int n, int step) {
	; int i;
	; char p = 0;
	; for (i = 0; i < n; i++) {
	; a[i] = p;
	; p = p + step;
	; }
	; }
	;
	; The "ExtTrunc" IR sequence here is:
	; "%sext = shl i32 %p.09, 24"
	; "%conv = ashr exact i32 %sext, 24"
	; We check that it does not appear in the vector loop body, whether
	; we vectorize or scalarize the induction.
	; In the case of widened induction, this means that the induction phi
	; is directly used, without shl/ashr on the way.

	; VF8-LABEL: @doit1
	; VF8: vector.body:
	; VF8: %vec.ind = phi <8 x i32>
	; VF8: store <8 x i32> %vec.ind
	; VF8: middle.block:

	; VF1-LABEL: @doit1
	; VF1: vector.body:
	; VF1-NOT: %{{.*}} = shl i32
	; VF1: middle.block:

	@a = common local_unnamed_addr global [250 x i32] zeroinitializer, align 16

	define void @doit1(i32 %n, i32 %step) {
	entry:
	%cmp7 = icmp sgt i32 %n, 0
	br i1 %cmp7, label %for.body.lr.ph, label %for.end

	for.body.lr.ph:
	%wide.trip.count = zext i32 %n to i64
	br label %for.body

	for.body:
	%indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
	%p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
	%sext = shl i32 %p.09, 24
	%conv = ashr exact i32 %sext, 24
	%arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
	store i32 %conv, i32* %arrayidx, align 4
	%add = add nsw i32 %conv, %step
	%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
	%exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
	br i1 %exitcond, label %for.end.loopexit, label %for.body

	for.end.loopexit:
	br label %for.end

	for.end:
	ret void
	}


	; Case2: Another variant of the above pattern is where the induction variable
	; is used only for address compuation (i.e. it is a GEP index) and therefore
	; the induction is not vectorized but rather only the step is widened.
	;
	; This is the case in the following code, where the induction variable 'w_ix'
	; is only used to access the array 'in':
	;
	; void doit2(int in, int out, size_t size, size_t step)
	; {
	; int w_ix = 0;
	; for (size_t offset = 0; offset < size; ++offset)
	; {
	; int w = in[w_ix];
	; out[offset] = w;
	; w_ix += step;
	; }
	; }
	;
	; The "ExtTrunc" IR sequence here is similar to the previous case:
	; "%sext = shl i64 %w_ix.012, 32
	; %idxprom = ashr exact i64 %sext, 32"
	; We check that it does not appear in the vector loop body, whether
	; we widen or scalarize the induction.
	; In the case of widened induction, this means that the induction phi
	; is directly used, without shl/ashr on the way.

	; VF8-LABEL: @doit2
	; VF8: vector.body:
	; VF8-NEXT: [[INDEX:%.+]] = phi i64 [ 0, %vector.ph ]
	; VF8-NEXT: [[I0:%.+]] = add i64 [[INDEX]], 0
	; VF8-NEXT: [[OFFSET_IDX:%.+]] = mul i64 [[INDEX]], %step
	; VF8-NEXT: [[MUL0:%.+]] = mul i64 0, %step
	; VF8-NEXT: [[ADD:%.+]] = add i64 [[OFFSET_IDX]], [[MUL0]]
	; VF8: getelementptr inbounds i32, i32* %in, i64 [[ADD]]
	; VF8: middle.block:

	; VF1-LABEL: @doit2
	; VF1: vector.body:
	; VF1-NOT: %{{.*}} = shl i64
	; VF1: middle.block:
	;

	define void @doit2(i32* nocapture readonly %in, i32* nocapture %out, i64 %size, i64 %step) {
	entry:
	%cmp9 = icmp eq i64 %size, 0
	br i1 %cmp9, label %for.cond.cleanup, label %for.body.lr.ph

	for.body.lr.ph:
	br label %for.body

	for.cond.cleanup.loopexit:
	br label %for.cond.cleanup

	for.cond.cleanup:
	ret void

	for.body:
	%w_ix.011 = phi i64 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
	%offset.010 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
	%sext = shl i64 %w_ix.011, 32
	%idxprom = ashr exact i64 %sext, 32
	%arrayidx = getelementptr inbounds i32, i32* %in, i64 %idxprom
	%0 = load i32, i32* %arrayidx, align 4
	%arrayidx1 = getelementptr inbounds i32, i32* %out, i64 %offset.010
	store i32 %0, i32* %arrayidx1, align 4
	%add = add i64 %idxprom, %step
	%inc = add nuw i64 %offset.010, 1
	%exitcond = icmp eq i64 %inc, %size
	br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
	}

	; Case3: Lastly, check also the following induction pattern:
	;
	; %p.09 = phi i32 [ %val0, %scalar.ph ], [ %add, %for.body ]
	; %conv = and i32 %p.09, 255
	; %add = add nsw i32 %conv, %step
	;
	; This is the case in the following code:
	;
	; int a[N];
	; void doit3(int n, int step) {
	; int i;
	; unsigned char p = 0;
	; for (i = 0; i < n; i++) {
	; a[i] = p;
	; p = p + step;
	; }
	; }
	;
	; The "ExtTrunc" IR sequence here is:
	; "%conv = and i32 %p.09, 255".
	; We check that it does not appear in the vector loop body, whether
	; we vectorize or scalarize the induction.

	; VF8-LABEL: @doit3
	; VF8: vector.body:
	; VF8: %vec.ind = phi <8 x i32>
	; VF8: store <8 x i32> %vec.ind
	; VF8: middle.block:

	; VF1-LABEL: @doit3
	; VF1: vector.body:
	; VF1-NOT: %{{.*}} = and i32
	; VF1: middle.block:

	define void @doit3(i32 %n, i32 %step) {
	entry:
	%cmp7 = icmp sgt i32 %n, 0
	br i1 %cmp7, label %for.body.lr.ph, label %for.end

	for.body.lr.ph:
	%wide.trip.count = zext i32 %n to i64
	br label %for.body

	for.body:
	%indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
	%p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
	%conv = and i32 %p.09, 255
	%arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
	store i32 %conv, i32* %arrayidx, align 4
	%add = add nsw i32 %conv, %step
	%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
	%exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
	br i1 %exitcond, label %for.end.loopexit, label %for.body

	for.end.loopexit:
	br label %for.end

	for.end:
	ret void
	}

	; VF8-LABEL: @test_conv_in_latch_block
	; VF8: vector.body:
	; VF8-NEXT: %index = phi i64
	; VF8-NEXT: %vec.ind = phi <8 x i32>
	; VF8: store <8 x i32> %vec.ind
	; VF8: middle.block:
	;
	define void @test_conv_in_latch_block(i32 %n, i32 %step, i32* noalias %A, i32* noalias %B) {
	entry:
	%wide.trip.count = zext i32 %n to i64
	br label %loop

	loop:
	%iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
	%p.09 = phi i32 [ 0, %entry ], [ %add, %latch ]
	%B.gep = getelementptr inbounds i32, i32* %B, i64 %iv
	%l = load i32, i32* %B.gep
	%c = icmp eq i32 %l, 0
	br i1 %c, label %then, label %latch

	then:
	%A.gep = getelementptr inbounds i32, i32* %A, i64 %iv
	store i32 0, i32* %A.gep
	br label %latch

	latch:
	%sext = shl i32 %p.09, 24
	%conv = ashr exact i32 %sext, 24
	%add = add nsw i32 %conv, %step
	store i32 %conv, i32* %B.gep, align 4
	%iv.next = add nuw nsw i64 %iv, 1
	%exitcond = icmp eq i64 %iv.next, %wide.trip.count
	br i1 %exitcond, label %exit, label %loop

	exit:
	ret void
	}