|  | ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | 
|  | ; RUN: opt -passes='default<O3>' -rotation-max-header-size=0 -S < %s  | FileCheck %s --check-prefix=HOIST | 
|  | ; RUN: opt -passes='default<O3>' -rotation-max-header-size=1 -S < %s  | FileCheck %s --check-prefix=HOIST | 
|  | ; RUN: opt -passes='default<O3>' -rotation-max-header-size=2 -S < %s  | FileCheck %s --check-prefix=ROTATE | 
|  | ; RUN: opt -passes='default<O3>' -rotation-max-header-size=3 -S < %s  | FileCheck %s --check-prefix=ROTATE | 
|  |  | 
|  | ; This example is produced from a very basic C code: | 
|  | ; | 
|  | ;   void f0(); | 
|  | ;   void f1(); | 
|  | ;   void f2(); | 
|  | ; | 
|  | ;   void loop(int width) { | 
|  | ;       if(width < 1) | 
|  | ;           return; | 
|  | ;       for(int i = 0; i < width - 1; ++i) { | 
|  | ;           f0(); | 
|  | ;           f1(); | 
|  | ;       } | 
|  | ;       f0(); | 
|  | ;       f2(); | 
|  | ;   } | 
|  |  | 
|  | ; We have a choice here. We can either | 
|  | ; * hoist the f0() call into loop header, | 
|  | ;   * which potentially makes loop rotation unprofitable since loop header might | 
|  | ;     have grown above certain threshold, and such unrotated loops will be | 
|  | ;     ignored by LoopVectorizer, preventing vectorization | 
|  | ;   * or loop rotation will succeed, resulting in some weird PHIs that will also | 
|  | ;     harm vectorization | 
|  | ; * or not hoist f0() call before performing loop rotation, | 
|  | ;   at the cost of potential code bloat and/or potentially successfully rotating | 
|  | ;   the loops, vectorizing them at the cost of compile time. | 
|  |  | 
|  | target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" | 
|  |  | 
|  | declare void @f0() | 
|  | declare void @f1() | 
|  | declare void @f2() | 
|  |  | 
|  | declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) | 
|  | declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) | 
|  |  | 
|  | define void @_Z4loopi(i32 %width) { | 
|  | ; HOIST-LABEL: @_Z4loopi( | 
|  | ; HOIST-NEXT:  entry: | 
|  | ; HOIST-NEXT:    [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1 | 
|  | ; HOIST-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] | 
|  | ; HOIST:       for.cond.preheader: | 
|  | ; HOIST-NEXT:    [[SUB:%.*]] = add nsw i32 [[WIDTH]], -1 | 
|  | ; HOIST-NEXT:    br label [[FOR_COND:%.*]] | 
|  | ; HOIST:       for.cond: | 
|  | ; HOIST-NEXT:    [[I_0:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[FOR_COND_PREHEADER]] ] | 
|  | ; HOIST-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[I_0]], [[SUB]] | 
|  | ; HOIST-NEXT:    tail call void @f0() | 
|  | ; HOIST-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] | 
|  | ; HOIST:       for.cond.cleanup: | 
|  | ; HOIST-NEXT:    tail call void @f2() | 
|  | ; HOIST-NEXT:    br label [[RETURN]] | 
|  | ; HOIST:       for.body: | 
|  | ; HOIST-NEXT:    tail call void @f1() | 
|  | ; HOIST-NEXT:    [[INC]] = add nuw i32 [[I_0]], 1 | 
|  | ; HOIST-NEXT:    br label [[FOR_COND]] | 
|  | ; HOIST:       return: | 
|  | ; HOIST-NEXT:    ret void | 
|  | ; | 
|  | ; ROTATE-LABEL: @_Z4loopi( | 
|  | ; ROTATE-NEXT:  entry: | 
|  | ; ROTATE-NEXT:    [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1 | 
|  | ; ROTATE-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] | 
|  | ; ROTATE:       for.cond.preheader: | 
|  | ; ROTATE-NEXT:    [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1 | 
|  | ; ROTATE-NEXT:    br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] | 
|  | ; ROTATE:       for.body.preheader: | 
|  | ; ROTATE-NEXT:    [[TMP0:%.*]] = add i32 [[WIDTH]], -2 | 
|  | ; ROTATE-NEXT:    br label [[FOR_BODY:%.*]] | 
|  | ; ROTATE:       for.cond.cleanup: | 
|  | ; ROTATE-NEXT:    tail call void @f0() | 
|  | ; ROTATE-NEXT:    tail call void @f2() | 
|  | ; ROTATE-NEXT:    br label [[RETURN]] | 
|  | ; ROTATE:       for.body: | 
|  | ; ROTATE-NEXT:    [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] | 
|  | ; ROTATE-NEXT:    tail call void @f0() | 
|  | ; ROTATE-NEXT:    tail call void @f1() | 
|  | ; ROTATE-NEXT:    [[INC]] = add nuw nsw i32 [[I_04]], 1 | 
|  | ; ROTATE-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[I_04]], [[TMP0]] | 
|  | ; ROTATE-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] | 
|  | ; ROTATE:       return: | 
|  | ; ROTATE-NEXT:    ret void | 
|  | ; | 
|  | entry: | 
|  | %width.addr = alloca i32, align 4 | 
|  | %i = alloca i32, align 4 | 
|  | store i32 %width, i32* %width.addr, align 4 | 
|  | %i1 = load i32, i32* %width.addr, align 4 | 
|  | %cmp = icmp slt i32 %i1, 1 | 
|  | br i1 %cmp, label %if.then, label %if.end | 
|  |  | 
|  | if.then: | 
|  | br label %return | 
|  |  | 
|  | if.end: | 
|  | %i2 = bitcast i32* %i to i8* | 
|  | call void @llvm.lifetime.start.p0i8(i64 4, i8* %i2) | 
|  | store i32 0, i32* %i, align 4 | 
|  | br label %for.cond | 
|  |  | 
|  | for.cond: | 
|  | %i3 = load i32, i32* %i, align 4 | 
|  | %i4 = load i32, i32* %width.addr, align 4 | 
|  | %sub = sub nsw i32 %i4, 1 | 
|  | %cmp1 = icmp slt i32 %i3, %sub | 
|  | br i1 %cmp1, label %for.body, label %for.cond.cleanup | 
|  |  | 
|  | for.cond.cleanup: | 
|  | %i5 = bitcast i32* %i to i8* | 
|  | call void @llvm.lifetime.end.p0i8(i64 4, i8* %i5) | 
|  | br label %for.end | 
|  |  | 
|  | for.body: | 
|  | call void @f0() | 
|  | call void @f1() | 
|  | br label %for.inc | 
|  |  | 
|  | for.inc: | 
|  | %i6 = load i32, i32* %i, align 4 | 
|  | %inc = add nsw i32 %i6, 1 | 
|  | store i32 %inc, i32* %i, align 4 | 
|  | br label %for.cond | 
|  |  | 
|  | for.end: | 
|  | call void @f0() | 
|  | call void @f2() | 
|  | br label %return | 
|  |  | 
|  | return: | 
|  | ret void | 
|  | } |