blob: 4d630a9bbb5011198f3f4b3fef8a06361fc904c2 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=indvars -indvars-predicate-loops=1 -S | FileCheck %s
; Nested loops with body using loop convergence token should be skipped by IndVarSimplify.
declare token @llvm.experimental.convergence.entry() #0
define void @nested(i32 %tidx, i32 %tidy, ptr %array) #0 {
; CHECK-LABEL: @nested(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = tail call token @llvm.experimental.convergence.entry()
; CHECK-NEXT: [[MUL_I:%.*]] = shl nsw i32 [[TIDX:%.*]], 3
; CHECK-NEXT: [[ADD_I:%.*]] = add nsw i32 [[MUL_I]], [[TIDY:%.*]]
; CHECK-NEXT: br label [[FOR_COND_I:%.*]]
; CHECK: for.cond.i:
; CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC10_I:%.*]], [[CLEANUP_I:%.*]] ]
; CHECK-NEXT: [[TMP1:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TMP0]]) ]
; CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i32 [[I_0_I]], 8
; CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_COND1_I_PREHEADER:%.*]], label [[EXIT:%.*]]
; CHECK: for.cond1.i.preheader:
; CHECK-NEXT: [[CMP5_I:%.*]] = icmp eq i32 [[I_0_I]], [[TIDX]]
; CHECK-NEXT: br label [[FOR_COND1_I:%.*]]
; CHECK: for.cond1.i:
; CHECK-NEXT: [[J_0_I:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY4_I:%.*]] ], [ 0, [[FOR_COND1_I_PREHEADER]] ]
; CHECK-NEXT: [[TMP2:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TMP1]]) ]
; CHECK-NEXT: [[CMP2_I:%.*]] = icmp samesign ult i32 [[J_0_I]], 8
; CHECK-NEXT: br i1 [[CMP2_I]], label [[FOR_BODY4_I]], label [[CLEANUP_I_LOOPEXIT:%.*]]
; CHECK: for.body4.i:
; CHECK-NEXT: [[CMP6_I:%.*]] = icmp eq i32 [[J_0_I]], [[TIDY]]
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP5_I]], i1 [[CMP6_I]], i1 false
; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[J_0_I]], 1
; CHECK-NEXT: br i1 [[OR_COND]], label [[IF_THEN_I:%.*]], label [[FOR_COND1_I]]
; CHECK: cleanup.i.loopexit:
; CHECK-NEXT: br label [[CLEANUP_I]]
; CHECK: if.then.i:
; CHECK-NEXT: [[HLSL_WAVE_ACTIVE_MAX7_I:%.*]] = call spir_func i32 @llvm.spv.wave.reduce.umax.i32(i32 [[ADD_I]]) [ "convergencectrl"(token [[TMP2]]) ]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[ARRAY:%.*]], i32 [[ADD_I]]
; CHECK-NEXT: store i32 [[HLSL_WAVE_ACTIVE_MAX7_I]], ptr [[TMP3]], align 4
; CHECK-NEXT: br label [[CLEANUP_I]]
; CHECK: cleanup.i:
; CHECK-NEXT: [[INC10_I]] = add nuw nsw i32 [[I_0_I]], 1
; CHECK-NEXT: br label [[FOR_COND_I]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
%0 = tail call token @llvm.experimental.convergence.entry()
%mul.i = shl nsw i32 %tidx, 3
%add.i = add nsw i32 %mul.i, %tidy
br label %for.cond.i
for.cond.i:
%i.0.i = phi i32 [ 0, %entry ], [ %inc10.i, %cleanup.i ]
%2 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %0) ]
%cmp.i = icmp ult i32 %i.0.i, 8
br i1 %cmp.i, label %for.cond1.i.preheader, label %exit
for.cond1.i.preheader:
%cmp5.i = icmp eq i32 %i.0.i, %tidx
br label %for.cond1.i
for.cond1.i:
%j.0.i = phi i32 [ %inc.i, %for.body4.i ], [ 0, %for.cond1.i.preheader ]
%3 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %2) ]
%cmp2.i = icmp ult i32 %j.0.i, 8
br i1 %cmp2.i, label %for.body4.i, label %cleanup.i.loopexit
for.body4.i:
%cmp6.i = icmp eq i32 %j.0.i, %tidy
%or.cond = select i1 %cmp5.i, i1 %cmp6.i, i1 false
%inc.i = add nsw i32 %j.0.i, 1
br i1 %or.cond, label %if.then.i, label %for.cond1.i
cleanup.i.loopexit:
br label %cleanup.i
if.then.i:
%hlsl.wave.active.max7.i = call spir_func i32 @llvm.spv.wave.reduce.umax.i32(i32 %add.i) [ "convergencectrl"(token %3) ]
%4 = getelementptr inbounds i32, ptr %array, i32 %add.i
store i32 %hlsl.wave.active.max7.i, ptr %4, align 4
br label %cleanup.i
cleanup.i:
%inc10.i = add nsw i32 %i.0.i, 1
br label %for.cond.i
exit:
ret void
}
declare token @llvm.experimental.convergence.loop() #0
declare i32 @llvm.spv.wave.reduce.umax.i32(i32) #0
attributes #0 = { convergent }