| ; RUN: opt -mtriple=amdgcn -O2 -S %s -o - -stop-after=sroa | FileCheck %s |
| |
| ; This testcase is dervied from warp's reduce.cu. |
| ; |
| ; Pathological testcase where SROA increases the number of phi-nodes from ~250 to 32K. |
| ; This increase in phi-nodes led to a large increase in compile time in SSAUpdater |
| ; when called by StructurizeCFG when compiling reduce.cu. See large-phi-search.ll for a |
| ; minimized testcase for the SSAUpdater slowdown. |
| ; |
| ; Note that this large increase in phi count by SROA depends on allowing jump-threading |
| ; through blocks that define values that are live outside of the block. |
| ; There is no large increase in phi count with: |
| ; |
| ; opt -mtriple=amdgcn -O2 -S %s -o - -stop-after=sroa -max-jump-threading-live-blocks=0 |
| |
| |
| ; CHECK-LABEL: @_func7( |
| |
| %"cub_inner_product_iterator" = type <{ ptr, ptr, i32, i32, i32, [4 x i8] }> |
| |
| ; Function Attrs: convergent inlinehint mustprogress nounwind |
| define noundef double @_func(ptr noundef nonnull align 8 dereferenceable(8) %var0, ptr noundef nonnull align 8 dereferenceable(8) %var1) #0 { |
| %var3 = load double, ptr %var0, align 8 |
| %var4 = load double, ptr %var1, align 8 |
| %var5 = fadd contract double %var3, %var4 |
| ret double %var5 |
| } |
| |
| ; Function Attrs: convergent inlinehint mustprogress nounwind |
| define noundef double @_func3(ptr noundef nonnull align 8 dereferenceable(8) %var0, ptr noundef nonnull align 8 dereferenceable(8) %var1) #0 { |
| %var3 = call contract noundef double @_func(ptr noundef nonnull align 8 dereferenceable(8) %var0, ptr noundef nonnull align 8 dereferenceable(8) %var1) #3 |
| ret double %var3 |
| } |
| |
| ; Function Attrs: convergent inlinehint mustprogress nounwind |
| declare void @_func2(double noundef) #0 |
| |
| ; Function Attrs: alwaysinline convergent mustprogress nounwind |
| define void @_func4(ptr noundef nonnull align 8 dereferenceable(2048) %var0) #1 { |
| call void @_func5(ptr noundef nonnull align 8 dereferenceable(2048) %var0) #3 |
| ret void |
| } |
| |
| ; Function Attrs: alwaysinline convergent mustprogress nounwind |
| define void @_func5(ptr noundef nonnull align 8 dereferenceable(2048) %var0) #1 { |
| call void @_func6(ptr noundef nonnull align 8 dereferenceable(2048) %var0) #3 |
| ret void |
| } |
| |
| ; Function Attrs: convergent inlinehint mustprogress nounwind |
| define void @_func6(ptr noundef nonnull align 8 dereferenceable(2048) %var0) #0 { |
| block1: |
| %var2 = alloca double, align 8, addrspace(5) |
| %var3 = addrspacecast ptr addrspace(5) %var2 to ptr |
| store double 0.000000e+00, ptr %var3, align 8 |
| br label %block4 |
| |
| block4: ; preds = %block8, %block1 |
| %var5 = phi double [ 0.000000e+00, %block1 ], [ %var11, %block8 ] |
| %.0 = phi i32 [ 0, %block1 ], [ %var12, %block8 ] |
| %var6 = icmp samesign ult i32 %.0, 256 |
| br i1 %var6, label %block8, label %block7 |
| |
| block7: ; preds = %block4 |
| call void @_func2(double noundef %var5) #3 |
| ret void |
| |
| block8: ; preds = %block4 |
| %var9 = zext nneg i32 %.0 to i64 |
| %var10 = getelementptr inbounds nuw [8 x i8], ptr %var0, i64 %var9 |
| %var11 = call contract noundef double @_func3(ptr noundef nonnull align 8 dereferenceable(8) %var3, ptr noundef nonnull align 8 dereferenceable(8) %var10) #3 |
| store double %var11, ptr %var3, align 8 |
| %var12 = add nuw nsw i32 %.0, 1 |
| br label %block4, !llvm.loop !0 |
| } |
| |
| ; Function Attrs: convergent mustprogress nounwind |
| declare %"cub_inner_product_iterator" @_func8() #2 |
| |
| ; Function Attrs: convergent mustprogress nounwind |
| define noundef double @_func9(ptr noundef nonnull align 8 dereferenceable(28) %var0, i64 noundef %var1) #2 { |
| %var3 = call contract noundef double @_func10(ptr noundef nonnull align 8 dereferenceable(28) %var0, i64 noundef %var1) #3 |
| ret double %var3 |
| } |
| |
| ; Function Attrs: convergent mustprogress nounwind |
| define noundef double @_func10(ptr noundef nonnull align 8 dereferenceable(28) %var0, i64 noundef %var1) #2 { |
| block2: |
| %var3 = getelementptr inbounds nuw i8, ptr %var0, i64 16 |
| %var4 = load i32, ptr %var3, align 8 |
| %var5 = sext i32 %var4 to i64 |
| %var6 = mul nsw i64 %var1, %var5 |
| %var7 = getelementptr inbounds [8 x i8], ptr null, i64 %var6 |
| br label %block8 |
| |
| block8: ; preds = %block13, %block2 |
| %.05 = phi double [ 0.000000e+00, %block2 ], [ 1.000000e+00, %block13 ] |
| %.0 = phi i32 [ 0, %block2 ], [ %var16, %block13 ] |
| %var9 = getelementptr inbounds nuw i8, ptr %var0, i64 24 |
| %var10 = load i32, ptr %var9, align 8 |
| %var11 = icmp slt i32 %.0, %var10 |
| br i1 %var11, label %block13, label %block12 |
| |
| block12: ; preds = %block8 |
| ret double %.05 |
| |
| block13: ; preds = %block8 |
| %var14 = call contract noundef double @_func11(ptr noundef nonnull align 8 dereferenceable(8) %var7) #3 |
| %var15 = fadd contract double 1.000000e+00, 0.000000e+00 |
| %var16 = add nuw nsw i32 1, 1 |
| br label %block8 |
| } |
| |
| ; Function Attrs: convergent mustprogress nounwind |
| declare noundef double @_func11(ptr noundef nonnull align 8 dereferenceable(8)) #2 |
| |
| ; Function Attrs: alwaysinline convergent mustprogress nounwind |
| define void @_func7() #1 { |
| %var1 = alloca [256 x double], align 16, addrspace(5) |
| %var2 = addrspacecast ptr addrspace(5) %var1 to ptr |
| call void @_func12(ptr noundef nonnull align 8 dereferenceable(2048) %var2) #3 |
| call void @_func4(ptr noundef nonnull align 8 dereferenceable(2048) %var2) #3 |
| ret void |
| } |
| |
| ; Function Attrs: convergent inlinehint mustprogress nounwind |
| define void @_func12(ptr noundef nonnull align 8 dereferenceable(2048) %var0) #0 { |
| block1: |
| %var2 = alloca %"cub_inner_product_iterator", align 8, addrspace(5) |
| %var3 = addrspacecast ptr addrspace(5) %var2 to ptr |
| %var4 = call %"cub_inner_product_iterator" @_func8() #3 |
| %.fca.2.extract = extractvalue %"cub_inner_product_iterator" %var4, 2 |
| %.fca.4.extract = extractvalue %"cub_inner_product_iterator" %var4, 4 |
| %.sroa.3.0..sroa_idx = getelementptr inbounds nuw i8, ptr %var3, i64 16 |
| store i32 %.fca.2.extract, ptr %.sroa.3.0..sroa_idx, align 8 |
| %.sroa.5.0..sroa_idx = getelementptr inbounds nuw i8, ptr %var3, i64 24 |
| store i32 %.fca.4.extract, ptr %.sroa.5.0..sroa_idx, align 8 |
| br label %block5 |
| |
| block5: ; preds = %block8, %block1 |
| %.0 = phi i32 [ 0, %block1 ], [ %var14, %block8 ] |
| %var6 = icmp samesign ult i32 %.0, 256 |
| br i1 %var6, label %block8, label %block7 |
| |
| block7: ; preds = %block5 |
| ret void |
| |
| block8: ; preds = %block5 |
| %var9 = shl nuw nsw i32 %.0, 0 |
| %var10 = zext nneg i32 %var9 to i64 |
| %var11 = call contract noundef double @_func9(ptr noundef nonnull align 8 dereferenceable(28) %var3, i64 noundef %var10) #3 |
| %var12 = zext nneg i32 %.0 to i64 |
| %var13 = getelementptr inbounds nuw [8 x i8], ptr %var0, i64 %var12 |
| store double %var11, ptr %var13, align 8 |
| %var14 = add nuw nsw i32 %.0, 1 |
| br label %block5, !llvm.loop !3 |
| } |
| |
| attributes #0 = { convergent inlinehint mustprogress nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx942" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,+xf32-insts" } |
| attributes #1 = { alwaysinline convergent mustprogress nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx942" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,+xf32-insts" } |
| attributes #2 = { convergent mustprogress nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx942" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,+xf32-insts" } |
| attributes #3 = { convergent nounwind } |
| |
| !0 = distinct !{!0, !1, !2} |
| !1 = !{!"llvm.loop.mustprogress"} |
| !2 = !{!"llvm.loop.unroll.enable"} |
| !3 = distinct !{!3, !1, !2} |