Matt Arsenault | 1f1f820 | 2025-03-17 16:10:01 +0700 | [diff] [blame] | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
Fangrui Song | 9e9907f | 2024-01-16 21:54:58 -0800 | [diff] [blame] | 2 | ; RUN: llc -mtriple=amdgcn < %s | FileCheck %s |
Mark Searles | 4e3d616 | 2017-10-16 23:38:53 +0000 | [diff] [blame] | 3 | |
| 4 | ; Check we can compile this bugpoint-reduced test without an |
| 5 | ; infinite loop in TLI.SimplifyDemandedBits() due to failure |
| 6 | ; to use return value of TLO.DAG.UpdateNodeOperands() |
| 7 | |
| 8 | ; Check that code was generated; we know there will be |
| 9 | ; a s_endpgm, so check for it. |
| 10 | |
| 11 | @0 = external unnamed_addr addrspace(3) global [462 x float], align 4 |
| 12 | |
Mark Searles | 4e3d616 | 2017-10-16 23:38:53 +0000 | [diff] [blame] | 13 | declare i32 @llvm.amdgcn.workitem.id.y() #0 |
Mark Searles | 4e3d616 | 2017-10-16 23:38:53 +0000 | [diff] [blame] | 14 | declare i32 @llvm.amdgcn.workitem.id.x() #0 |
Mark Searles | 4e3d616 | 2017-10-16 23:38:53 +0000 | [diff] [blame] | 15 | declare float @llvm.fmuladd.f32(float, float, float) #0 |
| 16 | |
Kriti Gupta | a3dfa4e | 2023-04-18 08:46:54 +0100 | [diff] [blame] | 17 | define amdgpu_kernel void @foo(ptr addrspace(1) noalias nocapture readonly %arg, ptr addrspace(1) noalias nocapture readonly %arg1, ptr addrspace(1) noalias nocapture %arg2, float %arg3, i1 %c0, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5) local_unnamed_addr !reqd_work_group_size !0 { |
Matt Arsenault | 1f1f820 | 2025-03-17 16:10:01 +0700 | [diff] [blame] | 18 | ; CHECK-LABEL: foo: |
| 19 | ; CHECK: ; %bb.0: ; %bb |
| 20 | ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x10 |
| 21 | ; CHECK-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x10 |
| 22 | ; CHECK-NEXT: s_load_dword s10, s[4:5], 0x11 |
| 23 | ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 2, v0 |
| 24 | ; CHECK-NEXT: s_movk_i32 s0, 0x54 |
| 25 | ; CHECK-NEXT: v_mov_b32_e32 v0, 0 |
| 26 | ; CHECK-NEXT: v_mad_u32_u24 v1, v1, s0, v2 |
| 27 | ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| 28 | ; CHECK-NEXT: s_bitcmp1_b32 s6, 8 |
| 29 | ; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 |
| 30 | ; CHECK-NEXT: s_bitcmp1_b32 s6, 16 |
| 31 | ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] |
| 32 | ; CHECK-NEXT: s_cselect_b64 s[4:5], -1, 0 |
| 33 | ; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v2 |
| 34 | ; CHECK-NEXT: s_xor_b64 s[4:5], s[4:5], -1 |
| 35 | ; CHECK-NEXT: s_bitcmp1_b32 s2, 24 |
| 36 | ; CHECK-NEXT: s_cselect_b64 s[6:7], -1, 0 |
| 37 | ; CHECK-NEXT: s_xor_b64 s[6:7], s[6:7], -1 |
| 38 | ; CHECK-NEXT: s_bitcmp1_b32 s3, 0 |
| 39 | ; CHECK-NEXT: s_cselect_b64 s[8:9], -1, 0 |
| 40 | ; CHECK-NEXT: s_bitcmp1_b32 s10, 8 |
| 41 | ; CHECK-NEXT: s_cselect_b64 s[10:11], -1, 0 |
| 42 | ; CHECK-NEXT: s_and_b64 s[2:3], exec, s[6:7] |
| 43 | ; CHECK-NEXT: s_and_b64 s[4:5], exec, s[4:5] |
| 44 | ; CHECK-NEXT: s_and_b64 s[6:7], exec, s[10:11] |
| 45 | ; CHECK-NEXT: s_and_b64 s[8:9], exec, s[8:9] |
| 46 | ; CHECK-NEXT: s_mov_b32 m0, -1 |
| 47 | ; CHECK-NEXT: .LBB0_1: ; %.loopexit145 |
| 48 | ; CHECK-NEXT: ; =>This Loop Header: Depth=1 |
| 49 | ; CHECK-NEXT: ; Child Loop BB0_3 Depth 2 |
| 50 | ; CHECK-NEXT: ; Child Loop BB0_4 Depth 3 |
| 51 | ; CHECK-NEXT: ; Child Loop BB0_5 Depth 2 |
| 52 | ; CHECK-NEXT: v_mov_b32_e32 v2, v1 |
| 53 | ; CHECK-NEXT: s_branch .LBB0_3 |
| 54 | ; CHECK-NEXT: .LBB0_2: ; %.loopexit |
| 55 | ; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=2 |
| 56 | ; CHECK-NEXT: v_add_i32_e32 v2, vcc, 0x540, v2 |
| 57 | ; CHECK-NEXT: s_mov_b64 vcc, s[4:5] |
| 58 | ; CHECK-NEXT: s_cbranch_vccnz .LBB0_5 |
| 59 | ; CHECK-NEXT: .LBB0_3: ; %bb13 |
| 60 | ; CHECK-NEXT: ; Parent Loop BB0_1 Depth=1 |
| 61 | ; CHECK-NEXT: ; => This Loop Header: Depth=2 |
| 62 | ; CHECK-NEXT: ; Child Loop BB0_4 Depth 3 |
| 63 | ; CHECK-NEXT: s_and_b64 vcc, exec, s[0:1] |
| 64 | ; CHECK-NEXT: v_mov_b32_e32 v3, v2 |
| 65 | ; CHECK-NEXT: s_cbranch_vccnz .LBB0_2 |
| 66 | ; CHECK-NEXT: .LBB0_4: ; %bb21 |
| 67 | ; CHECK-NEXT: ; Parent Loop BB0_1 Depth=1 |
| 68 | ; CHECK-NEXT: ; Parent Loop BB0_3 Depth=2 |
| 69 | ; CHECK-NEXT: ; => This Inner Loop Header: Depth=3 |
| 70 | ; CHECK-NEXT: ds_write_b32 v3, v0 |
| 71 | ; CHECK-NEXT: v_add_i32_e32 v3, vcc, 32, v3 |
| 72 | ; CHECK-NEXT: s_mov_b64 vcc, s[2:3] |
| 73 | ; CHECK-NEXT: s_cbranch_vccz .LBB0_4 |
| 74 | ; CHECK-NEXT: s_branch .LBB0_2 |
| 75 | ; CHECK-NEXT: .LBB0_5: ; %bb31 |
| 76 | ; CHECK-NEXT: ; Parent Loop BB0_1 Depth=1 |
| 77 | ; CHECK-NEXT: ; => This Inner Loop Header: Depth=2 |
| 78 | ; CHECK-NEXT: s_mov_b64 vcc, s[6:7] |
| 79 | ; CHECK-NEXT: s_cbranch_vccz .LBB0_5 |
| 80 | ; CHECK-NEXT: ; %bb.6: ; %bb30 |
| 81 | ; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 |
| 82 | ; CHECK-NEXT: s_mov_b64 vcc, s[8:9] |
| 83 | ; CHECK-NEXT: s_cbranch_vccz .LBB0_1 |
| 84 | ; CHECK-NEXT: ; %bb.7: ; %bb11 |
| 85 | ; CHECK-NEXT: s_endpgm |
Mark Searles | 4e3d616 | 2017-10-16 23:38:53 +0000 | [diff] [blame] | 86 | bb: |
| 87 | %tmp = tail call i32 @llvm.amdgcn.workitem.id.y() |
| 88 | %tmp4 = tail call i32 @llvm.amdgcn.workitem.id.x() |
| 89 | %tmp5 = and i32 %tmp, 15 |
| 90 | %tmp6 = mul nuw nsw i32 %tmp5, 21 |
| 91 | %tmp7 = sub i32 %tmp6, 0 |
| 92 | %tmp8 = add i32 %tmp7, 0 |
| 93 | %tmp9 = add i32 %tmp8, 0 |
Mark Searles | 4e3d616 | 2017-10-16 23:38:53 +0000 | [diff] [blame] | 94 | br label %bb12 |
| 95 | |
| 96 | bb11: ; preds = %bb30 |
Kriti Gupta | a3dfa4e | 2023-04-18 08:46:54 +0100 | [diff] [blame] | 97 | br i1 %c0, label %bb37, label %bb38 |
Mark Searles | 4e3d616 | 2017-10-16 23:38:53 +0000 | [diff] [blame] | 98 | |
| 99 | bb12: ; preds = %bb30, %bb |
| 100 | br i1 false, label %.preheader, label %.loopexit145 |
| 101 | |
| 102 | .loopexit145: ; preds = %.preheader, %bb12 |
| 103 | br label %bb13 |
| 104 | |
| 105 | bb13: ; preds = %.loopexit, %.loopexit145 |
| 106 | %tmp14 = phi i32 [ %tmp5, %.loopexit145 ], [ %tmp20, %.loopexit ] |
| 107 | %tmp15 = add nsw i32 %tmp14, -3 |
| 108 | %tmp16 = mul i32 %tmp14, 21 |
Kriti Gupta | a3dfa4e | 2023-04-18 08:46:54 +0100 | [diff] [blame] | 109 | br i1 %c1, label %bb17, label %.loopexit |
Mark Searles | 4e3d616 | 2017-10-16 23:38:53 +0000 | [diff] [blame] | 110 | |
| 111 | bb17: ; preds = %bb13 |
| 112 | %tmp18 = mul i32 %tmp15, 224 |
Matt Arsenault | 1f1f820 | 2025-03-17 16:10:01 +0700 | [diff] [blame] | 113 | %tmp19 = add i32 0, %tmp18 |
Mark Searles | 4e3d616 | 2017-10-16 23:38:53 +0000 | [diff] [blame] | 114 | br label %bb21 |
| 115 | |
| 116 | .loopexit: ; preds = %bb21, %bb13 |
| 117 | %tmp20 = add nuw nsw i32 %tmp14, 16 |
Kriti Gupta | a3dfa4e | 2023-04-18 08:46:54 +0100 | [diff] [blame] | 118 | br i1 %c2, label %bb13, label %bb26 |
Mark Searles | 4e3d616 | 2017-10-16 23:38:53 +0000 | [diff] [blame] | 119 | |
| 120 | bb21: ; preds = %bb21, %bb17 |
| 121 | %tmp22 = phi i32 [ %tmp4, %bb17 ], [ %tmp25, %bb21 ] |
| 122 | %tmp23 = add i32 %tmp22, %tmp16 |
Matt Arsenault | d85e849 | 2022-12-01 21:33:26 -0500 | [diff] [blame] | 123 | %tmp24 = getelementptr inbounds float, ptr addrspace(3) @0, i32 %tmp23 |
Matt Arsenault | 1f1f820 | 2025-03-17 16:10:01 +0700 | [diff] [blame] | 124 | store float 0.0, ptr addrspace(3) %tmp24, align 4 |
Mark Searles | 4e3d616 | 2017-10-16 23:38:53 +0000 | [diff] [blame] | 125 | %tmp25 = add nuw i32 %tmp22, 8 |
Kriti Gupta | a3dfa4e | 2023-04-18 08:46:54 +0100 | [diff] [blame] | 126 | br i1 %c3, label %bb21, label %.loopexit |
Mark Searles | 4e3d616 | 2017-10-16 23:38:53 +0000 | [diff] [blame] | 127 | |
| 128 | bb26: ; preds = %.loopexit |
| 129 | br label %bb31 |
| 130 | |
| 131 | .preheader: ; preds = %.preheader, %bb12 |
Matt Arsenault | 8ce612f | 2025-03-08 07:14:29 +0700 | [diff] [blame] | 132 | %tmp27 = phi i32 [ %tmp28, %.preheader ], [ poison, %bb12 ] |
Mark Searles | 4e3d616 | 2017-10-16 23:38:53 +0000 | [diff] [blame] | 133 | %tmp28 = add nuw i32 %tmp27, 128 |
| 134 | %tmp29 = icmp ult i32 %tmp28, 1568 |
| 135 | br i1 %tmp29, label %.preheader, label %.loopexit145 |
| 136 | |
| 137 | bb30: ; preds = %bb31 |
Kriti Gupta | a3dfa4e | 2023-04-18 08:46:54 +0100 | [diff] [blame] | 138 | br i1 %c4, label %bb11, label %bb12 |
Mark Searles | 4e3d616 | 2017-10-16 23:38:53 +0000 | [diff] [blame] | 139 | |
| 140 | bb31: ; preds = %bb31, %bb26 |
Matt Arsenault | 8ce612f | 2025-03-08 07:14:29 +0700 | [diff] [blame] | 141 | %tmp32 = phi i32 [ %tmp9, %bb26 ], [ poison, %bb31 ] |
Matt Arsenault | d85e849 | 2022-12-01 21:33:26 -0500 | [diff] [blame] | 142 | %tmp33 = getelementptr inbounds [462 x float], ptr addrspace(3) @0, i32 0, i32 %tmp32 |
| 143 | %tmp34 = load float, ptr addrspace(3) %tmp33, align 4 |
Matt Arsenault | 024df9c | 2025-03-13 20:07:48 +0700 | [diff] [blame] | 144 | %tmp35 = tail call float @llvm.fmuladd.f32(float %tmp34, float poison, float poison) |
| 145 | %tmp36 = tail call float @llvm.fmuladd.f32(float poison, float poison, float %tmp35) |
Kriti Gupta | a3dfa4e | 2023-04-18 08:46:54 +0100 | [diff] [blame] | 146 | br i1 %c5, label %bb30, label %bb31 |
Mark Searles | 4e3d616 | 2017-10-16 23:38:53 +0000 | [diff] [blame] | 147 | |
| 148 | bb37: ; preds = %bb11 |
| 149 | br label %bb38 |
| 150 | |
| 151 | bb38: ; preds = %bb37, %bb11 |
| 152 | ret void |
| 153 | } |
| 154 | |
| 155 | attributes #0 = { nounwind readnone speculatable } |
| 156 | |
| 157 | !0 = !{i32 8, i32 16, i32 1} |