blob: a5299ea36958ddcc4276e0b7cebf745624918127 [file] [log] [blame]
Matt Arsenault1f1f8202025-03-17 16:10:01 +07001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
Fangrui Song9e9907f2024-01-16 21:54:58 -08002; RUN: llc -mtriple=amdgcn < %s | FileCheck %s
Mark Searles4e3d6162017-10-16 23:38:53 +00003
4; Check we can compile this bugpoint-reduced test without an
5; infinite loop in TLI.SimplifyDemandedBits() due to failure
6; to use return value of TLO.DAG.UpdateNodeOperands()
7
8; Check that code was generated; we know there will be
9; a s_endpgm, so check for it.
10
11@0 = external unnamed_addr addrspace(3) global [462 x float], align 4
12
Mark Searles4e3d6162017-10-16 23:38:53 +000013declare i32 @llvm.amdgcn.workitem.id.y() #0
Mark Searles4e3d6162017-10-16 23:38:53 +000014declare i32 @llvm.amdgcn.workitem.id.x() #0
Mark Searles4e3d6162017-10-16 23:38:53 +000015declare float @llvm.fmuladd.f32(float, float, float) #0
16
Kriti Guptaa3dfa4e2023-04-18 08:46:54 +010017define amdgpu_kernel void @foo(ptr addrspace(1) noalias nocapture readonly %arg, ptr addrspace(1) noalias nocapture readonly %arg1, ptr addrspace(1) noalias nocapture %arg2, float %arg3, i1 %c0, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5) local_unnamed_addr !reqd_work_group_size !0 {
Matt Arsenault1f1f8202025-03-17 16:10:01 +070018; CHECK-LABEL: foo:
19; CHECK: ; %bb.0: ; %bb
20; CHECK-NEXT: s_load_dword s6, s[4:5], 0x10
21; CHECK-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x10
22; CHECK-NEXT: s_load_dword s10, s[4:5], 0x11
23; CHECK-NEXT: v_lshlrev_b32_e32 v2, 2, v0
24; CHECK-NEXT: s_movk_i32 s0, 0x54
25; CHECK-NEXT: v_mov_b32_e32 v0, 0
26; CHECK-NEXT: v_mad_u32_u24 v1, v1, s0, v2
27; CHECK-NEXT: s_waitcnt lgkmcnt(0)
28; CHECK-NEXT: s_bitcmp1_b32 s6, 8
29; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
30; CHECK-NEXT: s_bitcmp1_b32 s6, 16
31; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
32; CHECK-NEXT: s_cselect_b64 s[4:5], -1, 0
33; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v2
34; CHECK-NEXT: s_xor_b64 s[4:5], s[4:5], -1
35; CHECK-NEXT: s_bitcmp1_b32 s2, 24
36; CHECK-NEXT: s_cselect_b64 s[6:7], -1, 0
37; CHECK-NEXT: s_xor_b64 s[6:7], s[6:7], -1
38; CHECK-NEXT: s_bitcmp1_b32 s3, 0
39; CHECK-NEXT: s_cselect_b64 s[8:9], -1, 0
40; CHECK-NEXT: s_bitcmp1_b32 s10, 8
41; CHECK-NEXT: s_cselect_b64 s[10:11], -1, 0
42; CHECK-NEXT: s_and_b64 s[2:3], exec, s[6:7]
43; CHECK-NEXT: s_and_b64 s[4:5], exec, s[4:5]
44; CHECK-NEXT: s_and_b64 s[6:7], exec, s[10:11]
45; CHECK-NEXT: s_and_b64 s[8:9], exec, s[8:9]
46; CHECK-NEXT: s_mov_b32 m0, -1
47; CHECK-NEXT: .LBB0_1: ; %.loopexit145
48; CHECK-NEXT: ; =>This Loop Header: Depth=1
49; CHECK-NEXT: ; Child Loop BB0_3 Depth 2
50; CHECK-NEXT: ; Child Loop BB0_4 Depth 3
51; CHECK-NEXT: ; Child Loop BB0_5 Depth 2
52; CHECK-NEXT: v_mov_b32_e32 v2, v1
53; CHECK-NEXT: s_branch .LBB0_3
54; CHECK-NEXT: .LBB0_2: ; %.loopexit
55; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=2
56; CHECK-NEXT: v_add_i32_e32 v2, vcc, 0x540, v2
57; CHECK-NEXT: s_mov_b64 vcc, s[4:5]
58; CHECK-NEXT: s_cbranch_vccnz .LBB0_5
59; CHECK-NEXT: .LBB0_3: ; %bb13
60; CHECK-NEXT: ; Parent Loop BB0_1 Depth=1
61; CHECK-NEXT: ; => This Loop Header: Depth=2
62; CHECK-NEXT: ; Child Loop BB0_4 Depth 3
63; CHECK-NEXT: s_and_b64 vcc, exec, s[0:1]
64; CHECK-NEXT: v_mov_b32_e32 v3, v2
65; CHECK-NEXT: s_cbranch_vccnz .LBB0_2
66; CHECK-NEXT: .LBB0_4: ; %bb21
67; CHECK-NEXT: ; Parent Loop BB0_1 Depth=1
68; CHECK-NEXT: ; Parent Loop BB0_3 Depth=2
69; CHECK-NEXT: ; => This Inner Loop Header: Depth=3
70; CHECK-NEXT: ds_write_b32 v3, v0
71; CHECK-NEXT: v_add_i32_e32 v3, vcc, 32, v3
72; CHECK-NEXT: s_mov_b64 vcc, s[2:3]
73; CHECK-NEXT: s_cbranch_vccz .LBB0_4
74; CHECK-NEXT: s_branch .LBB0_2
75; CHECK-NEXT: .LBB0_5: ; %bb31
76; CHECK-NEXT: ; Parent Loop BB0_1 Depth=1
77; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
78; CHECK-NEXT: s_mov_b64 vcc, s[6:7]
79; CHECK-NEXT: s_cbranch_vccz .LBB0_5
80; CHECK-NEXT: ; %bb.6: ; %bb30
81; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
82; CHECK-NEXT: s_mov_b64 vcc, s[8:9]
83; CHECK-NEXT: s_cbranch_vccz .LBB0_1
84; CHECK-NEXT: ; %bb.7: ; %bb11
85; CHECK-NEXT: s_endpgm
Mark Searles4e3d6162017-10-16 23:38:53 +000086bb:
87 %tmp = tail call i32 @llvm.amdgcn.workitem.id.y()
88 %tmp4 = tail call i32 @llvm.amdgcn.workitem.id.x()
89 %tmp5 = and i32 %tmp, 15
90 %tmp6 = mul nuw nsw i32 %tmp5, 21
91 %tmp7 = sub i32 %tmp6, 0
92 %tmp8 = add i32 %tmp7, 0
93 %tmp9 = add i32 %tmp8, 0
Mark Searles4e3d6162017-10-16 23:38:53 +000094 br label %bb12
95
96bb11: ; preds = %bb30
Kriti Guptaa3dfa4e2023-04-18 08:46:54 +010097 br i1 %c0, label %bb37, label %bb38
Mark Searles4e3d6162017-10-16 23:38:53 +000098
99bb12: ; preds = %bb30, %bb
100 br i1 false, label %.preheader, label %.loopexit145
101
102.loopexit145: ; preds = %.preheader, %bb12
103 br label %bb13
104
105bb13: ; preds = %.loopexit, %.loopexit145
106 %tmp14 = phi i32 [ %tmp5, %.loopexit145 ], [ %tmp20, %.loopexit ]
107 %tmp15 = add nsw i32 %tmp14, -3
108 %tmp16 = mul i32 %tmp14, 21
Kriti Guptaa3dfa4e2023-04-18 08:46:54 +0100109 br i1 %c1, label %bb17, label %.loopexit
Mark Searles4e3d6162017-10-16 23:38:53 +0000110
111bb17: ; preds = %bb13
112 %tmp18 = mul i32 %tmp15, 224
Matt Arsenault1f1f8202025-03-17 16:10:01 +0700113 %tmp19 = add i32 0, %tmp18
Mark Searles4e3d6162017-10-16 23:38:53 +0000114 br label %bb21
115
116.loopexit: ; preds = %bb21, %bb13
117 %tmp20 = add nuw nsw i32 %tmp14, 16
Kriti Guptaa3dfa4e2023-04-18 08:46:54 +0100118 br i1 %c2, label %bb13, label %bb26
Mark Searles4e3d6162017-10-16 23:38:53 +0000119
120bb21: ; preds = %bb21, %bb17
121 %tmp22 = phi i32 [ %tmp4, %bb17 ], [ %tmp25, %bb21 ]
122 %tmp23 = add i32 %tmp22, %tmp16
Matt Arsenaultd85e8492022-12-01 21:33:26 -0500123 %tmp24 = getelementptr inbounds float, ptr addrspace(3) @0, i32 %tmp23
Matt Arsenault1f1f8202025-03-17 16:10:01 +0700124 store float 0.0, ptr addrspace(3) %tmp24, align 4
Mark Searles4e3d6162017-10-16 23:38:53 +0000125 %tmp25 = add nuw i32 %tmp22, 8
Kriti Guptaa3dfa4e2023-04-18 08:46:54 +0100126 br i1 %c3, label %bb21, label %.loopexit
Mark Searles4e3d6162017-10-16 23:38:53 +0000127
128bb26: ; preds = %.loopexit
129 br label %bb31
130
131.preheader: ; preds = %.preheader, %bb12
Matt Arsenault8ce612f2025-03-08 07:14:29 +0700132 %tmp27 = phi i32 [ %tmp28, %.preheader ], [ poison, %bb12 ]
Mark Searles4e3d6162017-10-16 23:38:53 +0000133 %tmp28 = add nuw i32 %tmp27, 128
134 %tmp29 = icmp ult i32 %tmp28, 1568
135 br i1 %tmp29, label %.preheader, label %.loopexit145
136
137bb30: ; preds = %bb31
Kriti Guptaa3dfa4e2023-04-18 08:46:54 +0100138 br i1 %c4, label %bb11, label %bb12
Mark Searles4e3d6162017-10-16 23:38:53 +0000139
140bb31: ; preds = %bb31, %bb26
Matt Arsenault8ce612f2025-03-08 07:14:29 +0700141 %tmp32 = phi i32 [ %tmp9, %bb26 ], [ poison, %bb31 ]
Matt Arsenaultd85e8492022-12-01 21:33:26 -0500142 %tmp33 = getelementptr inbounds [462 x float], ptr addrspace(3) @0, i32 0, i32 %tmp32
143 %tmp34 = load float, ptr addrspace(3) %tmp33, align 4
Matt Arsenault024df9c2025-03-13 20:07:48 +0700144 %tmp35 = tail call float @llvm.fmuladd.f32(float %tmp34, float poison, float poison)
145 %tmp36 = tail call float @llvm.fmuladd.f32(float poison, float poison, float %tmp35)
Kriti Guptaa3dfa4e2023-04-18 08:46:54 +0100146 br i1 %c5, label %bb30, label %bb31
Mark Searles4e3d6162017-10-16 23:38:53 +0000147
148bb37: ; preds = %bb11
149 br label %bb38
150
151bb38: ; preds = %bb37, %bb11
152 ret void
153}
154
155attributes #0 = { nounwind readnone speculatable }
156
157!0 = !{i32 8, i32 16, i32 1}