Matt Arsenault | c28f1fa | 2020-01-31 18:14:50 -0500 | [diff] [blame] | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
Fangrui Song | 9e9907f | 2024-01-16 21:54:58 -0800 | [diff] [blame] | 2 | ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI %s |
Tom Stellard | 58ac744 | 2014-04-29 23:12:48 +0000 | [diff] [blame] | 3 | ; |
Tom Stellard | 58ac744 | 2014-04-29 23:12:48 +0000 | [diff] [blame] | 4 | ; Most SALU instructions ignore control flow, so we need to make sure |
| 5 | ; they don't overwrite values from other blocks. |
| 6 | |
Tom Stellard | 744b99b | 2014-09-24 01:33:28 +0000 | [diff] [blame] | 7 | ; If the branch decision is made based on a value in an SGPR then all |
| 8 | ; threads will execute the same code paths, so we don't need to worry |
| 9 | ; about instructions in different blocks overwriting each other. |
Tom Stellard | 58ac744 | 2014-04-29 23:12:48 +0000 | [diff] [blame] | 10 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 11 | define amdgpu_kernel void @sgpr_if_else_salu_br(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) { |
Matt Arsenault | c28f1fa | 2020-01-31 18:14:50 -0500 | [diff] [blame] | 12 | ; SI-LABEL: sgpr_if_else_salu_br: |
| 13 | ; SI: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 14 | ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xb |
| 15 | ; SI-NEXT: s_load_dword s6, s[4:5], 0xf |
QingShan Zhang | 1ffb468 | 2020-08-07 10:20:58 +0000 | [diff] [blame] | 16 | ; SI-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 17 | ; SI-NEXT: s_cmp_lg_u32 s0, 0 |
Jay Foad | d2e5d35 | 2022-01-31 16:56:32 +0000 | [diff] [blame] | 18 | ; SI-NEXT: s_cbranch_scc0 .LBB0_4 |
QingShan Zhang | 1ffb468 | 2020-08-07 10:20:58 +0000 | [diff] [blame] | 19 | ; SI-NEXT: ; %bb.1: ; %else |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 20 | ; SI-NEXT: s_add_i32 s3, s3, s6 |
Jay Foad | d2e5d35 | 2022-01-31 16:56:32 +0000 | [diff] [blame] | 21 | ; SI-NEXT: s_cbranch_execnz .LBB0_3 |
| 22 | ; SI-NEXT: .LBB0_2: ; %if |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 23 | ; SI-NEXT: s_sub_i32 s3, s1, s2 |
Jay Foad | d2e5d35 | 2022-01-31 16:56:32 +0000 | [diff] [blame] | 24 | ; SI-NEXT: .LBB0_3: ; %endif |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 25 | ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 |
| 26 | ; SI-NEXT: s_add_i32 s0, s3, s0 |
| 27 | ; SI-NEXT: s_mov_b32 s7, 0xf000 |
| 28 | ; SI-NEXT: s_mov_b32 s6, -1 |
| 29 | ; SI-NEXT: v_mov_b32_e32 v0, s0 |
Carl Ritson | c316332 | 2022-10-06 09:06:32 +0900 | [diff] [blame] | 30 | ; SI-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 31 | ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
QingShan Zhang | 1ffb468 | 2020-08-07 10:20:58 +0000 | [diff] [blame] | 32 | ; SI-NEXT: s_endpgm |
Jay Foad | d2e5d35 | 2022-01-31 16:56:32 +0000 | [diff] [blame] | 33 | ; SI-NEXT: .LBB0_4: |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 34 | ; SI-NEXT: ; implicit-def: $sgpr3 |
Jay Foad | d2e5d35 | 2022-01-31 16:56:32 +0000 | [diff] [blame] | 35 | ; SI-NEXT: s_branch .LBB0_2 |
hsmahesha | 4905536 | 2020-07-17 11:40:10 +0530 | [diff] [blame] | 36 | |
Tom Stellard | 58ac744 | 2014-04-29 23:12:48 +0000 | [diff] [blame] | 37 | entry: |
| 38 | %0 = icmp eq i32 %a, 0 |
| 39 | br i1 %0, label %if, label %else |
| 40 | |
| 41 | if: |
Matt Arsenault | ad55ee5 | 2016-12-06 01:02:51 +0000 | [diff] [blame] | 42 | %1 = sub i32 %b, %c |
| 43 | br label %endif |
| 44 | |
| 45 | else: |
| 46 | %2 = add i32 %d, %e |
| 47 | br label %endif |
| 48 | |
| 49 | endif: |
| 50 | %3 = phi i32 [%1, %if], [%2, %else] |
| 51 | %4 = add i32 %3, %a |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 52 | store i32 %4, ptr addrspace(1) %out |
Matt Arsenault | ad55ee5 | 2016-12-06 01:02:51 +0000 | [diff] [blame] | 53 | ret void |
| 54 | } |
| 55 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 56 | define amdgpu_kernel void @sgpr_if_else_salu_br_opt(ptr addrspace(1) %out, [8 x i32], i32 %a, [8 x i32], i32 %b, [8 x i32], i32 %c, [8 x i32], i32 %d, [8 x i32], i32 %e) { |
Matt Arsenault | c28f1fa | 2020-01-31 18:14:50 -0500 | [diff] [blame] | 57 | ; SI-LABEL: sgpr_if_else_salu_br_opt: |
| 58 | ; SI: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 59 | ; SI-NEXT: s_load_dword s2, s[4:5], 0x13 |
QingShan Zhang | 1ffb468 | 2020-08-07 10:20:58 +0000 | [diff] [blame] | 60 | ; SI-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 61 | ; SI-NEXT: s_cmp_lg_u32 s2, 0 |
Jay Foad | d2e5d35 | 2022-01-31 16:56:32 +0000 | [diff] [blame] | 62 | ; SI-NEXT: s_cbranch_scc0 .LBB1_4 |
QingShan Zhang | 1ffb468 | 2020-08-07 10:20:58 +0000 | [diff] [blame] | 63 | ; SI-NEXT: ; %bb.1: ; %else |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 64 | ; SI-NEXT: s_load_dword s0, s[4:5], 0x2e |
| 65 | ; SI-NEXT: s_load_dword s1, s[4:5], 0x37 |
QingShan Zhang | 1ffb468 | 2020-08-07 10:20:58 +0000 | [diff] [blame] | 66 | ; SI-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 67 | ; SI-NEXT: s_add_i32 s3, s0, s1 |
Jay Foad | d2e5d35 | 2022-01-31 16:56:32 +0000 | [diff] [blame] | 68 | ; SI-NEXT: s_cbranch_execnz .LBB1_3 |
| 69 | ; SI-NEXT: .LBB1_2: ; %if |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 70 | ; SI-NEXT: s_load_dword s0, s[4:5], 0x1c |
| 71 | ; SI-NEXT: s_load_dword s1, s[4:5], 0x25 |
QingShan Zhang | 1ffb468 | 2020-08-07 10:20:58 +0000 | [diff] [blame] | 72 | ; SI-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 73 | ; SI-NEXT: s_add_i32 s3, s0, s1 |
Jay Foad | d2e5d35 | 2022-01-31 16:56:32 +0000 | [diff] [blame] | 74 | ; SI-NEXT: .LBB1_3: ; %endif |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 75 | ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 |
| 76 | ; SI-NEXT: s_add_i32 s0, s3, s2 |
| 77 | ; SI-NEXT: s_mov_b32 s7, 0xf000 |
| 78 | ; SI-NEXT: s_mov_b32 s6, -1 |
| 79 | ; SI-NEXT: v_mov_b32_e32 v0, s0 |
Carl Ritson | c316332 | 2022-10-06 09:06:32 +0900 | [diff] [blame] | 80 | ; SI-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 81 | ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
QingShan Zhang | 1ffb468 | 2020-08-07 10:20:58 +0000 | [diff] [blame] | 82 | ; SI-NEXT: s_endpgm |
Jay Foad | d2e5d35 | 2022-01-31 16:56:32 +0000 | [diff] [blame] | 83 | ; SI-NEXT: .LBB1_4: |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 84 | ; SI-NEXT: ; implicit-def: $sgpr3 |
Jay Foad | d2e5d35 | 2022-01-31 16:56:32 +0000 | [diff] [blame] | 85 | ; SI-NEXT: s_branch .LBB1_2 |
hsmahesha | 4905536 | 2020-07-17 11:40:10 +0530 | [diff] [blame] | 86 | |
Matt Arsenault | ad55ee5 | 2016-12-06 01:02:51 +0000 | [diff] [blame] | 87 | entry: |
Matt Arsenault | 8c4a352 | 2018-06-26 19:10:00 +0000 | [diff] [blame] | 88 | %cmp0 = icmp eq i32 %a, 0 |
| 89 | br i1 %cmp0, label %if, label %else |
Matt Arsenault | ad55ee5 | 2016-12-06 01:02:51 +0000 | [diff] [blame] | 90 | |
| 91 | if: |
Matt Arsenault | 8c4a352 | 2018-06-26 19:10:00 +0000 | [diff] [blame] | 92 | %add0 = add i32 %b, %c |
Tom Stellard | 58ac744 | 2014-04-29 23:12:48 +0000 | [diff] [blame] | 93 | br label %endif |
| 94 | |
| 95 | else: |
Matt Arsenault | 8c4a352 | 2018-06-26 19:10:00 +0000 | [diff] [blame] | 96 | %add1 = add i32 %d, %e |
Tom Stellard | 58ac744 | 2014-04-29 23:12:48 +0000 | [diff] [blame] | 97 | br label %endif |
| 98 | |
| 99 | endif: |
Matt Arsenault | 8c4a352 | 2018-06-26 19:10:00 +0000 | [diff] [blame] | 100 | %phi = phi i32 [%add0, %if], [%add1, %else] |
| 101 | %add2 = add i32 %phi, %a |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 102 | store i32 %add2, ptr addrspace(1) %out |
Tom Stellard | 58ac744 | 2014-04-29 23:12:48 +0000 | [diff] [blame] | 103 | ret void |
| 104 | } |
Tom Stellard | 744b99b | 2014-09-24 01:33:28 +0000 | [diff] [blame] | 105 | |
| 106 | ; The two S_ADD instructions should write to different registers, since |
| 107 | ; different threads will take different control flow paths. |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 108 | define amdgpu_kernel void @sgpr_if_else_valu_br(ptr addrspace(1) %out, float %a, i32 %b, i32 %c, i32 %d, i32 %e) { |
Matt Arsenault | c28f1fa | 2020-01-31 18:14:50 -0500 | [diff] [blame] | 109 | ; SI-LABEL: sgpr_if_else_valu_br: |
| 110 | ; SI: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 111 | ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xc |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 112 | ; SI-NEXT: v_cvt_f32_u32_e32 v0, v0 |
Matt Arsenault | d719f1c | 2021-08-03 19:09:44 -0400 | [diff] [blame] | 113 | ; SI-NEXT: ; implicit-def: $sgpr8 |
Matt Arsenault | c28f1fa | 2020-01-31 18:14:50 -0500 | [diff] [blame] | 114 | ; SI-NEXT: v_cmp_lg_f32_e32 vcc, 0, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 115 | ; SI-NEXT: s_and_saveexec_b64 s[6:7], vcc |
| 116 | ; SI-NEXT: s_xor_b64 s[6:7], exec, s[6:7] |
RamNalamothu | 18f93512 | 2021-11-20 01:53:38 +0530 | [diff] [blame] | 117 | ; SI-NEXT: s_cbranch_execz .LBB2_2 |
Matt Arsenault | c28f1fa | 2020-01-31 18:14:50 -0500 | [diff] [blame] | 118 | ; SI-NEXT: ; %bb.1: ; %else |
| 119 | ; SI-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 120 | ; SI-NEXT: s_add_i32 s8, s2, s3 |
RamNalamothu | 18f93512 | 2021-11-20 01:53:38 +0530 | [diff] [blame] | 121 | ; SI-NEXT: .LBB2_2: ; %Flow |
Shilei Tian | ca33649 | 2024-11-08 16:36:10 -0500 | [diff] [blame] | 122 | ; SI-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 123 | ; SI-NEXT: s_or_saveexec_b64 s[2:3], s[6:7] |
| 124 | ; SI-NEXT: v_mov_b32_e32 v0, s8 |
| 125 | ; SI-NEXT: s_xor_b64 exec, exec, s[2:3] |
| 126 | ; SI-NEXT: ; %bb.3: ; %if |
| 127 | ; SI-NEXT: s_add_i32 s0, s0, s1 |
| 128 | ; SI-NEXT: v_mov_b32_e32 v0, s0 |
| 129 | ; SI-NEXT: ; %bb.4: ; %endif |
| 130 | ; SI-NEXT: s_or_b64 exec, exec, s[2:3] |
| 131 | ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 |
Carl Ritson | c316332 | 2022-10-06 09:06:32 +0900 | [diff] [blame] | 132 | ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| 133 | ; SI-NEXT: s_mov_b32 s2, -1 |
| 134 | ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| 135 | ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
Matt Arsenault | c28f1fa | 2020-01-31 18:14:50 -0500 | [diff] [blame] | 136 | ; SI-NEXT: s_endpgm |
Tom Stellard | 744b99b | 2014-09-24 01:33:28 +0000 | [diff] [blame] | 137 | entry: |
Matt Arsenault | 9c47dd5 | 2016-02-11 06:02:01 +0000 | [diff] [blame] | 138 | %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
Tom Stellard | 744b99b | 2014-09-24 01:33:28 +0000 | [diff] [blame] | 139 | %tid_f = uitofp i32 %tid to float |
| 140 | %tmp1 = fcmp ueq float %tid_f, 0.0 |
| 141 | br i1 %tmp1, label %if, label %else |
| 142 | |
| 143 | if: |
| 144 | %tmp2 = add i32 %b, %c |
| 145 | br label %endif |
| 146 | |
| 147 | else: |
| 148 | %tmp3 = add i32 %d, %e |
| 149 | br label %endif |
| 150 | |
| 151 | endif: |
| 152 | %tmp4 = phi i32 [%tmp2, %if], [%tmp3, %else] |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 153 | store i32 %tmp4, ptr addrspace(1) %out |
Tom Stellard | 744b99b | 2014-09-24 01:33:28 +0000 | [diff] [blame] | 154 | ret void |
| 155 | } |
| 156 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 157 | define amdgpu_kernel void @sgpr_if_else_valu_cmp_phi_br(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) { |
Matt Arsenault | c28f1fa | 2020-01-31 18:14:50 -0500 | [diff] [blame] | 158 | ; SI-LABEL: sgpr_if_else_valu_cmp_phi_br: |
| 159 | ; SI: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 160 | ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| 161 | ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd |
| 162 | ; SI-NEXT: s_mov_b32 s6, 0 |
Matt Arsenault | c28f1fa | 2020-01-31 18:14:50 -0500 | [diff] [blame] | 163 | ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 |
Guozhi Wei | 11e8686 | 2022-11-14 19:34:59 +0000 | [diff] [blame] | 164 | ; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Matt Arsenault | d719f1c | 2021-08-03 19:09:44 -0400 | [diff] [blame] | 165 | ; SI-NEXT: ; implicit-def: $sgpr8_sgpr9 |
| 166 | ; SI-NEXT: s_and_saveexec_b64 s[10:11], vcc |
| 167 | ; SI-NEXT: s_xor_b64 s[10:11], exec, s[10:11] |
RamNalamothu | 18f93512 | 2021-11-20 01:53:38 +0530 | [diff] [blame] | 168 | ; SI-NEXT: s_cbranch_execz .LBB3_2 |
Matt Arsenault | c28f1fa | 2020-01-31 18:14:50 -0500 | [diff] [blame] | 169 | ; SI-NEXT: ; %bb.1: ; %else |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 170 | ; SI-NEXT: s_mov_b32 s7, 0xf000 |
Ruiling Song | 208332d | 2021-04-19 10:45:41 +0800 | [diff] [blame] | 171 | ; SI-NEXT: v_mov_b32_e32 v1, 0 |
Matt Arsenault | c28f1fa | 2020-01-31 18:14:50 -0500 | [diff] [blame] | 172 | ; SI-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 173 | ; SI-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
Matt Arsenault | c28f1fa | 2020-01-31 18:14:50 -0500 | [diff] [blame] | 174 | ; SI-NEXT: s_waitcnt vmcnt(0) |
Ruiling Song | 208332d | 2021-04-19 10:45:41 +0800 | [diff] [blame] | 175 | ; SI-NEXT: v_cmp_gt_i32_e32 vcc, 0, v0 |
Matt Arsenault | d719f1c | 2021-08-03 19:09:44 -0400 | [diff] [blame] | 176 | ; SI-NEXT: s_and_b64 s[8:9], vcc, exec |
Ruiling Song | 208332d | 2021-04-19 10:45:41 +0800 | [diff] [blame] | 177 | ; SI-NEXT: ; implicit-def: $vgpr0 |
RamNalamothu | 18f93512 | 2021-11-20 01:53:38 +0530 | [diff] [blame] | 178 | ; SI-NEXT: .LBB3_2: ; %Flow |
Matt Arsenault | d719f1c | 2021-08-03 19:09:44 -0400 | [diff] [blame] | 179 | ; SI-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 180 | ; SI-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11] |
RamNalamothu | 18f93512 | 2021-11-20 01:53:38 +0530 | [diff] [blame] | 181 | ; SI-NEXT: s_cbranch_execz .LBB3_4 |
Matt Arsenault | c28f1fa | 2020-01-31 18:14:50 -0500 | [diff] [blame] | 182 | ; SI-NEXT: ; %bb.3: ; %if |
Matt Arsenault | d719f1c | 2021-08-03 19:09:44 -0400 | [diff] [blame] | 183 | ; SI-NEXT: s_mov_b32 s15, 0xf000 |
| 184 | ; SI-NEXT: s_mov_b32 s14, 0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 185 | ; SI-NEXT: s_mov_b64 s[12:13], s[2:3] |
Matt Arsenault | c28f1fa | 2020-01-31 18:14:50 -0500 | [diff] [blame] | 186 | ; SI-NEXT: v_mov_b32_e32 v1, 0 |
Matt Arsenault | d719f1c | 2021-08-03 19:09:44 -0400 | [diff] [blame] | 187 | ; SI-NEXT: buffer_load_dword v0, v[0:1], s[12:15], 0 addr64 |
| 188 | ; SI-NEXT: s_andn2_b64 s[2:3], s[8:9], exec |
Matt Arsenault | c28f1fa | 2020-01-31 18:14:50 -0500 | [diff] [blame] | 189 | ; SI-NEXT: s_waitcnt vmcnt(0) |
| 190 | ; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 |
| 191 | ; SI-NEXT: s_and_b64 s[6:7], vcc, exec |
Matt Arsenault | d719f1c | 2021-08-03 19:09:44 -0400 | [diff] [blame] | 192 | ; SI-NEXT: s_or_b64 s[8:9], s[2:3], s[6:7] |
RamNalamothu | 18f93512 | 2021-11-20 01:53:38 +0530 | [diff] [blame] | 193 | ; SI-NEXT: .LBB3_4: ; %endif |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 194 | ; SI-NEXT: s_or_b64 exec, exec, s[4:5] |
| 195 | ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| 196 | ; SI-NEXT: s_mov_b32 s2, -1 |
Matt Arsenault | d719f1c | 2021-08-03 19:09:44 -0400 | [diff] [blame] | 197 | ; SI-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[8:9] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 198 | ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
Matt Arsenault | c28f1fa | 2020-01-31 18:14:50 -0500 | [diff] [blame] | 199 | ; SI-NEXT: s_endpgm |
Matt Arsenault | becd656 | 2014-12-03 05:22:35 +0000 | [diff] [blame] | 200 | entry: |
Matt Arsenault | 9c47dd5 | 2016-02-11 06:02:01 +0000 | [diff] [blame] | 201 | %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
Matt Arsenault | becd656 | 2014-12-03 05:22:35 +0000 | [diff] [blame] | 202 | %tmp1 = icmp eq i32 %tid, 0 |
| 203 | br i1 %tmp1, label %if, label %else |
| 204 | |
| 205 | if: |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 206 | %gep.if = getelementptr i32, ptr addrspace(1) %a, i32 %tid |
| 207 | %a.val = load i32, ptr addrspace(1) %gep.if |
Matt Arsenault | becd656 | 2014-12-03 05:22:35 +0000 | [diff] [blame] | 208 | %cmp.if = icmp eq i32 %a.val, 0 |
| 209 | br label %endif |
| 210 | |
| 211 | else: |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 212 | %gep.else = getelementptr i32, ptr addrspace(1) %b, i32 %tid |
| 213 | %b.val = load i32, ptr addrspace(1) %gep.else |
Matt Arsenault | becd656 | 2014-12-03 05:22:35 +0000 | [diff] [blame] | 214 | %cmp.else = icmp slt i32 %b.val, 0 |
| 215 | br label %endif |
| 216 | |
| 217 | endif: |
| 218 | %tmp4 = phi i1 [%cmp.if, %if], [%cmp.else, %else] |
| 219 | %ext = sext i1 %tmp4 to i32 |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 220 | store i32 %ext, ptr addrspace(1) %out |
Matt Arsenault | becd656 | 2014-12-03 05:22:35 +0000 | [diff] [blame] | 221 | ret void |
| 222 | } |
| 223 | |
Matt Arsenault | 9c47dd5 | 2016-02-11 06:02:01 +0000 | [diff] [blame] | 224 | declare i32 @llvm.amdgcn.workitem.id.x() #0 |
Tom Stellard | 744b99b | 2014-09-24 01:33:28 +0000 | [diff] [blame] | 225 | |
| 226 | attributes #0 = { readnone } |