Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| 2 | ; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s -enable-var-scope --check-prefix=SI |
| 3 | ; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s -enable-var-scope --check-prefix=VI |
| 4 | ; RUN: llc < %s -march=r600 -mcpu=cypress -verify-machineinstrs | FileCheck %s -enable-var-scope --check-prefix=EG |
| 5 | ; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -enable-var-scope --check-prefix=GFX10 |
| 6 | ; RUN: llc < %s -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -enable-var-scope --check-prefix=GFX10-GISEL |
| 7 | |
| 8 | declare i7 @llvm.cttz.i7(i7, i1) nounwind readnone |
| 9 | declare i8 @llvm.cttz.i8(i8, i1) nounwind readnone |
| 10 | declare i16 @llvm.cttz.i16(i16, i1) nounwind readnone |
| 11 | |
| 12 | declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone |
| 13 | declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone |
| 14 | declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) nounwind readnone |
| 15 | |
| 16 | declare i64 @llvm.cttz.i64(i64, i1) nounwind readnone |
| 17 | declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) nounwind readnone |
| 18 | declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>, i1) nounwind readnone |
| 19 | |
| 20 | declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone |
| 21 | |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 22 | define amdgpu_kernel void @s_cttz_i32(ptr addrspace(1) noalias %out, i32 %val) nounwind { |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 23 | ; SI-LABEL: s_cttz_i32: |
| 24 | ; SI: ; %bb.0: |
Jay Foad | 2b63933 | 2021-08-05 14:32:25 +0100 | [diff] [blame] | 25 | ; SI-NEXT: s_load_dword s2, s[0:1], 0xb |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 26 | ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| 27 | ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| 28 | ; SI-NEXT: s_waitcnt lgkmcnt(0) |
Jay Foad | 2b63933 | 2021-08-05 14:32:25 +0100 | [diff] [blame] | 29 | ; SI-NEXT: s_ff1_i32_b32 s2, s2 |
| 30 | ; SI-NEXT: s_min_u32 s4, s2, 32 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 31 | ; SI-NEXT: s_mov_b32 s2, -1 |
Jay Foad | 2b63933 | 2021-08-05 14:32:25 +0100 | [diff] [blame] | 32 | ; SI-NEXT: v_mov_b32_e32 v0, s4 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 33 | ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
| 34 | ; SI-NEXT: s_endpgm |
| 35 | ; |
| 36 | ; VI-LABEL: s_cttz_i32: |
| 37 | ; VI: ; %bb.0: |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 38 | ; VI-NEXT: s_load_dword s4, s[0:1], 0x2c |
| 39 | ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 |
| 40 | ; VI-NEXT: s_mov_b32 s3, 0xf000 |
| 41 | ; VI-NEXT: s_mov_b32 s2, -1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 42 | ; VI-NEXT: s_waitcnt lgkmcnt(0) |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 43 | ; VI-NEXT: s_ff1_i32_b32 s4, s4 |
| 44 | ; VI-NEXT: s_min_u32 s4, s4, 32 |
| 45 | ; VI-NEXT: v_mov_b32_e32 v0, s4 |
| 46 | ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 47 | ; VI-NEXT: s_endpgm |
| 48 | ; |
| 49 | ; EG-LABEL: s_cttz_i32: |
| 50 | ; EG: ; %bb.0: |
| 51 | ; EG-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] |
| 52 | ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 |
| 53 | ; EG-NEXT: CF_END |
| 54 | ; EG-NEXT: PAD |
| 55 | ; EG-NEXT: ALU clause starting at 4: |
| 56 | ; EG-NEXT: FFBL_INT * T0.W, KC0[2].Z, |
| 57 | ; EG-NEXT: CNDE_INT T0.X, KC0[2].Z, literal.x, PV.W, |
| 58 | ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, |
| 59 | ; EG-NEXT: 32(4.484155e-44), 2(2.802597e-45) |
| 60 | ; |
| 61 | ; GFX10-LABEL: s_cttz_i32: |
| 62 | ; GFX10: ; %bb.0: |
| 63 | ; GFX10-NEXT: s_clause 0x1 |
| 64 | ; GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c |
| 65 | ; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 |
| 66 | ; GFX10-NEXT: v_mov_b32_e32 v0, 0 |
| 67 | ; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| 68 | ; GFX10-NEXT: s_ff1_i32_b32 s0, s4 |
Jay Foad | 2b63933 | 2021-08-05 14:32:25 +0100 | [diff] [blame] | 69 | ; GFX10-NEXT: s_min_u32 s0, s0, 32 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 70 | ; GFX10-NEXT: v_mov_b32_e32 v1, s0 |
| 71 | ; GFX10-NEXT: global_store_dword v0, v1, s[2:3] |
| 72 | ; GFX10-NEXT: s_endpgm |
| 73 | ; |
| 74 | ; GFX10-GISEL-LABEL: s_cttz_i32: |
| 75 | ; GFX10-GISEL: ; %bb.0: |
| 76 | ; GFX10-GISEL-NEXT: s_clause 0x1 |
| 77 | ; GFX10-GISEL-NEXT: s_load_dword s4, s[0:1], 0x2c |
| 78 | ; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 |
| 79 | ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 |
| 80 | ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 81 | ; GFX10-GISEL-NEXT: s_ff1_i32_b32 s0, s4 |
Jay Foad | 83610d4 | 2021-08-03 17:11:08 +0100 | [diff] [blame] | 82 | ; GFX10-GISEL-NEXT: s_min_u32 s0, s0, 32 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 83 | ; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 |
| 84 | ; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[2:3] |
| 85 | ; GFX10-GISEL-NEXT: s_endpgm |
| 86 | %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 false) nounwind readnone |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 87 | store i32 %cttz, ptr addrspace(1) %out, align 4 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 88 | ret void |
| 89 | } |
| 90 | |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 91 | define amdgpu_kernel void @v_cttz_i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind { |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 92 | ; SI-LABEL: v_cttz_i32: |
| 93 | ; SI: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 94 | ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 |
| 95 | ; SI-NEXT: s_mov_b32 s7, 0xf000 |
| 96 | ; SI-NEXT: s_mov_b32 s10, 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 97 | ; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 98 | ; SI-NEXT: v_mov_b32_e32 v1, 0 |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 99 | ; SI-NEXT: s_mov_b32 s11, s7 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 100 | ; SI-NEXT: s_waitcnt lgkmcnt(0) |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 101 | ; SI-NEXT: s_mov_b64 s[8:9], s[2:3] |
| 102 | ; SI-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 |
| 103 | ; SI-NEXT: s_mov_b32 s6, -1 |
| 104 | ; SI-NEXT: s_mov_b32 s4, s0 |
| 105 | ; SI-NEXT: s_mov_b32 s5, s1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 106 | ; SI-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | 2b63933 | 2021-08-05 14:32:25 +0100 | [diff] [blame] | 107 | ; SI-NEXT: v_ffbl_b32_e32 v0, v0 |
| 108 | ; SI-NEXT: v_min_u32_e32 v0, 32, v0 |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 109 | ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 110 | ; SI-NEXT: s_endpgm |
| 111 | ; |
| 112 | ; VI-LABEL: v_cttz_i32: |
| 113 | ; VI: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 114 | ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 115 | ; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 116 | ; VI-NEXT: s_waitcnt lgkmcnt(0) |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 117 | ; VI-NEXT: v_mov_b32_e32 v1, s3 |
| 118 | ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 119 | ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 120 | ; VI-NEXT: flat_load_dword v0, v[0:1] |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 121 | ; VI-NEXT: s_mov_b32 s3, 0xf000 |
| 122 | ; VI-NEXT: s_mov_b32 s2, -1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 123 | ; VI-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | 2b63933 | 2021-08-05 14:32:25 +0100 | [diff] [blame] | 124 | ; VI-NEXT: v_ffbl_b32_e32 v0, v0 |
| 125 | ; VI-NEXT: v_min_u32_e32 v0, 32, v0 |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 126 | ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 127 | ; VI-NEXT: s_endpgm |
| 128 | ; |
| 129 | ; EG-LABEL: v_cttz_i32: |
| 130 | ; EG: ; %bb.0: |
| 131 | ; EG-NEXT: ALU 2, @8, KC0[CB0:0-32], KC1[] |
| 132 | ; EG-NEXT: TEX 0 @6 |
| 133 | ; EG-NEXT: ALU 3, @11, KC0[CB0:0-32], KC1[] |
| 134 | ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 |
| 135 | ; EG-NEXT: CF_END |
| 136 | ; EG-NEXT: PAD |
| 137 | ; EG-NEXT: Fetch clause starting at 6: |
| 138 | ; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 |
| 139 | ; EG-NEXT: ALU clause starting at 8: |
| 140 | ; EG-NEXT: LSHL * T0.W, T0.X, literal.x, |
| 141 | ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| 142 | ; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W, |
| 143 | ; EG-NEXT: ALU clause starting at 11: |
| 144 | ; EG-NEXT: FFBL_INT * T0.W, T0.X, |
| 145 | ; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W, |
| 146 | ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, |
| 147 | ; EG-NEXT: 32(4.484155e-44), 2(2.802597e-45) |
| 148 | ; |
| 149 | ; GFX10-LABEL: v_cttz_i32: |
| 150 | ; GFX10: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 151 | ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 152 | ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Jay Foad | 2b63933 | 2021-08-05 14:32:25 +0100 | [diff] [blame] | 153 | ; GFX10-NEXT: v_mov_b32_e32 v1, 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 154 | ; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| 155 | ; GFX10-NEXT: global_load_dword v0, v0, s[2:3] |
| 156 | ; GFX10-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | 2b63933 | 2021-08-05 14:32:25 +0100 | [diff] [blame] | 157 | ; GFX10-NEXT: v_ffbl_b32_e32 v0, v0 |
| 158 | ; GFX10-NEXT: v_min_u32_e32 v0, 32, v0 |
| 159 | ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 160 | ; GFX10-NEXT: s_endpgm |
| 161 | ; |
| 162 | ; GFX10-GISEL-LABEL: v_cttz_i32: |
| 163 | ; GFX10-GISEL: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 164 | ; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 165 | ; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Jay Foad | 83610d4 | 2021-08-03 17:11:08 +0100 | [diff] [blame] | 166 | ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 167 | ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 168 | ; GFX10-GISEL-NEXT: global_load_dword v0, v0, s[2:3] |
| 169 | ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | 83610d4 | 2021-08-03 17:11:08 +0100 | [diff] [blame] | 170 | ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v0, v0 |
| 171 | ; GFX10-GISEL-NEXT: v_min_u32_e32 v0, 32, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 172 | ; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] |
| 173 | ; GFX10-GISEL-NEXT: s_endpgm |
| 174 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 175 | %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid |
| 176 | %val = load i32, ptr addrspace(1) %in.gep, align 4 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 177 | %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 false) nounwind readnone |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 178 | store i32 %cttz, ptr addrspace(1) %out, align 4 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 179 | ret void |
| 180 | } |
| 181 | |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 182 | define amdgpu_kernel void @v_cttz_v2i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind { |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 183 | ; SI-LABEL: v_cttz_v2i32: |
| 184 | ; SI: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 185 | ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 |
| 186 | ; SI-NEXT: s_mov_b32 s7, 0xf000 |
| 187 | ; SI-NEXT: s_mov_b32 s10, 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 188 | ; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 |
| 189 | ; SI-NEXT: v_mov_b32_e32 v1, 0 |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 190 | ; SI-NEXT: s_mov_b32 s11, s7 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 191 | ; SI-NEXT: s_waitcnt lgkmcnt(0) |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 192 | ; SI-NEXT: s_mov_b64 s[8:9], s[2:3] |
| 193 | ; SI-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[8:11], 0 addr64 |
| 194 | ; SI-NEXT: s_mov_b32 s6, -1 |
| 195 | ; SI-NEXT: s_mov_b32 s4, s0 |
| 196 | ; SI-NEXT: s_mov_b32 s5, s1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 197 | ; SI-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | 2b63933 | 2021-08-05 14:32:25 +0100 | [diff] [blame] | 198 | ; SI-NEXT: v_ffbl_b32_e32 v1, v1 |
| 199 | ; SI-NEXT: v_ffbl_b32_e32 v0, v0 |
| 200 | ; SI-NEXT: v_min_u32_e32 v1, 32, v1 |
| 201 | ; SI-NEXT: v_min_u32_e32 v0, 32, v0 |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 202 | ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 203 | ; SI-NEXT: s_endpgm |
| 204 | ; |
| 205 | ; VI-LABEL: v_cttz_v2i32: |
| 206 | ; VI: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 207 | ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 208 | ; VI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 209 | ; VI-NEXT: s_waitcnt lgkmcnt(0) |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 210 | ; VI-NEXT: v_mov_b32_e32 v1, s3 |
| 211 | ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 212 | ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 213 | ; VI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 214 | ; VI-NEXT: s_mov_b32 s3, 0xf000 |
| 215 | ; VI-NEXT: s_mov_b32 s2, -1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 216 | ; VI-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | 2b63933 | 2021-08-05 14:32:25 +0100 | [diff] [blame] | 217 | ; VI-NEXT: v_ffbl_b32_e32 v1, v1 |
| 218 | ; VI-NEXT: v_ffbl_b32_e32 v0, v0 |
| 219 | ; VI-NEXT: v_min_u32_e32 v1, 32, v1 |
| 220 | ; VI-NEXT: v_min_u32_e32 v0, 32, v0 |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 221 | ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 222 | ; VI-NEXT: s_endpgm |
| 223 | ; |
| 224 | ; EG-LABEL: v_cttz_v2i32: |
| 225 | ; EG: ; %bb.0: |
| 226 | ; EG-NEXT: ALU 2, @8, KC0[CB0:0-32], KC1[] |
| 227 | ; EG-NEXT: TEX 0 @6 |
| 228 | ; EG-NEXT: ALU 6, @11, KC0[CB0:0-32], KC1[] |
| 229 | ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 |
| 230 | ; EG-NEXT: CF_END |
| 231 | ; EG-NEXT: PAD |
| 232 | ; EG-NEXT: Fetch clause starting at 6: |
| 233 | ; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 |
| 234 | ; EG-NEXT: ALU clause starting at 8: |
| 235 | ; EG-NEXT: LSHL * T0.W, T0.X, literal.x, |
| 236 | ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) |
| 237 | ; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W, |
| 238 | ; EG-NEXT: ALU clause starting at 11: |
| 239 | ; EG-NEXT: FFBL_INT * T0.W, T0.Y, |
| 240 | ; EG-NEXT: CNDE_INT T0.Y, T0.Y, literal.x, PV.W, |
| 241 | ; EG-NEXT: FFBL_INT * T0.W, T0.X, |
| 242 | ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) |
| 243 | ; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W, |
| 244 | ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, |
| 245 | ; EG-NEXT: 32(4.484155e-44), 2(2.802597e-45) |
| 246 | ; |
| 247 | ; GFX10-LABEL: v_cttz_v2i32: |
| 248 | ; GFX10: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 249 | ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 250 | ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0 |
Jay Foad | 2b63933 | 2021-08-05 14:32:25 +0100 | [diff] [blame] | 251 | ; GFX10-NEXT: v_mov_b32_e32 v2, 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 252 | ; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| 253 | ; GFX10-NEXT: global_load_dwordx2 v[0:1], v0, s[2:3] |
| 254 | ; GFX10-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | 2b63933 | 2021-08-05 14:32:25 +0100 | [diff] [blame] | 255 | ; GFX10-NEXT: v_ffbl_b32_e32 v1, v1 |
| 256 | ; GFX10-NEXT: v_ffbl_b32_e32 v0, v0 |
| 257 | ; GFX10-NEXT: v_min_u32_e32 v1, 32, v1 |
| 258 | ; GFX10-NEXT: v_min_u32_e32 v0, 32, v0 |
| 259 | ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 260 | ; GFX10-NEXT: s_endpgm |
| 261 | ; |
| 262 | ; GFX10-GISEL-LABEL: v_cttz_v2i32: |
| 263 | ; GFX10-GISEL: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 264 | ; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 265 | ; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 3, v0 |
Jay Foad | 83610d4 | 2021-08-03 17:11:08 +0100 | [diff] [blame] | 266 | ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 267 | ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 268 | ; GFX10-GISEL-NEXT: global_load_dwordx2 v[0:1], v0, s[2:3] |
| 269 | ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | 83610d4 | 2021-08-03 17:11:08 +0100 | [diff] [blame] | 270 | ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v0, v0 |
| 271 | ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v1 |
| 272 | ; GFX10-GISEL-NEXT: v_min_u32_e32 v0, 32, v0 |
| 273 | ; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 274 | ; GFX10-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] |
| 275 | ; GFX10-GISEL-NEXT: s_endpgm |
| 276 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 277 | %in.gep = getelementptr <2 x i32>, ptr addrspace(1) %valptr, i32 %tid |
| 278 | %val = load <2 x i32>, ptr addrspace(1) %in.gep, align 8 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 279 | %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %val, i1 false) nounwind readnone |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 280 | store <2 x i32> %cttz, ptr addrspace(1) %out, align 8 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 281 | ret void |
| 282 | } |
| 283 | |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 284 | define amdgpu_kernel void @v_cttz_v4i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind { |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 285 | ; SI-LABEL: v_cttz_v4i32: |
| 286 | ; SI: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 287 | ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 |
| 288 | ; SI-NEXT: s_mov_b32 s7, 0xf000 |
| 289 | ; SI-NEXT: s_mov_b32 s10, 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 290 | ; SI-NEXT: v_lshlrev_b32_e32 v0, 4, v0 |
| 291 | ; SI-NEXT: v_mov_b32_e32 v1, 0 |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 292 | ; SI-NEXT: s_mov_b32 s11, s7 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 293 | ; SI-NEXT: s_waitcnt lgkmcnt(0) |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 294 | ; SI-NEXT: s_mov_b64 s[8:9], s[2:3] |
| 295 | ; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 |
| 296 | ; SI-NEXT: s_mov_b32 s6, -1 |
| 297 | ; SI-NEXT: s_mov_b32 s4, s0 |
| 298 | ; SI-NEXT: s_mov_b32 s5, s1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 299 | ; SI-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | 2b63933 | 2021-08-05 14:32:25 +0100 | [diff] [blame] | 300 | ; SI-NEXT: v_ffbl_b32_e32 v3, v3 |
| 301 | ; SI-NEXT: v_ffbl_b32_e32 v2, v2 |
| 302 | ; SI-NEXT: v_ffbl_b32_e32 v1, v1 |
| 303 | ; SI-NEXT: v_ffbl_b32_e32 v0, v0 |
| 304 | ; SI-NEXT: v_min_u32_e32 v3, 32, v3 |
| 305 | ; SI-NEXT: v_min_u32_e32 v2, 32, v2 |
| 306 | ; SI-NEXT: v_min_u32_e32 v1, 32, v1 |
| 307 | ; SI-NEXT: v_min_u32_e32 v0, 32, v0 |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 308 | ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 309 | ; SI-NEXT: s_endpgm |
| 310 | ; |
| 311 | ; VI-LABEL: v_cttz_v4i32: |
| 312 | ; VI: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 313 | ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 314 | ; VI-NEXT: v_lshlrev_b32_e32 v0, 4, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 315 | ; VI-NEXT: s_waitcnt lgkmcnt(0) |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 316 | ; VI-NEXT: v_mov_b32_e32 v1, s3 |
| 317 | ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 318 | ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 319 | ; VI-NEXT: flat_load_dwordx4 v[0:3], v[0:1] |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 320 | ; VI-NEXT: s_mov_b32 s3, 0xf000 |
| 321 | ; VI-NEXT: s_mov_b32 s2, -1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 322 | ; VI-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | 2b63933 | 2021-08-05 14:32:25 +0100 | [diff] [blame] | 323 | ; VI-NEXT: v_ffbl_b32_e32 v3, v3 |
| 324 | ; VI-NEXT: v_ffbl_b32_e32 v2, v2 |
| 325 | ; VI-NEXT: v_ffbl_b32_e32 v1, v1 |
| 326 | ; VI-NEXT: v_ffbl_b32_e32 v0, v0 |
| 327 | ; VI-NEXT: v_min_u32_e32 v3, 32, v3 |
| 328 | ; VI-NEXT: v_min_u32_e32 v2, 32, v2 |
| 329 | ; VI-NEXT: v_min_u32_e32 v1, 32, v1 |
| 330 | ; VI-NEXT: v_min_u32_e32 v0, 32, v0 |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 331 | ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 332 | ; VI-NEXT: s_endpgm |
| 333 | ; |
| 334 | ; EG-LABEL: v_cttz_v4i32: |
| 335 | ; EG: ; %bb.0: |
| 336 | ; EG-NEXT: ALU 2, @8, KC0[CB0:0-32], KC1[] |
| 337 | ; EG-NEXT: TEX 0 @6 |
| 338 | ; EG-NEXT: ALU 12, @11, KC0[CB0:0-32], KC1[] |
| 339 | ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 |
| 340 | ; EG-NEXT: CF_END |
| 341 | ; EG-NEXT: PAD |
| 342 | ; EG-NEXT: Fetch clause starting at 6: |
| 343 | ; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 |
| 344 | ; EG-NEXT: ALU clause starting at 8: |
| 345 | ; EG-NEXT: LSHL * T0.W, T0.X, literal.x, |
| 346 | ; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) |
| 347 | ; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W, |
| 348 | ; EG-NEXT: ALU clause starting at 11: |
| 349 | ; EG-NEXT: FFBL_INT * T1.W, T0.W, |
| 350 | ; EG-NEXT: FFBL_INT T2.W, T0.Z, |
| 351 | ; EG-NEXT: CNDE_INT * T0.W, T0.W, literal.x, PV.W, BS:VEC_021/SCL_122 |
| 352 | ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) |
| 353 | ; EG-NEXT: CNDE_INT T0.Z, T0.Z, literal.x, PV.W, |
| 354 | ; EG-NEXT: FFBL_INT * T1.W, T0.Y, |
| 355 | ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) |
| 356 | ; EG-NEXT: CNDE_INT T0.Y, T0.Y, literal.x, PV.W, |
| 357 | ; EG-NEXT: FFBL_INT * T1.W, T0.X, |
| 358 | ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) |
| 359 | ; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W, |
| 360 | ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, |
| 361 | ; EG-NEXT: 32(4.484155e-44), 2(2.802597e-45) |
| 362 | ; |
| 363 | ; GFX10-LABEL: v_cttz_v4i32: |
| 364 | ; GFX10: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 365 | ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 366 | ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 367 | ; GFX10-NEXT: v_mov_b32_e32 v4, 0 |
| 368 | ; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| 369 | ; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[2:3] |
| 370 | ; GFX10-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | 2b63933 | 2021-08-05 14:32:25 +0100 | [diff] [blame] | 371 | ; GFX10-NEXT: v_ffbl_b32_e32 v3, v3 |
| 372 | ; GFX10-NEXT: v_ffbl_b32_e32 v2, v2 |
| 373 | ; GFX10-NEXT: v_ffbl_b32_e32 v1, v1 |
| 374 | ; GFX10-NEXT: v_ffbl_b32_e32 v0, v0 |
| 375 | ; GFX10-NEXT: v_min_u32_e32 v3, 32, v3 |
| 376 | ; GFX10-NEXT: v_min_u32_e32 v2, 32, v2 |
| 377 | ; GFX10-NEXT: v_min_u32_e32 v1, 32, v1 |
| 378 | ; GFX10-NEXT: v_min_u32_e32 v0, 32, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 379 | ; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] |
| 380 | ; GFX10-NEXT: s_endpgm |
| 381 | ; |
| 382 | ; GFX10-GISEL-LABEL: v_cttz_v4i32: |
| 383 | ; GFX10-GISEL: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 384 | ; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 385 | ; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 4, v0 |
Jay Foad | 83610d4 | 2021-08-03 17:11:08 +0100 | [diff] [blame] | 386 | ; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 387 | ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 388 | ; GFX10-GISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[2:3] |
| 389 | ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | 83610d4 | 2021-08-03 17:11:08 +0100 | [diff] [blame] | 390 | ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v0, v0 |
| 391 | ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v1 |
| 392 | ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v2, v2 |
| 393 | ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v3, v3 |
| 394 | ; GFX10-GISEL-NEXT: v_min_u32_e32 v0, 32, v0 |
| 395 | ; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1 |
| 396 | ; GFX10-GISEL-NEXT: v_min_u32_e32 v2, 32, v2 |
| 397 | ; GFX10-GISEL-NEXT: v_min_u32_e32 v3, 32, v3 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 398 | ; GFX10-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] |
| 399 | ; GFX10-GISEL-NEXT: s_endpgm |
| 400 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 401 | %in.gep = getelementptr <4 x i32>, ptr addrspace(1) %valptr, i32 %tid |
| 402 | %val = load <4 x i32>, ptr addrspace(1) %in.gep, align 16 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 403 | %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %val, i1 false) nounwind readnone |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 404 | store <4 x i32> %cttz, ptr addrspace(1) %out, align 16 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 405 | ret void |
| 406 | } |
| 407 | |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 408 | define amdgpu_kernel void @v_cttz_i8(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind { |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 409 | ; SI-LABEL: v_cttz_i8: |
| 410 | ; SI: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 411 | ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 |
| 412 | ; SI-NEXT: s_mov_b32 s7, 0xf000 |
| 413 | ; SI-NEXT: s_mov_b32 s6, -1 |
| 414 | ; SI-NEXT: s_mov_b32 s10, s6 |
| 415 | ; SI-NEXT: s_mov_b32 s11, s7 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 416 | ; SI-NEXT: s_waitcnt lgkmcnt(0) |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 417 | ; SI-NEXT: s_mov_b32 s8, s2 |
| 418 | ; SI-NEXT: s_mov_b32 s9, s3 |
| 419 | ; SI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 |
| 420 | ; SI-NEXT: s_mov_b32 s4, s0 |
| 421 | ; SI-NEXT: s_mov_b32 s5, s1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 422 | ; SI-NEXT: s_waitcnt vmcnt(0) |
| 423 | ; SI-NEXT: v_or_b32_e32 v0, 0x100, v0 |
| 424 | ; SI-NEXT: v_ffbl_b32_e32 v0, v0 |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 425 | ; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 426 | ; SI-NEXT: s_endpgm |
| 427 | ; |
| 428 | ; VI-LABEL: v_cttz_i8: |
| 429 | ; VI: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 430 | ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
| 431 | ; VI-NEXT: s_mov_b32 s7, 0xf000 |
| 432 | ; VI-NEXT: s_mov_b32 s6, -1 |
| 433 | ; VI-NEXT: s_mov_b32 s10, s6 |
| 434 | ; VI-NEXT: s_mov_b32 s11, s7 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 435 | ; VI-NEXT: s_waitcnt lgkmcnt(0) |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 436 | ; VI-NEXT: s_mov_b32 s8, s2 |
| 437 | ; VI-NEXT: s_mov_b32 s9, s3 |
| 438 | ; VI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 |
| 439 | ; VI-NEXT: s_mov_b32 s4, s0 |
| 440 | ; VI-NEXT: s_mov_b32 s5, s1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 441 | ; VI-NEXT: s_waitcnt vmcnt(0) |
| 442 | ; VI-NEXT: v_or_b32_e32 v0, 0x100, v0 |
| 443 | ; VI-NEXT: v_ffbl_b32_e32 v0, v0 |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 444 | ; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 445 | ; VI-NEXT: s_endpgm |
| 446 | ; |
| 447 | ; EG-LABEL: v_cttz_i8: |
| 448 | ; EG: ; %bb.0: |
| 449 | ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] |
| 450 | ; EG-NEXT: TEX 0 @6 |
| 451 | ; EG-NEXT: ALU 14, @9, KC0[CB0:0-32], KC1[] |
| 452 | ; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X |
| 453 | ; EG-NEXT: CF_END |
| 454 | ; EG-NEXT: PAD |
| 455 | ; EG-NEXT: Fetch clause starting at 6: |
| 456 | ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 |
| 457 | ; EG-NEXT: ALU clause starting at 8: |
| 458 | ; EG-NEXT: MOV * T0.X, KC0[2].Z, |
| 459 | ; EG-NEXT: ALU clause starting at 9: |
| 460 | ; EG-NEXT: OR_INT * T0.W, T0.X, literal.x, |
| 461 | ; EG-NEXT: 256(3.587324e-43), 0(0.000000e+00) |
| 462 | ; EG-NEXT: FFBL_INT T0.W, PV.W, |
| 463 | ; EG-NEXT: AND_INT * T1.W, KC0[2].Y, literal.x, |
| 464 | ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) |
| 465 | ; EG-NEXT: AND_INT T0.W, PV.W, literal.x, |
| 466 | ; EG-NEXT: LSHL * T1.W, PS, literal.y, |
| 467 | ; EG-NEXT: 255(3.573311e-43), 3(4.203895e-45) |
| 468 | ; EG-NEXT: LSHL T0.X, PV.W, PS, |
| 469 | ; EG-NEXT: LSHL * T0.W, literal.x, PS, |
| 470 | ; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) |
| 471 | ; EG-NEXT: MOV T0.Y, 0.0, |
| 472 | ; EG-NEXT: MOV * T0.Z, 0.0, |
| 473 | ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, |
| 474 | ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| 475 | ; |
| 476 | ; GFX10-LABEL: v_cttz_i8: |
| 477 | ; GFX10: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 478 | ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 479 | ; GFX10-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 480 | ; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| 481 | ; GFX10-NEXT: global_load_ubyte v1, v0, s[2:3] |
| 482 | ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| 483 | ; GFX10-NEXT: v_or_b32_e32 v1, 0x100, v1 |
| 484 | ; GFX10-NEXT: v_ffbl_b32_e32 v1, v1 |
| 485 | ; GFX10-NEXT: global_store_byte v0, v1, s[0:1] |
| 486 | ; GFX10-NEXT: s_endpgm |
| 487 | ; |
| 488 | ; GFX10-GISEL-LABEL: v_cttz_i8: |
| 489 | ; GFX10-GISEL: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 490 | ; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 491 | ; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 492 | ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 493 | ; GFX10-GISEL-NEXT: global_load_ubyte v1, v0, s[2:3] |
| 494 | ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | 57b9107 | 2021-08-06 11:05:42 +0100 | [diff] [blame] | 495 | ; GFX10-GISEL-NEXT: v_or_b32_e32 v1, 0x100, v1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 496 | ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v1 |
| 497 | ; GFX10-GISEL-NEXT: global_store_byte v0, v1, s[0:1] |
| 498 | ; GFX10-GISEL-NEXT: s_endpgm |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 499 | %val = load i8, ptr addrspace(1) %valptr |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 500 | %cttz = call i8 @llvm.cttz.i8(i8 %val, i1 false) nounwind readnone |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 501 | store i8 %cttz, ptr addrspace(1) %out |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 502 | ret void |
| 503 | } |
| 504 | |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 505 | define amdgpu_kernel void @s_cttz_i64(ptr addrspace(1) noalias %out, [8 x i32], i64 %val) nounwind { |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 506 | ; SI-LABEL: s_cttz_i64: |
| 507 | ; SI: ; %bb.0: |
| 508 | ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x13 |
| 509 | ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| 510 | ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| 511 | ; SI-NEXT: s_mov_b32 s2, -1 |
| 512 | ; SI-NEXT: s_waitcnt lgkmcnt(0) |
Jay Foad | e6c364a | 2021-08-05 09:58:29 +0100 | [diff] [blame] | 513 | ; SI-NEXT: s_ff1_i32_b32 s5, s5 |
| 514 | ; SI-NEXT: s_min_u32 s5, s5, 0xffffffdf |
| 515 | ; SI-NEXT: s_add_i32 s5, s5, 32 |
| 516 | ; SI-NEXT: s_ff1_i32_b32 s4, s4 |
| 517 | ; SI-NEXT: v_mov_b32_e32 v0, s5 |
| 518 | ; SI-NEXT: v_min3_u32 v0, s4, v0, 64 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 519 | ; SI-NEXT: v_mov_b32_e32 v1, 0 |
| 520 | ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 |
| 521 | ; SI-NEXT: s_endpgm |
| 522 | ; |
| 523 | ; VI-LABEL: s_cttz_i64: |
| 524 | ; VI: ; %bb.0: |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 525 | ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x4c |
| 526 | ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 |
| 527 | ; VI-NEXT: s_mov_b32 s3, 0xf000 |
| 528 | ; VI-NEXT: s_mov_b32 s2, -1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 529 | ; VI-NEXT: v_mov_b32_e32 v1, 0 |
| 530 | ; VI-NEXT: s_waitcnt lgkmcnt(0) |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 531 | ; VI-NEXT: s_ff1_i32_b32 s5, s5 |
| 532 | ; VI-NEXT: v_add_u32_e64 v0, s[6:7], s5, 32 clamp |
| 533 | ; VI-NEXT: s_ff1_i32_b32 s4, s4 |
| 534 | ; VI-NEXT: v_min3_u32 v0, s4, v0, 64 |
| 535 | ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 536 | ; VI-NEXT: s_endpgm |
| 537 | ; |
| 538 | ; EG-LABEL: s_cttz_i64: |
| 539 | ; EG: ; %bb.0: |
| 540 | ; EG-NEXT: ALU 9, @4, KC0[CB0:0-32], KC1[] |
| 541 | ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 |
| 542 | ; EG-NEXT: CF_END |
| 543 | ; EG-NEXT: PAD |
| 544 | ; EG-NEXT: ALU clause starting at 4: |
| 545 | ; EG-NEXT: FFBL_INT * T0.W, KC0[5].X, |
| 546 | ; EG-NEXT: CNDE_INT * T0.W, KC0[5].X, literal.x, PV.W, |
| 547 | ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) |
| 548 | ; EG-NEXT: FFBL_INT T1.W, KC0[4].W, |
| 549 | ; EG-NEXT: ADD_INT * T0.W, PV.W, literal.x, |
| 550 | ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) |
| 551 | ; EG-NEXT: CNDE_INT T0.X, KC0[4].W, PS, PV.W, |
| 552 | ; EG-NEXT: MOV T0.Y, 0.0, |
| 553 | ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, |
| 554 | ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| 555 | ; |
| 556 | ; GFX10-LABEL: s_cttz_i64: |
| 557 | ; GFX10: ; %bb.0: |
| 558 | ; GFX10-NEXT: s_clause 0x1 |
| 559 | ; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c |
| 560 | ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 |
| 561 | ; GFX10-NEXT: v_mov_b32_e32 v1, 0 |
| 562 | ; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| 563 | ; GFX10-NEXT: s_ff1_i32_b32 s0, s3 |
Jay Foad | e6c364a | 2021-08-05 09:58:29 +0100 | [diff] [blame] | 564 | ; GFX10-NEXT: v_add_nc_u32_e64 v0, s0, 32 clamp |
| 565 | ; GFX10-NEXT: s_ff1_i32_b32 s0, s2 |
| 566 | ; GFX10-NEXT: v_min3_u32 v0, s0, v0, 64 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 567 | ; GFX10-NEXT: global_store_dwordx2 v1, v[0:1], s[4:5] |
| 568 | ; GFX10-NEXT: s_endpgm |
| 569 | ; |
| 570 | ; GFX10-GISEL-LABEL: s_cttz_i64: |
| 571 | ; GFX10-GISEL: ; %bb.0: |
| 572 | ; GFX10-GISEL-NEXT: s_clause 0x1 |
| 573 | ; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c |
| 574 | ; GFX10-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 |
Jay Foad | 342642d | 2021-08-06 13:09:47 +0100 | [diff] [blame] | 575 | ; GFX10-GISEL-NEXT: s_mov_b32 s1, 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 576 | ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0 |
| 577 | ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 578 | ; GFX10-GISEL-NEXT: s_ff1_i32_b64 s0, s[2:3] |
Jay Foad | 83610d4 | 2021-08-03 17:11:08 +0100 | [diff] [blame] | 579 | ; GFX10-GISEL-NEXT: s_min_u32 s0, s0, 64 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 580 | ; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 |
| 581 | ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1 |
| 582 | ; GFX10-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] |
| 583 | ; GFX10-GISEL-NEXT: s_endpgm |
| 584 | %cttz = call i64 @llvm.cttz.i64(i64 %val, i1 false) |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 585 | store i64 %cttz, ptr addrspace(1) %out |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 586 | ret void |
| 587 | } |
| 588 | |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 589 | define amdgpu_kernel void @s_cttz_i64_trunc(ptr addrspace(1) noalias %out, i64 %val) nounwind { |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 590 | ; SI-LABEL: s_cttz_i64_trunc: |
| 591 | ; SI: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 592 | ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 |
| 593 | ; SI-NEXT: s_mov_b32 s7, 0xf000 |
| 594 | ; SI-NEXT: s_mov_b32 s6, -1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 595 | ; SI-NEXT: s_waitcnt lgkmcnt(0) |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 596 | ; SI-NEXT: s_mov_b32 s4, s0 |
| 597 | ; SI-NEXT: s_mov_b32 s5, s1 |
| 598 | ; SI-NEXT: s_ff1_i32_b32 s0, s3 |
| 599 | ; SI-NEXT: s_min_u32 s0, s0, 0xffffffdf |
| 600 | ; SI-NEXT: s_add_i32 s0, s0, 32 |
| 601 | ; SI-NEXT: s_ff1_i32_b32 s1, s2 |
| 602 | ; SI-NEXT: v_mov_b32_e32 v0, s0 |
| 603 | ; SI-NEXT: v_min3_u32 v0, s1, v0, 64 |
| 604 | ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 605 | ; SI-NEXT: s_endpgm |
| 606 | ; |
| 607 | ; VI-LABEL: s_cttz_i64_trunc: |
| 608 | ; VI: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 609 | ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
| 610 | ; VI-NEXT: s_mov_b32 s7, 0xf000 |
| 611 | ; VI-NEXT: s_mov_b32 s6, -1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 612 | ; VI-NEXT: s_waitcnt lgkmcnt(0) |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 613 | ; VI-NEXT: s_mov_b32 s4, s0 |
| 614 | ; VI-NEXT: s_ff1_i32_b32 s0, s3 |
| 615 | ; VI-NEXT: s_mov_b32 s5, s1 |
| 616 | ; VI-NEXT: v_add_u32_e64 v0, s[0:1], s0, 32 clamp |
| 617 | ; VI-NEXT: s_ff1_i32_b32 s0, s2 |
| 618 | ; VI-NEXT: v_min3_u32 v0, s0, v0, 64 |
| 619 | ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 620 | ; VI-NEXT: s_endpgm |
| 621 | ; |
| 622 | ; EG-LABEL: s_cttz_i64_trunc: |
| 623 | ; EG: ; %bb.0: |
| 624 | ; EG-NEXT: ALU 8, @4, KC0[CB0:0-32], KC1[] |
| 625 | ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 |
| 626 | ; EG-NEXT: CF_END |
| 627 | ; EG-NEXT: PAD |
| 628 | ; EG-NEXT: ALU clause starting at 4: |
| 629 | ; EG-NEXT: FFBL_INT * T0.W, KC0[3].X, |
| 630 | ; EG-NEXT: CNDE_INT * T0.W, KC0[3].X, literal.x, PV.W, |
| 631 | ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) |
| 632 | ; EG-NEXT: FFBL_INT T1.W, KC0[2].W, |
| 633 | ; EG-NEXT: ADD_INT * T0.W, PV.W, literal.x, |
| 634 | ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) |
| 635 | ; EG-NEXT: CNDE_INT T0.X, KC0[2].W, PS, PV.W, |
| 636 | ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, |
| 637 | ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| 638 | ; |
| 639 | ; GFX10-LABEL: s_cttz_i64_trunc: |
| 640 | ; GFX10: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 641 | ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | e6c364a | 2021-08-05 09:58:29 +0100 | [diff] [blame] | 642 | ; GFX10-NEXT: v_mov_b32_e32 v1, 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 643 | ; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 644 | ; GFX10-NEXT: s_ff1_i32_b32 s3, s3 |
| 645 | ; GFX10-NEXT: s_ff1_i32_b32 s2, s2 |
| 646 | ; GFX10-NEXT: v_add_nc_u32_e64 v0, s3, 32 clamp |
| 647 | ; GFX10-NEXT: v_min3_u32 v0, s2, v0, 64 |
| 648 | ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 649 | ; GFX10-NEXT: s_endpgm |
| 650 | ; |
| 651 | ; GFX10-GISEL-LABEL: s_cttz_i64_trunc: |
| 652 | ; GFX10-GISEL: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 653 | ; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 654 | ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 |
| 655 | ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 656 | ; GFX10-GISEL-NEXT: s_ff1_i32_b64 s2, s[2:3] |
| 657 | ; GFX10-GISEL-NEXT: s_min_u32 s2, s2, 64 |
| 658 | ; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s2 |
| 659 | ; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 660 | ; GFX10-GISEL-NEXT: s_endpgm |
| 661 | %cttz = call i64 @llvm.cttz.i64(i64 %val, i1 false) |
| 662 | %trunc = trunc i64 %cttz to i32 |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 663 | store i32 %trunc, ptr addrspace(1) %out |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 664 | ret void |
| 665 | } |
| 666 | |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 667 | define amdgpu_kernel void @v_cttz_i64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 668 | ; SI-LABEL: v_cttz_i64: |
| 669 | ; SI: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 670 | ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 671 | ; SI-NEXT: s_mov_b32 s7, 0xf000 |
| 672 | ; SI-NEXT: s_mov_b32 s6, 0 |
| 673 | ; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 |
| 674 | ; SI-NEXT: v_mov_b32_e32 v1, 0 |
| 675 | ; SI-NEXT: s_waitcnt lgkmcnt(0) |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 676 | ; SI-NEXT: s_mov_b64 s[4:5], s[2:3] |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 677 | ; SI-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64 |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 678 | ; SI-NEXT: s_mov_b64 s[2:3], s[6:7] |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 679 | ; SI-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | e6c364a | 2021-08-05 09:58:29 +0100 | [diff] [blame] | 680 | ; SI-NEXT: v_ffbl_b32_e32 v3, v3 |
| 681 | ; SI-NEXT: v_min_u32_e32 v3, 0xffffffdf, v3 |
| 682 | ; SI-NEXT: v_add_i32_e32 v3, vcc, 32, v3 |
| 683 | ; SI-NEXT: v_ffbl_b32_e32 v2, v2 |
| 684 | ; SI-NEXT: v_min3_u32 v2, v2, v3, 64 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 685 | ; SI-NEXT: v_mov_b32_e32 v3, v1 |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 686 | ; SI-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 687 | ; SI-NEXT: s_endpgm |
| 688 | ; |
| 689 | ; VI-LABEL: v_cttz_i64: |
| 690 | ; VI: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 691 | ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 692 | ; VI-NEXT: v_lshlrev_b32_e32 v3, 3, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 693 | ; VI-NEXT: v_mov_b32_e32 v2, 0 |
| 694 | ; VI-NEXT: s_waitcnt lgkmcnt(0) |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 695 | ; VI-NEXT: v_mov_b32_e32 v1, s3 |
| 696 | ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v3 |
Jay Foad | e6c364a | 2021-08-05 09:58:29 +0100 | [diff] [blame] | 697 | ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 698 | ; VI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 699 | ; VI-NEXT: v_mov_b32_e32 v4, s1 |
| 700 | ; VI-NEXT: v_add_u32_e32 v3, vcc, s0, v3 |
Jay Foad | e6c364a | 2021-08-05 09:58:29 +0100 | [diff] [blame] | 701 | ; VI-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 702 | ; VI-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | e6c364a | 2021-08-05 09:58:29 +0100 | [diff] [blame] | 703 | ; VI-NEXT: v_ffbl_b32_e32 v1, v1 |
| 704 | ; VI-NEXT: v_add_u32_e64 v1, s[0:1], v1, 32 clamp |
| 705 | ; VI-NEXT: v_ffbl_b32_e32 v0, v0 |
| 706 | ; VI-NEXT: v_min3_u32 v1, v0, v1, 64 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 707 | ; VI-NEXT: flat_store_dwordx2 v[3:4], v[1:2] |
| 708 | ; VI-NEXT: s_endpgm |
| 709 | ; |
| 710 | ; EG-LABEL: v_cttz_i64: |
| 711 | ; EG: ; %bb.0: |
| 712 | ; EG-NEXT: ALU 2, @8, KC0[CB0:0-32], KC1[] |
| 713 | ; EG-NEXT: TEX 0 @6 |
| 714 | ; EG-NEXT: ALU 10, @11, KC0[CB0:0-32], KC1[] |
| 715 | ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 |
| 716 | ; EG-NEXT: CF_END |
| 717 | ; EG-NEXT: PAD |
| 718 | ; EG-NEXT: Fetch clause starting at 6: |
| 719 | ; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 |
| 720 | ; EG-NEXT: ALU clause starting at 8: |
| 721 | ; EG-NEXT: LSHL * T0.W, T0.X, literal.x, |
| 722 | ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) |
| 723 | ; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W, |
| 724 | ; EG-NEXT: ALU clause starting at 11: |
| 725 | ; EG-NEXT: FFBL_INT * T1.W, T0.Y, |
| 726 | ; EG-NEXT: CNDE_INT * T1.W, T0.Y, literal.x, PV.W, |
| 727 | ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) |
| 728 | ; EG-NEXT: FFBL_INT T2.W, T0.X, |
| 729 | ; EG-NEXT: ADD_INT * T1.W, PV.W, literal.x, |
| 730 | ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) |
| 731 | ; EG-NEXT: CNDE_INT T0.X, T0.X, PS, PV.W, |
| 732 | ; EG-NEXT: MOV T0.Y, 0.0, |
| 733 | ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, T0.W, |
| 734 | ; EG-NEXT: LSHR * T1.X, PV.W, literal.x, |
| 735 | ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| 736 | ; |
| 737 | ; GFX10-LABEL: v_cttz_i64: |
| 738 | ; GFX10: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 739 | ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 740 | ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 3, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 741 | ; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| 742 | ; GFX10-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3] |
| 743 | ; GFX10-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | e6c364a | 2021-08-05 09:58:29 +0100 | [diff] [blame] | 744 | ; GFX10-NEXT: v_ffbl_b32_e32 v1, v1 |
| 745 | ; GFX10-NEXT: v_ffbl_b32_e32 v0, v0 |
| 746 | ; GFX10-NEXT: v_add_nc_u32_e64 v1, v1, 32 clamp |
| 747 | ; GFX10-NEXT: v_min3_u32 v0, v0, v1, 64 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 748 | ; GFX10-NEXT: v_mov_b32_e32 v1, 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 749 | ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] |
| 750 | ; GFX10-NEXT: s_endpgm |
| 751 | ; |
| 752 | ; GFX10-GISEL-LABEL: v_cttz_i64: |
| 753 | ; GFX10-GISEL: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 754 | ; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | 24b67a90 | 2021-08-04 11:55:29 +0100 | [diff] [blame] | 755 | ; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v2, 3, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 756 | ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
Jay Foad | 24b67a90 | 2021-08-04 11:55:29 +0100 | [diff] [blame] | 757 | ; GFX10-GISEL-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3] |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 758 | ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | 83610d4 | 2021-08-03 17:11:08 +0100 | [diff] [blame] | 759 | ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v1 |
| 760 | ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v0, v0 |
| 761 | ; GFX10-GISEL-NEXT: v_add_nc_u32_e64 v1, v1, 32 clamp |
| 762 | ; GFX10-GISEL-NEXT: v_min_u32_e32 v0, v0, v1 |
Jay Foad | 24b67a90 | 2021-08-04 11:55:29 +0100 | [diff] [blame] | 763 | ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 |
Jay Foad | 83610d4 | 2021-08-03 17:11:08 +0100 | [diff] [blame] | 764 | ; GFX10-GISEL-NEXT: v_min_u32_e32 v0, 64, v0 |
Jay Foad | 24b67a90 | 2021-08-04 11:55:29 +0100 | [diff] [blame] | 765 | ; GFX10-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 766 | ; GFX10-GISEL-NEXT: s_endpgm |
| 767 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 768 | %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid |
| 769 | %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %tid |
| 770 | %val = load i64, ptr addrspace(1) %in.gep |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 771 | %cttz = call i64 @llvm.cttz.i64(i64 %val, i1 false) |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 772 | store i64 %cttz, ptr addrspace(1) %out.gep |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 773 | ret void |
| 774 | } |
| 775 | |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 776 | define amdgpu_kernel void @v_cttz_i64_trunc(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 777 | ; SI-LABEL: v_cttz_i64_trunc: |
| 778 | ; SI: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 779 | ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 780 | ; SI-NEXT: s_mov_b32 s7, 0xf000 |
| 781 | ; SI-NEXT: s_mov_b32 s6, 0 |
| 782 | ; SI-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
| 783 | ; SI-NEXT: v_mov_b32_e32 v2, 0 |
| 784 | ; SI-NEXT: s_waitcnt lgkmcnt(0) |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 785 | ; SI-NEXT: s_mov_b64 s[4:5], s[2:3] |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 786 | ; SI-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 787 | ; SI-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 788 | ; SI-NEXT: s_mov_b64 s[2:3], s[6:7] |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 789 | ; SI-NEXT: s_waitcnt vmcnt(0) |
| 790 | ; SI-NEXT: v_ffbl_b32_e32 v0, v4 |
Jay Foad | e6c364a | 2021-08-05 09:58:29 +0100 | [diff] [blame] | 791 | ; SI-NEXT: v_min_u32_e32 v0, 0xffffffdf, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 792 | ; SI-NEXT: v_add_i32_e32 v0, vcc, 32, v0 |
Jay Foad | e6c364a | 2021-08-05 09:58:29 +0100 | [diff] [blame] | 793 | ; SI-NEXT: v_ffbl_b32_e32 v3, v3 |
| 794 | ; SI-NEXT: v_min3_u32 v0, v3, v0, 64 |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 795 | ; SI-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 796 | ; SI-NEXT: s_endpgm |
| 797 | ; |
| 798 | ; VI-LABEL: v_cttz_i64_trunc: |
| 799 | ; VI: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 800 | ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 801 | ; VI-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 802 | ; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 803 | ; VI-NEXT: s_waitcnt lgkmcnt(0) |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 804 | ; VI-NEXT: v_mov_b32_e32 v2, s3 |
| 805 | ; VI-NEXT: v_add_u32_e32 v1, vcc, s2, v1 |
Jay Foad | e6c364a | 2021-08-05 09:58:29 +0100 | [diff] [blame] | 806 | ; VI-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 807 | ; VI-NEXT: flat_load_dwordx2 v[1:2], v[1:2] |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 808 | ; VI-NEXT: v_add_u32_e32 v3, vcc, s0, v0 |
| 809 | ; VI-NEXT: v_mov_b32_e32 v4, s1 |
Jay Foad | e6c364a | 2021-08-05 09:58:29 +0100 | [diff] [blame] | 810 | ; VI-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 811 | ; VI-NEXT: s_waitcnt vmcnt(0) |
| 812 | ; VI-NEXT: v_ffbl_b32_e32 v0, v2 |
Jay Foad | e6c364a | 2021-08-05 09:58:29 +0100 | [diff] [blame] | 813 | ; VI-NEXT: v_add_u32_e64 v0, s[0:1], v0, 32 clamp |
| 814 | ; VI-NEXT: v_ffbl_b32_e32 v1, v1 |
| 815 | ; VI-NEXT: v_min3_u32 v0, v1, v0, 64 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 816 | ; VI-NEXT: flat_store_dword v[3:4], v0 |
| 817 | ; VI-NEXT: s_endpgm |
| 818 | ; |
| 819 | ; EG-LABEL: v_cttz_i64_trunc: |
| 820 | ; EG: ; %bb.0: |
| 821 | ; EG-NEXT: ALU 2, @8, KC0[CB0:0-32], KC1[] |
| 822 | ; EG-NEXT: TEX 0 @6 |
| 823 | ; EG-NEXT: ALU 10, @11, KC0[CB0:0-32], KC1[] |
| 824 | ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 |
| 825 | ; EG-NEXT: CF_END |
| 826 | ; EG-NEXT: PAD |
| 827 | ; EG-NEXT: Fetch clause starting at 6: |
| 828 | ; EG-NEXT: VTX_READ_64 T1.XY, T1.X, 0, #1 |
| 829 | ; EG-NEXT: ALU clause starting at 8: |
| 830 | ; EG-NEXT: LSHL * T0.W, T0.X, literal.x, |
| 831 | ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) |
| 832 | ; EG-NEXT: ADD_INT * T1.X, KC0[2].Z, PV.W, |
| 833 | ; EG-NEXT: ALU clause starting at 11: |
| 834 | ; EG-NEXT: FFBL_INT * T0.W, T1.Y, |
| 835 | ; EG-NEXT: CNDE_INT * T0.W, T1.Y, literal.x, PV.W, |
| 836 | ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) |
| 837 | ; EG-NEXT: LSHL T0.Z, T0.X, literal.x, |
| 838 | ; EG-NEXT: FFBL_INT T1.W, T1.X, BS:VEC_120/SCL_212 |
| 839 | ; EG-NEXT: ADD_INT * T0.W, PV.W, literal.y, |
| 840 | ; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) |
| 841 | ; EG-NEXT: CNDE_INT T0.X, T1.X, PS, PV.W, |
| 842 | ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, PV.Z, |
| 843 | ; EG-NEXT: LSHR * T1.X, PV.W, literal.x, |
| 844 | ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| 845 | ; |
| 846 | ; GFX10-LABEL: v_cttz_i64_trunc: |
| 847 | ; GFX10: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 848 | ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 849 | ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 850 | ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 851 | ; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| 852 | ; GFX10-NEXT: global_load_dwordx2 v[1:2], v1, s[2:3] |
| 853 | ; GFX10-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | e6c364a | 2021-08-05 09:58:29 +0100 | [diff] [blame] | 854 | ; GFX10-NEXT: v_ffbl_b32_e32 v2, v2 |
| 855 | ; GFX10-NEXT: v_ffbl_b32_e32 v1, v1 |
| 856 | ; GFX10-NEXT: v_add_nc_u32_e64 v2, v2, 32 clamp |
| 857 | ; GFX10-NEXT: v_min3_u32 v1, v1, v2, 64 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 858 | ; GFX10-NEXT: global_store_dword v0, v1, s[0:1] |
| 859 | ; GFX10-NEXT: s_endpgm |
| 860 | ; |
| 861 | ; GFX10-GISEL-LABEL: v_cttz_i64_trunc: |
| 862 | ; GFX10-GISEL: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 863 | ; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 864 | ; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 865 | ; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 866 | ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 867 | ; GFX10-GISEL-NEXT: global_load_dwordx2 v[1:2], v1, s[2:3] |
| 868 | ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | 83610d4 | 2021-08-03 17:11:08 +0100 | [diff] [blame] | 869 | ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v2, v2 |
| 870 | ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v1 |
| 871 | ; GFX10-GISEL-NEXT: v_add_nc_u32_e64 v2, v2, 32 clamp |
| 872 | ; GFX10-GISEL-NEXT: v_min_u32_e32 v1, v1, v2 |
| 873 | ; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 64, v1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 874 | ; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1] |
| 875 | ; GFX10-GISEL-NEXT: s_endpgm |
| 876 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 877 | %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid |
| 878 | %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| 879 | %val = load i64, ptr addrspace(1) %in.gep |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 880 | %cttz = call i64 @llvm.cttz.i64(i64 %val, i1 false) |
| 881 | %trunc = trunc i64 %cttz to i32 |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 882 | store i32 %trunc, ptr addrspace(1) %out.gep |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 883 | ret void |
| 884 | } |
| 885 | |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 886 | define amdgpu_kernel void @v_cttz_i32_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind { |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 887 | ; SI-LABEL: v_cttz_i32_sel_eq_neg1: |
| 888 | ; SI: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 889 | ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 |
| 890 | ; SI-NEXT: s_mov_b32 s7, 0xf000 |
| 891 | ; SI-NEXT: s_mov_b32 s10, 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 892 | ; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 893 | ; SI-NEXT: v_mov_b32_e32 v1, 0 |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 894 | ; SI-NEXT: s_mov_b32 s11, s7 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 895 | ; SI-NEXT: s_waitcnt lgkmcnt(0) |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 896 | ; SI-NEXT: s_mov_b64 s[8:9], s[2:3] |
| 897 | ; SI-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 |
| 898 | ; SI-NEXT: s_mov_b32 s6, -1 |
| 899 | ; SI-NEXT: s_mov_b32 s4, s0 |
| 900 | ; SI-NEXT: s_mov_b32 s5, s1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 901 | ; SI-NEXT: s_waitcnt vmcnt(0) |
| 902 | ; SI-NEXT: v_ffbl_b32_e32 v0, v0 |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 903 | ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 904 | ; SI-NEXT: s_endpgm |
| 905 | ; |
| 906 | ; VI-LABEL: v_cttz_i32_sel_eq_neg1: |
| 907 | ; VI: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 908 | ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 909 | ; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 910 | ; VI-NEXT: s_waitcnt lgkmcnt(0) |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 911 | ; VI-NEXT: v_mov_b32_e32 v1, s3 |
| 912 | ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 913 | ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 914 | ; VI-NEXT: flat_load_dword v0, v[0:1] |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 915 | ; VI-NEXT: s_mov_b32 s3, 0xf000 |
| 916 | ; VI-NEXT: s_mov_b32 s2, -1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 917 | ; VI-NEXT: s_waitcnt vmcnt(0) |
| 918 | ; VI-NEXT: v_ffbl_b32_e32 v0, v0 |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 919 | ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 920 | ; VI-NEXT: s_endpgm |
| 921 | ; |
| 922 | ; EG-LABEL: v_cttz_i32_sel_eq_neg1: |
| 923 | ; EG: ; %bb.0: |
| 924 | ; EG-NEXT: ALU 2, @8, KC0[CB0:0-32], KC1[] |
| 925 | ; EG-NEXT: TEX 0 @6 |
| 926 | ; EG-NEXT: ALU 5, @11, KC0[CB0:0-32], KC1[] |
| 927 | ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 |
| 928 | ; EG-NEXT: CF_END |
| 929 | ; EG-NEXT: PAD |
| 930 | ; EG-NEXT: Fetch clause starting at 6: |
| 931 | ; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 |
| 932 | ; EG-NEXT: ALU clause starting at 8: |
| 933 | ; EG-NEXT: LSHL * T0.W, T0.X, literal.x, |
| 934 | ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| 935 | ; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W, |
| 936 | ; EG-NEXT: ALU clause starting at 11: |
| 937 | ; EG-NEXT: FFBL_INT * T0.W, T0.X, |
| 938 | ; EG-NEXT: CNDE_INT * T0.W, T0.X, literal.x, PV.W, |
| 939 | ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) |
| 940 | ; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W, |
| 941 | ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, |
| 942 | ; EG-NEXT: -1(nan), 2(2.802597e-45) |
| 943 | ; |
| 944 | ; GFX10-LABEL: v_cttz_i32_sel_eq_neg1: |
| 945 | ; GFX10: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 946 | ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 947 | ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 948 | ; GFX10-NEXT: v_mov_b32_e32 v1, 0 |
| 949 | ; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| 950 | ; GFX10-NEXT: global_load_dword v0, v0, s[2:3] |
| 951 | ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| 952 | ; GFX10-NEXT: v_ffbl_b32_e32 v0, v0 |
| 953 | ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] |
| 954 | ; GFX10-NEXT: s_endpgm |
| 955 | ; |
| 956 | ; GFX10-GISEL-LABEL: v_cttz_i32_sel_eq_neg1: |
| 957 | ; GFX10-GISEL: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 958 | ; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 959 | ; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 960 | ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 961 | ; GFX10-GISEL-NEXT: global_load_dword v0, v0, s[2:3] |
| 962 | ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 963 | ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v0 |
| 964 | ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 |
Jay Foad | 83610d4 | 2021-08-03 17:11:08 +0100 | [diff] [blame] | 965 | ; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1 |
| 966 | ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, -1, vcc_lo |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 967 | ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 968 | ; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] |
| 969 | ; GFX10-GISEL-NEXT: s_endpgm |
| 970 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 971 | %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid |
| 972 | %val = load i32, ptr addrspace(1) %in.gep |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 973 | %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 false) nounwind readnone |
| 974 | %cmp = icmp eq i32 %val, 0 |
| 975 | %sel = select i1 %cmp, i32 -1, i32 %cttz |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 976 | store i32 %sel, ptr addrspace(1) %out |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 977 | ret void |
| 978 | } |
| 979 | |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 980 | define amdgpu_kernel void @v_cttz_i32_sel_ne_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind { |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 981 | ; SI-LABEL: v_cttz_i32_sel_ne_neg1: |
| 982 | ; SI: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 983 | ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 |
| 984 | ; SI-NEXT: s_mov_b32 s7, 0xf000 |
| 985 | ; SI-NEXT: s_mov_b32 s10, 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 986 | ; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 987 | ; SI-NEXT: v_mov_b32_e32 v1, 0 |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 988 | ; SI-NEXT: s_mov_b32 s11, s7 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 989 | ; SI-NEXT: s_waitcnt lgkmcnt(0) |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 990 | ; SI-NEXT: s_mov_b64 s[8:9], s[2:3] |
| 991 | ; SI-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 |
| 992 | ; SI-NEXT: s_mov_b32 s6, -1 |
| 993 | ; SI-NEXT: s_mov_b32 s4, s0 |
| 994 | ; SI-NEXT: s_mov_b32 s5, s1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 995 | ; SI-NEXT: s_waitcnt vmcnt(0) |
| 996 | ; SI-NEXT: v_ffbl_b32_e32 v0, v0 |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 997 | ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 998 | ; SI-NEXT: s_endpgm |
| 999 | ; |
| 1000 | ; VI-LABEL: v_cttz_i32_sel_ne_neg1: |
| 1001 | ; VI: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1002 | ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1003 | ; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1004 | ; VI-NEXT: s_waitcnt lgkmcnt(0) |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 1005 | ; VI-NEXT: v_mov_b32_e32 v1, s3 |
| 1006 | ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1007 | ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 1008 | ; VI-NEXT: flat_load_dword v0, v[0:1] |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 1009 | ; VI-NEXT: s_mov_b32 s3, 0xf000 |
| 1010 | ; VI-NEXT: s_mov_b32 s2, -1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1011 | ; VI-NEXT: s_waitcnt vmcnt(0) |
| 1012 | ; VI-NEXT: v_ffbl_b32_e32 v0, v0 |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 1013 | ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1014 | ; VI-NEXT: s_endpgm |
| 1015 | ; |
| 1016 | ; EG-LABEL: v_cttz_i32_sel_ne_neg1: |
| 1017 | ; EG: ; %bb.0: |
| 1018 | ; EG-NEXT: ALU 2, @8, KC0[CB0:0-32], KC1[] |
| 1019 | ; EG-NEXT: TEX 0 @6 |
| 1020 | ; EG-NEXT: ALU 5, @11, KC0[CB0:0-32], KC1[] |
| 1021 | ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 |
| 1022 | ; EG-NEXT: CF_END |
| 1023 | ; EG-NEXT: PAD |
| 1024 | ; EG-NEXT: Fetch clause starting at 6: |
| 1025 | ; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 |
| 1026 | ; EG-NEXT: ALU clause starting at 8: |
| 1027 | ; EG-NEXT: LSHL * T0.W, T0.X, literal.x, |
| 1028 | ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| 1029 | ; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W, |
| 1030 | ; EG-NEXT: ALU clause starting at 11: |
| 1031 | ; EG-NEXT: FFBL_INT * T0.W, T0.X, |
| 1032 | ; EG-NEXT: CNDE_INT * T0.W, T0.X, literal.x, PV.W, |
| 1033 | ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) |
| 1034 | ; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W, |
| 1035 | ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, |
| 1036 | ; EG-NEXT: -1(nan), 2(2.802597e-45) |
| 1037 | ; |
| 1038 | ; GFX10-LABEL: v_cttz_i32_sel_ne_neg1: |
| 1039 | ; GFX10: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1040 | ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1041 | ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1042 | ; GFX10-NEXT: v_mov_b32_e32 v1, 0 |
| 1043 | ; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| 1044 | ; GFX10-NEXT: global_load_dword v0, v0, s[2:3] |
| 1045 | ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| 1046 | ; GFX10-NEXT: v_ffbl_b32_e32 v0, v0 |
| 1047 | ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] |
| 1048 | ; GFX10-NEXT: s_endpgm |
| 1049 | ; |
| 1050 | ; GFX10-GISEL-LABEL: v_cttz_i32_sel_ne_neg1: |
| 1051 | ; GFX10-GISEL: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1052 | ; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1053 | ; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1054 | ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 1055 | ; GFX10-GISEL-NEXT: global_load_dword v0, v0, s[2:3] |
| 1056 | ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 1057 | ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1058 | ; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 |
Jay Foad | 83610d4 | 2021-08-03 17:11:08 +0100 | [diff] [blame] | 1059 | ; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1060 | ; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v1, vcc_lo |
| 1061 | ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 |
| 1062 | ; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] |
| 1063 | ; GFX10-GISEL-NEXT: s_endpgm |
| 1064 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 1065 | %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid |
| 1066 | %val = load i32, ptr addrspace(1) %in.gep |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1067 | %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 false) nounwind readnone |
| 1068 | %cmp = icmp ne i32 %val, 0 |
| 1069 | %sel = select i1 %cmp, i32 %cttz, i32 -1 |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 1070 | store i32 %sel, ptr addrspace(1) %out |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1071 | ret void |
| 1072 | } |
| 1073 | |
| 1074 | ; TODO: Should be able to eliminate select here as well. |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 1075 | define amdgpu_kernel void @v_cttz_i32_sel_eq_bitwidth(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind { |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1076 | ; SI-LABEL: v_cttz_i32_sel_eq_bitwidth: |
| 1077 | ; SI: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1078 | ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 |
| 1079 | ; SI-NEXT: s_mov_b32 s7, 0xf000 |
| 1080 | ; SI-NEXT: s_mov_b32 s10, 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1081 | ; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 1082 | ; SI-NEXT: v_mov_b32_e32 v1, 0 |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1083 | ; SI-NEXT: s_mov_b32 s11, s7 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1084 | ; SI-NEXT: s_waitcnt lgkmcnt(0) |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1085 | ; SI-NEXT: s_mov_b64 s[8:9], s[2:3] |
| 1086 | ; SI-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 |
| 1087 | ; SI-NEXT: s_mov_b32 s6, -1 |
| 1088 | ; SI-NEXT: s_mov_b32 s4, s0 |
| 1089 | ; SI-NEXT: s_mov_b32 s5, s1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1090 | ; SI-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | 2b63933 | 2021-08-05 14:32:25 +0100 | [diff] [blame] | 1091 | ; SI-NEXT: v_ffbl_b32_e32 v0, v0 |
| 1092 | ; SI-NEXT: v_min_u32_e32 v0, 32, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1093 | ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 32, v0 |
| 1094 | ; SI-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1095 | ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1096 | ; SI-NEXT: s_endpgm |
| 1097 | ; |
| 1098 | ; VI-LABEL: v_cttz_i32_sel_eq_bitwidth: |
| 1099 | ; VI: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1100 | ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1101 | ; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1102 | ; VI-NEXT: s_waitcnt lgkmcnt(0) |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 1103 | ; VI-NEXT: v_mov_b32_e32 v1, s3 |
| 1104 | ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1105 | ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 1106 | ; VI-NEXT: flat_load_dword v0, v[0:1] |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 1107 | ; VI-NEXT: s_mov_b32 s3, 0xf000 |
| 1108 | ; VI-NEXT: s_mov_b32 s2, -1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1109 | ; VI-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | 2b63933 | 2021-08-05 14:32:25 +0100 | [diff] [blame] | 1110 | ; VI-NEXT: v_ffbl_b32_e32 v0, v0 |
| 1111 | ; VI-NEXT: v_min_u32_e32 v0, 32, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1112 | ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 32, v0 |
| 1113 | ; VI-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 1114 | ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1115 | ; VI-NEXT: s_endpgm |
| 1116 | ; |
| 1117 | ; EG-LABEL: v_cttz_i32_sel_eq_bitwidth: |
| 1118 | ; EG: ; %bb.0: |
| 1119 | ; EG-NEXT: ALU 2, @8, KC0[CB0:0-32], KC1[] |
| 1120 | ; EG-NEXT: TEX 0 @6 |
| 1121 | ; EG-NEXT: ALU 7, @11, KC0[CB0:0-32], KC1[] |
| 1122 | ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 |
| 1123 | ; EG-NEXT: CF_END |
| 1124 | ; EG-NEXT: PAD |
| 1125 | ; EG-NEXT: Fetch clause starting at 6: |
| 1126 | ; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 |
| 1127 | ; EG-NEXT: ALU clause starting at 8: |
| 1128 | ; EG-NEXT: LSHL * T0.W, T0.X, literal.x, |
| 1129 | ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| 1130 | ; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W, |
| 1131 | ; EG-NEXT: ALU clause starting at 11: |
| 1132 | ; EG-NEXT: FFBL_INT * T0.W, T0.X, |
| 1133 | ; EG-NEXT: CNDE_INT * T0.W, T0.X, literal.x, PV.W, |
| 1134 | ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) |
| 1135 | ; EG-NEXT: SETE_INT * T1.W, PV.W, literal.x, |
| 1136 | ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) |
| 1137 | ; EG-NEXT: CNDE_INT T0.X, PV.W, T0.W, literal.x, |
| 1138 | ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, |
| 1139 | ; EG-NEXT: -1(nan), 2(2.802597e-45) |
| 1140 | ; |
| 1141 | ; GFX10-LABEL: v_cttz_i32_sel_eq_bitwidth: |
| 1142 | ; GFX10: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1143 | ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1144 | ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Jay Foad | 2b63933 | 2021-08-05 14:32:25 +0100 | [diff] [blame] | 1145 | ; GFX10-NEXT: v_mov_b32_e32 v1, 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1146 | ; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| 1147 | ; GFX10-NEXT: global_load_dword v0, v0, s[2:3] |
| 1148 | ; GFX10-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | 2b63933 | 2021-08-05 14:32:25 +0100 | [diff] [blame] | 1149 | ; GFX10-NEXT: v_ffbl_b32_e32 v0, v0 |
| 1150 | ; GFX10-NEXT: v_min_u32_e32 v0, 32, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1151 | ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 32, v0 |
| 1152 | ; GFX10-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc_lo |
| 1153 | ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] |
| 1154 | ; GFX10-NEXT: s_endpgm |
| 1155 | ; |
| 1156 | ; GFX10-GISEL-LABEL: v_cttz_i32_sel_eq_bitwidth: |
| 1157 | ; GFX10-GISEL: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1158 | ; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1159 | ; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Jay Foad | 83610d4 | 2021-08-03 17:11:08 +0100 | [diff] [blame] | 1160 | ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1161 | ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 1162 | ; GFX10-GISEL-NEXT: global_load_dword v0, v0, s[2:3] |
| 1163 | ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | 83610d4 | 2021-08-03 17:11:08 +0100 | [diff] [blame] | 1164 | ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v0, v0 |
| 1165 | ; GFX10-GISEL-NEXT: v_min_u32_e32 v0, 32, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1166 | ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 32, v0 |
| 1167 | ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, -1, vcc_lo |
| 1168 | ; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] |
| 1169 | ; GFX10-GISEL-NEXT: s_endpgm |
| 1170 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 1171 | %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid |
| 1172 | %val = load i32, ptr addrspace(1) %in.gep |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1173 | %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 false) nounwind readnone |
| 1174 | %cmp = icmp eq i32 %cttz, 32 |
| 1175 | %sel = select i1 %cmp, i32 -1, i32 %cttz |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 1176 | store i32 %sel, ptr addrspace(1) %out |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1177 | ret void |
| 1178 | } |
| 1179 | |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 1180 | define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind { |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1181 | ; SI-LABEL: v_cttz_i32_sel_ne_bitwidth: |
| 1182 | ; SI: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1183 | ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 |
| 1184 | ; SI-NEXT: s_mov_b32 s7, 0xf000 |
| 1185 | ; SI-NEXT: s_mov_b32 s10, 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1186 | ; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 1187 | ; SI-NEXT: v_mov_b32_e32 v1, 0 |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1188 | ; SI-NEXT: s_mov_b32 s11, s7 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1189 | ; SI-NEXT: s_waitcnt lgkmcnt(0) |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1190 | ; SI-NEXT: s_mov_b64 s[8:9], s[2:3] |
| 1191 | ; SI-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 |
| 1192 | ; SI-NEXT: s_mov_b32 s6, -1 |
| 1193 | ; SI-NEXT: s_mov_b32 s4, s0 |
| 1194 | ; SI-NEXT: s_mov_b32 s5, s1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1195 | ; SI-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | 2b63933 | 2021-08-05 14:32:25 +0100 | [diff] [blame] | 1196 | ; SI-NEXT: v_ffbl_b32_e32 v0, v0 |
| 1197 | ; SI-NEXT: v_min_u32_e32 v0, 32, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1198 | ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 32, v0 |
| 1199 | ; SI-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1200 | ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1201 | ; SI-NEXT: s_endpgm |
| 1202 | ; |
| 1203 | ; VI-LABEL: v_cttz_i32_sel_ne_bitwidth: |
| 1204 | ; VI: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1205 | ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1206 | ; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1207 | ; VI-NEXT: s_waitcnt lgkmcnt(0) |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 1208 | ; VI-NEXT: v_mov_b32_e32 v1, s3 |
| 1209 | ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1210 | ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 1211 | ; VI-NEXT: flat_load_dword v0, v[0:1] |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 1212 | ; VI-NEXT: s_mov_b32 s3, 0xf000 |
| 1213 | ; VI-NEXT: s_mov_b32 s2, -1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1214 | ; VI-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | 2b63933 | 2021-08-05 14:32:25 +0100 | [diff] [blame] | 1215 | ; VI-NEXT: v_ffbl_b32_e32 v0, v0 |
| 1216 | ; VI-NEXT: v_min_u32_e32 v0, 32, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1217 | ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 32, v0 |
| 1218 | ; VI-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 1219 | ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1220 | ; VI-NEXT: s_endpgm |
| 1221 | ; |
| 1222 | ; EG-LABEL: v_cttz_i32_sel_ne_bitwidth: |
| 1223 | ; EG: ; %bb.0: |
| 1224 | ; EG-NEXT: ALU 2, @8, KC0[CB0:0-32], KC1[] |
| 1225 | ; EG-NEXT: TEX 0 @6 |
| 1226 | ; EG-NEXT: ALU 7, @11, KC0[CB0:0-32], KC1[] |
| 1227 | ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 |
| 1228 | ; EG-NEXT: CF_END |
| 1229 | ; EG-NEXT: PAD |
| 1230 | ; EG-NEXT: Fetch clause starting at 6: |
| 1231 | ; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 |
| 1232 | ; EG-NEXT: ALU clause starting at 8: |
| 1233 | ; EG-NEXT: LSHL * T0.W, T0.X, literal.x, |
| 1234 | ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| 1235 | ; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W, |
| 1236 | ; EG-NEXT: ALU clause starting at 11: |
| 1237 | ; EG-NEXT: FFBL_INT * T0.W, T0.X, |
| 1238 | ; EG-NEXT: CNDE_INT * T0.W, T0.X, literal.x, PV.W, |
| 1239 | ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) |
| 1240 | ; EG-NEXT: SETNE_INT * T1.W, PV.W, literal.x, |
| 1241 | ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) |
| 1242 | ; EG-NEXT: CNDE_INT T0.X, PV.W, literal.x, T0.W, |
| 1243 | ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, |
| 1244 | ; EG-NEXT: -1(nan), 2(2.802597e-45) |
| 1245 | ; |
| 1246 | ; GFX10-LABEL: v_cttz_i32_sel_ne_bitwidth: |
| 1247 | ; GFX10: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1248 | ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1249 | ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Jay Foad | 2b63933 | 2021-08-05 14:32:25 +0100 | [diff] [blame] | 1250 | ; GFX10-NEXT: v_mov_b32_e32 v1, 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1251 | ; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| 1252 | ; GFX10-NEXT: global_load_dword v0, v0, s[2:3] |
| 1253 | ; GFX10-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | 2b63933 | 2021-08-05 14:32:25 +0100 | [diff] [blame] | 1254 | ; GFX10-NEXT: v_ffbl_b32_e32 v0, v0 |
| 1255 | ; GFX10-NEXT: v_min_u32_e32 v0, 32, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1256 | ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 32, v0 |
| 1257 | ; GFX10-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc_lo |
| 1258 | ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] |
| 1259 | ; GFX10-NEXT: s_endpgm |
| 1260 | ; |
| 1261 | ; GFX10-GISEL-LABEL: v_cttz_i32_sel_ne_bitwidth: |
| 1262 | ; GFX10-GISEL: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1263 | ; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1264 | ; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Jay Foad | 83610d4 | 2021-08-03 17:11:08 +0100 | [diff] [blame] | 1265 | ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1266 | ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 1267 | ; GFX10-GISEL-NEXT: global_load_dword v0, v0, s[2:3] |
| 1268 | ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | 83610d4 | 2021-08-03 17:11:08 +0100 | [diff] [blame] | 1269 | ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v0, v0 |
| 1270 | ; GFX10-GISEL-NEXT: v_min_u32_e32 v0, 32, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1271 | ; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 32, v0 |
| 1272 | ; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc_lo |
| 1273 | ; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] |
| 1274 | ; GFX10-GISEL-NEXT: s_endpgm |
| 1275 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 1276 | %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid |
| 1277 | %val = load i32, ptr addrspace(1) %in.gep |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1278 | %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 false) nounwind readnone |
| 1279 | %cmp = icmp ne i32 %cttz, 32 |
| 1280 | %sel = select i1 %cmp, i32 %cttz, i32 -1 |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 1281 | store i32 %sel, ptr addrspace(1) %out |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1282 | ret void |
| 1283 | } |
| 1284 | |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 1285 | define amdgpu_kernel void @v_cttz_i8_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind { |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1286 | ; SI-LABEL: v_cttz_i8_sel_eq_neg1: |
| 1287 | ; SI: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1288 | ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 |
| 1289 | ; SI-NEXT: s_mov_b32 s7, 0xf000 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1290 | ; SI-NEXT: v_mov_b32_e32 v1, 0 |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1291 | ; SI-NEXT: s_mov_b32 s10, 0 |
| 1292 | ; SI-NEXT: s_mov_b32 s11, s7 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1293 | ; SI-NEXT: s_waitcnt lgkmcnt(0) |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1294 | ; SI-NEXT: s_mov_b64 s[8:9], s[2:3] |
| 1295 | ; SI-NEXT: buffer_load_ubyte v0, v[0:1], s[8:11], 0 addr64 |
| 1296 | ; SI-NEXT: s_mov_b32 s6, -1 |
| 1297 | ; SI-NEXT: s_mov_b32 s4, s0 |
| 1298 | ; SI-NEXT: s_mov_b32 s5, s1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1299 | ; SI-NEXT: s_waitcnt vmcnt(0) |
| 1300 | ; SI-NEXT: v_ffbl_b32_e32 v0, v0 |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1301 | ; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1302 | ; SI-NEXT: s_endpgm |
| 1303 | ; |
| 1304 | ; VI-LABEL: v_cttz_i8_sel_eq_neg1: |
| 1305 | ; VI: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1306 | ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1307 | ; VI-NEXT: s_waitcnt lgkmcnt(0) |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 1308 | ; VI-NEXT: v_mov_b32_e32 v1, s3 |
| 1309 | ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1310 | ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 1311 | ; VI-NEXT: flat_load_ubyte v0, v[0:1] |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 1312 | ; VI-NEXT: s_mov_b32 s3, 0xf000 |
| 1313 | ; VI-NEXT: s_mov_b32 s2, -1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1314 | ; VI-NEXT: s_waitcnt vmcnt(0) |
| 1315 | ; VI-NEXT: v_ffbl_b32_e32 v0, v0 |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 1316 | ; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1317 | ; VI-NEXT: s_endpgm |
| 1318 | ; |
| 1319 | ; EG-LABEL: v_cttz_i8_sel_eq_neg1: |
| 1320 | ; EG: ; %bb.0: |
| 1321 | ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] |
| 1322 | ; EG-NEXT: TEX 0 @6 |
| 1323 | ; EG-NEXT: ALU 12, @9, KC0[CB0:0-32], KC1[] |
| 1324 | ; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X |
| 1325 | ; EG-NEXT: CF_END |
| 1326 | ; EG-NEXT: PAD |
| 1327 | ; EG-NEXT: Fetch clause starting at 6: |
| 1328 | ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 |
| 1329 | ; EG-NEXT: ALU clause starting at 8: |
| 1330 | ; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, T0.X, |
| 1331 | ; EG-NEXT: ALU clause starting at 9: |
| 1332 | ; EG-NEXT: FFBL_INT T0.W, T0.X, |
| 1333 | ; EG-NEXT: AND_INT * T1.W, KC0[2].Y, literal.x, |
| 1334 | ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) |
| 1335 | ; EG-NEXT: AND_INT T0.W, PV.W, literal.x, |
| 1336 | ; EG-NEXT: LSHL * T1.W, PS, literal.y, |
| 1337 | ; EG-NEXT: 255(3.573311e-43), 3(4.203895e-45) |
| 1338 | ; EG-NEXT: LSHL T0.X, PV.W, PS, |
| 1339 | ; EG-NEXT: LSHL * T0.W, literal.x, PS, |
| 1340 | ; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) |
| 1341 | ; EG-NEXT: MOV T0.Y, 0.0, |
| 1342 | ; EG-NEXT: MOV * T0.Z, 0.0, |
| 1343 | ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, |
| 1344 | ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| 1345 | ; |
| 1346 | ; GFX10-LABEL: v_cttz_i8_sel_eq_neg1: |
| 1347 | ; GFX10: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1348 | ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1349 | ; GFX10-NEXT: v_mov_b32_e32 v1, 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1350 | ; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| 1351 | ; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] |
| 1352 | ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| 1353 | ; GFX10-NEXT: v_ffbl_b32_e32 v0, v0 |
| 1354 | ; GFX10-NEXT: global_store_byte v1, v0, s[0:1] |
| 1355 | ; GFX10-NEXT: s_endpgm |
| 1356 | ; |
| 1357 | ; GFX10-GISEL-LABEL: v_cttz_i8_sel_eq_neg1: |
| 1358 | ; GFX10-GISEL: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1359 | ; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1360 | ; GFX10-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1361 | ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 1362 | ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s2 |
| 1363 | ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s3 |
| 1364 | ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0 |
| 1365 | ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v2, v3, vcc_lo |
Jay Foad | 57b9107 | 2021-08-06 11:05:42 +0100 | [diff] [blame] | 1366 | ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1367 | ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off |
| 1368 | ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1369 | ; GFX10-GISEL-NEXT: v_or_b32_e32 v1, 0x100, v0 |
Jay Foad | 57b9107 | 2021-08-06 11:05:42 +0100 | [diff] [blame] | 1370 | ; GFX10-GISEL-NEXT: v_cmp_eq_u32_sdwa s2, v0, v2 src0_sel:BYTE_0 src1_sel:DWORD |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1371 | ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v1 |
Jay Foad | 57b9107 | 2021-08-06 11:05:42 +0100 | [diff] [blame] | 1372 | ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, -1, s2 |
| 1373 | ; GFX10-GISEL-NEXT: global_store_byte v2, v0, s[0:1] |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1374 | ; GFX10-GISEL-NEXT: s_endpgm |
| 1375 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 1376 | %valptr.gep = getelementptr i8, ptr addrspace(1) %valptr, i32 %tid |
| 1377 | %val = load i8, ptr addrspace(1) %valptr.gep |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1378 | %cttz = call i8 @llvm.cttz.i8(i8 %val, i1 false) nounwind readnone |
| 1379 | %cmp = icmp eq i8 %val, 0 |
| 1380 | %sel = select i1 %cmp, i8 -1, i8 %cttz |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 1381 | store i8 %sel, ptr addrspace(1) %out |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1382 | ret void |
| 1383 | } |
| 1384 | |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 1385 | define amdgpu_kernel void @v_cttz_i16_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind { |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1386 | ; SI-LABEL: v_cttz_i16_sel_eq_neg1: |
| 1387 | ; SI: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1388 | ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 |
| 1389 | ; SI-NEXT: s_mov_b32 s7, 0xf000 |
| 1390 | ; SI-NEXT: s_mov_b32 s6, -1 |
| 1391 | ; SI-NEXT: s_mov_b32 s10, s6 |
| 1392 | ; SI-NEXT: s_mov_b32 s11, s7 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1393 | ; SI-NEXT: s_waitcnt lgkmcnt(0) |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1394 | ; SI-NEXT: s_mov_b32 s8, s2 |
| 1395 | ; SI-NEXT: s_mov_b32 s9, s3 |
| 1396 | ; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 |
| 1397 | ; SI-NEXT: s_mov_b32 s4, s0 |
| 1398 | ; SI-NEXT: s_mov_b32 s5, s1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1399 | ; SI-NEXT: s_waitcnt vmcnt(0) |
| 1400 | ; SI-NEXT: v_ffbl_b32_e32 v0, v0 |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1401 | ; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1402 | ; SI-NEXT: s_endpgm |
| 1403 | ; |
| 1404 | ; VI-LABEL: v_cttz_i16_sel_eq_neg1: |
| 1405 | ; VI: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1406 | ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
| 1407 | ; VI-NEXT: s_mov_b32 s7, 0xf000 |
| 1408 | ; VI-NEXT: s_mov_b32 s6, -1 |
| 1409 | ; VI-NEXT: s_mov_b32 s10, s6 |
| 1410 | ; VI-NEXT: s_mov_b32 s11, s7 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1411 | ; VI-NEXT: s_waitcnt lgkmcnt(0) |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1412 | ; VI-NEXT: s_mov_b32 s8, s2 |
| 1413 | ; VI-NEXT: s_mov_b32 s9, s3 |
| 1414 | ; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1415 | ; VI-NEXT: v_mov_b32_e32 v1, 0xffff |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1416 | ; VI-NEXT: s_mov_b32 s4, s0 |
| 1417 | ; VI-NEXT: s_mov_b32 s5, s1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1418 | ; VI-NEXT: s_waitcnt vmcnt(0) |
| 1419 | ; VI-NEXT: v_or_b32_e32 v2, 0x10000, v0 |
Jay Foad | 2b63933 | 2021-08-05 14:32:25 +0100 | [diff] [blame] | 1420 | ; VI-NEXT: v_ffbl_b32_e32 v2, v2 |
| 1421 | ; VI-NEXT: v_min_u32_e32 v2, 32, v2 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1422 | ; VI-NEXT: v_cmp_ne_u16_e32 vcc, 0, v0 |
| 1423 | ; VI-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1424 | ; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1425 | ; VI-NEXT: s_endpgm |
| 1426 | ; |
| 1427 | ; EG-LABEL: v_cttz_i16_sel_eq_neg1: |
| 1428 | ; EG: ; %bb.0: |
| 1429 | ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] |
| 1430 | ; EG-NEXT: TEX 0 @6 |
| 1431 | ; EG-NEXT: ALU 12, @9, KC0[CB0:0-32], KC1[] |
| 1432 | ; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X |
| 1433 | ; EG-NEXT: CF_END |
| 1434 | ; EG-NEXT: PAD |
| 1435 | ; EG-NEXT: Fetch clause starting at 6: |
| 1436 | ; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 |
| 1437 | ; EG-NEXT: ALU clause starting at 8: |
| 1438 | ; EG-NEXT: MOV * T0.X, KC0[2].Z, |
| 1439 | ; EG-NEXT: ALU clause starting at 9: |
| 1440 | ; EG-NEXT: FFBL_INT T0.W, T0.X, |
| 1441 | ; EG-NEXT: AND_INT * T1.W, KC0[2].Y, literal.x, |
| 1442 | ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) |
| 1443 | ; EG-NEXT: AND_INT T0.W, PV.W, literal.x, |
| 1444 | ; EG-NEXT: LSHL * T1.W, PS, literal.y, |
| 1445 | ; EG-NEXT: 65535(9.183409e-41), 3(4.203895e-45) |
| 1446 | ; EG-NEXT: LSHL T0.X, PV.W, PS, |
| 1447 | ; EG-NEXT: LSHL * T0.W, literal.x, PS, |
| 1448 | ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) |
| 1449 | ; EG-NEXT: MOV T0.Y, 0.0, |
| 1450 | ; EG-NEXT: MOV * T0.Z, 0.0, |
| 1451 | ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, |
| 1452 | ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| 1453 | ; |
| 1454 | ; GFX10-LABEL: v_cttz_i16_sel_eq_neg1: |
| 1455 | ; GFX10: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1456 | ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1457 | ; GFX10-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1458 | ; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| 1459 | ; GFX10-NEXT: global_load_ushort v1, v0, s[2:3] |
| 1460 | ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| 1461 | ; GFX10-NEXT: v_or_b32_e32 v2, 0x10000, v1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1462 | ; GFX10-NEXT: v_cmp_ne_u16_e32 vcc_lo, 0, v1 |
Jay Foad | 2b63933 | 2021-08-05 14:32:25 +0100 | [diff] [blame] | 1463 | ; GFX10-NEXT: v_ffbl_b32_e32 v2, v2 |
| 1464 | ; GFX10-NEXT: v_min_u32_e32 v2, 32, v2 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1465 | ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0xffff, v2, vcc_lo |
| 1466 | ; GFX10-NEXT: global_store_short v0, v1, s[0:1] |
| 1467 | ; GFX10-NEXT: s_endpgm |
| 1468 | ; |
| 1469 | ; GFX10-GISEL-LABEL: v_cttz_i16_sel_eq_neg1: |
| 1470 | ; GFX10-GISEL: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1471 | ; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1472 | ; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1473 | ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 1474 | ; GFX10-GISEL-NEXT: global_load_ushort v1, v0, s[2:3] |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1475 | ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 1476 | ; GFX10-GISEL-NEXT: v_or_b32_e32 v2, 0x10000, v1 |
| 1477 | ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 |
| 1478 | ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v2, v2 |
Jay Foad | 3eb2281 | 2022-05-16 15:48:11 +0100 | [diff] [blame] | 1479 | ; GFX10-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 |
| 1480 | ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v2, 0xffff, vcc_lo |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1481 | ; GFX10-GISEL-NEXT: global_store_short v0, v1, s[0:1] |
| 1482 | ; GFX10-GISEL-NEXT: s_endpgm |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 1483 | %val = load i16, ptr addrspace(1) %valptr |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1484 | %cttz = call i16 @llvm.cttz.i16(i16 %val, i1 false) nounwind readnone |
| 1485 | %cmp = icmp eq i16 %val, 0 |
| 1486 | %sel = select i1 %cmp, i16 -1, i16 %cttz |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 1487 | store i16 %sel, ptr addrspace(1) %out |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1488 | ret void |
| 1489 | } |
| 1490 | |
| 1491 | ; FIXME: Need to handle non-uniform case for function below (load without gep). |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 1492 | define amdgpu_kernel void @v_cttz_i7_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind { |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1493 | ; SI-LABEL: v_cttz_i7_sel_eq_neg1: |
| 1494 | ; SI: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1495 | ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 |
| 1496 | ; SI-NEXT: s_mov_b32 s7, 0xf000 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1497 | ; SI-NEXT: v_mov_b32_e32 v1, 0 |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1498 | ; SI-NEXT: s_mov_b32 s10, 0 |
| 1499 | ; SI-NEXT: s_mov_b32 s11, s7 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1500 | ; SI-NEXT: s_waitcnt lgkmcnt(0) |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1501 | ; SI-NEXT: s_mov_b64 s[8:9], s[2:3] |
| 1502 | ; SI-NEXT: buffer_load_ubyte v0, v[0:1], s[8:11], 0 addr64 |
| 1503 | ; SI-NEXT: s_mov_b32 s6, -1 |
| 1504 | ; SI-NEXT: s_mov_b32 s4, s0 |
| 1505 | ; SI-NEXT: s_mov_b32 s5, s1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1506 | ; SI-NEXT: s_waitcnt vmcnt(0) |
| 1507 | ; SI-NEXT: v_ffbl_b32_e32 v0, v0 |
| 1508 | ; SI-NEXT: v_and_b32_e32 v0, 0x7f, v0 |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1509 | ; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1510 | ; SI-NEXT: s_endpgm |
| 1511 | ; |
| 1512 | ; VI-LABEL: v_cttz_i7_sel_eq_neg1: |
| 1513 | ; VI: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1514 | ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1515 | ; VI-NEXT: s_waitcnt lgkmcnt(0) |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 1516 | ; VI-NEXT: v_mov_b32_e32 v1, s3 |
| 1517 | ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1518 | ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 1519 | ; VI-NEXT: flat_load_ubyte v0, v[0:1] |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 1520 | ; VI-NEXT: s_mov_b32 s3, 0xf000 |
| 1521 | ; VI-NEXT: s_mov_b32 s2, -1 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1522 | ; VI-NEXT: s_waitcnt vmcnt(0) |
| 1523 | ; VI-NEXT: v_ffbl_b32_e32 v0, v0 |
| 1524 | ; VI-NEXT: v_and_b32_e32 v0, 0x7f, v0 |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 1525 | ; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1526 | ; VI-NEXT: s_endpgm |
| 1527 | ; |
| 1528 | ; EG-LABEL: v_cttz_i7_sel_eq_neg1: |
| 1529 | ; EG: ; %bb.0: |
| 1530 | ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] |
| 1531 | ; EG-NEXT: TEX 0 @6 |
| 1532 | ; EG-NEXT: ALU 12, @9, KC0[CB0:0-32], KC1[] |
| 1533 | ; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X |
| 1534 | ; EG-NEXT: CF_END |
| 1535 | ; EG-NEXT: PAD |
| 1536 | ; EG-NEXT: Fetch clause starting at 6: |
| 1537 | ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 |
| 1538 | ; EG-NEXT: ALU clause starting at 8: |
| 1539 | ; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, T0.X, |
| 1540 | ; EG-NEXT: ALU clause starting at 9: |
| 1541 | ; EG-NEXT: FFBL_INT T0.W, T0.X, |
| 1542 | ; EG-NEXT: AND_INT * T1.W, KC0[2].Y, literal.x, |
| 1543 | ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) |
| 1544 | ; EG-NEXT: AND_INT T0.W, PV.W, literal.x, |
| 1545 | ; EG-NEXT: LSHL * T1.W, PS, literal.y, |
| 1546 | ; EG-NEXT: 127(1.779649e-43), 3(4.203895e-45) |
| 1547 | ; EG-NEXT: LSHL T0.X, PV.W, PS, |
| 1548 | ; EG-NEXT: LSHL * T0.W, literal.x, PS, |
| 1549 | ; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) |
| 1550 | ; EG-NEXT: MOV T0.Y, 0.0, |
| 1551 | ; EG-NEXT: MOV * T0.Z, 0.0, |
| 1552 | ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, |
| 1553 | ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| 1554 | ; |
| 1555 | ; GFX10-LABEL: v_cttz_i7_sel_eq_neg1: |
| 1556 | ; GFX10: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1557 | ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1558 | ; GFX10-NEXT: v_mov_b32_e32 v1, 0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1559 | ; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| 1560 | ; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] |
| 1561 | ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| 1562 | ; GFX10-NEXT: v_ffbl_b32_e32 v0, v0 |
| 1563 | ; GFX10-NEXT: v_and_b32_e32 v0, 0x7f, v0 |
| 1564 | ; GFX10-NEXT: global_store_byte v1, v0, s[0:1] |
| 1565 | ; GFX10-NEXT: s_endpgm |
| 1566 | ; |
| 1567 | ; GFX10-GISEL-LABEL: v_cttz_i7_sel_eq_neg1: |
| 1568 | ; GFX10-GISEL: ; %bb.0: |
Carl Ritson | 4c4db81 | 2022-07-30 11:13:20 +0900 | [diff] [blame] | 1569 | ; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1570 | ; GFX10-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1571 | ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 1572 | ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s2 |
| 1573 | ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s3 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1574 | ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0 |
| 1575 | ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v2, v3, vcc_lo |
| 1576 | ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off |
| 1577 | ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | 57b9107 | 2021-08-06 11:05:42 +0100 | [diff] [blame] | 1578 | ; GFX10-GISEL-NEXT: v_or_b32_e32 v1, 0x80, v0 |
Jay Foad | 3eb2281 | 2022-05-16 15:48:11 +0100 | [diff] [blame] | 1579 | ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0x7f, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1580 | ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v1 |
| 1581 | ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 |
| 1582 | ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0x7f, vcc_lo |
| 1583 | ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 |
Jay Foad | 3eb2281 | 2022-05-16 15:48:11 +0100 | [diff] [blame] | 1584 | ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0x7f, v0 |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1585 | ; GFX10-GISEL-NEXT: global_store_byte v1, v0, s[0:1] |
| 1586 | ; GFX10-GISEL-NEXT: s_endpgm |
| 1587 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 1588 | %valptr.gep = getelementptr i7, ptr addrspace(1) %valptr, i32 %tid |
| 1589 | %val = load i7, ptr addrspace(1) %valptr.gep |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1590 | %cttz = call i7 @llvm.cttz.i7(i7 %val, i1 false) nounwind readnone |
| 1591 | %cmp = icmp eq i7 %val, 0 |
| 1592 | %sel = select i1 %cmp, i7 -1, i7 %cttz |
Matt Arsenault | b5bc205 | 2022-11-29 18:26:06 -0500 | [diff] [blame] | 1593 | store i7 %sel, ptr addrspace(1) %out |
Jay Foad | ba5c4ac | 2021-08-03 17:13:02 +0100 | [diff] [blame] | 1594 | ret void |
| 1595 | } |