blob: 0881cd84a4da2c81504f0ef16dd48d15f1966172 [file] [log] [blame]
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
Fangrui Song9e9907f2024-01-16 21:54:58 -08002; RUN: llc < %s -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=SI
3; RUN: llc < %s -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefix=GFX7
4; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=GFX10
5; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=GFX1030
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +00006
Fangrui Song9e9907f2024-01-16 21:54:58 -08007; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=G_SI
8; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX7
9; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX10
10; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1030
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +000011
12declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double, ptr addrspace(8), i32, i32, i32 immarg)
13declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double, ptr addrspace(8), i32, i32, i32 immarg)
14
15
16define amdgpu_kernel void @raw_ptr_buffer_atomic_min_noret_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) {
17; SI-LABEL: raw_ptr_buffer_atomic_min_noret_f64:
18; SI: ; %bb.0: ; %main_body
Shilei Tian6548b632024-11-08 20:21:16 -050019; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd
20; SI-NEXT: s_load_dword s8, s[4:5], 0xf
21; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +000022; SI-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -050023; SI-NEXT: v_mov_b32_e32 v0, s6
24; SI-NEXT: v_mov_b32_e32 v1, s7
25; SI-NEXT: v_mov_b32_e32 v2, s8
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +000026; SI-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen
27; SI-NEXT: s_endpgm
28;
29; GFX7-LABEL: raw_ptr_buffer_atomic_min_noret_f64:
30; GFX7: ; %bb.0: ; %main_body
Shilei Tian6548b632024-11-08 20:21:16 -050031; GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd
32; GFX7-NEXT: s_load_dword s8, s[4:5], 0xf
33; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +000034; GFX7-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -050035; GFX7-NEXT: v_mov_b32_e32 v0, s6
36; GFX7-NEXT: v_mov_b32_e32 v1, s7
37; GFX7-NEXT: v_mov_b32_e32 v2, s8
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +000038; GFX7-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen
39; GFX7-NEXT: s_endpgm
40;
41; GFX10-LABEL: raw_ptr_buffer_atomic_min_noret_f64:
42; GFX10: ; %bb.0: ; %main_body
43; GFX10-NEXT: s_clause 0x2
Shilei Tian6548b632024-11-08 20:21:16 -050044; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
45; GFX10-NEXT: s_load_dword s8, s[4:5], 0x3c
46; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +000047; GFX10-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -050048; GFX10-NEXT: v_mov_b32_e32 v0, s6
49; GFX10-NEXT: v_mov_b32_e32 v1, s7
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +000050; GFX10-NEXT: v_mov_b32_e32 v2, s8
Shilei Tian6548b632024-11-08 20:21:16 -050051; GFX10-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +000052; GFX10-NEXT: s_endpgm
53;
54; GFX1030-LABEL: raw_ptr_buffer_atomic_min_noret_f64:
55; GFX1030: ; %bb.0: ; %main_body
56; GFX1030-NEXT: s_clause 0x2
Shilei Tian6548b632024-11-08 20:21:16 -050057; GFX1030-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
58; GFX1030-NEXT: s_load_dword s8, s[4:5], 0x3c
59; GFX1030-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +000060; GFX1030-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -050061; GFX1030-NEXT: v_mov_b32_e32 v0, s6
62; GFX1030-NEXT: v_mov_b32_e32 v1, s7
63; GFX1030-NEXT: v_mov_b32_e32 v2, s8
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +000064; GFX1030-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen
65; GFX1030-NEXT: s_endpgm
66;
67; G_SI-LABEL: raw_ptr_buffer_atomic_min_noret_f64:
68; G_SI: ; %bb.0: ; %main_body
Shilei Tian6548b632024-11-08 20:21:16 -050069; G_SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd
70; G_SI-NEXT: s_load_dword s8, s[4:5], 0xf
71; G_SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +000072; G_SI-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -050073; G_SI-NEXT: v_mov_b32_e32 v0, s6
74; G_SI-NEXT: v_mov_b32_e32 v1, s7
75; G_SI-NEXT: v_mov_b32_e32 v2, s8
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +000076; G_SI-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen
77; G_SI-NEXT: s_endpgm
78;
79; G_GFX7-LABEL: raw_ptr_buffer_atomic_min_noret_f64:
80; G_GFX7: ; %bb.0: ; %main_body
Shilei Tian6548b632024-11-08 20:21:16 -050081; G_GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd
82; G_GFX7-NEXT: s_load_dword s8, s[4:5], 0xf
83; G_GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +000084; G_GFX7-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -050085; G_GFX7-NEXT: v_mov_b32_e32 v0, s6
86; G_GFX7-NEXT: v_mov_b32_e32 v1, s7
87; G_GFX7-NEXT: v_mov_b32_e32 v2, s8
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +000088; G_GFX7-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen
89; G_GFX7-NEXT: s_endpgm
90;
91; G_GFX10-LABEL: raw_ptr_buffer_atomic_min_noret_f64:
92; G_GFX10: ; %bb.0: ; %main_body
93; G_GFX10-NEXT: s_clause 0x2
Shilei Tian6548b632024-11-08 20:21:16 -050094; G_GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
95; G_GFX10-NEXT: s_load_dword s8, s[4:5], 0x3c
96; G_GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +000097; G_GFX10-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -050098; G_GFX10-NEXT: v_mov_b32_e32 v0, s6
99; G_GFX10-NEXT: v_mov_b32_e32 v1, s7
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000100; G_GFX10-NEXT: v_mov_b32_e32 v2, s8
Shilei Tian6548b632024-11-08 20:21:16 -0500101; G_GFX10-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000102; G_GFX10-NEXT: s_endpgm
103;
104; G_GFX1030-LABEL: raw_ptr_buffer_atomic_min_noret_f64:
105; G_GFX1030: ; %bb.0: ; %main_body
106; G_GFX1030-NEXT: s_clause 0x2
Shilei Tian6548b632024-11-08 20:21:16 -0500107; G_GFX1030-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
108; G_GFX1030-NEXT: s_load_dword s8, s[4:5], 0x3c
109; G_GFX1030-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000110; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -0500111; G_GFX1030-NEXT: v_mov_b32_e32 v0, s6
112; G_GFX1030-NEXT: v_mov_b32_e32 v1, s7
113; G_GFX1030-NEXT: v_mov_b32_e32 v2, s8
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000114; G_GFX1030-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen
115; G_GFX1030-NEXT: s_endpgm
116main_body:
117 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
118 ret void
119}
120
121define amdgpu_ps void @raw_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) {
122; SI-LABEL: raw_ptr_buffer_atomic_min_rtn_f64:
123; SI: ; %bb.0: ; %main_body
124; SI-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc
125; SI-NEXT: s_mov_b32 m0, -1
126; SI-NEXT: s_waitcnt vmcnt(0)
127; SI-NEXT: ds_write_b64 v0, v[0:1]
128; SI-NEXT: s_endpgm
129;
130; GFX7-LABEL: raw_ptr_buffer_atomic_min_rtn_f64:
131; GFX7: ; %bb.0: ; %main_body
132; GFX7-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc
133; GFX7-NEXT: s_mov_b32 m0, -1
134; GFX7-NEXT: s_waitcnt vmcnt(0)
135; GFX7-NEXT: ds_write_b64 v0, v[0:1]
136; GFX7-NEXT: s_endpgm
137;
138; GFX10-LABEL: raw_ptr_buffer_atomic_min_rtn_f64:
139; GFX10: ; %bb.0: ; %main_body
140; GFX10-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc
141; GFX10-NEXT: s_waitcnt vmcnt(0)
142; GFX10-NEXT: ds_write_b64 v0, v[0:1]
143; GFX10-NEXT: s_endpgm
144;
145; GFX1030-LABEL: raw_ptr_buffer_atomic_min_rtn_f64:
146; GFX1030: ; %bb.0: ; %main_body
147; GFX1030-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc
148; GFX1030-NEXT: s_waitcnt vmcnt(0)
149; GFX1030-NEXT: ds_write_b64 v0, v[0:1]
150; GFX1030-NEXT: s_endpgm
151;
152; G_SI-LABEL: raw_ptr_buffer_atomic_min_rtn_f64:
153; G_SI: ; %bb.0: ; %main_body
154; G_SI-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc
155; G_SI-NEXT: s_mov_b32 m0, -1
156; G_SI-NEXT: s_waitcnt vmcnt(0)
157; G_SI-NEXT: ds_write_b64 v0, v[0:1]
158; G_SI-NEXT: s_endpgm
159;
160; G_GFX7-LABEL: raw_ptr_buffer_atomic_min_rtn_f64:
161; G_GFX7: ; %bb.0: ; %main_body
162; G_GFX7-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc
163; G_GFX7-NEXT: s_mov_b32 m0, -1
164; G_GFX7-NEXT: s_waitcnt vmcnt(0)
165; G_GFX7-NEXT: ds_write_b64 v0, v[0:1]
166; G_GFX7-NEXT: s_endpgm
167;
168; G_GFX10-LABEL: raw_ptr_buffer_atomic_min_rtn_f64:
169; G_GFX10: ; %bb.0: ; %main_body
170; G_GFX10-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc
171; G_GFX10-NEXT: s_waitcnt vmcnt(0)
172; G_GFX10-NEXT: ds_write_b64 v0, v[0:1]
173; G_GFX10-NEXT: s_endpgm
174;
175; G_GFX1030-LABEL: raw_ptr_buffer_atomic_min_rtn_f64:
176; G_GFX1030: ; %bb.0: ; %main_body
177; G_GFX1030-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc
178; G_GFX1030-NEXT: s_waitcnt vmcnt(0)
179; G_GFX1030-NEXT: ds_write_b64 v0, v[0:1]
180; G_GFX1030-NEXT: s_endpgm
181main_body:
182 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
183 store double %ret, ptr addrspace(3) undef
184 ret void
185}
186
187define amdgpu_ps void @raw_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex, ptr addrspace(3) %out) {
188; SI-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc:
189; SI: ; %bb.0: ; %main_body
190; SI-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc
191; SI-NEXT: s_mov_b32 m0, -1
192; SI-NEXT: s_waitcnt vmcnt(0)
193; SI-NEXT: ds_write_b64 v3, v[0:1]
194; SI-NEXT: s_endpgm
195;
196; GFX7-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc:
197; GFX7: ; %bb.0: ; %main_body
198; GFX7-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc
199; GFX7-NEXT: s_mov_b32 m0, -1
200; GFX7-NEXT: s_waitcnt vmcnt(0)
201; GFX7-NEXT: ds_write_b64 v3, v[0:1]
202; GFX7-NEXT: s_endpgm
203;
204; GFX10-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc:
205; GFX10: ; %bb.0: ; %main_body
206; GFX10-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc
207; GFX10-NEXT: s_waitcnt vmcnt(0)
208; GFX10-NEXT: ds_write_b64 v3, v[0:1]
209; GFX10-NEXT: s_endpgm
210;
211; GFX1030-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc:
212; GFX1030: ; %bb.0: ; %main_body
213; GFX1030-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc
214; GFX1030-NEXT: s_waitcnt vmcnt(0)
215; GFX1030-NEXT: ds_write_b64 v3, v[0:1]
216; GFX1030-NEXT: s_endpgm
217;
218; G_SI-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc:
219; G_SI: ; %bb.0: ; %main_body
220; G_SI-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc
221; G_SI-NEXT: s_mov_b32 m0, -1
222; G_SI-NEXT: s_waitcnt vmcnt(0)
223; G_SI-NEXT: ds_write_b64 v3, v[0:1]
224; G_SI-NEXT: s_endpgm
225;
226; G_GFX7-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc:
227; G_GFX7: ; %bb.0: ; %main_body
228; G_GFX7-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc
229; G_GFX7-NEXT: s_mov_b32 m0, -1
230; G_GFX7-NEXT: s_waitcnt vmcnt(0)
231; G_GFX7-NEXT: ds_write_b64 v3, v[0:1]
232; G_GFX7-NEXT: s_endpgm
233;
234; G_GFX10-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc:
235; G_GFX10: ; %bb.0: ; %main_body
236; G_GFX10-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc
237; G_GFX10-NEXT: s_waitcnt vmcnt(0)
238; G_GFX10-NEXT: ds_write_b64 v3, v[0:1]
239; G_GFX10-NEXT: s_endpgm
240;
241; G_GFX1030-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc:
242; G_GFX1030: ; %bb.0: ; %main_body
243; G_GFX1030-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc
244; G_GFX1030-NEXT: s_waitcnt vmcnt(0)
245; G_GFX1030-NEXT: ds_write_b64 v3, v[0:1]
246; G_GFX1030-NEXT: s_endpgm
247main_body:
248 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
249 store double %ret, ptr addrspace(3) %out, align 8
250 ret void
251}
252
253define amdgpu_kernel void @raw_ptr_buffer_atomic_max_noret_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) {
254; SI-LABEL: raw_ptr_buffer_atomic_max_noret_f64:
255; SI: ; %bb.0: ; %main_body
Shilei Tian6548b632024-11-08 20:21:16 -0500256; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd
257; SI-NEXT: s_load_dword s8, s[4:5], 0xf
258; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000259; SI-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -0500260; SI-NEXT: v_mov_b32_e32 v0, s6
261; SI-NEXT: v_mov_b32_e32 v1, s7
262; SI-NEXT: v_mov_b32_e32 v2, s8
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000263; SI-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen
264; SI-NEXT: s_endpgm
265;
266; GFX7-LABEL: raw_ptr_buffer_atomic_max_noret_f64:
267; GFX7: ; %bb.0: ; %main_body
Shilei Tian6548b632024-11-08 20:21:16 -0500268; GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd
269; GFX7-NEXT: s_load_dword s8, s[4:5], 0xf
270; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000271; GFX7-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -0500272; GFX7-NEXT: v_mov_b32_e32 v0, s6
273; GFX7-NEXT: v_mov_b32_e32 v1, s7
274; GFX7-NEXT: v_mov_b32_e32 v2, s8
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000275; GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen
276; GFX7-NEXT: s_endpgm
277;
278; GFX10-LABEL: raw_ptr_buffer_atomic_max_noret_f64:
279; GFX10: ; %bb.0: ; %main_body
280; GFX10-NEXT: s_clause 0x2
Shilei Tian6548b632024-11-08 20:21:16 -0500281; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
282; GFX10-NEXT: s_load_dword s8, s[4:5], 0x3c
283; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000284; GFX10-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -0500285; GFX10-NEXT: v_mov_b32_e32 v0, s6
286; GFX10-NEXT: v_mov_b32_e32 v1, s7
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000287; GFX10-NEXT: v_mov_b32_e32 v2, s8
Shilei Tian6548b632024-11-08 20:21:16 -0500288; GFX10-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000289; GFX10-NEXT: s_endpgm
290;
291; GFX1030-LABEL: raw_ptr_buffer_atomic_max_noret_f64:
292; GFX1030: ; %bb.0: ; %main_body
293; GFX1030-NEXT: s_clause 0x2
Shilei Tian6548b632024-11-08 20:21:16 -0500294; GFX1030-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
295; GFX1030-NEXT: s_load_dword s8, s[4:5], 0x3c
296; GFX1030-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000297; GFX1030-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -0500298; GFX1030-NEXT: v_mov_b32_e32 v0, s6
299; GFX1030-NEXT: v_mov_b32_e32 v1, s7
300; GFX1030-NEXT: v_mov_b32_e32 v2, s8
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000301; GFX1030-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen
302; GFX1030-NEXT: s_endpgm
303;
304; G_SI-LABEL: raw_ptr_buffer_atomic_max_noret_f64:
305; G_SI: ; %bb.0: ; %main_body
Shilei Tian6548b632024-11-08 20:21:16 -0500306; G_SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd
307; G_SI-NEXT: s_load_dword s8, s[4:5], 0xf
308; G_SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000309; G_SI-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -0500310; G_SI-NEXT: v_mov_b32_e32 v0, s6
311; G_SI-NEXT: v_mov_b32_e32 v1, s7
312; G_SI-NEXT: v_mov_b32_e32 v2, s8
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000313; G_SI-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen
314; G_SI-NEXT: s_endpgm
315;
316; G_GFX7-LABEL: raw_ptr_buffer_atomic_max_noret_f64:
317; G_GFX7: ; %bb.0: ; %main_body
Shilei Tian6548b632024-11-08 20:21:16 -0500318; G_GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd
319; G_GFX7-NEXT: s_load_dword s8, s[4:5], 0xf
320; G_GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000321; G_GFX7-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -0500322; G_GFX7-NEXT: v_mov_b32_e32 v0, s6
323; G_GFX7-NEXT: v_mov_b32_e32 v1, s7
324; G_GFX7-NEXT: v_mov_b32_e32 v2, s8
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000325; G_GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen
326; G_GFX7-NEXT: s_endpgm
327;
328; G_GFX10-LABEL: raw_ptr_buffer_atomic_max_noret_f64:
329; G_GFX10: ; %bb.0: ; %main_body
330; G_GFX10-NEXT: s_clause 0x2
Shilei Tian6548b632024-11-08 20:21:16 -0500331; G_GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
332; G_GFX10-NEXT: s_load_dword s8, s[4:5], 0x3c
333; G_GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000334; G_GFX10-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -0500335; G_GFX10-NEXT: v_mov_b32_e32 v0, s6
336; G_GFX10-NEXT: v_mov_b32_e32 v1, s7
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000337; G_GFX10-NEXT: v_mov_b32_e32 v2, s8
Shilei Tian6548b632024-11-08 20:21:16 -0500338; G_GFX10-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000339; G_GFX10-NEXT: s_endpgm
340;
341; G_GFX1030-LABEL: raw_ptr_buffer_atomic_max_noret_f64:
342; G_GFX1030: ; %bb.0: ; %main_body
343; G_GFX1030-NEXT: s_clause 0x2
Shilei Tian6548b632024-11-08 20:21:16 -0500344; G_GFX1030-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
345; G_GFX1030-NEXT: s_load_dword s8, s[4:5], 0x3c
346; G_GFX1030-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000347; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -0500348; G_GFX1030-NEXT: v_mov_b32_e32 v0, s6
349; G_GFX1030-NEXT: v_mov_b32_e32 v1, s7
350; G_GFX1030-NEXT: v_mov_b32_e32 v2, s8
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000351; G_GFX1030-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen
352; G_GFX1030-NEXT: s_endpgm
353main_body:
354 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
355 ret void
356}
357
358define amdgpu_ps void @raw_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) {
359; SI-LABEL: raw_ptr_buffer_atomic_max_rtn_f64:
360; SI: ; %bb.0: ; %main_body
361; SI-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc
362; SI-NEXT: s_mov_b32 m0, -1
363; SI-NEXT: s_waitcnt vmcnt(0)
364; SI-NEXT: ds_write_b64 v0, v[0:1]
365; SI-NEXT: s_endpgm
366;
367; GFX7-LABEL: raw_ptr_buffer_atomic_max_rtn_f64:
368; GFX7: ; %bb.0: ; %main_body
369; GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc
370; GFX7-NEXT: s_mov_b32 m0, -1
371; GFX7-NEXT: s_waitcnt vmcnt(0)
372; GFX7-NEXT: ds_write_b64 v0, v[0:1]
373; GFX7-NEXT: s_endpgm
374;
375; GFX10-LABEL: raw_ptr_buffer_atomic_max_rtn_f64:
376; GFX10: ; %bb.0: ; %main_body
377; GFX10-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc
378; GFX10-NEXT: s_waitcnt vmcnt(0)
379; GFX10-NEXT: ds_write_b64 v0, v[0:1]
380; GFX10-NEXT: s_endpgm
381;
382; GFX1030-LABEL: raw_ptr_buffer_atomic_max_rtn_f64:
383; GFX1030: ; %bb.0: ; %main_body
384; GFX1030-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc
385; GFX1030-NEXT: s_waitcnt vmcnt(0)
386; GFX1030-NEXT: ds_write_b64 v0, v[0:1]
387; GFX1030-NEXT: s_endpgm
388;
389; G_SI-LABEL: raw_ptr_buffer_atomic_max_rtn_f64:
390; G_SI: ; %bb.0: ; %main_body
391; G_SI-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc
392; G_SI-NEXT: s_mov_b32 m0, -1
393; G_SI-NEXT: s_waitcnt vmcnt(0)
394; G_SI-NEXT: ds_write_b64 v0, v[0:1]
395; G_SI-NEXT: s_endpgm
396;
397; G_GFX7-LABEL: raw_ptr_buffer_atomic_max_rtn_f64:
398; G_GFX7: ; %bb.0: ; %main_body
399; G_GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc
400; G_GFX7-NEXT: s_mov_b32 m0, -1
401; G_GFX7-NEXT: s_waitcnt vmcnt(0)
402; G_GFX7-NEXT: ds_write_b64 v0, v[0:1]
403; G_GFX7-NEXT: s_endpgm
404;
405; G_GFX10-LABEL: raw_ptr_buffer_atomic_max_rtn_f64:
406; G_GFX10: ; %bb.0: ; %main_body
407; G_GFX10-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc
408; G_GFX10-NEXT: s_waitcnt vmcnt(0)
409; G_GFX10-NEXT: ds_write_b64 v0, v[0:1]
410; G_GFX10-NEXT: s_endpgm
411;
412; G_GFX1030-LABEL: raw_ptr_buffer_atomic_max_rtn_f64:
413; G_GFX1030: ; %bb.0: ; %main_body
414; G_GFX1030-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc
415; G_GFX1030-NEXT: s_waitcnt vmcnt(0)
416; G_GFX1030-NEXT: ds_write_b64 v0, v[0:1]
417; G_GFX1030-NEXT: s_endpgm
418main_body:
419 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
420 store double %ret, ptr addrspace(3) undef
421 ret void
422}
423
424define amdgpu_kernel void @raw_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex, ptr addrspace(3) %out) {
425; SI-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc:
426; SI: ; %bb.0: ; %main_body
Shilei Tian6548b632024-11-08 20:21:16 -0500427; SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000428; SI-NEXT: s_mov_b32 m0, -1
429; SI-NEXT: s_waitcnt lgkmcnt(0)
430; SI-NEXT: v_mov_b32_e32 v0, s4
431; SI-NEXT: v_mov_b32_e32 v1, s5
432; SI-NEXT: v_mov_b32_e32 v2, s6
433; SI-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 4 offen glc slc
434; SI-NEXT: v_mov_b32_e32 v2, s7
435; SI-NEXT: s_waitcnt vmcnt(0)
436; SI-NEXT: ds_write_b64 v2, v[0:1]
437; SI-NEXT: s_endpgm
438;
439; GFX7-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc:
440; GFX7: ; %bb.0: ; %main_body
Shilei Tian6548b632024-11-08 20:21:16 -0500441; GFX7-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000442; GFX7-NEXT: s_mov_b32 m0, -1
443; GFX7-NEXT: s_waitcnt lgkmcnt(0)
444; GFX7-NEXT: v_mov_b32_e32 v0, s4
445; GFX7-NEXT: v_mov_b32_e32 v1, s5
446; GFX7-NEXT: v_mov_b32_e32 v2, s6
447; GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 4 offen glc slc
448; GFX7-NEXT: v_mov_b32_e32 v2, s7
449; GFX7-NEXT: s_waitcnt vmcnt(0)
450; GFX7-NEXT: ds_write_b64 v2, v[0:1]
451; GFX7-NEXT: s_endpgm
452;
453; GFX10-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc:
454; GFX10: ; %bb.0: ; %main_body
Shilei Tian6548b632024-11-08 20:21:16 -0500455; GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000456; GFX10-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -0500457; GFX10-NEXT: v_mov_b32_e32 v0, s12
458; GFX10-NEXT: v_mov_b32_e32 v1, s13
459; GFX10-NEXT: v_mov_b32_e32 v2, s14
460; GFX10-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[8:11], 4 offen glc slc
461; GFX10-NEXT: v_mov_b32_e32 v2, s15
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000462; GFX10-NEXT: s_waitcnt vmcnt(0)
463; GFX10-NEXT: ds_write_b64 v2, v[0:1]
464; GFX10-NEXT: s_endpgm
465;
466; GFX1030-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc:
467; GFX1030: ; %bb.0: ; %main_body
Shilei Tian6548b632024-11-08 20:21:16 -0500468; GFX1030-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000469; GFX1030-NEXT: s_waitcnt lgkmcnt(0)
470; GFX1030-NEXT: v_mov_b32_e32 v0, s4
471; GFX1030-NEXT: v_mov_b32_e32 v1, s5
472; GFX1030-NEXT: v_mov_b32_e32 v2, s6
473; GFX1030-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 4 offen glc slc
474; GFX1030-NEXT: v_mov_b32_e32 v2, s7
475; GFX1030-NEXT: s_waitcnt vmcnt(0)
476; GFX1030-NEXT: ds_write_b64 v2, v[0:1]
477; GFX1030-NEXT: s_endpgm
478;
479; G_SI-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc:
480; G_SI: ; %bb.0: ; %main_body
Shilei Tian6548b632024-11-08 20:21:16 -0500481; G_SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000482; G_SI-NEXT: s_mov_b32 m0, -1
483; G_SI-NEXT: s_waitcnt lgkmcnt(0)
484; G_SI-NEXT: v_mov_b32_e32 v0, s4
485; G_SI-NEXT: v_mov_b32_e32 v1, s5
486; G_SI-NEXT: v_mov_b32_e32 v2, s6
487; G_SI-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 4 offen glc slc
488; G_SI-NEXT: v_mov_b32_e32 v2, s7
489; G_SI-NEXT: s_waitcnt vmcnt(0)
490; G_SI-NEXT: ds_write_b64 v2, v[0:1]
491; G_SI-NEXT: s_endpgm
492;
493; G_GFX7-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc:
494; G_GFX7: ; %bb.0: ; %main_body
Shilei Tian6548b632024-11-08 20:21:16 -0500495; G_GFX7-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000496; G_GFX7-NEXT: s_mov_b32 m0, -1
497; G_GFX7-NEXT: s_waitcnt lgkmcnt(0)
498; G_GFX7-NEXT: v_mov_b32_e32 v0, s4
499; G_GFX7-NEXT: v_mov_b32_e32 v1, s5
500; G_GFX7-NEXT: v_mov_b32_e32 v2, s6
501; G_GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 4 offen glc slc
502; G_GFX7-NEXT: v_mov_b32_e32 v2, s7
503; G_GFX7-NEXT: s_waitcnt vmcnt(0)
504; G_GFX7-NEXT: ds_write_b64 v2, v[0:1]
505; G_GFX7-NEXT: s_endpgm
506;
507; G_GFX10-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc:
508; G_GFX10: ; %bb.0: ; %main_body
Shilei Tian6548b632024-11-08 20:21:16 -0500509; G_GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000510; G_GFX10-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -0500511; G_GFX10-NEXT: v_mov_b32_e32 v0, s12
512; G_GFX10-NEXT: v_mov_b32_e32 v1, s13
513; G_GFX10-NEXT: v_mov_b32_e32 v2, s14
514; G_GFX10-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[8:11], 4 offen glc slc
515; G_GFX10-NEXT: v_mov_b32_e32 v2, s15
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000516; G_GFX10-NEXT: s_waitcnt vmcnt(0)
517; G_GFX10-NEXT: ds_write_b64 v2, v[0:1]
518; G_GFX10-NEXT: s_endpgm
519;
520; G_GFX1030-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc:
521; G_GFX1030: ; %bb.0: ; %main_body
Shilei Tian6548b632024-11-08 20:21:16 -0500522; G_GFX1030-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24
Krzysztof Drewniakfaa2c672023-04-04 17:11:04 +0000523; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0)
524; G_GFX1030-NEXT: v_mov_b32_e32 v0, s4
525; G_GFX1030-NEXT: v_mov_b32_e32 v1, s5
526; G_GFX1030-NEXT: v_mov_b32_e32 v2, s6
527; G_GFX1030-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 4 offen glc slc
528; G_GFX1030-NEXT: v_mov_b32_e32 v2, s7
529; G_GFX1030-NEXT: s_waitcnt vmcnt(0)
530; G_GFX1030-NEXT: ds_write_b64 v2, v[0:1]
531; G_GFX1030-NEXT: s_endpgm
532main_body:
533 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
534 store double %ret, ptr addrspace(3) %out, align 8
535 ret void
536}