blob: 939d45c74107d424c7b622509ba38ad96dc5510a [file] [log] [blame]
Jay Foad062b7f32022-02-23 13:35:34 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=SI
3; RUN: llc < %s -march=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefix=GFX7
4; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=GFX10
5; RUN: llc < %s -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=GFX1030
6
7; RUN: llc < %s -global-isel -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=G_SI
8; RUN: llc < %s -global-isel -march=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX7
9; RUN: llc < %s -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX10
10; RUN: llc < %s -global-isel -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1030
11
12declare double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double, <4 x i32>, i32, i32, i32 immarg)
13declare double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double, <4 x i32>, i32, i32, i32 immarg)
14
15
16define amdgpu_kernel void @raw_buffer_atomic_min_noret_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
17; SI-LABEL: raw_buffer_atomic_min_noret_f64:
18; SI: ; %bb.0: ; %main_body
19; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd
20; SI-NEXT: s_load_dword s6, s[0:1], 0xf
21; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
22; SI-NEXT: s_waitcnt lgkmcnt(0)
23; SI-NEXT: v_mov_b32_e32 v0, s4
24; SI-NEXT: v_mov_b32_e32 v1, s5
25; SI-NEXT: v_mov_b32_e32 v2, s6
26; SI-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen
27; SI-NEXT: s_endpgm
28;
29; GFX7-LABEL: raw_buffer_atomic_min_noret_f64:
30; GFX7: ; %bb.0: ; %main_body
31; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd
32; GFX7-NEXT: s_load_dword s6, s[0:1], 0xf
33; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
34; GFX7-NEXT: s_waitcnt lgkmcnt(0)
35; GFX7-NEXT: v_mov_b32_e32 v0, s4
36; GFX7-NEXT: v_mov_b32_e32 v1, s5
37; GFX7-NEXT: v_mov_b32_e32 v2, s6
38; GFX7-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen
39; GFX7-NEXT: s_endpgm
40;
41; GFX10-LABEL: raw_buffer_atomic_min_noret_f64:
42; GFX10: ; %bb.0: ; %main_body
43; GFX10-NEXT: s_clause 0x2
44; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
45; GFX10-NEXT: s_load_dword s8, s[0:1], 0x3c
46; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
47; GFX10-NEXT: s_waitcnt lgkmcnt(0)
48; GFX10-NEXT: v_mov_b32_e32 v0, s2
49; GFX10-NEXT: v_mov_b32_e32 v1, s3
50; GFX10-NEXT: v_mov_b32_e32 v2, s8
51; GFX10-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[4:7], 0 offen
52; GFX10-NEXT: s_endpgm
53;
54; GFX1030-LABEL: raw_buffer_atomic_min_noret_f64:
55; GFX1030: ; %bb.0: ; %main_body
56; GFX1030-NEXT: s_clause 0x2
57; GFX1030-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
58; GFX1030-NEXT: s_load_dword s6, s[0:1], 0x3c
59; GFX1030-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
60; GFX1030-NEXT: s_waitcnt lgkmcnt(0)
61; GFX1030-NEXT: v_mov_b32_e32 v0, s4
62; GFX1030-NEXT: v_mov_b32_e32 v1, s5
63; GFX1030-NEXT: v_mov_b32_e32 v2, s6
64; GFX1030-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen
65; GFX1030-NEXT: s_endpgm
66;
67; G_SI-LABEL: raw_buffer_atomic_min_noret_f64:
68; G_SI: ; %bb.0: ; %main_body
69; G_SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd
70; G_SI-NEXT: s_load_dword s6, s[0:1], 0xf
71; G_SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
72; G_SI-NEXT: s_waitcnt lgkmcnt(0)
73; G_SI-NEXT: v_mov_b32_e32 v0, s4
74; G_SI-NEXT: v_mov_b32_e32 v1, s5
75; G_SI-NEXT: v_mov_b32_e32 v2, s6
Abinav Puthan Purayil0a66fd82022-05-08 23:39:49 +053076; G_SI-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen
Jay Foad062b7f32022-02-23 13:35:34 +000077; G_SI-NEXT: s_endpgm
78;
79; G_GFX7-LABEL: raw_buffer_atomic_min_noret_f64:
80; G_GFX7: ; %bb.0: ; %main_body
81; G_GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd
82; G_GFX7-NEXT: s_load_dword s6, s[0:1], 0xf
83; G_GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
84; G_GFX7-NEXT: s_waitcnt lgkmcnt(0)
85; G_GFX7-NEXT: v_mov_b32_e32 v0, s4
86; G_GFX7-NEXT: v_mov_b32_e32 v1, s5
87; G_GFX7-NEXT: v_mov_b32_e32 v2, s6
Abinav Puthan Purayil0a66fd82022-05-08 23:39:49 +053088; G_GFX7-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen
Jay Foad062b7f32022-02-23 13:35:34 +000089; G_GFX7-NEXT: s_endpgm
90;
91; G_GFX10-LABEL: raw_buffer_atomic_min_noret_f64:
92; G_GFX10: ; %bb.0: ; %main_body
93; G_GFX10-NEXT: s_clause 0x2
94; G_GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
95; G_GFX10-NEXT: s_load_dword s8, s[0:1], 0x3c
96; G_GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
97; G_GFX10-NEXT: s_waitcnt lgkmcnt(0)
98; G_GFX10-NEXT: v_mov_b32_e32 v0, s2
99; G_GFX10-NEXT: v_mov_b32_e32 v1, s3
100; G_GFX10-NEXT: v_mov_b32_e32 v2, s8
Abinav Puthan Purayil0a66fd82022-05-08 23:39:49 +0530101; G_GFX10-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[4:7], 0 offen
Jay Foad062b7f32022-02-23 13:35:34 +0000102; G_GFX10-NEXT: s_endpgm
103;
104; G_GFX1030-LABEL: raw_buffer_atomic_min_noret_f64:
105; G_GFX1030: ; %bb.0: ; %main_body
106; G_GFX1030-NEXT: s_clause 0x2
107; G_GFX1030-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
108; G_GFX1030-NEXT: s_load_dword s6, s[0:1], 0x3c
109; G_GFX1030-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
110; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0)
111; G_GFX1030-NEXT: v_mov_b32_e32 v0, s4
112; G_GFX1030-NEXT: v_mov_b32_e32 v1, s5
113; G_GFX1030-NEXT: v_mov_b32_e32 v2, s6
Abinav Puthan Purayil0a66fd82022-05-08 23:39:49 +0530114; G_GFX1030-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen
Jay Foad062b7f32022-02-23 13:35:34 +0000115; G_GFX1030-NEXT: s_endpgm
116main_body:
117 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
118 ret void
119}
120
121define amdgpu_ps void @raw_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
122; SI-LABEL: raw_buffer_atomic_min_rtn_f64:
123; SI: ; %bb.0: ; %main_body
124; SI-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc
125; SI-NEXT: s_mov_b32 m0, -1
126; SI-NEXT: s_waitcnt vmcnt(0)
127; SI-NEXT: ds_write_b64 v0, v[0:1]
128; SI-NEXT: s_endpgm
129;
130; GFX7-LABEL: raw_buffer_atomic_min_rtn_f64:
131; GFX7: ; %bb.0: ; %main_body
132; GFX7-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc
133; GFX7-NEXT: s_mov_b32 m0, -1
134; GFX7-NEXT: s_waitcnt vmcnt(0)
135; GFX7-NEXT: ds_write_b64 v0, v[0:1]
136; GFX7-NEXT: s_endpgm
137;
138; GFX10-LABEL: raw_buffer_atomic_min_rtn_f64:
139; GFX10: ; %bb.0: ; %main_body
140; GFX10-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc
141; GFX10-NEXT: s_waitcnt vmcnt(0)
142; GFX10-NEXT: ds_write_b64 v0, v[0:1]
143; GFX10-NEXT: s_endpgm
144;
145; GFX1030-LABEL: raw_buffer_atomic_min_rtn_f64:
146; GFX1030: ; %bb.0: ; %main_body
147; GFX1030-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc
148; GFX1030-NEXT: s_waitcnt vmcnt(0)
149; GFX1030-NEXT: ds_write_b64 v0, v[0:1]
150; GFX1030-NEXT: s_endpgm
151;
152; G_SI-LABEL: raw_buffer_atomic_min_rtn_f64:
153; G_SI: ; %bb.0: ; %main_body
154; G_SI-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc
155; G_SI-NEXT: s_mov_b32 m0, -1
156; G_SI-NEXT: s_waitcnt vmcnt(0)
157; G_SI-NEXT: ds_write_b64 v0, v[0:1]
158; G_SI-NEXT: s_endpgm
159;
160; G_GFX7-LABEL: raw_buffer_atomic_min_rtn_f64:
161; G_GFX7: ; %bb.0: ; %main_body
162; G_GFX7-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc
163; G_GFX7-NEXT: s_mov_b32 m0, -1
164; G_GFX7-NEXT: s_waitcnt vmcnt(0)
165; G_GFX7-NEXT: ds_write_b64 v0, v[0:1]
166; G_GFX7-NEXT: s_endpgm
167;
168; G_GFX10-LABEL: raw_buffer_atomic_min_rtn_f64:
169; G_GFX10: ; %bb.0: ; %main_body
170; G_GFX10-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc
171; G_GFX10-NEXT: s_waitcnt vmcnt(0)
172; G_GFX10-NEXT: ds_write_b64 v0, v[0:1]
173; G_GFX10-NEXT: s_endpgm
174;
175; G_GFX1030-LABEL: raw_buffer_atomic_min_rtn_f64:
176; G_GFX1030: ; %bb.0: ; %main_body
177; G_GFX1030-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc
178; G_GFX1030-NEXT: s_waitcnt vmcnt(0)
179; G_GFX1030-NEXT: ds_write_b64 v0, v[0:1]
180; G_GFX1030-NEXT: s_endpgm
181main_body:
182 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
Nikita Popovee8670b2022-12-19 12:39:01 +0100183 store double %ret, ptr addrspace(3) undef
Jay Foad062b7f32022-02-23 13:35:34 +0000184 ret void
185}
186
Nikita Popovee8670b2022-12-19 12:39:01 +0100187define amdgpu_ps void @raw_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, ptr addrspace(3) %out) {
Jay Foad062b7f32022-02-23 13:35:34 +0000188; SI-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc:
189; SI: ; %bb.0: ; %main_body
190; SI-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc
191; SI-NEXT: s_mov_b32 m0, -1
192; SI-NEXT: s_waitcnt vmcnt(0)
193; SI-NEXT: ds_write_b64 v3, v[0:1]
194; SI-NEXT: s_endpgm
195;
196; GFX7-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc:
197; GFX7: ; %bb.0: ; %main_body
198; GFX7-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc
199; GFX7-NEXT: s_mov_b32 m0, -1
200; GFX7-NEXT: s_waitcnt vmcnt(0)
201; GFX7-NEXT: ds_write_b64 v3, v[0:1]
202; GFX7-NEXT: s_endpgm
203;
204; GFX10-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc:
205; GFX10: ; %bb.0: ; %main_body
206; GFX10-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc
207; GFX10-NEXT: s_waitcnt vmcnt(0)
208; GFX10-NEXT: ds_write_b64 v3, v[0:1]
209; GFX10-NEXT: s_endpgm
210;
211; GFX1030-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc:
212; GFX1030: ; %bb.0: ; %main_body
213; GFX1030-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc
214; GFX1030-NEXT: s_waitcnt vmcnt(0)
215; GFX1030-NEXT: ds_write_b64 v3, v[0:1]
216; GFX1030-NEXT: s_endpgm
217;
218; G_SI-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc:
219; G_SI: ; %bb.0: ; %main_body
220; G_SI-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc
221; G_SI-NEXT: s_mov_b32 m0, -1
222; G_SI-NEXT: s_waitcnt vmcnt(0)
223; G_SI-NEXT: ds_write_b64 v3, v[0:1]
224; G_SI-NEXT: s_endpgm
225;
226; G_GFX7-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc:
227; G_GFX7: ; %bb.0: ; %main_body
228; G_GFX7-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc
229; G_GFX7-NEXT: s_mov_b32 m0, -1
230; G_GFX7-NEXT: s_waitcnt vmcnt(0)
231; G_GFX7-NEXT: ds_write_b64 v3, v[0:1]
232; G_GFX7-NEXT: s_endpgm
233;
234; G_GFX10-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc:
235; G_GFX10: ; %bb.0: ; %main_body
236; G_GFX10-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc
237; G_GFX10-NEXT: s_waitcnt vmcnt(0)
238; G_GFX10-NEXT: ds_write_b64 v3, v[0:1]
239; G_GFX10-NEXT: s_endpgm
240;
241; G_GFX1030-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc:
242; G_GFX1030: ; %bb.0: ; %main_body
243; G_GFX1030-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc
244; G_GFX1030-NEXT: s_waitcnt vmcnt(0)
245; G_GFX1030-NEXT: ds_write_b64 v3, v[0:1]
246; G_GFX1030-NEXT: s_endpgm
247main_body:
248 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
Nikita Popovee8670b2022-12-19 12:39:01 +0100249 store double %ret, ptr addrspace(3) %out, align 8
Jay Foad062b7f32022-02-23 13:35:34 +0000250 ret void
251}
252
253define amdgpu_kernel void @raw_buffer_atomic_max_noret_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
254; SI-LABEL: raw_buffer_atomic_max_noret_f64:
255; SI: ; %bb.0: ; %main_body
256; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd
257; SI-NEXT: s_load_dword s6, s[0:1], 0xf
258; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
259; SI-NEXT: s_waitcnt lgkmcnt(0)
260; SI-NEXT: v_mov_b32_e32 v0, s4
261; SI-NEXT: v_mov_b32_e32 v1, s5
262; SI-NEXT: v_mov_b32_e32 v2, s6
263; SI-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen
264; SI-NEXT: s_endpgm
265;
266; GFX7-LABEL: raw_buffer_atomic_max_noret_f64:
267; GFX7: ; %bb.0: ; %main_body
268; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd
269; GFX7-NEXT: s_load_dword s6, s[0:1], 0xf
270; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
271; GFX7-NEXT: s_waitcnt lgkmcnt(0)
272; GFX7-NEXT: v_mov_b32_e32 v0, s4
273; GFX7-NEXT: v_mov_b32_e32 v1, s5
274; GFX7-NEXT: v_mov_b32_e32 v2, s6
275; GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen
276; GFX7-NEXT: s_endpgm
277;
278; GFX10-LABEL: raw_buffer_atomic_max_noret_f64:
279; GFX10: ; %bb.0: ; %main_body
280; GFX10-NEXT: s_clause 0x2
281; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
282; GFX10-NEXT: s_load_dword s8, s[0:1], 0x3c
283; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
284; GFX10-NEXT: s_waitcnt lgkmcnt(0)
285; GFX10-NEXT: v_mov_b32_e32 v0, s2
286; GFX10-NEXT: v_mov_b32_e32 v1, s3
287; GFX10-NEXT: v_mov_b32_e32 v2, s8
288; GFX10-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[4:7], 0 offen
289; GFX10-NEXT: s_endpgm
290;
291; GFX1030-LABEL: raw_buffer_atomic_max_noret_f64:
292; GFX1030: ; %bb.0: ; %main_body
293; GFX1030-NEXT: s_clause 0x2
294; GFX1030-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
295; GFX1030-NEXT: s_load_dword s6, s[0:1], 0x3c
296; GFX1030-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
297; GFX1030-NEXT: s_waitcnt lgkmcnt(0)
298; GFX1030-NEXT: v_mov_b32_e32 v0, s4
299; GFX1030-NEXT: v_mov_b32_e32 v1, s5
300; GFX1030-NEXT: v_mov_b32_e32 v2, s6
301; GFX1030-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen
302; GFX1030-NEXT: s_endpgm
303;
304; G_SI-LABEL: raw_buffer_atomic_max_noret_f64:
305; G_SI: ; %bb.0: ; %main_body
306; G_SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd
307; G_SI-NEXT: s_load_dword s6, s[0:1], 0xf
308; G_SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
309; G_SI-NEXT: s_waitcnt lgkmcnt(0)
310; G_SI-NEXT: v_mov_b32_e32 v0, s4
311; G_SI-NEXT: v_mov_b32_e32 v1, s5
312; G_SI-NEXT: v_mov_b32_e32 v2, s6
Abinav Puthan Purayil0a66fd82022-05-08 23:39:49 +0530313; G_SI-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen
Jay Foad062b7f32022-02-23 13:35:34 +0000314; G_SI-NEXT: s_endpgm
315;
316; G_GFX7-LABEL: raw_buffer_atomic_max_noret_f64:
317; G_GFX7: ; %bb.0: ; %main_body
318; G_GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd
319; G_GFX7-NEXT: s_load_dword s6, s[0:1], 0xf
320; G_GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
321; G_GFX7-NEXT: s_waitcnt lgkmcnt(0)
322; G_GFX7-NEXT: v_mov_b32_e32 v0, s4
323; G_GFX7-NEXT: v_mov_b32_e32 v1, s5
324; G_GFX7-NEXT: v_mov_b32_e32 v2, s6
Abinav Puthan Purayil0a66fd82022-05-08 23:39:49 +0530325; G_GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen
Jay Foad062b7f32022-02-23 13:35:34 +0000326; G_GFX7-NEXT: s_endpgm
327;
328; G_GFX10-LABEL: raw_buffer_atomic_max_noret_f64:
329; G_GFX10: ; %bb.0: ; %main_body
330; G_GFX10-NEXT: s_clause 0x2
331; G_GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
332; G_GFX10-NEXT: s_load_dword s8, s[0:1], 0x3c
333; G_GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
334; G_GFX10-NEXT: s_waitcnt lgkmcnt(0)
335; G_GFX10-NEXT: v_mov_b32_e32 v0, s2
336; G_GFX10-NEXT: v_mov_b32_e32 v1, s3
337; G_GFX10-NEXT: v_mov_b32_e32 v2, s8
Abinav Puthan Purayil0a66fd82022-05-08 23:39:49 +0530338; G_GFX10-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[4:7], 0 offen
Jay Foad062b7f32022-02-23 13:35:34 +0000339; G_GFX10-NEXT: s_endpgm
340;
341; G_GFX1030-LABEL: raw_buffer_atomic_max_noret_f64:
342; G_GFX1030: ; %bb.0: ; %main_body
343; G_GFX1030-NEXT: s_clause 0x2
344; G_GFX1030-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
345; G_GFX1030-NEXT: s_load_dword s6, s[0:1], 0x3c
346; G_GFX1030-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
347; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0)
348; G_GFX1030-NEXT: v_mov_b32_e32 v0, s4
349; G_GFX1030-NEXT: v_mov_b32_e32 v1, s5
350; G_GFX1030-NEXT: v_mov_b32_e32 v2, s6
Abinav Puthan Purayil0a66fd82022-05-08 23:39:49 +0530351; G_GFX1030-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen
Jay Foad062b7f32022-02-23 13:35:34 +0000352; G_GFX1030-NEXT: s_endpgm
353main_body:
354 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
355 ret void
356}
357
358define amdgpu_ps void @raw_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
359; SI-LABEL: raw_buffer_atomic_max_rtn_f64:
360; SI: ; %bb.0: ; %main_body
361; SI-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc
362; SI-NEXT: s_mov_b32 m0, -1
363; SI-NEXT: s_waitcnt vmcnt(0)
364; SI-NEXT: ds_write_b64 v0, v[0:1]
365; SI-NEXT: s_endpgm
366;
367; GFX7-LABEL: raw_buffer_atomic_max_rtn_f64:
368; GFX7: ; %bb.0: ; %main_body
369; GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc
370; GFX7-NEXT: s_mov_b32 m0, -1
371; GFX7-NEXT: s_waitcnt vmcnt(0)
372; GFX7-NEXT: ds_write_b64 v0, v[0:1]
373; GFX7-NEXT: s_endpgm
374;
375; GFX10-LABEL: raw_buffer_atomic_max_rtn_f64:
376; GFX10: ; %bb.0: ; %main_body
377; GFX10-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc
378; GFX10-NEXT: s_waitcnt vmcnt(0)
379; GFX10-NEXT: ds_write_b64 v0, v[0:1]
380; GFX10-NEXT: s_endpgm
381;
382; GFX1030-LABEL: raw_buffer_atomic_max_rtn_f64:
383; GFX1030: ; %bb.0: ; %main_body
384; GFX1030-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc
385; GFX1030-NEXT: s_waitcnt vmcnt(0)
386; GFX1030-NEXT: ds_write_b64 v0, v[0:1]
387; GFX1030-NEXT: s_endpgm
388;
389; G_SI-LABEL: raw_buffer_atomic_max_rtn_f64:
390; G_SI: ; %bb.0: ; %main_body
391; G_SI-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc
392; G_SI-NEXT: s_mov_b32 m0, -1
393; G_SI-NEXT: s_waitcnt vmcnt(0)
394; G_SI-NEXT: ds_write_b64 v0, v[0:1]
395; G_SI-NEXT: s_endpgm
396;
397; G_GFX7-LABEL: raw_buffer_atomic_max_rtn_f64:
398; G_GFX7: ; %bb.0: ; %main_body
399; G_GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc
400; G_GFX7-NEXT: s_mov_b32 m0, -1
401; G_GFX7-NEXT: s_waitcnt vmcnt(0)
402; G_GFX7-NEXT: ds_write_b64 v0, v[0:1]
403; G_GFX7-NEXT: s_endpgm
404;
405; G_GFX10-LABEL: raw_buffer_atomic_max_rtn_f64:
406; G_GFX10: ; %bb.0: ; %main_body
407; G_GFX10-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc
408; G_GFX10-NEXT: s_waitcnt vmcnt(0)
409; G_GFX10-NEXT: ds_write_b64 v0, v[0:1]
410; G_GFX10-NEXT: s_endpgm
411;
412; G_GFX1030-LABEL: raw_buffer_atomic_max_rtn_f64:
413; G_GFX1030: ; %bb.0: ; %main_body
414; G_GFX1030-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc
415; G_GFX1030-NEXT: s_waitcnt vmcnt(0)
416; G_GFX1030-NEXT: ds_write_b64 v0, v[0:1]
417; G_GFX1030-NEXT: s_endpgm
418main_body:
419 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
Nikita Popovee8670b2022-12-19 12:39:01 +0100420 store double %ret, ptr addrspace(3) undef
Jay Foad062b7f32022-02-23 13:35:34 +0000421 ret void
422}
423
Nikita Popovee8670b2022-12-19 12:39:01 +0100424define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, ptr addrspace(3) %out) {
Jay Foad062b7f32022-02-23 13:35:34 +0000425; SI-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
426; SI: ; %bb.0: ; %main_body
Carl Ritsonae980bf2022-07-30 11:13:20 +0900427; SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
Jay Foad062b7f32022-02-23 13:35:34 +0000428; SI-NEXT: s_mov_b32 m0, -1
429; SI-NEXT: s_waitcnt lgkmcnt(0)
Carl Ritsonae980bf2022-07-30 11:13:20 +0900430; SI-NEXT: v_mov_b32_e32 v0, s4
431; SI-NEXT: v_mov_b32_e32 v1, s5
432; SI-NEXT: v_mov_b32_e32 v2, s6
433; SI-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 4 offen glc slc
434; SI-NEXT: v_mov_b32_e32 v2, s7
Jay Foad062b7f32022-02-23 13:35:34 +0000435; SI-NEXT: s_waitcnt vmcnt(0)
436; SI-NEXT: ds_write_b64 v2, v[0:1]
437; SI-NEXT: s_endpgm
438;
439; GFX7-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
440; GFX7: ; %bb.0: ; %main_body
Carl Ritsonae980bf2022-07-30 11:13:20 +0900441; GFX7-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
Jay Foad062b7f32022-02-23 13:35:34 +0000442; GFX7-NEXT: s_mov_b32 m0, -1
443; GFX7-NEXT: s_waitcnt lgkmcnt(0)
444; GFX7-NEXT: v_mov_b32_e32 v0, s4
445; GFX7-NEXT: v_mov_b32_e32 v1, s5
446; GFX7-NEXT: v_mov_b32_e32 v2, s6
447; GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 4 offen glc slc
448; GFX7-NEXT: v_mov_b32_e32 v2, s7
449; GFX7-NEXT: s_waitcnt vmcnt(0)
450; GFX7-NEXT: ds_write_b64 v2, v[0:1]
451; GFX7-NEXT: s_endpgm
452;
453; GFX10-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
454; GFX10: ; %bb.0: ; %main_body
Carl Ritsonae980bf2022-07-30 11:13:20 +0900455; GFX10-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
Jay Foad062b7f32022-02-23 13:35:34 +0000456; GFX10-NEXT: s_waitcnt lgkmcnt(0)
Carl Ritsonae980bf2022-07-30 11:13:20 +0900457; GFX10-NEXT: v_mov_b32_e32 v0, s4
458; GFX10-NEXT: v_mov_b32_e32 v1, s5
459; GFX10-NEXT: v_mov_b32_e32 v2, s6
460; GFX10-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 4 offen glc slc
461; GFX10-NEXT: v_mov_b32_e32 v2, s7
Jay Foad062b7f32022-02-23 13:35:34 +0000462; GFX10-NEXT: s_waitcnt vmcnt(0)
463; GFX10-NEXT: ds_write_b64 v2, v[0:1]
464; GFX10-NEXT: s_endpgm
465;
466; GFX1030-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
467; GFX1030: ; %bb.0: ; %main_body
Carl Ritsonae980bf2022-07-30 11:13:20 +0900468; GFX1030-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
Jay Foad062b7f32022-02-23 13:35:34 +0000469; GFX1030-NEXT: s_waitcnt lgkmcnt(0)
470; GFX1030-NEXT: v_mov_b32_e32 v0, s4
471; GFX1030-NEXT: v_mov_b32_e32 v1, s5
472; GFX1030-NEXT: v_mov_b32_e32 v2, s6
473; GFX1030-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 4 offen glc slc
474; GFX1030-NEXT: v_mov_b32_e32 v2, s7
475; GFX1030-NEXT: s_waitcnt vmcnt(0)
476; GFX1030-NEXT: ds_write_b64 v2, v[0:1]
477; GFX1030-NEXT: s_endpgm
478;
479; G_SI-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
480; G_SI: ; %bb.0: ; %main_body
Carl Ritsonae980bf2022-07-30 11:13:20 +0900481; G_SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
Jay Foad062b7f32022-02-23 13:35:34 +0000482; G_SI-NEXT: s_mov_b32 m0, -1
483; G_SI-NEXT: s_waitcnt lgkmcnt(0)
Carl Ritsonae980bf2022-07-30 11:13:20 +0900484; G_SI-NEXT: v_mov_b32_e32 v0, s4
485; G_SI-NEXT: v_mov_b32_e32 v1, s5
486; G_SI-NEXT: v_mov_b32_e32 v2, s6
487; G_SI-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 4 offen glc slc
488; G_SI-NEXT: v_mov_b32_e32 v2, s7
Jay Foad062b7f32022-02-23 13:35:34 +0000489; G_SI-NEXT: s_waitcnt vmcnt(0)
490; G_SI-NEXT: ds_write_b64 v2, v[0:1]
491; G_SI-NEXT: s_endpgm
492;
493; G_GFX7-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
494; G_GFX7: ; %bb.0: ; %main_body
Carl Ritsonae980bf2022-07-30 11:13:20 +0900495; G_GFX7-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
Jay Foad062b7f32022-02-23 13:35:34 +0000496; G_GFX7-NEXT: s_mov_b32 m0, -1
497; G_GFX7-NEXT: s_waitcnt lgkmcnt(0)
498; G_GFX7-NEXT: v_mov_b32_e32 v0, s4
499; G_GFX7-NEXT: v_mov_b32_e32 v1, s5
500; G_GFX7-NEXT: v_mov_b32_e32 v2, s6
501; G_GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 4 offen glc slc
502; G_GFX7-NEXT: v_mov_b32_e32 v2, s7
503; G_GFX7-NEXT: s_waitcnt vmcnt(0)
504; G_GFX7-NEXT: ds_write_b64 v2, v[0:1]
505; G_GFX7-NEXT: s_endpgm
506;
507; G_GFX10-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
508; G_GFX10: ; %bb.0: ; %main_body
Carl Ritsonae980bf2022-07-30 11:13:20 +0900509; G_GFX10-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
Jay Foad062b7f32022-02-23 13:35:34 +0000510; G_GFX10-NEXT: s_waitcnt lgkmcnt(0)
Carl Ritsonae980bf2022-07-30 11:13:20 +0900511; G_GFX10-NEXT: v_mov_b32_e32 v0, s4
512; G_GFX10-NEXT: v_mov_b32_e32 v1, s5
513; G_GFX10-NEXT: v_mov_b32_e32 v2, s6
514; G_GFX10-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 4 offen glc slc
515; G_GFX10-NEXT: v_mov_b32_e32 v2, s7
Jay Foad062b7f32022-02-23 13:35:34 +0000516; G_GFX10-NEXT: s_waitcnt vmcnt(0)
517; G_GFX10-NEXT: ds_write_b64 v2, v[0:1]
518; G_GFX10-NEXT: s_endpgm
519;
520; G_GFX1030-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
521; G_GFX1030: ; %bb.0: ; %main_body
Carl Ritsonae980bf2022-07-30 11:13:20 +0900522; G_GFX1030-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
Jay Foad062b7f32022-02-23 13:35:34 +0000523; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0)
524; G_GFX1030-NEXT: v_mov_b32_e32 v0, s4
525; G_GFX1030-NEXT: v_mov_b32_e32 v1, s5
526; G_GFX1030-NEXT: v_mov_b32_e32 v2, s6
527; G_GFX1030-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 4 offen glc slc
528; G_GFX1030-NEXT: v_mov_b32_e32 v2, s7
529; G_GFX1030-NEXT: s_waitcnt vmcnt(0)
530; G_GFX1030-NEXT: ds_write_b64 v2, v[0:1]
531; G_GFX1030-NEXT: s_endpgm
532main_body:
533 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
Nikita Popovee8670b2022-12-19 12:39:01 +0100534 store double %ret, ptr addrspace(3) %out, align 8
Jay Foad062b7f32022-02-23 13:35:34 +0000535 ret void
536}