blob: d7a837a6e5f718c026454257963b6742aa7b0f7b [file] [log] [blame]
Fangrui Song9e9907f2024-01-16 21:54:58 -08001; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=VI %s
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00003
Matt Arsenault0c687392017-01-30 16:57:41 +00004; GCN-LABEL: {{^}}mac_f16:
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00005; GCN: {{buffer|flat}}_load_ushort v[[A_F16:[0-9]+]]
6; GCN: {{buffer|flat}}_load_ushort v[[B_F16:[0-9]+]]
7; GCN: {{buffer|flat}}_load_ushort v[[C_F16:[0-9]+]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00008; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
9; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
10; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +000011; SI: v_mac_f32_e32 v[[C_F32]], v[[A_F32]], v[[B_F32]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000012; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[C_F32]]
13; SI: buffer_store_short v[[R_F16]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +000014; VI: v_mac_f16_e32 v[[C_F16]], v[[A_F16]], v[[B_F16]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000015; VI: buffer_store_short v[[C_F16]]
16; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000017define amdgpu_kernel void @mac_f16(
Matt Arsenault177ff422022-11-29 17:49:58 -050018 ptr addrspace(1) %r,
19 ptr addrspace(1) %a,
20 ptr addrspace(1) %b,
21 ptr addrspace(1) %c) #0 {
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000022entry:
Matt Arsenault177ff422022-11-29 17:49:58 -050023 %a.val = load half, ptr addrspace(1) %a
24 %b.val = load half, ptr addrspace(1) %b
25 %c.val = load half, ptr addrspace(1) %c
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000026
27 %t.val = fmul half %a.val, %b.val
28 %r.val = fadd half %t.val, %c.val
29
Matt Arsenault177ff422022-11-29 17:49:58 -050030 store half %r.val, ptr addrspace(1) %r
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000031 ret void
32}
33
Matt Arsenault9dba9bd2017-02-02 02:27:04 +000034; GCN-LABEL: {{^}}mac_f16_same_add:
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000035; SI: v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD:v[0-9]+]]
36; SI: v_mac_f32_e32 [[ADD]], v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault9dba9bd2017-02-02 02:27:04 +000037
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000038; VI: v_mad_f16 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD:v[0-9]+]]
39; VI: v_mac_f16_e32 [[ADD]], v{{[0-9]+}}, v{{[0-9]+}}
40; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000041define amdgpu_kernel void @mac_f16_same_add(
Matt Arsenault177ff422022-11-29 17:49:58 -050042 ptr addrspace(1) %r0,
43 ptr addrspace(1) %r1,
44 ptr addrspace(1) %a,
45 ptr addrspace(1) %b,
46 ptr addrspace(1) %c,
47 ptr addrspace(1) %d,
48 ptr addrspace(1) %e) #0 {
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000049entry:
Matt Arsenault177ff422022-11-29 17:49:58 -050050 %a.val = load half, ptr addrspace(1) %a
51 %b.val = load half, ptr addrspace(1) %b
52 %c.val = load half, ptr addrspace(1) %c
53 %d.val = load half, ptr addrspace(1) %d
54 %e.val = load half, ptr addrspace(1) %e
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000055
56 %t0.val = fmul half %a.val, %b.val
57 %r0.val = fadd half %t0.val, %c.val
58
59 %t1.val = fmul half %d.val, %e.val
60 %r1.val = fadd half %t1.val, %c.val
61
Matt Arsenault177ff422022-11-29 17:49:58 -050062 store half %r0.val, ptr addrspace(1) %r0
63 store half %r1.val, ptr addrspace(1) %r1
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000064 ret void
65}
66
Matt Arsenault9dba9bd2017-02-02 02:27:04 +000067; GCN-LABEL: {{^}}mac_f16_neg_a:
Matt Arsenaulta8fcfad2017-02-02 23:21:23 +000068; SI: v_cvt_f32_f16_e32 [[CVT_A:v[0-9]+]], v{{[0-9]+}}
69; SI: v_cvt_f32_f16_e32 [[CVT_B:v[0-9]+]], v{{[0-9]+}}
70; SI: v_cvt_f32_f16_e32 [[CVT_C:v[0-9]+]], v{{[0-9]+}}
71; SI: v_mad_f32 v{{[0-9]+}}, -[[CVT_A]], [[CVT_B]], [[CVT_C]]
Matt Arsenault9dba9bd2017-02-02 02:27:04 +000072
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000073; VI-NOT: v_mac_f16
74; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
75; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000076define amdgpu_kernel void @mac_f16_neg_a(
Matt Arsenault177ff422022-11-29 17:49:58 -050077 ptr addrspace(1) %r,
78 ptr addrspace(1) %a,
79 ptr addrspace(1) %b,
80 ptr addrspace(1) %c) #0 {
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000081entry:
Matt Arsenault177ff422022-11-29 17:49:58 -050082 %a.val = load half, ptr addrspace(1) %a
83 %b.val = load half, ptr addrspace(1) %b
84 %c.val = load half, ptr addrspace(1) %c
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000085
Cameron McInally92be6402021-01-04 14:13:14 -060086 %a.neg = fneg half %a.val
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000087 %t.val = fmul half %a.neg, %b.val
88 %r.val = fadd half %t.val, %c.val
89
Matt Arsenault177ff422022-11-29 17:49:58 -050090 store half %r.val, ptr addrspace(1) %r
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000091 ret void
92}
93
Matt Arsenault9dba9bd2017-02-02 02:27:04 +000094; GCN-LABEL: {{^}}mac_f16_neg_b:
Matt Arsenaulta8fcfad2017-02-02 23:21:23 +000095; SI: v_cvt_f32_f16_e32 [[CVT_A:v[0-9]+]], v{{[0-9]+}}
96; SI: v_cvt_f32_f16_e32 [[CVT_B:v[0-9]+]], v{{[0-9]+}}
97; SI: v_cvt_f32_f16_e32 [[CVT_C:v[0-9]+]], v{{[0-9]+}}
98; SI: v_mad_f32 v{{[0-9]+}}, -[[CVT_A]], [[CVT_B]], [[CVT_C]]
99
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000100; VI-NOT: v_mac_f16
101; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
102; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000103define amdgpu_kernel void @mac_f16_neg_b(
Matt Arsenault177ff422022-11-29 17:49:58 -0500104 ptr addrspace(1) %r,
105 ptr addrspace(1) %a,
106 ptr addrspace(1) %b,
107 ptr addrspace(1) %c) #0 {
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000108entry:
Matt Arsenault177ff422022-11-29 17:49:58 -0500109 %a.val = load half, ptr addrspace(1) %a
110 %b.val = load half, ptr addrspace(1) %b
111 %c.val = load half, ptr addrspace(1) %c
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000112
Cameron McInally92be6402021-01-04 14:13:14 -0600113 %b.neg = fneg half %b.val
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000114 %t.val = fmul half %a.val, %b.neg
115 %r.val = fadd half %t.val, %c.val
116
Matt Arsenault177ff422022-11-29 17:49:58 -0500117 store half %r.val, ptr addrspace(1) %r
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000118 ret void
119}
120
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000121; GCN-LABEL: {{^}}mac_f16_neg_c:
122; SI: v_cvt_f32_f16_e32
Matt Arsenaulta8fcfad2017-02-02 23:21:23 +0000123; SI: v_cvt_f32_f16_e32
124; SI: v_cvt_f32_f16_e32
125; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000126
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000127; VI-NOT: v_mac_f16
128; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
129; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000130define amdgpu_kernel void @mac_f16_neg_c(
Matt Arsenault177ff422022-11-29 17:49:58 -0500131 ptr addrspace(1) %r,
132 ptr addrspace(1) %a,
133 ptr addrspace(1) %b,
134 ptr addrspace(1) %c) #0 {
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000135entry:
Matt Arsenault177ff422022-11-29 17:49:58 -0500136 %a.val = load half, ptr addrspace(1) %a
137 %b.val = load half, ptr addrspace(1) %b
138 %c.val = load half, ptr addrspace(1) %c
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000139
Cameron McInally92be6402021-01-04 14:13:14 -0600140 %c.neg = fneg half %c.val
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000141 %t.val = fmul half %a.val, %b.val
142 %r.val = fadd half %t.val, %c.neg
143
Matt Arsenault177ff422022-11-29 17:49:58 -0500144 store half %r.val, ptr addrspace(1) %r
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000145 ret void
146}
147
Matt Arsenault0c687392017-01-30 16:57:41 +0000148; GCN-LABEL: {{^}}mac_f16_neg_a_safe_fp_math:
149; SI: v_sub_f32_e32 v[[NEG_A:[0-9]+]], 0, v{{[0-9]+}}
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000150; SI: v_mac_f32_e32 v{{[0-9]+}}, v[[NEG_A]], v{{[0-9]+}}
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000151; VI: v_sub_f16_e32 v[[NEG_A:[0-9]+]], 0, v{{[0-9]+}}
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000152; VI: v_mac_f16_e32 v{{[0-9]+}}, v[[NEG_A]], v{{[0-9]+}}
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000153; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000154define amdgpu_kernel void @mac_f16_neg_a_safe_fp_math(
Matt Arsenault177ff422022-11-29 17:49:58 -0500155 ptr addrspace(1) %r,
156 ptr addrspace(1) %a,
157 ptr addrspace(1) %b,
158 ptr addrspace(1) %c) #0 {
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000159entry:
Matt Arsenault177ff422022-11-29 17:49:58 -0500160 %a.val = load half, ptr addrspace(1) %a
161 %b.val = load half, ptr addrspace(1) %b
162 %c.val = load half, ptr addrspace(1) %c
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000163
164 %a.neg = fsub half 0.0, %a.val
165 %t.val = fmul half %a.neg, %b.val
166 %r.val = fadd half %t.val, %c.val
167
Matt Arsenault177ff422022-11-29 17:49:58 -0500168 store half %r.val, ptr addrspace(1) %r
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000169 ret void
170}
171
Matt Arsenault0c687392017-01-30 16:57:41 +0000172; GCN-LABEL: {{^}}mac_f16_neg_b_safe_fp_math:
173; SI: v_sub_f32_e32 v[[NEG_A:[0-9]+]], 0, v{{[0-9]+}}
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000174; SI: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000175; VI: v_sub_f16_e32 v[[NEG_A:[0-9]+]], 0, v{{[0-9]+}}
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000176; VI: v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000177; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000178define amdgpu_kernel void @mac_f16_neg_b_safe_fp_math(
Matt Arsenault177ff422022-11-29 17:49:58 -0500179 ptr addrspace(1) %r,
180 ptr addrspace(1) %a,
181 ptr addrspace(1) %b,
182 ptr addrspace(1) %c) #0 {
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000183entry:
Matt Arsenault177ff422022-11-29 17:49:58 -0500184 %a.val = load half, ptr addrspace(1) %a
185 %b.val = load half, ptr addrspace(1) %b
186 %c.val = load half, ptr addrspace(1) %c
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000187
188 %b.neg = fsub half 0.0, %b.val
189 %t.val = fmul half %a.val, %b.neg
190 %r.val = fadd half %t.val, %c.val
191
Matt Arsenault177ff422022-11-29 17:49:58 -0500192 store half %r.val, ptr addrspace(1) %r
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000193 ret void
194}
195
Matt Arsenault0c687392017-01-30 16:57:41 +0000196; GCN-LABEL: {{^}}mac_f16_neg_c_safe_fp_math:
197; SI: v_sub_f32_e32 v[[NEG_A:[0-9]+]], 0, v{{[0-9]+}}
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000198; SI: v_mac_f32_e32 v[[NEG_A]], v{{[0-9]+}}, v{{[0-9]+}}
199; VI: v_sub_f16_e32 v[[NEG_A:[0-9]+]], 0, v{{[0-9]+}}
200; VI: v_mac_f16_e32 v[[NEG_A]], v{{[0-9]+}}, v{{[0-9]+}}
201; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000202define amdgpu_kernel void @mac_f16_neg_c_safe_fp_math(
Matt Arsenault177ff422022-11-29 17:49:58 -0500203 ptr addrspace(1) %r,
204 ptr addrspace(1) %a,
205 ptr addrspace(1) %b,
206 ptr addrspace(1) %c) #0 {
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000207entry:
Matt Arsenault177ff422022-11-29 17:49:58 -0500208 %a.val = load half, ptr addrspace(1) %a
209 %b.val = load half, ptr addrspace(1) %b
210 %c.val = load half, ptr addrspace(1) %c
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000211
212 %c.neg = fsub half 0.0, %c.val
213 %t.val = fmul half %a.val, %b.val
214 %r.val = fadd half %t.val, %c.neg
215
Matt Arsenault177ff422022-11-29 17:49:58 -0500216 store half %r.val, ptr addrspace(1) %r
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000217 ret void
218}
219
Matt Arsenault9a3fd872017-03-09 01:36:39 +0000220; GCN-LABEL: {{^}}mac_f16_neg_a_nsz_fp_math:
Matt Arsenaulta8fcfad2017-02-02 23:21:23 +0000221; SI: v_cvt_f32_f16_e32 [[CVT_A:v[0-9]+]], v{{[0-9]+}}
222; SI: v_cvt_f32_f16_e32 [[CVT_B:v[0-9]+]], v{{[0-9]+}}
223; SI: v_cvt_f32_f16_e32 [[CVT_C:v[0-9]+]], v{{[0-9]+}}
224; SI: v_mad_f32 v{{[0-9]+}}, -[[CVT_A]], [[CVT_B]], [[CVT_C]]
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000225
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000226; VI-NOT: v_mac_f16
227; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}}
228; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000229define amdgpu_kernel void @mac_f16_neg_a_nsz_fp_math(
Matt Arsenault177ff422022-11-29 17:49:58 -0500230 ptr addrspace(1) %r,
231 ptr addrspace(1) %a,
232 ptr addrspace(1) %b,
233 ptr addrspace(1) %c) #1 {
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000234entry:
Matt Arsenault177ff422022-11-29 17:49:58 -0500235 %a.val = load half, ptr addrspace(1) %a
236 %b.val = load half, ptr addrspace(1) %b
237 %c.val = load half, ptr addrspace(1) %c
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000238
239 %a.neg = fsub half 0.0, %a.val
240 %t.val = fmul half %a.neg, %b.val
241 %r.val = fadd half %t.val, %c.val
242
Matt Arsenault177ff422022-11-29 17:49:58 -0500243 store half %r.val, ptr addrspace(1) %r
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000244 ret void
245}
246
Matt Arsenault9a3fd872017-03-09 01:36:39 +0000247; GCN-LABEL: {{^}}mac_f16_neg_b_nsz_fp_math:
Matt Arsenaulta8fcfad2017-02-02 23:21:23 +0000248; SI: v_cvt_f32_f16_e32 [[CVT_A:v[0-9]+]], v{{[0-9]+}}
249; SI: v_cvt_f32_f16_e32 [[CVT_B:v[0-9]+]], v{{[0-9]+}}
250; SI: v_cvt_f32_f16_e32 [[CVT_C:v[0-9]+]], v{{[0-9]+}}
251; SI: v_mad_f32 v{{[0-9]+}}, -[[CVT_A]], [[CVT_B]], [[CVT_C]]
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000252
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000253; VI-NOT: v_mac_f16
254; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}}
255; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000256define amdgpu_kernel void @mac_f16_neg_b_nsz_fp_math(
Matt Arsenault177ff422022-11-29 17:49:58 -0500257 ptr addrspace(1) %r,
258 ptr addrspace(1) %a,
259 ptr addrspace(1) %b,
260 ptr addrspace(1) %c) #1 {
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000261entry:
Matt Arsenault177ff422022-11-29 17:49:58 -0500262 %a.val = load half, ptr addrspace(1) %a
263 %b.val = load half, ptr addrspace(1) %b
264 %c.val = load half, ptr addrspace(1) %c
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000265
266 %b.neg = fsub half 0.0, %b.val
267 %t.val = fmul half %a.val, %b.neg
268 %r.val = fadd half %t.val, %c.val
269
Matt Arsenault177ff422022-11-29 17:49:58 -0500270 store half %r.val, ptr addrspace(1) %r
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000271 ret void
272}
273
Matt Arsenault9a3fd872017-03-09 01:36:39 +0000274; GCN-LABEL: {{^}}mac_f16_neg_c_nsz_fp_math:
Matt Arsenaulta8fcfad2017-02-02 23:21:23 +0000275; SI: v_cvt_f32_f16_e32 [[CVT_A:v[0-9]+]], v{{[0-9]+}}
276; SI: v_cvt_f32_f16_e32 [[CVT_B:v[0-9]+]], v{{[0-9]+}}
277; SI: v_cvt_f32_f16_e32 [[CVT_C:v[0-9]+]], v{{[0-9]+}}
278; SI: v_mad_f32 v{{[0-9]+}}, [[CVT_A]], [[CVT_B]], -[[CVT_C]]
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000279
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000280; VI-NOT: v_mac_f16
281; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]}}
282; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000283define amdgpu_kernel void @mac_f16_neg_c_nsz_fp_math(
Matt Arsenault177ff422022-11-29 17:49:58 -0500284 ptr addrspace(1) %r,
285 ptr addrspace(1) %a,
286 ptr addrspace(1) %b,
287 ptr addrspace(1) %c) #1 {
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000288entry:
Matt Arsenault177ff422022-11-29 17:49:58 -0500289 %a.val = load half, ptr addrspace(1) %a
290 %b.val = load half, ptr addrspace(1) %b
291 %c.val = load half, ptr addrspace(1) %c
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000292
293 %c.neg = fsub half 0.0, %c.val
294 %t.val = fmul half %a.val, %b.val
295 %r.val = fadd half %t.val, %c.neg
296
Matt Arsenault177ff422022-11-29 17:49:58 -0500297 store half %r.val, ptr addrspace(1) %r
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000298 ret void
299}
300
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000301; GCN-LABEL: {{^}}mac_v2f16:
Matt Arsenault7aad8fd2017-01-24 22:02:15 +0000302; GCN: {{buffer|flat}}_load_dword v[[A_V2_F16:[0-9]+]]
303; GCN: {{buffer|flat}}_load_dword v[[B_V2_F16:[0-9]+]]
304; GCN: {{buffer|flat}}_load_dword v[[C_V2_F16:[0-9]+]]
Matt Arsenault86e02ce2017-03-15 19:04:26 +0000305
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000306; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
Jay Foad6c0204c2019-09-06 10:07:28 +0000307; SI-DAG: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
308; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
309
310; SI-DAG: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
311; SI-DAG: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
312; SI-DAG: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
313
314; SI-DAG: v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]]
315; SI-DAG: v_cvt_f32_f16_e32 v[[C_F32_1:[0-9]+]], v[[C_F16_1]]
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000316; SI-DAG: v_cvt_f32_f16_e32 v[[C_F32_0:[0-9]+]], v[[C_V2_F16]]
Jay Foad6c0204c2019-09-06 10:07:28 +0000317
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000318; SI-DAG: v_mac_f32_e32 v[[C_F32_0]], v[[A_F32_0]], v[[B_F32_0]]
Matt Arsenault86e02ce2017-03-15 19:04:26 +0000319; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_LO:[0-9]+]], v[[C_F32_0]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000320; SI-DAG: v_mac_f32_e32 v[[C_F32_1]], v[[A_F32_1]], v[[B_F32_1]]
Matt Arsenault86e02ce2017-03-15 19:04:26 +0000321; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[C_F32_1]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000322; SI: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
Matt Arsenault8edfaee2017-03-31 19:53:03 +0000323; VI-NOT: and
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000324; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_LO]], v[[R_F16_HI]]
Matt Arsenault86e02ce2017-03-15 19:04:26 +0000325
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000326; VI-DAG: v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]]
327; VI-DAG: v_mac_f16_sdwa v[[C_F16_1]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000328; VI-DAG: v_mac_f16_e32 v[[C_V2_F16]], v[[A_V2_F16]], v[[B_V2_F16]]
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000329; VI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[C_F16_1]]
Matt Arsenault8edfaee2017-03-31 19:53:03 +0000330; VI-NOT: and
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000331; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[C_V2_F16]], v[[R_F16_HI]]
Matt Arsenault8edfaee2017-03-31 19:53:03 +0000332
Matt Arsenault7aad8fd2017-01-24 22:02:15 +0000333; GCN: {{buffer|flat}}_store_dword v[[R_V2_F16]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000334; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000335define amdgpu_kernel void @mac_v2f16(
Matt Arsenault177ff422022-11-29 17:49:58 -0500336 ptr addrspace(1) %r,
337 ptr addrspace(1) %a,
338 ptr addrspace(1) %b,
339 ptr addrspace(1) %c) #0 {
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000340entry:
Matt Arsenault177ff422022-11-29 17:49:58 -0500341 %a.val = load <2 x half>, ptr addrspace(1) %a
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000342 call void @llvm.amdgcn.s.barrier() #2
Matt Arsenault177ff422022-11-29 17:49:58 -0500343 %b.val = load <2 x half>, ptr addrspace(1) %b
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000344 call void @llvm.amdgcn.s.barrier() #2
Matt Arsenault177ff422022-11-29 17:49:58 -0500345 %c.val = load <2 x half>, ptr addrspace(1) %c
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000346
347 %t.val = fmul <2 x half> %a.val, %b.val
348 %r.val = fadd <2 x half> %t.val, %c.val
349
Matt Arsenault177ff422022-11-29 17:49:58 -0500350 store <2 x half> %r.val, ptr addrspace(1) %r
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000351 ret void
352}
353
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000354; GCN-LABEL: {{^}}mac_v2f16_same_add:
Stanislav Mekhanoshin555d8f42020-01-13 14:54:17 -0800355; SI-DAG: v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
356; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
357; SI-DAG: v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
358; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
Sam Kolton9fa16962017-04-06 15:03:28 +0000359
360; VI-DAG: v_mac_f16_sdwa v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
361; VI-DAG: v_mad_f16 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
362; VI-DAG: v_mac_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
363; VI-DAG: v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
364
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000365; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000366define amdgpu_kernel void @mac_v2f16_same_add(
Matt Arsenault177ff422022-11-29 17:49:58 -0500367 ptr addrspace(1) %r0,
368 ptr addrspace(1) %r1,
369 ptr addrspace(1) %a,
370 ptr addrspace(1) %b,
371 ptr addrspace(1) %c,
372 ptr addrspace(1) %d,
373 ptr addrspace(1) %e) #0 {
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000374entry:
Matt Arsenault177ff422022-11-29 17:49:58 -0500375 %a.val = load <2 x half>, ptr addrspace(1) %a
376 %b.val = load <2 x half>, ptr addrspace(1) %b
377 %c.val = load <2 x half>, ptr addrspace(1) %c
378 %d.val = load <2 x half>, ptr addrspace(1) %d
379 %e.val = load <2 x half>, ptr addrspace(1) %e
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000380
381 %t0.val = fmul <2 x half> %a.val, %b.val
382 %r0.val = fadd <2 x half> %t0.val, %c.val
383
384 %t1.val = fmul <2 x half> %d.val, %e.val
385 %r1.val = fadd <2 x half> %t1.val, %c.val
386
Matt Arsenault177ff422022-11-29 17:49:58 -0500387 store <2 x half> %r0.val, ptr addrspace(1) %r0
388 store <2 x half> %r1.val, ptr addrspace(1) %r1
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000389 ret void
390}
391
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000392; GCN-LABEL: {{^}}mac_v2f16_neg_a:
Matt Arsenaulta8fcfad2017-02-02 23:21:23 +0000393; SI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], {{v[0-9]+}}
394; SI: v_cvt_f32_f16_e32 [[CVT1:v[0-9]+]], {{v[0-9]+}}
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000395
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000396; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
397; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000398
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000399; VI-NOT: v_mac_f16
400; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
401; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
402; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000403define amdgpu_kernel void @mac_v2f16_neg_a(
Matt Arsenault177ff422022-11-29 17:49:58 -0500404 ptr addrspace(1) %r,
405 ptr addrspace(1) %a,
406 ptr addrspace(1) %b,
407 ptr addrspace(1) %c) #0 {
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000408entry:
Matt Arsenault177ff422022-11-29 17:49:58 -0500409 %a.val = load <2 x half>, ptr addrspace(1) %a
410 %b.val = load <2 x half>, ptr addrspace(1) %b
411 %c.val = load <2 x half>, ptr addrspace(1) %c
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000412
Cameron McInally92be6402021-01-04 14:13:14 -0600413 %a.neg = fneg <2 x half> %a.val
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000414 %t.val = fmul <2 x half> %a.neg, %b.val
415 %r.val = fadd <2 x half> %t.val, %c.val
416
Matt Arsenault177ff422022-11-29 17:49:58 -0500417 store <2 x half> %r.val, ptr addrspace(1) %r
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000418 ret void
419}
420
421; GCN-LABEL: {{^}}mac_v2f16_neg_b
Matt Arsenaulta8fcfad2017-02-02 23:21:23 +0000422; SI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], {{v[0-9]+}}
423; SI: v_cvt_f32_f16_e32 [[CVT1:v[0-9]+]], {{v[0-9]+}}
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000424; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
425; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000426
427
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000428; VI-NOT: v_mac_f16
429; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
430; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
431; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000432define amdgpu_kernel void @mac_v2f16_neg_b(
Matt Arsenault177ff422022-11-29 17:49:58 -0500433 ptr addrspace(1) %r,
434 ptr addrspace(1) %a,
435 ptr addrspace(1) %b,
436 ptr addrspace(1) %c) #0 {
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000437entry:
Matt Arsenault177ff422022-11-29 17:49:58 -0500438 %a.val = load <2 x half>, ptr addrspace(1) %a
439 %b.val = load <2 x half>, ptr addrspace(1) %b
440 %c.val = load <2 x half>, ptr addrspace(1) %c
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000441
Cameron McInally92be6402021-01-04 14:13:14 -0600442 %b.neg = fneg <2 x half> %b.val
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000443 %t.val = fmul <2 x half> %a.val, %b.neg
444 %r.val = fadd <2 x half> %t.val, %c.val
445
Matt Arsenault177ff422022-11-29 17:49:58 -0500446 store <2 x half> %r.val, ptr addrspace(1) %r
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000447 ret void
448}
449
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000450; GCN-LABEL: {{^}}mac_v2f16_neg_c:
Matt Arsenaulta8fcfad2017-02-02 23:21:23 +0000451; SI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], {{v[0-9]+}}
452; SI: v_cvt_f32_f16_e32 [[CVT1:v[0-9]+]], {{v[0-9]+}}
453; SI: v_cvt_f32_f16_e32 [[CVT2:v[0-9]+]], {{v[0-9]+}}
454; SI: v_cvt_f32_f16_e32 [[CVT3:v[0-9]+]], {{v[0-9]+}}
455; SI: v_cvt_f32_f16_e32 [[CVT4:v[0-9]+]], {{v[0-9]+}}
456; SI: v_cvt_f32_f16_e32 [[CVT5:v[0-9]+]], {{v[0-9]+}}
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000457
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000458; SI-DAG: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
459; SI-DAG: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000460
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000461; VI-NOT: v_mac_f16
462; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
463; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
464; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000465define amdgpu_kernel void @mac_v2f16_neg_c(
Matt Arsenault177ff422022-11-29 17:49:58 -0500466 ptr addrspace(1) %r,
467 ptr addrspace(1) %a,
468 ptr addrspace(1) %b,
469 ptr addrspace(1) %c) #0 {
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000470entry:
Matt Arsenault177ff422022-11-29 17:49:58 -0500471 %a.val = load <2 x half>, ptr addrspace(1) %a
472 %b.val = load <2 x half>, ptr addrspace(1) %b
473 %c.val = load <2 x half>, ptr addrspace(1) %c
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000474
Cameron McInally92be6402021-01-04 14:13:14 -0600475 %c.neg = fneg <2 x half> %c.val
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000476 %t.val = fmul <2 x half> %a.val, %b.val
477 %r.val = fadd <2 x half> %t.val, %c.neg
478
Matt Arsenault177ff422022-11-29 17:49:58 -0500479 store <2 x half> %r.val, ptr addrspace(1) %r
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000480 ret void
481}
482
Matt Arsenault0c687392017-01-30 16:57:41 +0000483; GCN-LABEL: {{^}}mac_v2f16_neg_a_safe_fp_math:
Sam Kolton9fa16962017-04-06 15:03:28 +0000484
Matt Arsenault0c687392017-01-30 16:57:41 +0000485; SI: v_sub_f32_e32 v[[NEG_A0:[0-9]+]], 0, v{{[0-9]+}}
486; SI: v_sub_f32_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}}
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000487; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v[[NEG_A0]], v{{[0-9]+}}
488; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v[[NEG_A1]], v{{[0-9]+}}
Sam Kolton9fa16962017-04-06 15:03:28 +0000489
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000490; VI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
491; VI-DAG: v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}}
492; VI-DAG: v_sub_f16_sdwa v[[NEG_A0:[0-9]+]], [[ZERO]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000493; VI-DAG: v_mac_f16_sdwa v{{[0-9]+}}, v[[NEG_A0]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000494; VI-DAG: v_mac_f16_e32 v{{[0-9]+}}, v[[NEG_A1]], v{{[0-9]+}}
Sam Kolton9fa16962017-04-06 15:03:28 +0000495
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000496; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000497define amdgpu_kernel void @mac_v2f16_neg_a_safe_fp_math(
Matt Arsenault177ff422022-11-29 17:49:58 -0500498 ptr addrspace(1) %r,
499 ptr addrspace(1) %a,
500 ptr addrspace(1) %b,
501 ptr addrspace(1) %c) #0 {
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000502entry:
Matt Arsenault177ff422022-11-29 17:49:58 -0500503 %a.val = load <2 x half>, ptr addrspace(1) %a
504 %b.val = load <2 x half>, ptr addrspace(1) %b
505 %c.val = load <2 x half>, ptr addrspace(1) %c
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000506
507 %a.neg = fsub <2 x half> <half 0.0, half 0.0>, %a.val
508 %t.val = fmul <2 x half> %a.neg, %b.val
509 %r.val = fadd <2 x half> %t.val, %c.val
510
Matt Arsenault177ff422022-11-29 17:49:58 -0500511 store <2 x half> %r.val, ptr addrspace(1) %r
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000512 ret void
513}
514
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000515; GCN-LABEL: {{^}}mac_v2f16_neg_b_safe_fp_math:
Sam Kolton9fa16962017-04-06 15:03:28 +0000516
Matt Arsenault0c687392017-01-30 16:57:41 +0000517; SI: v_sub_f32_e32 v[[NEG_A0:[0-9]+]], 0, v{{[0-9]+}}
518; SI: v_sub_f32_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}}
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000519; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A0]]
520; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A1]]
Sam Kolton9fa16962017-04-06 15:03:28 +0000521
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000522; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000523; VI: v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}}
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000524; VI: v_sub_f16_sdwa v[[NEG_A0:[0-9]+]], [[ZERO]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000525; VI-DAG: v_mac_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000526; VI-DAG: v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A1]]
Sam Kolton9fa16962017-04-06 15:03:28 +0000527
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000528; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000529define amdgpu_kernel void @mac_v2f16_neg_b_safe_fp_math(
Matt Arsenault177ff422022-11-29 17:49:58 -0500530 ptr addrspace(1) %r,
531 ptr addrspace(1) %a,
532 ptr addrspace(1) %b,
533 ptr addrspace(1) %c) #0 {
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000534entry:
Matt Arsenault177ff422022-11-29 17:49:58 -0500535 %a.val = load <2 x half>, ptr addrspace(1) %a
536 %b.val = load <2 x half>, ptr addrspace(1) %b
537 %c.val = load <2 x half>, ptr addrspace(1) %c
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000538
539 %b.neg = fsub <2 x half> <half 0.0, half 0.0>, %b.val
540 %t.val = fmul <2 x half> %a.val, %b.neg
541 %r.val = fadd <2 x half> %t.val, %c.val
542
Matt Arsenault177ff422022-11-29 17:49:58 -0500543 store <2 x half> %r.val, ptr addrspace(1) %r
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000544 ret void
545}
546
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000547; GCN-LABEL: {{^}}mac_v2f16_neg_c_safe_fp_math:
Sam Kolton9fa16962017-04-06 15:03:28 +0000548
Matt Arsenault0c687392017-01-30 16:57:41 +0000549; SI: v_sub_f32_e32 v[[NEG_A0:[0-9]+]], 0, v{{[0-9]+}}
550; SI: v_sub_f32_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}}
Matt Arsenault86e02ce2017-03-15 19:04:26 +0000551; SI-DAG: v_mac_f32_e32 v[[NEG_A0]], v{{[0-9]+}}, v{{[0-9]+}}
552; SI-DAG: v_mac_f32_e32 v[[NEG_A1]], v{{[0-9]+}}, v{{[0-9]+}}
Sam Kolton9fa16962017-04-06 15:03:28 +0000553
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000554; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000555; VI: v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}}
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000556; VI: v_sub_f16_sdwa v[[NEG_A0:[0-9]+]], [[ZERO]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
Sam Kolton9fa16962017-04-06 15:03:28 +0000557; VI-DAG: v_mac_f16_sdwa v[[NEG_A0]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
558; VI-DAG: v_mac_f16_e32 v[[NEG_A1]], v{{[0-9]+}}, v{{[0-9]+}}
559
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000560; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000561define amdgpu_kernel void @mac_v2f16_neg_c_safe_fp_math(
Matt Arsenault177ff422022-11-29 17:49:58 -0500562 ptr addrspace(1) %r,
563 ptr addrspace(1) %a,
564 ptr addrspace(1) %b,
565 ptr addrspace(1) %c) #0 {
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000566entry:
Matt Arsenault177ff422022-11-29 17:49:58 -0500567 %a.val = load <2 x half>, ptr addrspace(1) %a
568 %b.val = load <2 x half>, ptr addrspace(1) %b
569 %c.val = load <2 x half>, ptr addrspace(1) %c
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000570
571 %c.neg = fsub <2 x half> <half 0.0, half 0.0>, %c.val
572 %t.val = fmul <2 x half> %a.val, %b.val
573 %r.val = fadd <2 x half> %t.val, %c.neg
574
Matt Arsenault177ff422022-11-29 17:49:58 -0500575 store <2 x half> %r.val, ptr addrspace(1) %r
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000576 ret void
577}
578
Matt Arsenault9a3fd872017-03-09 01:36:39 +0000579; GCN-LABEL: {{^}}mac_v2f16_neg_a_nsz_fp_math:
Matt Arsenaulta8fcfad2017-02-02 23:21:23 +0000580; SI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], {{v[0-9]+}}
581; SI: v_cvt_f32_f16_e32 [[CVT1:v[0-9]+]], {{v[0-9]+}}
582; SI: v_cvt_f32_f16_e32 [[CVT2:v[0-9]+]], {{v[0-9]+}}
583; SI: v_cvt_f32_f16_e32 [[CVT3:v[0-9]+]], {{v[0-9]+}}
584; SI: v_cvt_f32_f16_e32 [[CVT4:v[0-9]+]], {{v[0-9]+}}
585; SI: v_cvt_f32_f16_e32 [[CVT5:v[0-9]+]], {{v[0-9]+}}
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000586
Matt Arsenaulta8fcfad2017-02-02 23:21:23 +0000587; SI-DAG: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
588; SI-DAG: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000589
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000590; VI-NOT: v_mac_f16
591; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
592; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
593; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000594define amdgpu_kernel void @mac_v2f16_neg_a_nsz_fp_math(
Matt Arsenault177ff422022-11-29 17:49:58 -0500595 ptr addrspace(1) %r,
596 ptr addrspace(1) %a,
597 ptr addrspace(1) %b,
598 ptr addrspace(1) %c) #1 {
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000599entry:
Matt Arsenault177ff422022-11-29 17:49:58 -0500600 %a.val = load <2 x half>, ptr addrspace(1) %a
601 %b.val = load <2 x half>, ptr addrspace(1) %b
602 %c.val = load <2 x half>, ptr addrspace(1) %c
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000603
604 %a.neg = fsub <2 x half> <half 0.0, half 0.0>, %a.val
605 %t.val = fmul <2 x half> %a.neg, %b.val
606 %r.val = fadd <2 x half> %t.val, %c.val
607
Matt Arsenault177ff422022-11-29 17:49:58 -0500608 store <2 x half> %r.val, ptr addrspace(1) %r
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000609 ret void
610}
611
Matt Arsenault9a3fd872017-03-09 01:36:39 +0000612; GCN-LABEL: {{^}}mac_v2f16_neg_b_nsz_fp_math:
Matt Arsenaulta8fcfad2017-02-02 23:21:23 +0000613; SI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], {{v[0-9]+}}
614; SI: v_cvt_f32_f16_e32 [[CVT1:v[0-9]+]], {{v[0-9]+}}
615; SI: v_cvt_f32_f16_e32 [[CVT2:v[0-9]+]], {{v[0-9]+}}
616; SI: v_cvt_f32_f16_e32 [[CVT3:v[0-9]+]], {{v[0-9]+}}
617; SI: v_cvt_f32_f16_e32 [[CVT4:v[0-9]+]], {{v[0-9]+}}
618; SI: v_cvt_f32_f16_e32 [[CVT5:v[0-9]+]], {{v[0-9]+}}
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000619
Matt Arsenaulta8fcfad2017-02-02 23:21:23 +0000620; SI-DAG: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
621; SI-DAG: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000622
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000623; VI-NOT: v_mac_f16
624; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
625; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
626; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000627define amdgpu_kernel void @mac_v2f16_neg_b_nsz_fp_math(
Matt Arsenault177ff422022-11-29 17:49:58 -0500628 ptr addrspace(1) %r,
629 ptr addrspace(1) %a,
630 ptr addrspace(1) %b,
631 ptr addrspace(1) %c) #1 {
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000632entry:
Matt Arsenault177ff422022-11-29 17:49:58 -0500633 %a.val = load <2 x half>, ptr addrspace(1) %a
634 %b.val = load <2 x half>, ptr addrspace(1) %b
635 %c.val = load <2 x half>, ptr addrspace(1) %c
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000636
637 %b.neg = fsub <2 x half> <half 0.0, half 0.0>, %b.val
638 %t.val = fmul <2 x half> %a.val, %b.neg
639 %r.val = fadd <2 x half> %t.val, %c.val
640
Matt Arsenault177ff422022-11-29 17:49:58 -0500641 store <2 x half> %r.val, ptr addrspace(1) %r
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000642 ret void
643}
644
Matt Arsenault9a3fd872017-03-09 01:36:39 +0000645; GCN-LABEL: {{^}}mac_v2f16_neg_c_nsz_fp_math:
Matt Arsenaulta8fcfad2017-02-02 23:21:23 +0000646; SI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], {{v[0-9]+}}
647; SI: v_cvt_f32_f16_e32 [[CVT1:v[0-9]+]], {{v[0-9]+}}
648; SI: v_cvt_f32_f16_e32 [[CVT2:v[0-9]+]], {{v[0-9]+}}
649; SI: v_cvt_f32_f16_e32 [[CVT3:v[0-9]+]], {{v[0-9]+}}
650; SI: v_cvt_f32_f16_e32 [[CVT4:v[0-9]+]], {{v[0-9]+}}
651; SI: v_cvt_f32_f16_e32 [[CVT5:v[0-9]+]], {{v[0-9]+}}
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000652
Matt Arsenaulta8fcfad2017-02-02 23:21:23 +0000653; SI-DAG: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
654; SI-DAG: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000655
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000656; VI-NOT: v_mac_f16
657; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}}
658; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}}
659; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000660define amdgpu_kernel void @mac_v2f16_neg_c_nsz_fp_math(
Matt Arsenault177ff422022-11-29 17:49:58 -0500661 ptr addrspace(1) %r,
662 ptr addrspace(1) %a,
663 ptr addrspace(1) %b,
664 ptr addrspace(1) %c) #1 {
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000665entry:
Matt Arsenault177ff422022-11-29 17:49:58 -0500666 %a.val = load <2 x half>, ptr addrspace(1) %a
667 %b.val = load <2 x half>, ptr addrspace(1) %b
668 %c.val = load <2 x half>, ptr addrspace(1) %c
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000669
670 %c.neg = fsub <2 x half> <half 0.0, half 0.0>, %c.val
671 %t.val = fmul <2 x half> %a.val, %b.val
672 %r.val = fadd <2 x half> %t.val, %c.neg
673
Matt Arsenault177ff422022-11-29 17:49:58 -0500674 store <2 x half> %r.val, ptr addrspace(1) %r
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000675 ret void
676}
677
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000678declare void @llvm.amdgcn.s.barrier() #2
679
Matt Arsenault5660bb62019-11-18 16:48:07 +0530680attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" }
681attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" "denormal-fp-math"="preserve-sign,preserve-sign" }
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000682attributes #2 = { nounwind convergent }