blob: 8573cd4d1fe136af1a8f23c4b6faae88269833c9 [file] [log] [blame]
Matt Arsenaultffa8be32019-11-18 16:48:07 +05301; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX900
2; RUN: llc -march=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX906-DL-UNSAFE
3; RUN: llc -march=amdgcn -mcpu=gfx1011 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
4; RUN: llc -march=amdgcn -mcpu=gfx1012 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
5; RUN: llc -march=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906
6; RUN: llc -march=amdgcn -mcpu=gfx906 -denormal-fp-math=preserve-sign -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906-CONTRACT
7; RUN: llc -march=amdgcn -mcpu=gfx906 -denormal-fp-math=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906-DENORM-CONTRACT
Farhana Aleend9dbd892018-07-16 18:19:59 +00008; (fadd (fmul S1.x, S2.x), (fadd (fmul (S1.y, S2.y), z))) -> (fdot2 S1, S2, z)
9
10; Tests to make sure fdot2 is not generated when vector elements of dot-product expressions
11; are not converted from f16 to f32.
12; GCN-LABEL: {{^}}dotproduct_f16
Konstantin Zhuravlyovb2888692018-11-19 21:10:16 +000013; GFX900: v_fma_f16
14; GFX900: v_fma_f16
Farhana Aleend9dbd892018-07-16 18:19:59 +000015
16; GFX906: v_mul_f16_e32
17; GFX906: v_mul_f16_e32
18
Stanislav Mekhanoshin1f91cba2019-06-14 00:33:31 +000019; GFX906-DL-UNSAFE: v_fma_f16
20; GFX10-CONTRACT: v_fmac_f16
Farhana Aleend9dbd892018-07-16 18:19:59 +000021
22; GFX906-CONTRACT: v_mac_f16_e32
Konstantin Zhuravlyovb2888692018-11-19 21:10:16 +000023; GFX906-DENORM-CONTRACT: v_fma_f16
Nikita Popovee8670b2022-12-19 12:39:01 +010024define amdgpu_kernel void @dotproduct_f16(ptr addrspace(1) %src1,
25 ptr addrspace(1) %src2,
26 ptr addrspace(1) nocapture %dst) {
Farhana Aleend9dbd892018-07-16 18:19:59 +000027entry:
Nikita Popovee8670b2022-12-19 12:39:01 +010028 %src1.vec = load <2 x half>, ptr addrspace(1) %src1
29 %src2.vec = load <2 x half>, ptr addrspace(1) %src2
Farhana Aleend9dbd892018-07-16 18:19:59 +000030
31 %src1.el1 = extractelement <2 x half> %src1.vec, i64 0
32 %src2.el1 = extractelement <2 x half> %src2.vec, i64 0
33
34 %src1.el2 = extractelement <2 x half> %src1.vec, i64 1
35 %src2.el2 = extractelement <2 x half> %src2.vec, i64 1
36
37 %mul2 = fmul half %src1.el2, %src2.el2
38 %mul1 = fmul half %src1.el1, %src2.el1
Nikita Popovee8670b2022-12-19 12:39:01 +010039 %acc = load half, ptr addrspace(1) %dst, align 2
Farhana Aleend9dbd892018-07-16 18:19:59 +000040 %acc1 = fadd half %mul2, %acc
41 %acc2 = fadd half %mul1, %acc1
Nikita Popovee8670b2022-12-19 12:39:01 +010042 store half %acc2, ptr addrspace(1) %dst, align 2
Farhana Aleend9dbd892018-07-16 18:19:59 +000043 ret void
44}
45
46
47; We only want to generate fdot2 if vector element of dot product is converted from f16 to f32
48; and the vectors are of type <2 x half>
49; GCN-LABEL: {{^}}dotproduct_f16_f32
50; GFX900: v_mad_mix_f32
Konstantin Zhuravlyov0d15b422018-11-15 02:42:04 +000051; GFX900: v_mad_mix_f32
Farhana Aleend9dbd892018-07-16 18:19:59 +000052
53; GFX906: v_mad_f32
54; GFX906: v_mac_f32_e32
55
Stanislav Mekhanoshin1f91cba2019-06-14 00:33:31 +000056; GFX906-DL-UNSAFE: v_dot2_f32_f16
57; GFX10-DL-UNSAFE: v_dot2c_f32_f16_e32
Farhana Aleend9dbd892018-07-16 18:19:59 +000058
59; GFX906-CONTRACT: v_dot2_f32_f16
60
61; GFX906-DENORM-CONTRACT: v_dot2_f32_f16
Nikita Popovee8670b2022-12-19 12:39:01 +010062define amdgpu_kernel void @dotproduct_f16_f32(ptr addrspace(1) %src1,
63 ptr addrspace(1) %src2,
64 ptr addrspace(1) nocapture %dst) {
Farhana Aleend9dbd892018-07-16 18:19:59 +000065entry:
Nikita Popovee8670b2022-12-19 12:39:01 +010066 %src1.vec = load <2 x half>, ptr addrspace(1) %src1
67 %src2.vec = load <2 x half>, ptr addrspace(1) %src2
Farhana Aleend9dbd892018-07-16 18:19:59 +000068
69 %src1.el1 = extractelement <2 x half> %src1.vec, i64 0
70 %csrc1.el1 = fpext half %src1.el1 to float
71 %src2.el1 = extractelement <2 x half> %src2.vec, i64 0
72 %csrc2.el1 = fpext half %src2.el1 to float
73
74 %src1.el2 = extractelement <2 x half> %src1.vec, i64 1
75 %csrc1.el2 = fpext half %src1.el2 to float
76 %src2.el2 = extractelement <2 x half> %src2.vec, i64 1
77 %csrc2.el2 = fpext half %src2.el2 to float
78
79 %mul2 = fmul float %csrc1.el2, %csrc2.el2
80 %mul1 = fmul float %csrc1.el1, %csrc2.el1
Nikita Popovee8670b2022-12-19 12:39:01 +010081 %acc = load float, ptr addrspace(1) %dst, align 4
Farhana Aleend9dbd892018-07-16 18:19:59 +000082 %acc1 = fadd float %mul2, %acc
83 %acc2 = fadd float %mul1, %acc1
Nikita Popovee8670b2022-12-19 12:39:01 +010084 store float %acc2, ptr addrspace(1) %dst, align 4
Farhana Aleend9dbd892018-07-16 18:19:59 +000085 ret void
86}
87
88; We only want to generate fdot2 if vector element of dot product is converted from f16 to f32
89; and the vectors are of type <2 x half>
90; GCN-LABEL: {{^}}dotproduct_diffvecorder
91; GFX900: v_mad_mix_f32
Konstantin Zhuravlyov0d15b422018-11-15 02:42:04 +000092; GFX900: v_mad_mix_f32
Farhana Aleend9dbd892018-07-16 18:19:59 +000093
94; GFX906: v_mad_f32
95; GFX906: v_mac_f32_e32
96
Stanislav Mekhanoshin1f91cba2019-06-14 00:33:31 +000097; GFX906-DL-UNSAFE: v_dot2_f32_f16
98; GFX10-DL-UNSAFE: v_dot2c_f32_f16_e32
Farhana Aleend9dbd892018-07-16 18:19:59 +000099
100; GFX906-CONTRACT: v_dot2_f32_f16
101; GFX906-DENORM-CONTRACT: v_dot2_f32_f16
Nikita Popovee8670b2022-12-19 12:39:01 +0100102define amdgpu_kernel void @dotproduct_diffvecorder(ptr addrspace(1) %src1,
103 ptr addrspace(1) %src2,
104 ptr addrspace(1) nocapture %dst) {
Farhana Aleend9dbd892018-07-16 18:19:59 +0000105entry:
Nikita Popovee8670b2022-12-19 12:39:01 +0100106 %src1.vec = load <2 x half>, ptr addrspace(1) %src1
107 %src2.vec = load <2 x half>, ptr addrspace(1) %src2
Farhana Aleend9dbd892018-07-16 18:19:59 +0000108
109 %src1.el1 = extractelement <2 x half> %src1.vec, i64 0
110 %csrc1.el1 = fpext half %src1.el1 to float
111 %src2.el1 = extractelement <2 x half> %src2.vec, i64 0
112 %csrc2.el1 = fpext half %src2.el1 to float
113
114 %src1.el2 = extractelement <2 x half> %src1.vec, i64 1
115 %csrc1.el2 = fpext half %src1.el2 to float
116 %src2.el2 = extractelement <2 x half> %src2.vec, i64 1
117 %csrc2.el2 = fpext half %src2.el2 to float
118
119 %mul2 = fmul float %csrc2.el2, %csrc1.el2
120 %mul1 = fmul float %csrc1.el1, %csrc2.el1
Nikita Popovee8670b2022-12-19 12:39:01 +0100121 %acc = load float, ptr addrspace(1) %dst, align 4
Farhana Aleend9dbd892018-07-16 18:19:59 +0000122 %acc1 = fadd float %mul2, %acc
123 %acc2 = fadd float %mul1, %acc1
Nikita Popovee8670b2022-12-19 12:39:01 +0100124 store float %acc2, ptr addrspace(1) %dst, align 4
Farhana Aleend9dbd892018-07-16 18:19:59 +0000125 ret void
126}
127
128; Tests to make sure dot product is not generated when the vectors are not of <2 x half>.
129; GCN-LABEL: {{^}}dotproduct_v4f16
130; GFX900: v_mad_mix_f32
131
132; GFX906: v_mad_f32
133; GFX906: v_mac_f32_e32
134
Stanislav Mekhanoshin1f91cba2019-06-14 00:33:31 +0000135; GCN-DL-UNSAFE: v_fma_mix_f32
Farhana Aleend9dbd892018-07-16 18:19:59 +0000136
137; GFX906-CONTRACT: v_fma_mix_f32
138; GFX906-DENORM-CONTRACT: v_fma_mix_f32
Nikita Popovee8670b2022-12-19 12:39:01 +0100139define amdgpu_kernel void @dotproduct_v4f16(ptr addrspace(1) %src1,
140 ptr addrspace(1) %src2,
141 ptr addrspace(1) nocapture %dst) {
Farhana Aleend9dbd892018-07-16 18:19:59 +0000142entry:
Nikita Popovee8670b2022-12-19 12:39:01 +0100143 %src1.vec = load <4 x half>, ptr addrspace(1) %src1
144 %src2.vec = load <4 x half>, ptr addrspace(1) %src2
Farhana Aleend9dbd892018-07-16 18:19:59 +0000145
146 %src1.el1 = extractelement <4 x half> %src1.vec, i64 0
147 %csrc1.el1 = fpext half %src1.el1 to float
148 %src2.el1 = extractelement <4 x half> %src2.vec, i64 0
149 %csrc2.el1 = fpext half %src2.el1 to float
150
151 %src1.el2 = extractelement <4 x half> %src1.vec, i64 1
152 %csrc1.el2 = fpext half %src1.el2 to float
153 %src2.el2 = extractelement <4 x half> %src2.vec, i64 1
154 %csrc2.el2 = fpext half %src2.el2 to float
155
156 %mul2 = fmul float %csrc1.el2, %csrc2.el2
157 %mul1 = fmul float %csrc1.el1, %csrc2.el1
Nikita Popovee8670b2022-12-19 12:39:01 +0100158 %acc = load float, ptr addrspace(1) %dst, align 4
Farhana Aleend9dbd892018-07-16 18:19:59 +0000159 %acc1 = fadd float %mul2, %acc
160 %acc2 = fadd float %mul1, %acc1
Nikita Popovee8670b2022-12-19 12:39:01 +0100161 store float %acc2, ptr addrspace(1) %dst, align 4
Farhana Aleend9dbd892018-07-16 18:19:59 +0000162 ret void
163}
164
165; GCN-LABEL: {{^}}NotAdotproduct
166; GFX900: v_mad_mix_f32
Konstantin Zhuravlyov0d15b422018-11-15 02:42:04 +0000167; GFX900: v_mad_mix_f32
Farhana Aleend9dbd892018-07-16 18:19:59 +0000168
169; GFX906: v_mad_f32
170; GFX906: v_mac_f32_e32
171
Stanislav Mekhanoshin1f91cba2019-06-14 00:33:31 +0000172; GCN-DL-UNSAFE: v_fma_mix_f32
Farhana Aleend9dbd892018-07-16 18:19:59 +0000173
174; GFX906-CONTRACT: v_fma_mix_f32
175; GFX906-DENORM-CONTRACT: v_fma_mix_f32
Nikita Popovee8670b2022-12-19 12:39:01 +0100176define amdgpu_kernel void @NotAdotproduct(ptr addrspace(1) %src1,
177 ptr addrspace(1) %src2,
178 ptr addrspace(1) nocapture %dst) {
Farhana Aleend9dbd892018-07-16 18:19:59 +0000179entry:
Nikita Popovee8670b2022-12-19 12:39:01 +0100180 %src1.vec = load <2 x half>, ptr addrspace(1) %src1
181 %src2.vec = load <2 x half>, ptr addrspace(1) %src2
Farhana Aleend9dbd892018-07-16 18:19:59 +0000182
183 %src1.el1 = extractelement <2 x half> %src1.vec, i64 0
184 %csrc1.el1 = fpext half %src1.el1 to float
185 %src2.el1 = extractelement <2 x half> %src2.vec, i64 0
186 %csrc2.el1 = fpext half %src2.el1 to float
187
188 %src1.el2 = extractelement <2 x half> %src1.vec, i64 1
189 %csrc1.el2 = fpext half %src1.el2 to float
190 %src2.el2 = extractelement <2 x half> %src2.vec, i64 1
191 %csrc2.el2 = fpext half %src2.el2 to float
192
193 %mul2 = fmul float %csrc1.el2, %csrc1.el1
194 %mul1 = fmul float %csrc2.el1, %csrc2.el2
Nikita Popovee8670b2022-12-19 12:39:01 +0100195 %acc = load float, ptr addrspace(1) %dst, align 4
Farhana Aleend9dbd892018-07-16 18:19:59 +0000196 %acc1 = fadd float %mul2, %acc
197 %acc2 = fadd float %mul1, %acc1
Nikita Popovee8670b2022-12-19 12:39:01 +0100198 store float %acc2, ptr addrspace(1) %dst, align 4
Farhana Aleend9dbd892018-07-16 18:19:59 +0000199 ret void
200}
201
202; GCN-LABEL: {{^}}Diff_Idx_NotAdotproduct
203; GFX900: v_mad_mix_f32
Konstantin Zhuravlyov0d15b422018-11-15 02:42:04 +0000204; GFX900: v_mad_mix_f32
Farhana Aleend9dbd892018-07-16 18:19:59 +0000205
206; GFX906: v_mad_f32
207; GFX906: v_mac_f32_e32
208
Stanislav Mekhanoshin1f91cba2019-06-14 00:33:31 +0000209; GCN-DL-UNSAFE: v_fma_mix_f32
Farhana Aleend9dbd892018-07-16 18:19:59 +0000210
211; GFX906-CONTRACT: v_fma_mix_f32
212; GFX906-DENORM-CONTRACT: v_fma_mix_f32
Nikita Popovee8670b2022-12-19 12:39:01 +0100213define amdgpu_kernel void @Diff_Idx_NotAdotproduct(ptr addrspace(1) %src1,
214 ptr addrspace(1) %src2,
215 ptr addrspace(1) nocapture %dst) {
Farhana Aleend9dbd892018-07-16 18:19:59 +0000216entry:
Nikita Popovee8670b2022-12-19 12:39:01 +0100217 %src1.vec = load <2 x half>, ptr addrspace(1) %src1
218 %src2.vec = load <2 x half>, ptr addrspace(1) %src2
Farhana Aleend9dbd892018-07-16 18:19:59 +0000219
220 %src1.el1 = extractelement <2 x half> %src1.vec, i64 0
221 %csrc1.el1 = fpext half %src1.el1 to float
222 %src2.el1 = extractelement <2 x half> %src2.vec, i64 0
223 %csrc2.el1 = fpext half %src2.el1 to float
224
225 %src1.el2 = extractelement <2 x half> %src1.vec, i64 1
226 %csrc1.el2 = fpext half %src1.el2 to float
227 %src2.el2 = extractelement <2 x half> %src2.vec, i64 1
228 %csrc2.el2 = fpext half %src2.el2 to float
229
230 %mul2 = fmul float %csrc1.el2, %csrc2.el1
231 %mul1 = fmul float %csrc1.el1, %csrc2.el2
Nikita Popovee8670b2022-12-19 12:39:01 +0100232 %acc = load float, ptr addrspace(1) %dst, align 4
Farhana Aleend9dbd892018-07-16 18:19:59 +0000233 %acc1 = fadd float %mul2, %acc
234 %acc2 = fadd float %mul1, %acc1
Nikita Popovee8670b2022-12-19 12:39:01 +0100235 store float %acc2, ptr addrspace(1) %dst, align 4
Farhana Aleend9dbd892018-07-16 18:19:59 +0000236 ret void
Konstantin Zhuravlyov0d15b422018-11-15 02:42:04 +0000237}