Matt Arsenault | ffa8be3 | 2019-11-18 16:48:07 +0530 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX900 |
| 2 | ; RUN: llc -march=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX906-DL-UNSAFE |
| 3 | ; RUN: llc -march=amdgcn -mcpu=gfx1011 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT |
| 4 | ; RUN: llc -march=amdgcn -mcpu=gfx1012 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT |
| 5 | ; RUN: llc -march=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906 |
| 6 | ; RUN: llc -march=amdgcn -mcpu=gfx906 -denormal-fp-math=preserve-sign -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906-CONTRACT |
| 7 | ; RUN: llc -march=amdgcn -mcpu=gfx906 -denormal-fp-math=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906-DENORM-CONTRACT |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 8 | ; (fadd (fmul S1.x, S2.x), (fadd (fmul (S1.y, S2.y), z))) -> (fdot2 S1, S2, z) |
| 9 | |
| 10 | ; Tests to make sure fdot2 is not generated when vector elements of dot-product expressions |
| 11 | ; are not converted from f16 to f32. |
| 12 | ; GCN-LABEL: {{^}}dotproduct_f16 |
Konstantin Zhuravlyov | b288869 | 2018-11-19 21:10:16 +0000 | [diff] [blame] | 13 | ; GFX900: v_fma_f16 |
| 14 | ; GFX900: v_fma_f16 |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 15 | |
| 16 | ; GFX906: v_mul_f16_e32 |
| 17 | ; GFX906: v_mul_f16_e32 |
| 18 | |
Stanislav Mekhanoshin | 1f91cba | 2019-06-14 00:33:31 +0000 | [diff] [blame] | 19 | ; GFX906-DL-UNSAFE: v_fma_f16 |
| 20 | ; GFX10-CONTRACT: v_fmac_f16 |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 21 | |
| 22 | ; GFX906-CONTRACT: v_mac_f16_e32 |
Konstantin Zhuravlyov | b288869 | 2018-11-19 21:10:16 +0000 | [diff] [blame] | 23 | ; GFX906-DENORM-CONTRACT: v_fma_f16 |
Nikita Popov | ee8670b | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 24 | define amdgpu_kernel void @dotproduct_f16(ptr addrspace(1) %src1, |
| 25 | ptr addrspace(1) %src2, |
| 26 | ptr addrspace(1) nocapture %dst) { |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 27 | entry: |
Nikita Popov | ee8670b | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 28 | %src1.vec = load <2 x half>, ptr addrspace(1) %src1 |
| 29 | %src2.vec = load <2 x half>, ptr addrspace(1) %src2 |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 30 | |
| 31 | %src1.el1 = extractelement <2 x half> %src1.vec, i64 0 |
| 32 | %src2.el1 = extractelement <2 x half> %src2.vec, i64 0 |
| 33 | |
| 34 | %src1.el2 = extractelement <2 x half> %src1.vec, i64 1 |
| 35 | %src2.el2 = extractelement <2 x half> %src2.vec, i64 1 |
| 36 | |
| 37 | %mul2 = fmul half %src1.el2, %src2.el2 |
| 38 | %mul1 = fmul half %src1.el1, %src2.el1 |
Nikita Popov | ee8670b | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 39 | %acc = load half, ptr addrspace(1) %dst, align 2 |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 40 | %acc1 = fadd half %mul2, %acc |
| 41 | %acc2 = fadd half %mul1, %acc1 |
Nikita Popov | ee8670b | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 42 | store half %acc2, ptr addrspace(1) %dst, align 2 |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 43 | ret void |
| 44 | } |
| 45 | |
| 46 | |
| 47 | ; We only want to generate fdot2 if vector element of dot product is converted from f16 to f32 |
| 48 | ; and the vectors are of type <2 x half> |
| 49 | ; GCN-LABEL: {{^}}dotproduct_f16_f32 |
| 50 | ; GFX900: v_mad_mix_f32 |
Konstantin Zhuravlyov | 0d15b42 | 2018-11-15 02:42:04 +0000 | [diff] [blame] | 51 | ; GFX900: v_mad_mix_f32 |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 52 | |
| 53 | ; GFX906: v_mad_f32 |
| 54 | ; GFX906: v_mac_f32_e32 |
| 55 | |
Stanislav Mekhanoshin | 1f91cba | 2019-06-14 00:33:31 +0000 | [diff] [blame] | 56 | ; GFX906-DL-UNSAFE: v_dot2_f32_f16 |
| 57 | ; GFX10-DL-UNSAFE: v_dot2c_f32_f16_e32 |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 58 | |
| 59 | ; GFX906-CONTRACT: v_dot2_f32_f16 |
| 60 | |
| 61 | ; GFX906-DENORM-CONTRACT: v_dot2_f32_f16 |
Nikita Popov | ee8670b | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 62 | define amdgpu_kernel void @dotproduct_f16_f32(ptr addrspace(1) %src1, |
| 63 | ptr addrspace(1) %src2, |
| 64 | ptr addrspace(1) nocapture %dst) { |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 65 | entry: |
Nikita Popov | ee8670b | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 66 | %src1.vec = load <2 x half>, ptr addrspace(1) %src1 |
| 67 | %src2.vec = load <2 x half>, ptr addrspace(1) %src2 |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 68 | |
| 69 | %src1.el1 = extractelement <2 x half> %src1.vec, i64 0 |
| 70 | %csrc1.el1 = fpext half %src1.el1 to float |
| 71 | %src2.el1 = extractelement <2 x half> %src2.vec, i64 0 |
| 72 | %csrc2.el1 = fpext half %src2.el1 to float |
| 73 | |
| 74 | %src1.el2 = extractelement <2 x half> %src1.vec, i64 1 |
| 75 | %csrc1.el2 = fpext half %src1.el2 to float |
| 76 | %src2.el2 = extractelement <2 x half> %src2.vec, i64 1 |
| 77 | %csrc2.el2 = fpext half %src2.el2 to float |
| 78 | |
| 79 | %mul2 = fmul float %csrc1.el2, %csrc2.el2 |
| 80 | %mul1 = fmul float %csrc1.el1, %csrc2.el1 |
Nikita Popov | ee8670b | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 81 | %acc = load float, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 82 | %acc1 = fadd float %mul2, %acc |
| 83 | %acc2 = fadd float %mul1, %acc1 |
Nikita Popov | ee8670b | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 84 | store float %acc2, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 85 | ret void |
| 86 | } |
| 87 | |
| 88 | ; We only want to generate fdot2 if vector element of dot product is converted from f16 to f32 |
| 89 | ; and the vectors are of type <2 x half> |
| 90 | ; GCN-LABEL: {{^}}dotproduct_diffvecorder |
| 91 | ; GFX900: v_mad_mix_f32 |
Konstantin Zhuravlyov | 0d15b42 | 2018-11-15 02:42:04 +0000 | [diff] [blame] | 92 | ; GFX900: v_mad_mix_f32 |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 93 | |
| 94 | ; GFX906: v_mad_f32 |
| 95 | ; GFX906: v_mac_f32_e32 |
| 96 | |
Stanislav Mekhanoshin | 1f91cba | 2019-06-14 00:33:31 +0000 | [diff] [blame] | 97 | ; GFX906-DL-UNSAFE: v_dot2_f32_f16 |
| 98 | ; GFX10-DL-UNSAFE: v_dot2c_f32_f16_e32 |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 99 | |
| 100 | ; GFX906-CONTRACT: v_dot2_f32_f16 |
| 101 | ; GFX906-DENORM-CONTRACT: v_dot2_f32_f16 |
Nikita Popov | ee8670b | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 102 | define amdgpu_kernel void @dotproduct_diffvecorder(ptr addrspace(1) %src1, |
| 103 | ptr addrspace(1) %src2, |
| 104 | ptr addrspace(1) nocapture %dst) { |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 105 | entry: |
Nikita Popov | ee8670b | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 106 | %src1.vec = load <2 x half>, ptr addrspace(1) %src1 |
| 107 | %src2.vec = load <2 x half>, ptr addrspace(1) %src2 |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 108 | |
| 109 | %src1.el1 = extractelement <2 x half> %src1.vec, i64 0 |
| 110 | %csrc1.el1 = fpext half %src1.el1 to float |
| 111 | %src2.el1 = extractelement <2 x half> %src2.vec, i64 0 |
| 112 | %csrc2.el1 = fpext half %src2.el1 to float |
| 113 | |
| 114 | %src1.el2 = extractelement <2 x half> %src1.vec, i64 1 |
| 115 | %csrc1.el2 = fpext half %src1.el2 to float |
| 116 | %src2.el2 = extractelement <2 x half> %src2.vec, i64 1 |
| 117 | %csrc2.el2 = fpext half %src2.el2 to float |
| 118 | |
| 119 | %mul2 = fmul float %csrc2.el2, %csrc1.el2 |
| 120 | %mul1 = fmul float %csrc1.el1, %csrc2.el1 |
Nikita Popov | ee8670b | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 121 | %acc = load float, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 122 | %acc1 = fadd float %mul2, %acc |
| 123 | %acc2 = fadd float %mul1, %acc1 |
Nikita Popov | ee8670b | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 124 | store float %acc2, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 125 | ret void |
| 126 | } |
| 127 | |
| 128 | ; Tests to make sure dot product is not generated when the vectors are not of <2 x half>. |
| 129 | ; GCN-LABEL: {{^}}dotproduct_v4f16 |
| 130 | ; GFX900: v_mad_mix_f32 |
| 131 | |
| 132 | ; GFX906: v_mad_f32 |
| 133 | ; GFX906: v_mac_f32_e32 |
| 134 | |
Stanislav Mekhanoshin | 1f91cba | 2019-06-14 00:33:31 +0000 | [diff] [blame] | 135 | ; GCN-DL-UNSAFE: v_fma_mix_f32 |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 136 | |
| 137 | ; GFX906-CONTRACT: v_fma_mix_f32 |
| 138 | ; GFX906-DENORM-CONTRACT: v_fma_mix_f32 |
Nikita Popov | ee8670b | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 139 | define amdgpu_kernel void @dotproduct_v4f16(ptr addrspace(1) %src1, |
| 140 | ptr addrspace(1) %src2, |
| 141 | ptr addrspace(1) nocapture %dst) { |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 142 | entry: |
Nikita Popov | ee8670b | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 143 | %src1.vec = load <4 x half>, ptr addrspace(1) %src1 |
| 144 | %src2.vec = load <4 x half>, ptr addrspace(1) %src2 |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 145 | |
| 146 | %src1.el1 = extractelement <4 x half> %src1.vec, i64 0 |
| 147 | %csrc1.el1 = fpext half %src1.el1 to float |
| 148 | %src2.el1 = extractelement <4 x half> %src2.vec, i64 0 |
| 149 | %csrc2.el1 = fpext half %src2.el1 to float |
| 150 | |
| 151 | %src1.el2 = extractelement <4 x half> %src1.vec, i64 1 |
| 152 | %csrc1.el2 = fpext half %src1.el2 to float |
| 153 | %src2.el2 = extractelement <4 x half> %src2.vec, i64 1 |
| 154 | %csrc2.el2 = fpext half %src2.el2 to float |
| 155 | |
| 156 | %mul2 = fmul float %csrc1.el2, %csrc2.el2 |
| 157 | %mul1 = fmul float %csrc1.el1, %csrc2.el1 |
Nikita Popov | ee8670b | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 158 | %acc = load float, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 159 | %acc1 = fadd float %mul2, %acc |
| 160 | %acc2 = fadd float %mul1, %acc1 |
Nikita Popov | ee8670b | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 161 | store float %acc2, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 162 | ret void |
| 163 | } |
| 164 | |
| 165 | ; GCN-LABEL: {{^}}NotAdotproduct |
| 166 | ; GFX900: v_mad_mix_f32 |
Konstantin Zhuravlyov | 0d15b42 | 2018-11-15 02:42:04 +0000 | [diff] [blame] | 167 | ; GFX900: v_mad_mix_f32 |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 168 | |
| 169 | ; GFX906: v_mad_f32 |
| 170 | ; GFX906: v_mac_f32_e32 |
| 171 | |
Stanislav Mekhanoshin | 1f91cba | 2019-06-14 00:33:31 +0000 | [diff] [blame] | 172 | ; GCN-DL-UNSAFE: v_fma_mix_f32 |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 173 | |
| 174 | ; GFX906-CONTRACT: v_fma_mix_f32 |
| 175 | ; GFX906-DENORM-CONTRACT: v_fma_mix_f32 |
Nikita Popov | ee8670b | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 176 | define amdgpu_kernel void @NotAdotproduct(ptr addrspace(1) %src1, |
| 177 | ptr addrspace(1) %src2, |
| 178 | ptr addrspace(1) nocapture %dst) { |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 179 | entry: |
Nikita Popov | ee8670b | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 180 | %src1.vec = load <2 x half>, ptr addrspace(1) %src1 |
| 181 | %src2.vec = load <2 x half>, ptr addrspace(1) %src2 |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 182 | |
| 183 | %src1.el1 = extractelement <2 x half> %src1.vec, i64 0 |
| 184 | %csrc1.el1 = fpext half %src1.el1 to float |
| 185 | %src2.el1 = extractelement <2 x half> %src2.vec, i64 0 |
| 186 | %csrc2.el1 = fpext half %src2.el1 to float |
| 187 | |
| 188 | %src1.el2 = extractelement <2 x half> %src1.vec, i64 1 |
| 189 | %csrc1.el2 = fpext half %src1.el2 to float |
| 190 | %src2.el2 = extractelement <2 x half> %src2.vec, i64 1 |
| 191 | %csrc2.el2 = fpext half %src2.el2 to float |
| 192 | |
| 193 | %mul2 = fmul float %csrc1.el2, %csrc1.el1 |
| 194 | %mul1 = fmul float %csrc2.el1, %csrc2.el2 |
Nikita Popov | ee8670b | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 195 | %acc = load float, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 196 | %acc1 = fadd float %mul2, %acc |
| 197 | %acc2 = fadd float %mul1, %acc1 |
Nikita Popov | ee8670b | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 198 | store float %acc2, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 199 | ret void |
| 200 | } |
| 201 | |
| 202 | ; GCN-LABEL: {{^}}Diff_Idx_NotAdotproduct |
| 203 | ; GFX900: v_mad_mix_f32 |
Konstantin Zhuravlyov | 0d15b42 | 2018-11-15 02:42:04 +0000 | [diff] [blame] | 204 | ; GFX900: v_mad_mix_f32 |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 205 | |
| 206 | ; GFX906: v_mad_f32 |
| 207 | ; GFX906: v_mac_f32_e32 |
| 208 | |
Stanislav Mekhanoshin | 1f91cba | 2019-06-14 00:33:31 +0000 | [diff] [blame] | 209 | ; GCN-DL-UNSAFE: v_fma_mix_f32 |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 210 | |
| 211 | ; GFX906-CONTRACT: v_fma_mix_f32 |
| 212 | ; GFX906-DENORM-CONTRACT: v_fma_mix_f32 |
Nikita Popov | ee8670b | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 213 | define amdgpu_kernel void @Diff_Idx_NotAdotproduct(ptr addrspace(1) %src1, |
| 214 | ptr addrspace(1) %src2, |
| 215 | ptr addrspace(1) nocapture %dst) { |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 216 | entry: |
Nikita Popov | ee8670b | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 217 | %src1.vec = load <2 x half>, ptr addrspace(1) %src1 |
| 218 | %src2.vec = load <2 x half>, ptr addrspace(1) %src2 |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 219 | |
| 220 | %src1.el1 = extractelement <2 x half> %src1.vec, i64 0 |
| 221 | %csrc1.el1 = fpext half %src1.el1 to float |
| 222 | %src2.el1 = extractelement <2 x half> %src2.vec, i64 0 |
| 223 | %csrc2.el1 = fpext half %src2.el1 to float |
| 224 | |
| 225 | %src1.el2 = extractelement <2 x half> %src1.vec, i64 1 |
| 226 | %csrc1.el2 = fpext half %src1.el2 to float |
| 227 | %src2.el2 = extractelement <2 x half> %src2.vec, i64 1 |
| 228 | %csrc2.el2 = fpext half %src2.el2 to float |
| 229 | |
| 230 | %mul2 = fmul float %csrc1.el2, %csrc2.el1 |
| 231 | %mul1 = fmul float %csrc1.el1, %csrc2.el2 |
Nikita Popov | ee8670b | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 232 | %acc = load float, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 233 | %acc1 = fadd float %mul2, %acc |
| 234 | %acc2 = fadd float %mul1, %acc1 |
Nikita Popov | ee8670b | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 235 | store float %acc2, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | d9dbd89 | 2018-07-16 18:19:59 +0000 | [diff] [blame] | 236 | ret void |
Konstantin Zhuravlyov | 0d15b42 | 2018-11-15 02:42:04 +0000 | [diff] [blame] | 237 | } |