Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
Matt Arsenault | 5a3299a | 2024-11-26 12:59:15 -0500 | [diff] [blame] | 2 | ; RUN: llc -mtriple=amdgcn -mcpu=gfx700 < %s | FileCheck --check-prefixes=GFX7 %s |
| 3 | ; RUN: llc -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8 %s |
| 4 | ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck --check-prefixes=GFX9-NODL %s |
| 5 | ; RUN: llc -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck --check-prefixes=GFX9-DL %s |
| 6 | ; RUN: llc -mtriple=amdgcn -mcpu=gfx1011 < %s | FileCheck --check-prefixes=GFX10-DL %s |
| 7 | ; RUN: llc -mtriple=amdgcn -mcpu=gfx1012 < %s | FileCheck --check-prefixes=GFX10-DL %s |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 8 | |
| 9 | ; add(mul(S0.x, S1.y), |
| 10 | ; add (mul (S0.y, S1.y), S3)) -> v_dot2_{I|U}32_{I|U}16(S1, S2, S3) |
| 11 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 12 | define amdgpu_kernel void @udot2(ptr addrspace(1) %src1, |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 13 | ; GFX7-LABEL: udot2: |
| 14 | ; GFX7: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 15 | ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| 16 | ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd |
| 17 | ; GFX7-NEXT: s_mov_b32 s7, 0xf000 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 18 | ; GFX7-NEXT: s_mov_b32 s10, 0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 19 | ; GFX7-NEXT: s_mov_b32 s11, s7 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 20 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 21 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[0:1] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 22 | ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 23 | ; GFX7-NEXT: v_mov_b32_e32 v1, 0 |
| 24 | ; GFX7-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 25 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[2:3] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 26 | ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 27 | ; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 |
| 28 | ; GFX7-NEXT: s_mov_b32 s6, -1 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 29 | ; GFX7-NEXT: s_waitcnt vmcnt(1) |
| 30 | ; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v2 |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 31 | ; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 32 | ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| 33 | ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v0 |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 34 | ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 35 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 36 | ; GFX7-NEXT: v_mad_u32_u24 v1, v3, v1, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 37 | ; GFX7-NEXT: v_mad_u32_u24 v0, v0, v2, v1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 38 | ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 39 | ; GFX7-NEXT: s_endpgm |
| 40 | ; |
| 41 | ; GFX8-LABEL: udot2: |
| 42 | ; GFX8: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 43 | ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 44 | ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 45 | ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 46 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 47 | ; GFX8-NEXT: v_mov_b32_e32 v1, s1 |
| 48 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 49 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 50 | ; GFX8-NEXT: flat_load_dword v3, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 51 | ; GFX8-NEXT: v_mov_b32_e32 v1, s3 |
| 52 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 53 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 54 | ; GFX8-NEXT: flat_load_dword v0, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 55 | ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 56 | ; GFX8-NEXT: s_waitcnt vmcnt(1) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 57 | ; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v3 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 58 | ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v3 |
| 59 | ; GFX8-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 60 | ; GFX8-NEXT: v_and_b32_e32 v2, 0xffff, v0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 61 | ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 62 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 63 | ; GFX8-NEXT: v_mad_u32_u24 v0, v0, v3, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 64 | ; GFX8-NEXT: v_mad_u32_u24 v2, v2, v1, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 65 | ; GFX8-NEXT: v_mov_b32_e32 v0, s4 |
| 66 | ; GFX8-NEXT: v_mov_b32_e32 v1, s5 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 67 | ; GFX8-NEXT: flat_store_dword v[0:1], v2 |
| 68 | ; GFX8-NEXT: s_endpgm |
| 69 | ; |
| 70 | ; GFX9-NODL-LABEL: udot2: |
| 71 | ; GFX9-NODL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 72 | ; GFX9-NODL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 73 | ; GFX9-NODL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 74 | ; GFX9-NODL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 75 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 76 | ; GFX9-NODL-NEXT: global_load_dword v1, v0, s[0:1] |
| 77 | ; GFX9-NODL-NEXT: global_load_dword v2, v0, s[2:3] |
| 78 | ; GFX9-NODL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 79 | ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 80 | ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) |
| 81 | ; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 82 | ; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 83 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 84 | ; GFX9-NODL-NEXT: v_add3_u32 v1, v1, s0, v3 |
| 85 | ; GFX9-NODL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 86 | ; GFX9-NODL-NEXT: s_endpgm |
| 87 | ; |
| 88 | ; GFX9-DL-LABEL: udot2: |
| 89 | ; GFX9-DL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 90 | ; GFX9-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 91 | ; GFX9-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 92 | ; GFX9-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 93 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 94 | ; GFX9-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 95 | ; GFX9-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 96 | ; GFX9-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 97 | ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 98 | ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 99 | ; GFX9-DL-NEXT: v_dot2_u32_u16 v1, v2, v1, s0 |
| 100 | ; GFX9-DL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 101 | ; GFX9-DL-NEXT: s_endpgm |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 102 | ; |
| 103 | ; GFX10-DL-LABEL: udot2: |
| 104 | ; GFX10-DL: ; %bb.0: ; %entry |
Matt Arsenault | b1bcb7c | 2024-07-15 09:59:07 +0400 | [diff] [blame] | 105 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 106 | ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 107 | ; GFX10-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 108 | ; GFX10-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 109 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 110 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 111 | ; GFX10-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 112 | ; GFX10-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 113 | ; GFX10-DL-NEXT: s_waitcnt_depctr 0xffe3 |
| 114 | ; GFX10-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | b1bcb7c | 2024-07-15 09:59:07 +0400 | [diff] [blame] | 115 | ; GFX10-DL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 116 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 117 | ; GFX10-DL-NEXT: v_dot2_u32_u16 v1, v2, v1, s0 |
| 118 | ; GFX10-DL-NEXT: global_store_dword v0, v1, s[6:7] |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 119 | ; GFX10-DL-NEXT: s_endpgm |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 120 | ptr addrspace(1) %src2, |
| 121 | ptr addrspace(1) nocapture %dst) { |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 122 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 123 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 124 | %gep1 = getelementptr <2 x i16>, ptr addrspace(1) %src1, i32 %idx |
| 125 | %vec1 = load <2 x i16>, ptr addrspace(1) %gep1 |
| 126 | %gep2 = getelementptr <2 x i16>, ptr addrspace(1) %src2, i32 %idx |
| 127 | %vec2 = load <2 x i16>, ptr addrspace(1) %gep2 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 128 | |
| 129 | %s1.elt1 = extractelement <2 x i16> %vec1, i64 0 |
| 130 | %conv = zext i16 %s1.elt1 to i32 |
| 131 | %s2.elt1 = extractelement <2 x i16> %vec2, i64 0 |
| 132 | %conv2 = zext i16 %s2.elt1 to i32 |
| 133 | %mul1 = mul nuw i32 %conv2, %conv |
| 134 | |
| 135 | %s1.elt2 = extractelement <2 x i16> %vec1, i64 1 |
| 136 | %conv3 = zext i16 %s1.elt2 to i32 |
| 137 | %s2.elt2 = extractelement <2 x i16> %vec2, i64 1 |
| 138 | %conv4 = zext i16 %s2.elt2 to i32 |
| 139 | %mul2 = mul nuw i32 %conv4, %conv3 |
| 140 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 141 | %s3 = load i32, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 142 | %add = add i32 %mul2, %s3 |
| 143 | %add6 = add i32 %add, %mul1 |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 144 | store i32 %add6, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 145 | ret void |
| 146 | } |
| 147 | |
| 148 | ; TODO: Support this pattern |
| 149 | ; add(S3, |
| 150 | ; add (mul (S0.y, S1.y), mul (S0.y, S1.y))) -> v_dot2_{I|U}32_{I|U}16(S1, S2, S3) |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 151 | define amdgpu_kernel void @udot2_MulMul(ptr addrspace(1) %src1, |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 152 | ; GFX7-LABEL: udot2_MulMul: |
| 153 | ; GFX7: ; %bb.0: ; %entry |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 154 | ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| 155 | ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd |
| 156 | ; GFX7-NEXT: s_mov_b32 s7, 0xf000 |
| 157 | ; GFX7-NEXT: s_mov_b32 s10, 0 |
| 158 | ; GFX7-NEXT: s_mov_b32 s11, s7 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 159 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 160 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[0:1] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 161 | ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 162 | ; GFX7-NEXT: v_mov_b32_e32 v1, 0 |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 163 | ; GFX7-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 |
| 164 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[2:3] |
| 165 | ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 |
| 166 | ; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 |
| 167 | ; GFX7-NEXT: s_mov_b32 s6, -1 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 168 | ; GFX7-NEXT: s_waitcnt vmcnt(1) |
| 169 | ; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v2 |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 170 | ; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 171 | ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| 172 | ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v0 |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 173 | ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 174 | ; GFX7-NEXT: v_mul_u32_u24_e32 v0, v0, v2 |
| 175 | ; GFX7-NEXT: v_mad_u32_u24 v0, v3, v1, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 176 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 177 | ; GFX7-NEXT: v_add_i32_e32 v0, vcc, s0, v0 |
| 178 | ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 179 | ; GFX7-NEXT: s_endpgm |
| 180 | ; |
| 181 | ; GFX8-LABEL: udot2_MulMul: |
| 182 | ; GFX8: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 183 | ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 184 | ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 185 | ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 186 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 187 | ; GFX8-NEXT: v_mov_b32_e32 v1, s1 |
| 188 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 189 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 190 | ; GFX8-NEXT: flat_load_dword v3, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 191 | ; GFX8-NEXT: v_mov_b32_e32 v1, s3 |
| 192 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 193 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 194 | ; GFX8-NEXT: flat_load_dword v0, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 195 | ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 196 | ; GFX8-NEXT: s_waitcnt vmcnt(1) |
| 197 | ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v3 |
| 198 | ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| 199 | ; GFX8-NEXT: v_mul_u32_u24_sdwa v1, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 200 | ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
| 201 | ; GFX8-NEXT: v_mad_u32_u24 v0, v0, v2, v1 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 202 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 203 | ; GFX8-NEXT: v_add_u32_e32 v2, vcc, s0, v0 |
| 204 | ; GFX8-NEXT: v_mov_b32_e32 v0, s4 |
| 205 | ; GFX8-NEXT: v_mov_b32_e32 v1, s5 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 206 | ; GFX8-NEXT: flat_store_dword v[0:1], v2 |
| 207 | ; GFX8-NEXT: s_endpgm |
| 208 | ; |
| 209 | ; GFX9-NODL-LABEL: udot2_MulMul: |
| 210 | ; GFX9-NODL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 211 | ; GFX9-NODL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 212 | ; GFX9-NODL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 213 | ; GFX9-NODL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 214 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 215 | ; GFX9-NODL-NEXT: global_load_dword v1, v0, s[0:1] |
| 216 | ; GFX9-NODL-NEXT: global_load_dword v2, v0, s[2:3] |
| 217 | ; GFX9-NODL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 218 | ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 219 | ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) |
| 220 | ; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 221 | ; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 222 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 223 | ; GFX9-NODL-NEXT: v_add3_u32 v1, v1, v3, s0 |
| 224 | ; GFX9-NODL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 225 | ; GFX9-NODL-NEXT: s_endpgm |
| 226 | ; |
| 227 | ; GFX9-DL-LABEL: udot2_MulMul: |
| 228 | ; GFX9-DL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 229 | ; GFX9-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 230 | ; GFX9-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 231 | ; GFX9-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 232 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 233 | ; GFX9-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 234 | ; GFX9-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 235 | ; GFX9-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 236 | ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 237 | ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) |
| 238 | ; GFX9-DL-NEXT: v_mul_u32_u24_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 239 | ; GFX9-DL-NEXT: v_mul_u32_u24_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 240 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 241 | ; GFX9-DL-NEXT: v_add3_u32 v1, v1, v3, s0 |
| 242 | ; GFX9-DL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 243 | ; GFX9-DL-NEXT: s_endpgm |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 244 | ; |
| 245 | ; GFX10-DL-LABEL: udot2_MulMul: |
| 246 | ; GFX10-DL: ; %bb.0: ; %entry |
Matt Arsenault | b1bcb7c | 2024-07-15 09:59:07 +0400 | [diff] [blame] | 247 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 248 | ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 249 | ; GFX10-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 250 | ; GFX10-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 251 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 252 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 253 | ; GFX10-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 254 | ; GFX10-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 255 | ; GFX10-DL-NEXT: s_waitcnt_depctr 0xffe3 |
| 256 | ; GFX10-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 257 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) |
| 258 | ; GFX10-DL-NEXT: v_mul_u32_u24_sdwa v0, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 259 | ; GFX10-DL-NEXT: v_mul_u32_u24_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| 260 | ; GFX10-DL-NEXT: v_mov_b32_e32 v2, 0 |
Jay Foad | 0412f51 | 2019-12-17 16:09:02 +0000 | [diff] [blame] | 261 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 262 | ; GFX10-DL-NEXT: v_add3_u32 v0, v1, v0, s0 |
| 263 | ; GFX10-DL-NEXT: global_store_dword v2, v0, s[6:7] |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 264 | ; GFX10-DL-NEXT: s_endpgm |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 265 | ptr addrspace(1) %src2, |
| 266 | ptr addrspace(1) nocapture %dst) { |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 267 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 268 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 269 | %gep1 = getelementptr <2 x i16>, ptr addrspace(1) %src1, i32 %idx |
| 270 | %vec1 = load <2 x i16>, ptr addrspace(1) %gep1 |
| 271 | %gep2 = getelementptr <2 x i16>, ptr addrspace(1) %src2, i32 %idx |
| 272 | %vec2 = load <2 x i16>, ptr addrspace(1) %gep2 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 273 | |
| 274 | %s1.elt1 = extractelement <2 x i16> %vec1, i64 0 |
| 275 | %conv = zext i16 %s1.elt1 to i32 |
| 276 | %s2.elt1 = extractelement <2 x i16> %vec2, i64 0 |
| 277 | %conv2 = zext i16 %s2.elt1 to i32 |
| 278 | %mul1 = mul nuw i32 %conv2, %conv |
| 279 | |
| 280 | %s1.elt2 = extractelement <2 x i16> %vec1, i64 1 |
| 281 | %conv3 = zext i16 %s1.elt2 to i32 |
| 282 | %s2.elt2 = extractelement <2 x i16> %vec2, i64 1 |
| 283 | %conv4 = zext i16 %s2.elt2 to i32 |
| 284 | %mul2 = mul nuw i32 %conv4, %conv3 |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 285 | %s3 = load i32, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 286 | %add = add i32 %mul2, %mul1 |
| 287 | %add6 = add i32 %add, %s3 |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 288 | store i32 %add6, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 289 | ret void |
| 290 | } |
| 291 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 292 | define amdgpu_kernel void @idot2(ptr addrspace(1) %src1, |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 293 | ; GFX7-LABEL: idot2: |
| 294 | ; GFX7: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 295 | ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| 296 | ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd |
| 297 | ; GFX7-NEXT: s_mov_b32 s7, 0xf000 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 298 | ; GFX7-NEXT: s_mov_b32 s10, 0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 299 | ; GFX7-NEXT: s_mov_b32 s11, s7 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 300 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 301 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[0:1] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 302 | ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 303 | ; GFX7-NEXT: v_mov_b32_e32 v1, 0 |
| 304 | ; GFX7-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 305 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[2:3] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 306 | ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 307 | ; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 |
| 308 | ; GFX7-NEXT: s_mov_b32 s6, -1 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 309 | ; GFX7-NEXT: s_waitcnt vmcnt(1) |
| 310 | ; GFX7-NEXT: v_bfe_i32 v1, v2, 0, 16 |
| 311 | ; GFX7-NEXT: v_ashrrev_i32_e32 v2, 16, v2 |
| 312 | ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| 313 | ; GFX7-NEXT: v_bfe_i32 v3, v0, 0, 16 |
| 314 | ; GFX7-NEXT: v_ashrrev_i32_e32 v0, 16, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 315 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 316 | ; GFX7-NEXT: v_mad_i32_i24 v0, v0, v2, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 317 | ; GFX7-NEXT: v_mad_i32_i24 v0, v3, v1, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 318 | ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 319 | ; GFX7-NEXT: s_endpgm |
| 320 | ; |
| 321 | ; GFX8-LABEL: idot2: |
| 322 | ; GFX8: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 323 | ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 324 | ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 325 | ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 326 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 327 | ; GFX8-NEXT: v_mov_b32_e32 v1, s1 |
| 328 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 329 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 330 | ; GFX8-NEXT: flat_load_dword v3, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 331 | ; GFX8-NEXT: v_mov_b32_e32 v1, s3 |
| 332 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 333 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 334 | ; GFX8-NEXT: flat_load_dword v0, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 335 | ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 336 | ; GFX8-NEXT: s_waitcnt vmcnt(1) |
| 337 | ; GFX8-NEXT: v_bfe_i32 v1, v3, 0, 16 |
| 338 | ; GFX8-NEXT: v_ashrrev_i32_e32 v3, 16, v3 |
| 339 | ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| 340 | ; GFX8-NEXT: v_bfe_i32 v2, v0, 0, 16 |
| 341 | ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 16, v0 |
Jay Foad | 0412f51 | 2019-12-17 16:09:02 +0000 | [diff] [blame] | 342 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 343 | ; GFX8-NEXT: v_mad_i32_i24 v0, v0, v3, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 344 | ; GFX8-NEXT: v_mad_i32_i24 v2, v2, v1, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 345 | ; GFX8-NEXT: v_mov_b32_e32 v0, s4 |
| 346 | ; GFX8-NEXT: v_mov_b32_e32 v1, s5 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 347 | ; GFX8-NEXT: flat_store_dword v[0:1], v2 |
| 348 | ; GFX8-NEXT: s_endpgm |
| 349 | ; |
| 350 | ; GFX9-NODL-LABEL: idot2: |
| 351 | ; GFX9-NODL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 352 | ; GFX9-NODL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 353 | ; GFX9-NODL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 354 | ; GFX9-NODL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 355 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 356 | ; GFX9-NODL-NEXT: global_load_dword v1, v0, s[0:1] |
| 357 | ; GFX9-NODL-NEXT: global_load_dword v2, v0, s[2:3] |
| 358 | ; GFX9-NODL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 359 | ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 360 | ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) |
| 361 | ; GFX9-NODL-NEXT: v_mul_i32_i24_sdwa v3, sext(v2), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 362 | ; GFX9-NODL-NEXT: v_mul_i32_i24_sdwa v1, sext(v2), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 363 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 364 | ; GFX9-NODL-NEXT: v_add3_u32 v1, v1, s0, v3 |
| 365 | ; GFX9-NODL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 366 | ; GFX9-NODL-NEXT: s_endpgm |
| 367 | ; |
| 368 | ; GFX9-DL-LABEL: idot2: |
| 369 | ; GFX9-DL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 370 | ; GFX9-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 371 | ; GFX9-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 372 | ; GFX9-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 373 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 374 | ; GFX9-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 375 | ; GFX9-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 376 | ; GFX9-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 377 | ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 378 | ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 379 | ; GFX9-DL-NEXT: v_dot2_i32_i16 v1, v2, v1, s0 |
| 380 | ; GFX9-DL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 381 | ; GFX9-DL-NEXT: s_endpgm |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 382 | ; |
| 383 | ; GFX10-DL-LABEL: idot2: |
| 384 | ; GFX10-DL: ; %bb.0: ; %entry |
Matt Arsenault | b1bcb7c | 2024-07-15 09:59:07 +0400 | [diff] [blame] | 385 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 386 | ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 387 | ; GFX10-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 388 | ; GFX10-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 389 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 390 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 391 | ; GFX10-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 392 | ; GFX10-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 393 | ; GFX10-DL-NEXT: s_waitcnt_depctr 0xffe3 |
| 394 | ; GFX10-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | b1bcb7c | 2024-07-15 09:59:07 +0400 | [diff] [blame] | 395 | ; GFX10-DL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 396 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 397 | ; GFX10-DL-NEXT: v_dot2_i32_i16 v1, v2, v1, s0 |
| 398 | ; GFX10-DL-NEXT: global_store_dword v0, v1, s[6:7] |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 399 | ; GFX10-DL-NEXT: s_endpgm |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 400 | ptr addrspace(1) %src2, |
| 401 | ptr addrspace(1) nocapture %dst) { |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 402 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 403 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 404 | %gep1 = getelementptr <2 x i16>, ptr addrspace(1) %src1, i32 %idx |
| 405 | %vec1 = load <2 x i16>, ptr addrspace(1) %gep1 |
| 406 | %gep2 = getelementptr <2 x i16>, ptr addrspace(1) %src2, i32 %idx |
| 407 | %vec2 = load <2 x i16>, ptr addrspace(1) %gep2 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 408 | |
| 409 | %s1.elt1 = extractelement <2 x i16> %vec1, i64 0 |
| 410 | %conv = sext i16 %s1.elt1 to i32 |
| 411 | %s2.elt1 = extractelement <2 x i16> %vec2, i64 0 |
| 412 | %conv2 = sext i16 %s2.elt1 to i32 |
| 413 | %mul1 = mul nuw i32 %conv2, %conv |
| 414 | |
| 415 | %s1.elt2 = extractelement <2 x i16> %vec1, i64 1 |
| 416 | %conv3 = sext i16 %s1.elt2 to i32 |
| 417 | %s2.elt2 = extractelement <2 x i16> %vec2, i64 1 |
| 418 | %conv4 = sext i16 %s2.elt2 to i32 |
| 419 | %mul2 = mul nuw i32 %conv4, %conv3 |
| 420 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 421 | %s3 = load i32, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 422 | %add = add i32 %mul2, %s3 |
| 423 | %add6 = add i32 %add, %mul1 |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 424 | store i32 %add6, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 425 | ret void |
| 426 | } |
| 427 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 428 | define amdgpu_kernel void @idot2_MixedTypedMul(ptr addrspace(1) %src1, |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 429 | ; GFX7-LABEL: idot2_MixedTypedMul: |
| 430 | ; GFX7: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 431 | ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| 432 | ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd |
| 433 | ; GFX7-NEXT: s_mov_b32 s7, 0xf000 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 434 | ; GFX7-NEXT: s_mov_b32 s10, 0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 435 | ; GFX7-NEXT: s_mov_b32 s11, s7 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 436 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 437 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[0:1] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 438 | ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 439 | ; GFX7-NEXT: v_mov_b32_e32 v1, 0 |
| 440 | ; GFX7-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 441 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[2:3] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 442 | ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 443 | ; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 |
| 444 | ; GFX7-NEXT: s_mov_b32 s6, -1 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 445 | ; GFX7-NEXT: s_waitcnt vmcnt(1) |
| 446 | ; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v2 |
| 447 | ; GFX7-NEXT: v_bfe_i32 v2, v2, 0, 16 |
| 448 | ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| 449 | ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v0 |
| 450 | ; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 451 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 452 | ; GFX7-NEXT: v_mad_u32_u24 v1, v3, v1, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 453 | ; GFX7-NEXT: v_mad_i32_i24 v0, v0, v2, v1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 454 | ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 455 | ; GFX7-NEXT: s_endpgm |
| 456 | ; |
| 457 | ; GFX8-LABEL: idot2_MixedTypedMul: |
| 458 | ; GFX8: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 459 | ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 460 | ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 461 | ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 462 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 463 | ; GFX8-NEXT: v_mov_b32_e32 v1, s1 |
| 464 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 465 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 466 | ; GFX8-NEXT: flat_load_dword v3, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 467 | ; GFX8-NEXT: v_mov_b32_e32 v1, s3 |
| 468 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 469 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 470 | ; GFX8-NEXT: flat_load_dword v0, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 471 | ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 472 | ; GFX8-NEXT: s_waitcnt vmcnt(1) |
| 473 | ; GFX8-NEXT: v_bfe_i32 v1, v3, 0, 16 |
| 474 | ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v3 |
| 475 | ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| 476 | ; GFX8-NEXT: v_bfe_i32 v2, v0, 0, 16 |
| 477 | ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
Jay Foad | 0412f51 | 2019-12-17 16:09:02 +0000 | [diff] [blame] | 478 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 479 | ; GFX8-NEXT: v_mad_u32_u24 v0, v0, v3, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 480 | ; GFX8-NEXT: v_mad_i32_i24 v2, v2, v1, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 481 | ; GFX8-NEXT: v_mov_b32_e32 v0, s4 |
| 482 | ; GFX8-NEXT: v_mov_b32_e32 v1, s5 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 483 | ; GFX8-NEXT: flat_store_dword v[0:1], v2 |
| 484 | ; GFX8-NEXT: s_endpgm |
| 485 | ; |
| 486 | ; GFX9-NODL-LABEL: idot2_MixedTypedMul: |
| 487 | ; GFX9-NODL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 488 | ; GFX9-NODL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 489 | ; GFX9-NODL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 490 | ; GFX9-NODL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 491 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 492 | ; GFX9-NODL-NEXT: global_load_dword v1, v0, s[0:1] |
| 493 | ; GFX9-NODL-NEXT: global_load_dword v2, v0, s[2:3] |
| 494 | ; GFX9-NODL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 495 | ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 496 | ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) |
| 497 | ; GFX9-NODL-NEXT: v_mul_i32_i24_sdwa v3, sext(v2), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 498 | ; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 499 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 500 | ; GFX9-NODL-NEXT: v_add3_u32 v1, v1, s0, v3 |
| 501 | ; GFX9-NODL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 502 | ; GFX9-NODL-NEXT: s_endpgm |
| 503 | ; |
| 504 | ; GFX9-DL-LABEL: idot2_MixedTypedMul: |
| 505 | ; GFX9-DL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 506 | ; GFX9-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 507 | ; GFX9-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 508 | ; GFX9-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 509 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 510 | ; GFX9-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 511 | ; GFX9-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 512 | ; GFX9-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 513 | ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 514 | ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) |
| 515 | ; GFX9-DL-NEXT: v_mul_i32_i24_sdwa v3, sext(v2), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 516 | ; GFX9-DL-NEXT: v_mul_u32_u24_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 517 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 518 | ; GFX9-DL-NEXT: v_add3_u32 v1, v1, s0, v3 |
| 519 | ; GFX9-DL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 520 | ; GFX9-DL-NEXT: s_endpgm |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 521 | ; |
| 522 | ; GFX10-DL-LABEL: idot2_MixedTypedMul: |
| 523 | ; GFX10-DL: ; %bb.0: ; %entry |
Matt Arsenault | b1bcb7c | 2024-07-15 09:59:07 +0400 | [diff] [blame] | 524 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 525 | ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 526 | ; GFX10-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 527 | ; GFX10-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 528 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 529 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 530 | ; GFX10-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 531 | ; GFX10-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 532 | ; GFX10-DL-NEXT: s_waitcnt_depctr 0xffe3 |
| 533 | ; GFX10-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 534 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) |
| 535 | ; GFX10-DL-NEXT: v_mul_i32_i24_sdwa v0, sext(v2), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 536 | ; GFX10-DL-NEXT: v_mul_u32_u24_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| 537 | ; GFX10-DL-NEXT: v_mov_b32_e32 v2, 0 |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 538 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 539 | ; GFX10-DL-NEXT: v_add3_u32 v0, v1, s0, v0 |
| 540 | ; GFX10-DL-NEXT: global_store_dword v2, v0, s[6:7] |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 541 | ; GFX10-DL-NEXT: s_endpgm |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 542 | ptr addrspace(1) %src2, |
| 543 | ptr addrspace(1) nocapture %dst) { |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 544 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 545 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 546 | %gep1 = getelementptr <2 x i16>, ptr addrspace(1) %src1, i32 %idx |
| 547 | %vec1 = load <2 x i16>, ptr addrspace(1) %gep1 |
| 548 | %gep2 = getelementptr <2 x i16>, ptr addrspace(1) %src2, i32 %idx |
| 549 | %vec2 = load <2 x i16>, ptr addrspace(1) %gep2 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 550 | |
| 551 | %s1.elt1 = extractelement <2 x i16> %vec1, i64 0 |
| 552 | %conv = sext i16 %s1.elt1 to i32 |
| 553 | %s2.elt1 = extractelement <2 x i16> %vec2, i64 0 |
| 554 | %conv2 = sext i16 %s2.elt1 to i32 |
| 555 | %mul1 = mul nuw i32 %conv2, %conv |
| 556 | |
| 557 | %s1.elt2 = extractelement <2 x i16> %vec1, i64 1 |
| 558 | %conv3 = zext i16 %s1.elt2 to i32 |
| 559 | %s2.elt2 = extractelement <2 x i16> %vec2, i64 1 |
| 560 | %conv4 = zext i16 %s2.elt2 to i32 |
| 561 | %mul2 = mul nuw i32 %conv4, %conv3 |
| 562 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 563 | %s3 = load i32, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 564 | %add = add i32 %mul2, %s3 |
| 565 | %add6 = add i32 %add, %mul1 |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 566 | store i32 %add6, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 567 | ret void |
| 568 | } |
| 569 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 570 | define amdgpu_kernel void @udot2_alt_AddOperands(ptr addrspace(1) %src1, |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 571 | ; GFX7-LABEL: udot2_alt_AddOperands: |
| 572 | ; GFX7: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 573 | ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| 574 | ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd |
| 575 | ; GFX7-NEXT: s_mov_b32 s7, 0xf000 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 576 | ; GFX7-NEXT: s_mov_b32 s10, 0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 577 | ; GFX7-NEXT: s_mov_b32 s11, s7 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 578 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 579 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[0:1] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 580 | ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 581 | ; GFX7-NEXT: v_mov_b32_e32 v1, 0 |
| 582 | ; GFX7-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 583 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[2:3] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 584 | ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 585 | ; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 |
| 586 | ; GFX7-NEXT: s_mov_b32 s6, -1 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 587 | ; GFX7-NEXT: s_waitcnt vmcnt(1) |
| 588 | ; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v2 |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 589 | ; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 590 | ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| 591 | ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v0 |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 592 | ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 593 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 594 | ; GFX7-NEXT: v_mad_u32_u24 v1, v3, v1, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 595 | ; GFX7-NEXT: v_mad_u32_u24 v0, v0, v2, v1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 596 | ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 597 | ; GFX7-NEXT: s_endpgm |
| 598 | ; |
| 599 | ; GFX8-LABEL: udot2_alt_AddOperands: |
| 600 | ; GFX8: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 601 | ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 602 | ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 603 | ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 604 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 605 | ; GFX8-NEXT: v_mov_b32_e32 v1, s1 |
| 606 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 607 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 608 | ; GFX8-NEXT: flat_load_dword v3, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 609 | ; GFX8-NEXT: v_mov_b32_e32 v1, s3 |
| 610 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 611 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 612 | ; GFX8-NEXT: flat_load_dword v0, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 613 | ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 614 | ; GFX8-NEXT: s_waitcnt vmcnt(1) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 615 | ; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v3 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 616 | ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v3 |
| 617 | ; GFX8-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 618 | ; GFX8-NEXT: v_and_b32_e32 v2, 0xffff, v0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 619 | ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 620 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 621 | ; GFX8-NEXT: v_mad_u32_u24 v0, v0, v3, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 622 | ; GFX8-NEXT: v_mad_u32_u24 v2, v2, v1, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 623 | ; GFX8-NEXT: v_mov_b32_e32 v0, s4 |
| 624 | ; GFX8-NEXT: v_mov_b32_e32 v1, s5 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 625 | ; GFX8-NEXT: flat_store_dword v[0:1], v2 |
| 626 | ; GFX8-NEXT: s_endpgm |
| 627 | ; |
| 628 | ; GFX9-NODL-LABEL: udot2_alt_AddOperands: |
| 629 | ; GFX9-NODL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 630 | ; GFX9-NODL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 631 | ; GFX9-NODL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 632 | ; GFX9-NODL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 633 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 634 | ; GFX9-NODL-NEXT: global_load_dword v1, v0, s[0:1] |
| 635 | ; GFX9-NODL-NEXT: global_load_dword v2, v0, s[2:3] |
| 636 | ; GFX9-NODL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 637 | ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 638 | ; GFX9-NODL-NEXT: s_waitcnt vmcnt(1) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 639 | ; GFX9-NODL-NEXT: v_and_b32_e32 v3, 0xffff, v1 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 640 | ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 641 | ; GFX9-NODL-NEXT: v_and_b32_e32 v4, 0xffff, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 642 | ; GFX9-NODL-NEXT: v_lshrrev_b32_e32 v1, 16, v1 |
| 643 | ; GFX9-NODL-NEXT: v_lshrrev_b32_e32 v2, 16, v2 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 644 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 645 | ; GFX9-NODL-NEXT: v_mad_u32_u24 v1, v2, v1, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 646 | ; GFX9-NODL-NEXT: v_mad_u32_u24 v1, v4, v3, v1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 647 | ; GFX9-NODL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 648 | ; GFX9-NODL-NEXT: s_endpgm |
| 649 | ; |
| 650 | ; GFX9-DL-LABEL: udot2_alt_AddOperands: |
| 651 | ; GFX9-DL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 652 | ; GFX9-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 653 | ; GFX9-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 654 | ; GFX9-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 655 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 656 | ; GFX9-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 657 | ; GFX9-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 658 | ; GFX9-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 659 | ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 660 | ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 661 | ; GFX9-DL-NEXT: v_dot2_u32_u16 v1, v2, v1, s0 |
| 662 | ; GFX9-DL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 663 | ; GFX9-DL-NEXT: s_endpgm |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 664 | ; |
| 665 | ; GFX10-DL-LABEL: udot2_alt_AddOperands: |
| 666 | ; GFX10-DL: ; %bb.0: ; %entry |
Matt Arsenault | b1bcb7c | 2024-07-15 09:59:07 +0400 | [diff] [blame] | 667 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 668 | ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 669 | ; GFX10-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 670 | ; GFX10-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 671 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 672 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 673 | ; GFX10-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 674 | ; GFX10-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 675 | ; GFX10-DL-NEXT: s_waitcnt_depctr 0xffe3 |
| 676 | ; GFX10-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | b1bcb7c | 2024-07-15 09:59:07 +0400 | [diff] [blame] | 677 | ; GFX10-DL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 678 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 679 | ; GFX10-DL-NEXT: v_dot2_u32_u16 v1, v2, v1, s0 |
| 680 | ; GFX10-DL-NEXT: global_store_dword v0, v1, s[6:7] |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 681 | ; GFX10-DL-NEXT: s_endpgm |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 682 | ptr addrspace(1) %src2, |
| 683 | ptr addrspace(1) nocapture %dst) { |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 684 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 685 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 686 | %gep1 = getelementptr <2 x i16>, ptr addrspace(1) %src1, i32 %idx |
| 687 | %vec1 = load <2 x i16>, ptr addrspace(1) %gep1 |
| 688 | %gep2 = getelementptr <2 x i16>, ptr addrspace(1) %src2, i32 %idx |
| 689 | %vec2 = load <2 x i16>, ptr addrspace(1) %gep2 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 690 | |
| 691 | %s1.elt1 = extractelement <2 x i16> %vec1, i64 0 |
| 692 | %conv = zext i16 %s1.elt1 to i32 |
| 693 | %s2.elt1 = extractelement <2 x i16> %vec2, i64 0 |
| 694 | %conv2 = zext i16 %s2.elt1 to i32 |
| 695 | %mul1 = mul nuw i32 %conv2, %conv |
| 696 | |
| 697 | %s1.elt2 = extractelement <2 x i16> %vec1, i64 1 |
| 698 | %conv3 = zext i16 %s1.elt2 to i32 |
| 699 | %s2.elt2 = extractelement <2 x i16> %vec2, i64 1 |
| 700 | %conv4 = zext i16 %s2.elt2 to i32 |
| 701 | %mul2 = mul nuw i32 %conv4, %conv3 |
| 702 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 703 | %s3 = load i32, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 704 | %add = add i32 %s3, %mul2 |
| 705 | %add6 = add i32 %mul1, %add |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 706 | store i32 %add6, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 707 | ret void |
| 708 | } |
| 709 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 710 | define amdgpu_kernel void @idot2_MixedExt(ptr addrspace(1) %src1, |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 711 | ; GFX7-LABEL: idot2_MixedExt: |
| 712 | ; GFX7: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 713 | ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| 714 | ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd |
| 715 | ; GFX7-NEXT: s_mov_b32 s7, 0xf000 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 716 | ; GFX7-NEXT: s_mov_b32 s10, 0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 717 | ; GFX7-NEXT: s_mov_b32 s11, s7 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 718 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 719 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[0:1] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 720 | ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 721 | ; GFX7-NEXT: v_mov_b32_e32 v1, 0 |
| 722 | ; GFX7-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 723 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[2:3] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 724 | ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 725 | ; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 |
| 726 | ; GFX7-NEXT: s_mov_b32 s6, -1 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 727 | ; GFX7-NEXT: s_waitcnt vmcnt(1) |
| 728 | ; GFX7-NEXT: v_bfe_i32 v1, v2, 0, 16 |
| 729 | ; GFX7-NEXT: v_ashrrev_i32_e32 v2, 16, v2 |
| 730 | ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| 731 | ; GFX7-NEXT: v_and_b32_e32 v3, 0xffff, v0 |
| 732 | ; GFX7-NEXT: v_ashrrev_i32_e32 v0, 16, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 733 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 734 | ; GFX7-NEXT: v_mad_i32_i24 v0, v0, v2, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 735 | ; GFX7-NEXT: v_mad_i32_i24 v0, v3, v1, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 736 | ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 737 | ; GFX7-NEXT: s_endpgm |
| 738 | ; |
| 739 | ; GFX8-LABEL: idot2_MixedExt: |
| 740 | ; GFX8: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 741 | ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 742 | ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 743 | ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 744 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 745 | ; GFX8-NEXT: v_mov_b32_e32 v1, s1 |
| 746 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 747 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 748 | ; GFX8-NEXT: flat_load_dword v3, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 749 | ; GFX8-NEXT: v_mov_b32_e32 v1, s3 |
| 750 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 751 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 752 | ; GFX8-NEXT: flat_load_dword v0, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 753 | ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 754 | ; GFX8-NEXT: s_waitcnt vmcnt(1) |
| 755 | ; GFX8-NEXT: v_bfe_i32 v1, v3, 0, 16 |
| 756 | ; GFX8-NEXT: v_ashrrev_i32_e32 v3, 16, v3 |
| 757 | ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| 758 | ; GFX8-NEXT: v_and_b32_e32 v2, 0xffff, v0 |
| 759 | ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 16, v0 |
Jay Foad | 0412f51 | 2019-12-17 16:09:02 +0000 | [diff] [blame] | 760 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 761 | ; GFX8-NEXT: v_mad_i32_i24 v0, v0, v3, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 762 | ; GFX8-NEXT: v_mad_i32_i24 v2, v2, v1, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 763 | ; GFX8-NEXT: v_mov_b32_e32 v0, s4 |
| 764 | ; GFX8-NEXT: v_mov_b32_e32 v1, s5 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 765 | ; GFX8-NEXT: flat_store_dword v[0:1], v2 |
| 766 | ; GFX8-NEXT: s_endpgm |
| 767 | ; |
| 768 | ; GFX9-NODL-LABEL: idot2_MixedExt: |
| 769 | ; GFX9-NODL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 770 | ; GFX9-NODL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 771 | ; GFX9-NODL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 772 | ; GFX9-NODL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 773 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 774 | ; GFX9-NODL-NEXT: global_load_dword v1, v0, s[0:1] |
| 775 | ; GFX9-NODL-NEXT: global_load_dword v2, v0, s[2:3] |
| 776 | ; GFX9-NODL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 777 | ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 778 | ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) |
| 779 | ; GFX9-NODL-NEXT: v_mul_i32_i24_sdwa v3, v2, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 780 | ; GFX9-NODL-NEXT: v_mul_i32_i24_sdwa v1, sext(v2), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 781 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 782 | ; GFX9-NODL-NEXT: v_add3_u32 v1, v1, s0, v3 |
| 783 | ; GFX9-NODL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 784 | ; GFX9-NODL-NEXT: s_endpgm |
| 785 | ; |
| 786 | ; GFX9-DL-LABEL: idot2_MixedExt: |
| 787 | ; GFX9-DL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 788 | ; GFX9-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 789 | ; GFX9-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 790 | ; GFX9-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 791 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 792 | ; GFX9-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 793 | ; GFX9-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 794 | ; GFX9-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 795 | ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 796 | ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) |
| 797 | ; GFX9-DL-NEXT: v_mul_i32_i24_sdwa v3, v2, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 798 | ; GFX9-DL-NEXT: v_mul_i32_i24_sdwa v1, sext(v2), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 799 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 800 | ; GFX9-DL-NEXT: v_add3_u32 v1, v1, s0, v3 |
| 801 | ; GFX9-DL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 802 | ; GFX9-DL-NEXT: s_endpgm |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 803 | ; |
| 804 | ; GFX10-DL-LABEL: idot2_MixedExt: |
| 805 | ; GFX10-DL: ; %bb.0: ; %entry |
Matt Arsenault | b1bcb7c | 2024-07-15 09:59:07 +0400 | [diff] [blame] | 806 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 807 | ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 808 | ; GFX10-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 809 | ; GFX10-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 810 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 811 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 812 | ; GFX10-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 813 | ; GFX10-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 814 | ; GFX10-DL-NEXT: s_waitcnt_depctr 0xffe3 |
| 815 | ; GFX10-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 816 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) |
| 817 | ; GFX10-DL-NEXT: v_mul_i32_i24_sdwa v0, v2, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 818 | ; GFX10-DL-NEXT: v_mul_i32_i24_sdwa v1, sext(v2), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| 819 | ; GFX10-DL-NEXT: v_mov_b32_e32 v2, 0 |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 820 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 821 | ; GFX10-DL-NEXT: v_add3_u32 v0, v1, s0, v0 |
| 822 | ; GFX10-DL-NEXT: global_store_dword v2, v0, s[6:7] |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 823 | ; GFX10-DL-NEXT: s_endpgm |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 824 | ptr addrspace(1) %src2, |
| 825 | ptr addrspace(1) nocapture %dst) { |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 826 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 827 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 828 | %gep1 = getelementptr <2 x i16>, ptr addrspace(1) %src1, i32 %idx |
| 829 | %vec1 = load <2 x i16>, ptr addrspace(1) %gep1 |
| 830 | %gep2 = getelementptr <2 x i16>, ptr addrspace(1) %src2, i32 %idx |
| 831 | %vec2 = load <2 x i16>, ptr addrspace(1) %gep2 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 832 | |
| 833 | %s1.elt1 = extractelement <2 x i16> %vec1, i64 0 |
| 834 | %conv = sext i16 %s1.elt1 to i32 |
| 835 | %s2.elt1 = extractelement <2 x i16> %vec2, i64 0 |
| 836 | %conv2 = zext i16 %s2.elt1 to i32 |
| 837 | %mul1 = mul nuw i32 %conv2, %conv |
| 838 | |
| 839 | %s1.elt2 = extractelement <2 x i16> %vec1, i64 1 |
| 840 | %conv3 = sext i16 %s1.elt2 to i32 |
| 841 | %s2.elt2 = extractelement <2 x i16> %vec2, i64 1 |
| 842 | %conv4 = sext i16 %s2.elt2 to i32 |
| 843 | %mul2 = mul nuw i32 %conv4, %conv3 |
| 844 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 845 | %s3 = load i32, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 846 | %add = add i32 %mul2, %s3 |
| 847 | %add6 = add i32 %add, %mul1 |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 848 | store i32 %add6, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 849 | ret void |
| 850 | } |
| 851 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 852 | define amdgpu_kernel void @notudot2_SameVec(ptr addrspace(1) %src1, |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 853 | ; GFX7-LABEL: notudot2_SameVec: |
| 854 | ; GFX7: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 855 | ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| 856 | ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd |
| 857 | ; GFX7-NEXT: s_mov_b32 s7, 0xf000 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 858 | ; GFX7-NEXT: s_mov_b32 s10, 0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 859 | ; GFX7-NEXT: s_mov_b32 s11, s7 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 860 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 861 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[0:1] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 862 | ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 863 | ; GFX7-NEXT: v_mov_b32_e32 v1, 0 |
| 864 | ; GFX7-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 865 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[2:3] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 866 | ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 867 | ; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 |
| 868 | ; GFX7-NEXT: s_mov_b32 s6, -1 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 869 | ; GFX7-NEXT: s_waitcnt vmcnt(1) |
| 870 | ; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v2 |
| 871 | ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| 872 | ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 873 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 874 | ; GFX7-NEXT: v_mad_u32_u24 v0, v0, v0, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 875 | ; GFX7-NEXT: v_mad_u32_u24 v0, v1, v1, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 876 | ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 877 | ; GFX7-NEXT: s_endpgm |
| 878 | ; |
| 879 | ; GFX8-LABEL: notudot2_SameVec: |
| 880 | ; GFX8: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 881 | ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 882 | ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 883 | ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 884 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 885 | ; GFX8-NEXT: v_mov_b32_e32 v1, s1 |
| 886 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 887 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 888 | ; GFX8-NEXT: flat_load_dword v3, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 889 | ; GFX8-NEXT: v_mov_b32_e32 v1, s3 |
| 890 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2 |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 891 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 892 | ; GFX8-NEXT: flat_load_dword v0, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 893 | ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 894 | ; GFX8-NEXT: s_waitcnt vmcnt(1) |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 895 | ; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v3 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 896 | ; GFX8-NEXT: s_waitcnt vmcnt(0) |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 897 | ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
Jay Foad | 4383079 | 2019-10-07 15:33:59 +0100 | [diff] [blame] | 898 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 899 | ; GFX8-NEXT: v_mad_u32_u24 v0, v0, v0, s0 |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 900 | ; GFX8-NEXT: v_mad_u32_u24 v2, v1, v1, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 901 | ; GFX8-NEXT: v_mov_b32_e32 v0, s4 |
| 902 | ; GFX8-NEXT: v_mov_b32_e32 v1, s5 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 903 | ; GFX8-NEXT: flat_store_dword v[0:1], v2 |
| 904 | ; GFX8-NEXT: s_endpgm |
| 905 | ; |
| 906 | ; GFX9-NODL-LABEL: notudot2_SameVec: |
| 907 | ; GFX9-NODL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 908 | ; GFX9-NODL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 909 | ; GFX9-NODL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 910 | ; GFX9-NODL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 911 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 912 | ; GFX9-NODL-NEXT: global_load_dword v1, v0, s[0:1] |
| 913 | ; GFX9-NODL-NEXT: global_load_dword v2, v0, s[2:3] |
| 914 | ; GFX9-NODL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 915 | ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 916 | ; GFX9-NODL-NEXT: s_waitcnt vmcnt(1) |
| 917 | ; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 918 | ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) |
| 919 | ; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 920 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 921 | ; GFX9-NODL-NEXT: v_add3_u32 v1, v2, s0, v1 |
| 922 | ; GFX9-NODL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 923 | ; GFX9-NODL-NEXT: s_endpgm |
| 924 | ; |
| 925 | ; GFX9-DL-LABEL: notudot2_SameVec: |
| 926 | ; GFX9-DL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 927 | ; GFX9-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 928 | ; GFX9-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 929 | ; GFX9-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 930 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 931 | ; GFX9-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 932 | ; GFX9-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 933 | ; GFX9-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 934 | ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 935 | ; GFX9-DL-NEXT: s_waitcnt vmcnt(1) |
| 936 | ; GFX9-DL-NEXT: v_mul_u32_u24_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 937 | ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) |
| 938 | ; GFX9-DL-NEXT: v_mul_u32_u24_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 939 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 940 | ; GFX9-DL-NEXT: v_add3_u32 v1, v2, s0, v1 |
| 941 | ; GFX9-DL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 942 | ; GFX9-DL-NEXT: s_endpgm |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 943 | ; |
| 944 | ; GFX10-DL-LABEL: notudot2_SameVec: |
| 945 | ; GFX10-DL: ; %bb.0: ; %entry |
Matt Arsenault | b1bcb7c | 2024-07-15 09:59:07 +0400 | [diff] [blame] | 946 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 947 | ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 948 | ; GFX10-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 949 | ; GFX10-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 950 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 951 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 952 | ; GFX10-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 953 | ; GFX10-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 954 | ; GFX10-DL-NEXT: s_waitcnt_depctr 0xffe3 |
| 955 | ; GFX10-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 956 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(1) |
| 957 | ; GFX10-DL-NEXT: v_mul_u32_u24_sdwa v0, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 958 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) |
| 959 | ; GFX10-DL-NEXT: v_mul_u32_u24_sdwa v1, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| 960 | ; GFX10-DL-NEXT: v_mov_b32_e32 v2, 0 |
Jay Foad | 0412f51 | 2019-12-17 16:09:02 +0000 | [diff] [blame] | 961 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 962 | ; GFX10-DL-NEXT: v_add3_u32 v0, v1, s0, v0 |
| 963 | ; GFX10-DL-NEXT: global_store_dword v2, v0, s[6:7] |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 964 | ; GFX10-DL-NEXT: s_endpgm |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 965 | ptr addrspace(1) %src2, |
| 966 | ptr addrspace(1) nocapture %dst) { |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 967 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 968 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 969 | %gep1 = getelementptr <2 x i16>, ptr addrspace(1) %src1, i32 %idx |
| 970 | %vec1 = load <2 x i16>, ptr addrspace(1) %gep1 |
| 971 | %gep2 = getelementptr <2 x i16>, ptr addrspace(1) %src2, i32 %idx |
| 972 | %vec2 = load <2 x i16>, ptr addrspace(1) %gep2 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 973 | |
| 974 | %s1.elt1 = extractelement <2 x i16> %vec1, i64 0 |
| 975 | %conv = zext i16 %s1.elt1 to i32 |
| 976 | %s2.elt1 = extractelement <2 x i16> %vec1, i64 0 |
| 977 | %conv2 = zext i16 %s2.elt1 to i32 |
| 978 | %mul1 = mul i32 %conv2, %conv |
| 979 | |
| 980 | %s1.elt2 = extractelement <2 x i16> %vec2, i64 1 |
| 981 | %conv3 = zext i16 %s1.elt2 to i32 |
| 982 | %s2.elt2 = extractelement <2 x i16> %vec2, i64 1 |
| 983 | %conv4 = zext i16 %s2.elt2 to i32 |
| 984 | %mul2 = mul i32 %conv4, %conv3 |
| 985 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 986 | %s3 = load i32, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 987 | %add = add i32 %mul2, %s3 |
| 988 | %add6 = add i32 %add, %mul1 |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 989 | store i32 %add6, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 990 | ret void |
| 991 | } |
| 992 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 993 | define amdgpu_kernel void @udot2_v4i16(ptr addrspace(1) %src1, |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 994 | ; GFX7-LABEL: udot2_v4i16: |
| 995 | ; GFX7: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 996 | ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| 997 | ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd |
| 998 | ; GFX7-NEXT: s_mov_b32 s7, 0xf000 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 999 | ; GFX7-NEXT: s_mov_b32 s10, 0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1000 | ; GFX7-NEXT: s_mov_b32 s11, s7 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1001 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1002 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[0:1] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1003 | ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 3, v0 |
| 1004 | ; GFX7-NEXT: v_mov_b32_e32 v1, 0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1005 | ; GFX7-NEXT: s_mov_b64 s[0:1], s[2:3] |
| 1006 | ; GFX7-NEXT: s_mov_b64 s[2:3], s[10:11] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1007 | ; GFX7-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1008 | ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 |
| 1009 | ; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 |
| 1010 | ; GFX7-NEXT: s_mov_b32 s6, -1 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1011 | ; GFX7-NEXT: s_waitcnt vmcnt(1) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 1012 | ; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1013 | ; GFX7-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 1014 | ; GFX7-NEXT: v_and_b32_e32 v3, 0xffff, v0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1015 | ; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v2 |
| 1016 | ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1017 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1018 | ; GFX7-NEXT: v_mad_u32_u24 v0, v0, v2, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1019 | ; GFX7-NEXT: v_mad_u32_u24 v0, v3, v1, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1020 | ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1021 | ; GFX7-NEXT: s_endpgm |
| 1022 | ; |
| 1023 | ; GFX8-LABEL: udot2_v4i16: |
| 1024 | ; GFX8: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1025 | ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 1026 | ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1027 | ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 3, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1028 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1029 | ; GFX8-NEXT: v_mov_b32_e32 v1, s1 |
| 1030 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1031 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1032 | ; GFX8-NEXT: v_mov_b32_e32 v3, s3 |
| 1033 | ; GFX8-NEXT: v_add_u32_e32 v2, vcc, s2, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1034 | ; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc |
| 1035 | ; GFX8-NEXT: flat_load_dword v0, v[0:1] |
| 1036 | ; GFX8-NEXT: flat_load_dword v1, v[2:3] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1037 | ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1038 | ; GFX8-NEXT: s_waitcnt vmcnt(1) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 1039 | ; GFX8-NEXT: v_and_b32_e32 v2, 0xffff, v0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1040 | ; GFX8-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 1041 | ; GFX8-NEXT: v_and_b32_e32 v3, 0xffff, v1 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1042 | ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
| 1043 | ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v1 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1044 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1045 | ; GFX8-NEXT: v_mad_u32_u24 v0, v1, v0, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1046 | ; GFX8-NEXT: v_mad_u32_u24 v2, v3, v2, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1047 | ; GFX8-NEXT: v_mov_b32_e32 v0, s4 |
| 1048 | ; GFX8-NEXT: v_mov_b32_e32 v1, s5 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1049 | ; GFX8-NEXT: flat_store_dword v[0:1], v2 |
| 1050 | ; GFX8-NEXT: s_endpgm |
| 1051 | ; |
| 1052 | ; GFX9-NODL-LABEL: udot2_v4i16: |
| 1053 | ; GFX9-NODL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1054 | ; GFX9-NODL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 1055 | ; GFX9-NODL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1056 | ; GFX9-NODL-NEXT: v_lshlrev_b32_e32 v0, 3, v0 |
| 1057 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1058 | ; GFX9-NODL-NEXT: global_load_dword v1, v0, s[0:1] |
| 1059 | ; GFX9-NODL-NEXT: global_load_dword v2, v0, s[2:3] |
| 1060 | ; GFX9-NODL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 1061 | ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1062 | ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) |
| 1063 | ; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 1064 | ; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1065 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1066 | ; GFX9-NODL-NEXT: v_add3_u32 v1, v1, s0, v3 |
| 1067 | ; GFX9-NODL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1068 | ; GFX9-NODL-NEXT: s_endpgm |
| 1069 | ; |
| 1070 | ; GFX9-DL-LABEL: udot2_v4i16: |
| 1071 | ; GFX9-DL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1072 | ; GFX9-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 1073 | ; GFX9-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1074 | ; GFX9-DL-NEXT: v_lshlrev_b32_e32 v0, 3, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1075 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1076 | ; GFX9-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 1077 | ; GFX9-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 1078 | ; GFX9-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 1079 | ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1080 | ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1081 | ; GFX9-DL-NEXT: v_dot2_u32_u16 v1, v2, v1, s0 |
| 1082 | ; GFX9-DL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1083 | ; GFX9-DL-NEXT: s_endpgm |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 1084 | ; |
| 1085 | ; GFX10-DL-LABEL: udot2_v4i16: |
| 1086 | ; GFX10-DL: ; %bb.0: ; %entry |
Matt Arsenault | b1bcb7c | 2024-07-15 09:59:07 +0400 | [diff] [blame] | 1087 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1088 | ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 1089 | ; GFX10-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1090 | ; GFX10-DL-NEXT: v_lshlrev_b32_e32 v0, 3, v0 |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 1091 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1092 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1093 | ; GFX10-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 1094 | ; GFX10-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 1095 | ; GFX10-DL-NEXT: s_waitcnt_depctr 0xffe3 |
| 1096 | ; GFX10-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | b1bcb7c | 2024-07-15 09:59:07 +0400 | [diff] [blame] | 1097 | ; GFX10-DL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1098 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1099 | ; GFX10-DL-NEXT: v_dot2_u32_u16 v1, v2, v1, s0 |
| 1100 | ; GFX10-DL-NEXT: global_store_dword v0, v1, s[6:7] |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 1101 | ; GFX10-DL-NEXT: s_endpgm |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1102 | ptr addrspace(1) %src2, |
| 1103 | ptr addrspace(1) nocapture %dst) { |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 1104 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1105 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1106 | %gep1 = getelementptr <4 x i16>, ptr addrspace(1) %src1, i32 %idx |
| 1107 | %vec1 = load <4 x i16>, ptr addrspace(1) %gep1 |
| 1108 | %gep2 = getelementptr <4 x i16>, ptr addrspace(1) %src2, i32 %idx |
| 1109 | %vec2 = load <4 x i16>, ptr addrspace(1) %gep2 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 1110 | |
| 1111 | %s1.elt1 = extractelement <4 x i16> %vec1, i64 0 |
| 1112 | %conv = zext i16 %s1.elt1 to i32 |
| 1113 | %s2.elt1 = extractelement <4 x i16> %vec2, i64 0 |
| 1114 | %conv2 = zext i16 %s2.elt1 to i32 |
| 1115 | %mul1 = mul i32 %conv2, %conv |
| 1116 | |
| 1117 | %s1.elt2 = extractelement <4 x i16> %vec1, i64 1 |
| 1118 | %conv3 = zext i16 %s1.elt2 to i32 |
| 1119 | %s2.elt2 = extractelement <4 x i16> %vec2, i64 1 |
| 1120 | %conv4 = zext i16 %s2.elt2 to i32 |
| 1121 | %mul2 = mul i32 %conv4, %conv3 |
| 1122 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1123 | %s3 = load i32, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 1124 | %add = add i32 %mul2, %s3 |
| 1125 | %add6 = add i32 %add, %mul1 |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1126 | store i32 %add6, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 1127 | ret void |
| 1128 | } |
| 1129 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1130 | define amdgpu_kernel void @udot2_v4i16_Hi(ptr addrspace(1) %src1, |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1131 | ; GFX7-LABEL: udot2_v4i16_Hi: |
| 1132 | ; GFX7: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1133 | ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| 1134 | ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd |
| 1135 | ; GFX7-NEXT: s_mov_b32 s7, 0xf000 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1136 | ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 3, v0 |
| 1137 | ; GFX7-NEXT: v_mov_b32_e32 v1, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1138 | ; GFX7-NEXT: s_mov_b32 s10, 0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1139 | ; GFX7-NEXT: s_mov_b32 s11, s7 |
Joe Nash | 3ce1b96 | 2021-09-08 13:22:15 -0400 | [diff] [blame] | 1140 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1141 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[0:1] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1142 | ; GFX7-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 offset:4 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1143 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[2:3] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1144 | ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 offset:4 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1145 | ; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 |
| 1146 | ; GFX7-NEXT: s_mov_b32 s6, -1 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1147 | ; GFX7-NEXT: s_waitcnt vmcnt(1) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 1148 | ; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1149 | ; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v2 |
| 1150 | ; GFX7-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 1151 | ; GFX7-NEXT: v_and_b32_e32 v3, 0xffff, v0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1152 | ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1153 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1154 | ; GFX7-NEXT: v_mad_u32_u24 v0, v0, v2, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1155 | ; GFX7-NEXT: v_mad_u32_u24 v0, v3, v1, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1156 | ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1157 | ; GFX7-NEXT: s_endpgm |
| 1158 | ; |
| 1159 | ; GFX8-LABEL: udot2_v4i16_Hi: |
| 1160 | ; GFX8: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1161 | ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 1162 | ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1163 | ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 3, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1164 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1165 | ; GFX8-NEXT: v_mov_b32_e32 v1, s1 |
| 1166 | ; GFX8-NEXT: v_add_u32_e32 v2, vcc, s0, v0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1167 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1168 | ; GFX8-NEXT: v_mov_b32_e32 v3, s3 |
| 1169 | ; GFX8-NEXT: v_add_u32_e32 v4, vcc, s2, v0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1170 | ; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc |
| 1171 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 4, v2 |
| 1172 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 1173 | ; GFX8-NEXT: flat_load_dword v2, v[0:1] |
| 1174 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 4, v4 |
| 1175 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| 1176 | ; GFX8-NEXT: flat_load_dword v0, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1177 | ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1178 | ; GFX8-NEXT: s_waitcnt vmcnt(1) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 1179 | ; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1180 | ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v2 |
| 1181 | ; GFX8-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 1182 | ; GFX8-NEXT: v_and_b32_e32 v3, 0xffff, v0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1183 | ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1184 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1185 | ; GFX8-NEXT: v_mad_u32_u24 v0, v0, v2, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1186 | ; GFX8-NEXT: v_mad_u32_u24 v2, v3, v1, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1187 | ; GFX8-NEXT: v_mov_b32_e32 v0, s4 |
| 1188 | ; GFX8-NEXT: v_mov_b32_e32 v1, s5 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1189 | ; GFX8-NEXT: flat_store_dword v[0:1], v2 |
| 1190 | ; GFX8-NEXT: s_endpgm |
| 1191 | ; |
| 1192 | ; GFX9-NODL-LABEL: udot2_v4i16_Hi: |
| 1193 | ; GFX9-NODL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1194 | ; GFX9-NODL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 1195 | ; GFX9-NODL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1196 | ; GFX9-NODL-NEXT: v_lshlrev_b32_e32 v0, 3, v0 |
| 1197 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1198 | ; GFX9-NODL-NEXT: global_load_dword v1, v0, s[0:1] offset:4 |
| 1199 | ; GFX9-NODL-NEXT: global_load_dword v2, v0, s[2:3] offset:4 |
| 1200 | ; GFX9-NODL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 1201 | ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1202 | ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) |
| 1203 | ; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 1204 | ; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1205 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1206 | ; GFX9-NODL-NEXT: v_add3_u32 v1, v1, s0, v3 |
| 1207 | ; GFX9-NODL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1208 | ; GFX9-NODL-NEXT: s_endpgm |
| 1209 | ; |
| 1210 | ; GFX9-DL-LABEL: udot2_v4i16_Hi: |
| 1211 | ; GFX9-DL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1212 | ; GFX9-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 1213 | ; GFX9-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1214 | ; GFX9-DL-NEXT: v_lshlrev_b32_e32 v0, 3, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1215 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1216 | ; GFX9-DL-NEXT: global_load_dword v1, v0, s[0:1] offset:4 |
| 1217 | ; GFX9-DL-NEXT: global_load_dword v2, v0, s[2:3] offset:4 |
| 1218 | ; GFX9-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 1219 | ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1220 | ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1221 | ; GFX9-DL-NEXT: v_dot2_u32_u16 v1, v2, v1, s0 |
| 1222 | ; GFX9-DL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1223 | ; GFX9-DL-NEXT: s_endpgm |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 1224 | ; |
| 1225 | ; GFX10-DL-LABEL: udot2_v4i16_Hi: |
| 1226 | ; GFX10-DL: ; %bb.0: ; %entry |
Matt Arsenault | b1bcb7c | 2024-07-15 09:59:07 +0400 | [diff] [blame] | 1227 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1228 | ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 1229 | ; GFX10-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1230 | ; GFX10-DL-NEXT: v_lshlrev_b32_e32 v0, 3, v0 |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 1231 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1232 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1233 | ; GFX10-DL-NEXT: global_load_dword v1, v0, s[0:1] offset:4 |
| 1234 | ; GFX10-DL-NEXT: global_load_dword v2, v0, s[2:3] offset:4 |
| 1235 | ; GFX10-DL-NEXT: s_waitcnt_depctr 0xffe3 |
| 1236 | ; GFX10-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | b1bcb7c | 2024-07-15 09:59:07 +0400 | [diff] [blame] | 1237 | ; GFX10-DL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1238 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1239 | ; GFX10-DL-NEXT: v_dot2_u32_u16 v1, v2, v1, s0 |
| 1240 | ; GFX10-DL-NEXT: global_store_dword v0, v1, s[6:7] |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 1241 | ; GFX10-DL-NEXT: s_endpgm |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1242 | ptr addrspace(1) %src2, |
| 1243 | ptr addrspace(1) nocapture %dst) { |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 1244 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1245 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1246 | %gep1 = getelementptr <4 x i16>, ptr addrspace(1) %src1, i32 %idx |
| 1247 | %vec1 = load <4 x i16>, ptr addrspace(1) %gep1 |
| 1248 | %gep2 = getelementptr <4 x i16>, ptr addrspace(1) %src2, i32 %idx |
| 1249 | %vec2 = load <4 x i16>, ptr addrspace(1) %gep2 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 1250 | |
| 1251 | %s1.elt1 = extractelement <4 x i16> %vec1, i64 2 |
| 1252 | %conv = zext i16 %s1.elt1 to i32 |
| 1253 | %s2.elt1 = extractelement <4 x i16> %vec2, i64 2 |
| 1254 | %conv2 = zext i16 %s2.elt1 to i32 |
| 1255 | %mul1 = mul i32 %conv2, %conv |
| 1256 | |
| 1257 | %s1.elt2 = extractelement <4 x i16> %vec1, i64 3 |
| 1258 | %conv3 = zext i16 %s1.elt2 to i32 |
| 1259 | %s2.elt2 = extractelement <4 x i16> %vec2, i64 3 |
| 1260 | %conv4 = zext i16 %s2.elt2 to i32 |
| 1261 | %mul2 = mul i32 %conv4, %conv3 |
| 1262 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1263 | %s3 = load i32, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 1264 | %add = add i32 %mul2, %s3 |
| 1265 | %add6 = add i32 %add, %mul1 |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1266 | store i32 %add6, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 1267 | ret void |
| 1268 | } |
| 1269 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1270 | define amdgpu_kernel void @notudot2_v4i16_Even(ptr addrspace(1) %src1, |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1271 | ; GFX7-LABEL: notudot2_v4i16_Even: |
| 1272 | ; GFX7: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1273 | ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| 1274 | ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd |
| 1275 | ; GFX7-NEXT: s_mov_b32 s7, 0xf000 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1276 | ; GFX7-NEXT: s_mov_b32 s10, 0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1277 | ; GFX7-NEXT: s_mov_b32 s11, s7 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1278 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1279 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[0:1] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1280 | ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 3, v0 |
| 1281 | ; GFX7-NEXT: v_mov_b32_e32 v1, 0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1282 | ; GFX7-NEXT: s_mov_b64 s[0:1], s[2:3] |
| 1283 | ; GFX7-NEXT: s_mov_b64 s[2:3], s[10:11] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1284 | ; GFX7-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[8:11], 0 addr64 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1285 | ; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[0:3], 0 addr64 |
| 1286 | ; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 |
| 1287 | ; GFX7-NEXT: s_mov_b32 s6, -1 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1288 | ; GFX7-NEXT: s_waitcnt vmcnt(1) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 1289 | ; GFX7-NEXT: v_and_b32_e32 v3, 0xffff, v3 |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 1290 | ; GFX7-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 1291 | ; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1 |
| 1292 | ; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2 |
| 1293 | ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1294 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1295 | ; GFX7-NEXT: v_mad_u32_u24 v1, v1, v3, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1296 | ; GFX7-NEXT: v_mad_u32_u24 v0, v0, v2, v1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1297 | ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1298 | ; GFX7-NEXT: s_endpgm |
| 1299 | ; |
| 1300 | ; GFX8-LABEL: notudot2_v4i16_Even: |
| 1301 | ; GFX8: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1302 | ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 1303 | ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1304 | ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 3, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1305 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1306 | ; GFX8-NEXT: v_mov_b32_e32 v1, s1 |
| 1307 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1308 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1309 | ; GFX8-NEXT: v_mov_b32_e32 v3, s3 |
| 1310 | ; GFX8-NEXT: v_add_u32_e32 v2, vcc, s2, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1311 | ; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc |
| 1312 | ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] |
| 1313 | ; GFX8-NEXT: flat_load_dwordx2 v[2:3], v[2:3] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1314 | ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1315 | ; GFX8-NEXT: s_waitcnt vmcnt(1) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 1316 | ; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1317 | ; GFX8-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 1318 | ; GFX8-NEXT: v_and_b32_e32 v3, 0xffff, v3 |
| 1319 | ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| 1320 | ; GFX8-NEXT: v_and_b32_e32 v2, 0xffff, v2 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1321 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1322 | ; GFX8-NEXT: v_mad_u32_u24 v1, v3, v1, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1323 | ; GFX8-NEXT: v_mad_u32_u24 v2, v2, v0, v1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1324 | ; GFX8-NEXT: v_mov_b32_e32 v0, s4 |
| 1325 | ; GFX8-NEXT: v_mov_b32_e32 v1, s5 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1326 | ; GFX8-NEXT: flat_store_dword v[0:1], v2 |
| 1327 | ; GFX8-NEXT: s_endpgm |
| 1328 | ; |
| 1329 | ; GFX9-NODL-LABEL: notudot2_v4i16_Even: |
| 1330 | ; GFX9-NODL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1331 | ; GFX9-NODL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 1332 | ; GFX9-NODL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1333 | ; GFX9-NODL-NEXT: v_lshlrev_b32_e32 v4, 3, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1334 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1335 | ; GFX9-NODL-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1] |
| 1336 | ; GFX9-NODL-NEXT: global_load_dwordx2 v[2:3], v4, s[2:3] |
| 1337 | ; GFX9-NODL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1338 | ; GFX9-NODL-NEXT: v_mov_b32_e32 v4, 0 |
| 1339 | ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) |
| 1340 | ; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 1341 | ; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1342 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1343 | ; GFX9-NODL-NEXT: v_add3_u32 v0, v1, s0, v0 |
| 1344 | ; GFX9-NODL-NEXT: global_store_dword v4, v0, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1345 | ; GFX9-NODL-NEXT: s_endpgm |
| 1346 | ; |
| 1347 | ; GFX9-DL-LABEL: notudot2_v4i16_Even: |
| 1348 | ; GFX9-DL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1349 | ; GFX9-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 1350 | ; GFX9-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1351 | ; GFX9-DL-NEXT: v_lshlrev_b32_e32 v4, 3, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1352 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1353 | ; GFX9-DL-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1] |
| 1354 | ; GFX9-DL-NEXT: global_load_dwordx2 v[2:3], v4, s[2:3] |
| 1355 | ; GFX9-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1356 | ; GFX9-DL-NEXT: v_mov_b32_e32 v4, 0 |
| 1357 | ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) |
| 1358 | ; GFX9-DL-NEXT: v_mul_u32_u24_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 1359 | ; GFX9-DL-NEXT: v_mul_u32_u24_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1360 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1361 | ; GFX9-DL-NEXT: v_add3_u32 v0, v1, s0, v0 |
| 1362 | ; GFX9-DL-NEXT: global_store_dword v4, v0, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1363 | ; GFX9-DL-NEXT: s_endpgm |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 1364 | ; |
| 1365 | ; GFX10-DL-LABEL: notudot2_v4i16_Even: |
| 1366 | ; GFX10-DL: ; %bb.0: ; %entry |
Matt Arsenault | b1bcb7c | 2024-07-15 09:59:07 +0400 | [diff] [blame] | 1367 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1368 | ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 1369 | ; GFX10-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1370 | ; GFX10-DL-NEXT: v_lshlrev_b32_e32 v4, 3, v0 |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 1371 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1372 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1373 | ; GFX10-DL-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1] |
| 1374 | ; GFX10-DL-NEXT: global_load_dwordx2 v[2:3], v4, s[2:3] |
| 1375 | ; GFX10-DL-NEXT: s_waitcnt_depctr 0xffe3 |
| 1376 | ; GFX10-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1377 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) |
| 1378 | ; GFX10-DL-NEXT: v_mul_u32_u24_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 1379 | ; GFX10-DL-NEXT: v_mul_u32_u24_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 1380 | ; GFX10-DL-NEXT: v_mov_b32_e32 v2, 0 |
Jay Foad | 0412f51 | 2019-12-17 16:09:02 +0000 | [diff] [blame] | 1381 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1382 | ; GFX10-DL-NEXT: v_add3_u32 v0, v1, s0, v0 |
| 1383 | ; GFX10-DL-NEXT: global_store_dword v2, v0, s[6:7] |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 1384 | ; GFX10-DL-NEXT: s_endpgm |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1385 | ptr addrspace(1) %src2, |
| 1386 | ptr addrspace(1) nocapture %dst) { |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 1387 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1388 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1389 | %gep1 = getelementptr <4 x i16>, ptr addrspace(1) %src1, i32 %idx |
| 1390 | %vec1 = load <4 x i16>, ptr addrspace(1) %gep1 |
| 1391 | %gep2 = getelementptr <4 x i16>, ptr addrspace(1) %src2, i32 %idx |
| 1392 | %vec2 = load <4 x i16>, ptr addrspace(1) %gep2 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 1393 | |
| 1394 | %s1.elt1 = extractelement <4 x i16> %vec1, i64 0 |
| 1395 | %conv = zext i16 %s1.elt1 to i32 |
| 1396 | %s2.elt1 = extractelement <4 x i16> %vec2, i64 0 |
| 1397 | %conv2 = zext i16 %s2.elt1 to i32 |
| 1398 | %mul1 = mul i32 %conv2, %conv |
| 1399 | |
| 1400 | %s1.elt2 = extractelement <4 x i16> %vec1, i64 2 |
| 1401 | %conv3 = zext i16 %s1.elt2 to i32 |
| 1402 | %s2.elt2 = extractelement <4 x i16> %vec2, i64 2 |
| 1403 | %conv4 = zext i16 %s2.elt2 to i32 |
| 1404 | %mul2 = mul i32 %conv4, %conv3 |
| 1405 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1406 | %s3 = load i32, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 1407 | %add = add i32 %mul2, %s3 |
| 1408 | %add6 = add i32 %add, %mul1 |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1409 | store i32 %add6, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 1410 | ret void |
| 1411 | } |
| 1412 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1413 | define amdgpu_kernel void @notudot2_v4i16_Middle(ptr addrspace(1) %src1, |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1414 | ; GFX7-LABEL: notudot2_v4i16_Middle: |
| 1415 | ; GFX7: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1416 | ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| 1417 | ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd |
| 1418 | ; GFX7-NEXT: s_mov_b32 s7, 0xf000 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1419 | ; GFX7-NEXT: s_mov_b32 s10, 0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1420 | ; GFX7-NEXT: s_mov_b32 s11, s7 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1421 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1422 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[0:1] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1423 | ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 3, v0 |
| 1424 | ; GFX7-NEXT: v_mov_b32_e32 v1, 0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1425 | ; GFX7-NEXT: s_mov_b64 s[0:1], s[2:3] |
| 1426 | ; GFX7-NEXT: s_mov_b64 s[2:3], s[10:11] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1427 | ; GFX7-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[8:11], 0 addr64 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1428 | ; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[0:3], 0 addr64 |
| 1429 | ; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 |
| 1430 | ; GFX7-NEXT: s_mov_b32 s6, -1 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1431 | ; GFX7-NEXT: s_waitcnt vmcnt(1) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 1432 | ; GFX7-NEXT: v_and_b32_e32 v3, 0xffff, v3 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1433 | ; GFX7-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 1434 | ; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1435 | ; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v2 |
| 1436 | ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1437 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1438 | ; GFX7-NEXT: v_mad_u32_u24 v1, v1, v3, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1439 | ; GFX7-NEXT: v_mad_u32_u24 v0, v0, v2, v1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1440 | ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1441 | ; GFX7-NEXT: s_endpgm |
| 1442 | ; |
| 1443 | ; GFX8-LABEL: notudot2_v4i16_Middle: |
| 1444 | ; GFX8: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1445 | ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 1446 | ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1447 | ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 3, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1448 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1449 | ; GFX8-NEXT: v_mov_b32_e32 v1, s1 |
| 1450 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1451 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1452 | ; GFX8-NEXT: v_mov_b32_e32 v3, s3 |
| 1453 | ; GFX8-NEXT: v_add_u32_e32 v2, vcc, s2, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1454 | ; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc |
| 1455 | ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] |
| 1456 | ; GFX8-NEXT: flat_load_dwordx2 v[2:3], v[2:3] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1457 | ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1458 | ; GFX8-NEXT: s_waitcnt vmcnt(1) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 1459 | ; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1460 | ; GFX8-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 1461 | ; GFX8-NEXT: v_and_b32_e32 v3, 0xffff, v3 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1462 | ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
| 1463 | ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v2 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1464 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1465 | ; GFX8-NEXT: v_mad_u32_u24 v1, v3, v1, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1466 | ; GFX8-NEXT: v_mad_u32_u24 v2, v2, v0, v1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1467 | ; GFX8-NEXT: v_mov_b32_e32 v0, s4 |
| 1468 | ; GFX8-NEXT: v_mov_b32_e32 v1, s5 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1469 | ; GFX8-NEXT: flat_store_dword v[0:1], v2 |
| 1470 | ; GFX8-NEXT: s_endpgm |
| 1471 | ; |
| 1472 | ; GFX9-NODL-LABEL: notudot2_v4i16_Middle: |
| 1473 | ; GFX9-NODL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1474 | ; GFX9-NODL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 1475 | ; GFX9-NODL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1476 | ; GFX9-NODL-NEXT: v_lshlrev_b32_e32 v4, 3, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1477 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1478 | ; GFX9-NODL-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1] |
| 1479 | ; GFX9-NODL-NEXT: global_load_dwordx2 v[2:3], v4, s[2:3] |
| 1480 | ; GFX9-NODL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1481 | ; GFX9-NODL-NEXT: v_mov_b32_e32 v4, 0 |
| 1482 | ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) |
| 1483 | ; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| 1484 | ; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1485 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1486 | ; GFX9-NODL-NEXT: v_add3_u32 v0, v1, s0, v0 |
| 1487 | ; GFX9-NODL-NEXT: global_store_dword v4, v0, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1488 | ; GFX9-NODL-NEXT: s_endpgm |
| 1489 | ; |
| 1490 | ; GFX9-DL-LABEL: notudot2_v4i16_Middle: |
| 1491 | ; GFX9-DL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1492 | ; GFX9-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 1493 | ; GFX9-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1494 | ; GFX9-DL-NEXT: v_lshlrev_b32_e32 v4, 3, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1495 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1496 | ; GFX9-DL-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1] |
| 1497 | ; GFX9-DL-NEXT: global_load_dwordx2 v[2:3], v4, s[2:3] |
| 1498 | ; GFX9-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1499 | ; GFX9-DL-NEXT: v_mov_b32_e32 v4, 0 |
| 1500 | ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) |
| 1501 | ; GFX9-DL-NEXT: v_mul_u32_u24_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| 1502 | ; GFX9-DL-NEXT: v_mul_u32_u24_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1503 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1504 | ; GFX9-DL-NEXT: v_add3_u32 v0, v1, s0, v0 |
| 1505 | ; GFX9-DL-NEXT: global_store_dword v4, v0, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1506 | ; GFX9-DL-NEXT: s_endpgm |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 1507 | ; |
| 1508 | ; GFX10-DL-LABEL: notudot2_v4i16_Middle: |
| 1509 | ; GFX10-DL: ; %bb.0: ; %entry |
Matt Arsenault | b1bcb7c | 2024-07-15 09:59:07 +0400 | [diff] [blame] | 1510 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1511 | ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 1512 | ; GFX10-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1513 | ; GFX10-DL-NEXT: v_lshlrev_b32_e32 v4, 3, v0 |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 1514 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1515 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1516 | ; GFX10-DL-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1] |
| 1517 | ; GFX10-DL-NEXT: global_load_dwordx2 v[2:3], v4, s[2:3] |
| 1518 | ; GFX10-DL-NEXT: s_waitcnt_depctr 0xffe3 |
| 1519 | ; GFX10-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1520 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) |
| 1521 | ; GFX10-DL-NEXT: v_mul_u32_u24_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| 1522 | ; GFX10-DL-NEXT: v_mul_u32_u24_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 1523 | ; GFX10-DL-NEXT: v_mov_b32_e32 v2, 0 |
Jay Foad | 0412f51 | 2019-12-17 16:09:02 +0000 | [diff] [blame] | 1524 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1525 | ; GFX10-DL-NEXT: v_add3_u32 v0, v1, s0, v0 |
| 1526 | ; GFX10-DL-NEXT: global_store_dword v2, v0, s[6:7] |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 1527 | ; GFX10-DL-NEXT: s_endpgm |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1528 | ptr addrspace(1) %src2, |
| 1529 | ptr addrspace(1) nocapture %dst) { |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 1530 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1531 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1532 | %gep1 = getelementptr <4 x i16>, ptr addrspace(1) %src1, i32 %idx |
| 1533 | %vec1 = load <4 x i16>, ptr addrspace(1) %gep1 |
| 1534 | %gep2 = getelementptr <4 x i16>, ptr addrspace(1) %src2, i32 %idx |
| 1535 | %vec2 = load <4 x i16>, ptr addrspace(1) %gep2 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 1536 | |
| 1537 | %s1.elt1 = extractelement <4 x i16> %vec1, i64 1 |
| 1538 | %conv = zext i16 %s1.elt1 to i32 |
| 1539 | %s2.elt1 = extractelement <4 x i16> %vec2, i64 1 |
| 1540 | %conv2 = zext i16 %s2.elt1 to i32 |
| 1541 | %mul1 = mul i32 %conv2, %conv |
| 1542 | |
| 1543 | %s1.elt2 = extractelement <4 x i16> %vec1, i64 2 |
| 1544 | %conv3 = zext i16 %s1.elt2 to i32 |
| 1545 | %s2.elt2 = extractelement <4 x i16> %vec2, i64 2 |
| 1546 | %conv4 = zext i16 %s2.elt2 to i32 |
| 1547 | %mul2 = mul i32 %conv4, %conv3 |
| 1548 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1549 | %s3 = load i32, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 1550 | %add = add i32 %mul2, %s3 |
| 1551 | %add6 = add i32 %add, %mul1 |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1552 | store i32 %add6, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 1553 | ret void |
| 1554 | } |
| 1555 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1556 | define amdgpu_kernel void @notudot2_DiffIndex(ptr addrspace(1) %src1, |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1557 | ; GFX7-LABEL: notudot2_DiffIndex: |
| 1558 | ; GFX7: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1559 | ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| 1560 | ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd |
| 1561 | ; GFX7-NEXT: s_mov_b32 s7, 0xf000 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1562 | ; GFX7-NEXT: s_mov_b32 s10, 0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1563 | ; GFX7-NEXT: s_mov_b32 s11, s7 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1564 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1565 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[0:1] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1566 | ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 1567 | ; GFX7-NEXT: v_mov_b32_e32 v1, 0 |
| 1568 | ; GFX7-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1569 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[2:3] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1570 | ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1571 | ; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 |
| 1572 | ; GFX7-NEXT: s_mov_b32 s6, -1 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1573 | ; GFX7-NEXT: s_waitcnt vmcnt(1) |
| 1574 | ; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v2 |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 1575 | ; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1576 | ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| 1577 | ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v0 |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 1578 | ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1579 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1580 | ; GFX7-NEXT: v_mad_u32_u24 v0, v0, v1, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1581 | ; GFX7-NEXT: v_mad_u32_u24 v0, v3, v2, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1582 | ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1583 | ; GFX7-NEXT: s_endpgm |
| 1584 | ; |
| 1585 | ; GFX8-LABEL: notudot2_DiffIndex: |
| 1586 | ; GFX8: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1587 | ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 1588 | ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1589 | ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1590 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1591 | ; GFX8-NEXT: v_mov_b32_e32 v1, s1 |
| 1592 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1593 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 1594 | ; GFX8-NEXT: flat_load_dword v3, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1595 | ; GFX8-NEXT: v_mov_b32_e32 v1, s3 |
| 1596 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1597 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 1598 | ; GFX8-NEXT: flat_load_dword v0, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1599 | ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1600 | ; GFX8-NEXT: s_waitcnt vmcnt(1) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 1601 | ; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v3 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1602 | ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v3 |
| 1603 | ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| 1604 | ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v0 |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 1605 | ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1606 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1607 | ; GFX8-NEXT: v_mad_u32_u24 v0, v0, v3, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1608 | ; GFX8-NEXT: v_mad_u32_u24 v2, v2, v1, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1609 | ; GFX8-NEXT: v_mov_b32_e32 v0, s4 |
| 1610 | ; GFX8-NEXT: v_mov_b32_e32 v1, s5 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1611 | ; GFX8-NEXT: flat_store_dword v[0:1], v2 |
| 1612 | ; GFX8-NEXT: s_endpgm |
| 1613 | ; |
| 1614 | ; GFX9-NODL-LABEL: notudot2_DiffIndex: |
| 1615 | ; GFX9-NODL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1616 | ; GFX9-NODL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 1617 | ; GFX9-NODL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1618 | ; GFX9-NODL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 1619 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1620 | ; GFX9-NODL-NEXT: global_load_dword v1, v0, s[0:1] |
| 1621 | ; GFX9-NODL-NEXT: global_load_dword v2, v0, s[2:3] |
| 1622 | ; GFX9-NODL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 1623 | ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1624 | ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) |
| 1625 | ; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_0 |
| 1626 | ; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_1 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1627 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1628 | ; GFX9-NODL-NEXT: v_add3_u32 v1, v1, s0, v3 |
| 1629 | ; GFX9-NODL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1630 | ; GFX9-NODL-NEXT: s_endpgm |
| 1631 | ; |
| 1632 | ; GFX9-DL-LABEL: notudot2_DiffIndex: |
| 1633 | ; GFX9-DL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1634 | ; GFX9-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 1635 | ; GFX9-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1636 | ; GFX9-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 1637 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1638 | ; GFX9-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 1639 | ; GFX9-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 1640 | ; GFX9-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 1641 | ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1642 | ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) |
| 1643 | ; GFX9-DL-NEXT: v_mul_u32_u24_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_0 |
| 1644 | ; GFX9-DL-NEXT: v_mul_u32_u24_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_1 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1645 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1646 | ; GFX9-DL-NEXT: v_add3_u32 v1, v1, s0, v3 |
| 1647 | ; GFX9-DL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1648 | ; GFX9-DL-NEXT: s_endpgm |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 1649 | ; |
| 1650 | ; GFX10-DL-LABEL: notudot2_DiffIndex: |
| 1651 | ; GFX10-DL: ; %bb.0: ; %entry |
Matt Arsenault | b1bcb7c | 2024-07-15 09:59:07 +0400 | [diff] [blame] | 1652 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1653 | ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 1654 | ; GFX10-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1655 | ; GFX10-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 1656 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1657 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1658 | ; GFX10-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 1659 | ; GFX10-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 1660 | ; GFX10-DL-NEXT: s_waitcnt_depctr 0xffe3 |
| 1661 | ; GFX10-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1662 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) |
| 1663 | ; GFX10-DL-NEXT: v_mul_u32_u24_sdwa v0, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_0 |
| 1664 | ; GFX10-DL-NEXT: v_mul_u32_u24_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_1 |
| 1665 | ; GFX10-DL-NEXT: v_mov_b32_e32 v2, 0 |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 1666 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1667 | ; GFX10-DL-NEXT: v_add3_u32 v0, v1, s0, v0 |
| 1668 | ; GFX10-DL-NEXT: global_store_dword v2, v0, s[6:7] |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 1669 | ; GFX10-DL-NEXT: s_endpgm |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1670 | ptr addrspace(1) %src2, |
| 1671 | ptr addrspace(1) nocapture %dst) { |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 1672 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1673 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1674 | %gep1 = getelementptr <2 x i16>, ptr addrspace(1) %src1, i32 %idx |
| 1675 | %vec1 = load <2 x i16>, ptr addrspace(1) %gep1 |
| 1676 | %gep2 = getelementptr <2 x i16>, ptr addrspace(1) %src2, i32 %idx |
| 1677 | %vec2 = load <2 x i16>, ptr addrspace(1) %gep2 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 1678 | |
| 1679 | %s1.elt1 = extractelement <2 x i16> %vec1, i64 0 |
| 1680 | %conv = zext i16 %s1.elt1 to i32 |
| 1681 | %s2.elt1 = extractelement <2 x i16> %vec2, i64 1 |
| 1682 | %conv2 = zext i16 %s2.elt1 to i32 |
| 1683 | %mul1 = mul i32 %conv2, %conv |
| 1684 | |
| 1685 | %s1.elt2 = extractelement <2 x i16> %vec1, i64 1 |
| 1686 | %conv3 = zext i16 %s1.elt2 to i32 |
| 1687 | %s2.elt2 = extractelement <2 x i16> %vec2, i64 0 |
| 1688 | %conv4 = zext i16 %s2.elt2 to i32 |
| 1689 | %mul2 = mul i32 %conv4, %conv3 |
| 1690 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1691 | %s3 = load i32, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 1692 | %add = add i32 %mul2, %s3 |
| 1693 | %add6 = add i32 %add, %mul1 |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1694 | store i32 %add6, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 1695 | ret void |
| 1696 | } |
| 1697 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1698 | define amdgpu_kernel void @udot2_MultipleUses_add1(ptr addrspace(1) %src1, |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1699 | ; GFX7-LABEL: udot2_MultipleUses_add1: |
| 1700 | ; GFX7: ; %bb.0: ; %entry |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 1701 | ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| 1702 | ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd |
| 1703 | ; GFX7-NEXT: s_mov_b32 s7, 0xf000 |
| 1704 | ; GFX7-NEXT: s_mov_b32 s10, 0 |
| 1705 | ; GFX7-NEXT: s_mov_b32 s11, s7 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1706 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 1707 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[0:1] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1708 | ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 1709 | ; GFX7-NEXT: v_mov_b32_e32 v1, 0 |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 1710 | ; GFX7-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 |
| 1711 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[2:3] |
| 1712 | ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 |
| 1713 | ; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 |
| 1714 | ; GFX7-NEXT: s_mov_b32 s6, -1 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1715 | ; GFX7-NEXT: s_waitcnt vmcnt(1) |
| 1716 | ; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v2 |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 1717 | ; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1718 | ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| 1719 | ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v0 |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 1720 | ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1721 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 1722 | ; GFX7-NEXT: v_mad_u32_u24 v1, v3, v1, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1723 | ; GFX7-NEXT: v_mad_u32_u24 v0, v0, v2, v1 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1724 | ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1 |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 1725 | ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1726 | ; GFX7-NEXT: s_endpgm |
| 1727 | ; |
| 1728 | ; GFX8-LABEL: udot2_MultipleUses_add1: |
| 1729 | ; GFX8: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1730 | ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 1731 | ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1732 | ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1733 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1734 | ; GFX8-NEXT: v_mov_b32_e32 v1, s1 |
| 1735 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1736 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 1737 | ; GFX8-NEXT: flat_load_dword v3, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1738 | ; GFX8-NEXT: v_mov_b32_e32 v1, s3 |
| 1739 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1740 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 1741 | ; GFX8-NEXT: flat_load_dword v0, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1742 | ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1743 | ; GFX8-NEXT: s_waitcnt vmcnt(1) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 1744 | ; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v3 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1745 | ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v3 |
| 1746 | ; GFX8-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 1747 | ; GFX8-NEXT: v_and_b32_e32 v2, 0xffff, v0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1748 | ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1749 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1750 | ; GFX8-NEXT: v_mad_u32_u24 v0, v0, v3, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1751 | ; GFX8-NEXT: v_mad_u32_u24 v1, v2, v1, v0 |
| 1752 | ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v1, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1753 | ; GFX8-NEXT: v_mov_b32_e32 v0, s4 |
| 1754 | ; GFX8-NEXT: v_mov_b32_e32 v1, s5 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1755 | ; GFX8-NEXT: flat_store_dword v[0:1], v2 |
| 1756 | ; GFX8-NEXT: s_endpgm |
| 1757 | ; |
| 1758 | ; GFX9-NODL-LABEL: udot2_MultipleUses_add1: |
| 1759 | ; GFX9-NODL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1760 | ; GFX9-NODL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 1761 | ; GFX9-NODL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1762 | ; GFX9-NODL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 1763 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1764 | ; GFX9-NODL-NEXT: global_load_dword v1, v0, s[0:1] |
| 1765 | ; GFX9-NODL-NEXT: global_load_dword v2, v0, s[2:3] |
| 1766 | ; GFX9-NODL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 1767 | ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1768 | ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) |
| 1769 | ; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 1770 | ; GFX9-NODL-NEXT: v_lshrrev_b32_e32 v1, 16, v1 |
| 1771 | ; GFX9-NODL-NEXT: v_lshrrev_b32_e32 v2, 16, v2 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1772 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1773 | ; GFX9-NODL-NEXT: v_mad_u32_u24 v1, v2, v1, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1774 | ; GFX9-NODL-NEXT: v_add3_u32 v1, v1, v3, v1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1775 | ; GFX9-NODL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1776 | ; GFX9-NODL-NEXT: s_endpgm |
| 1777 | ; |
| 1778 | ; GFX9-DL-LABEL: udot2_MultipleUses_add1: |
| 1779 | ; GFX9-DL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1780 | ; GFX9-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 1781 | ; GFX9-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1782 | ; GFX9-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 1783 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1784 | ; GFX9-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 1785 | ; GFX9-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 1786 | ; GFX9-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 1787 | ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1788 | ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) |
| 1789 | ; GFX9-DL-NEXT: v_mul_u32_u24_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 1790 | ; GFX9-DL-NEXT: v_lshrrev_b32_e32 v1, 16, v1 |
| 1791 | ; GFX9-DL-NEXT: v_lshrrev_b32_e32 v2, 16, v2 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1792 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1793 | ; GFX9-DL-NEXT: v_mad_u32_u24 v1, v2, v1, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1794 | ; GFX9-DL-NEXT: v_add3_u32 v1, v1, v3, v1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1795 | ; GFX9-DL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1796 | ; GFX9-DL-NEXT: s_endpgm |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 1797 | ; |
| 1798 | ; GFX10-DL-LABEL: udot2_MultipleUses_add1: |
| 1799 | ; GFX10-DL: ; %bb.0: ; %entry |
Matt Arsenault | b1bcb7c | 2024-07-15 09:59:07 +0400 | [diff] [blame] | 1800 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1801 | ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 1802 | ; GFX10-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1803 | ; GFX10-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1804 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
| 1805 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1806 | ; GFX10-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 1807 | ; GFX10-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 1808 | ; GFX10-DL-NEXT: s_waitcnt_depctr 0xffe3 |
| 1809 | ; GFX10-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1810 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(1) |
| 1811 | ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v0, 16, v1 |
| 1812 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) |
| 1813 | ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 |
| 1814 | ; GFX10-DL-NEXT: v_mul_u32_u24_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 1815 | ; GFX10-DL-NEXT: v_mov_b32_e32 v2, 0 |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 1816 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1817 | ; GFX10-DL-NEXT: v_mad_u32_u24 v0, v3, v0, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1818 | ; GFX10-DL-NEXT: v_add3_u32 v0, v0, v1, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1819 | ; GFX10-DL-NEXT: global_store_dword v2, v0, s[6:7] |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 1820 | ; GFX10-DL-NEXT: s_endpgm |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1821 | ptr addrspace(1) %src2, |
| 1822 | ptr addrspace(1) nocapture %dst) { |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 1823 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1824 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1825 | %gep1 = getelementptr <2 x i16>, ptr addrspace(1) %src1, i32 %idx |
| 1826 | %vec1 = load <2 x i16>, ptr addrspace(1) %gep1 |
| 1827 | %gep2 = getelementptr <2 x i16>, ptr addrspace(1) %src2, i32 %idx |
| 1828 | %vec2 = load <2 x i16>, ptr addrspace(1) %gep2 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 1829 | |
| 1830 | %s1.elt1 = extractelement <2 x i16> %vec1, i64 0 |
| 1831 | %conv = zext i16 %s1.elt1 to i32 |
| 1832 | %s2.elt1 = extractelement <2 x i16> %vec2, i64 0 |
| 1833 | %conv2 = zext i16 %s2.elt1 to i32 |
| 1834 | %mul1 = mul i32 %conv2, %conv |
| 1835 | |
| 1836 | %s1.elt2 = extractelement <2 x i16> %vec1, i64 1 |
| 1837 | %conv3 = zext i16 %s1.elt2 to i32 |
| 1838 | %s2.elt2 = extractelement <2 x i16> %vec2, i64 1 |
| 1839 | %conv4 = zext i16 %s2.elt2 to i32 |
| 1840 | %mul2 = mul i32 %conv4, %conv3 |
| 1841 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1842 | %s3 = load i32, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 1843 | %add1 = add i32 %mul2, %s3 |
| 1844 | %add2 = add i32 %add1, %mul1 |
| 1845 | |
| 1846 | %res = add i32 %add2, %add1 |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1847 | store i32 %res, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 1848 | ret void |
| 1849 | } |
| 1850 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1851 | define amdgpu_kernel void @idot2_MultipleUses_add1(ptr addrspace(1) %src1, |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1852 | ; GFX7-LABEL: idot2_MultipleUses_add1: |
| 1853 | ; GFX7: ; %bb.0: ; %entry |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 1854 | ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| 1855 | ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd |
| 1856 | ; GFX7-NEXT: s_mov_b32 s7, 0xf000 |
| 1857 | ; GFX7-NEXT: s_mov_b32 s10, 0 |
| 1858 | ; GFX7-NEXT: s_mov_b32 s11, s7 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1859 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 1860 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[0:1] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1861 | ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 1862 | ; GFX7-NEXT: v_mov_b32_e32 v1, 0 |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 1863 | ; GFX7-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 |
| 1864 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[2:3] |
| 1865 | ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 |
| 1866 | ; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 |
| 1867 | ; GFX7-NEXT: s_mov_b32 s6, -1 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1868 | ; GFX7-NEXT: s_waitcnt vmcnt(1) |
| 1869 | ; GFX7-NEXT: v_bfe_i32 v1, v2, 0, 16 |
| 1870 | ; GFX7-NEXT: v_ashrrev_i32_e32 v2, 16, v2 |
| 1871 | ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| 1872 | ; GFX7-NEXT: v_bfe_i32 v3, v0, 0, 16 |
| 1873 | ; GFX7-NEXT: v_ashrrev_i32_e32 v0, 16, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1874 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 1875 | ; GFX7-NEXT: v_mad_i32_i24 v0, v0, v2, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1876 | ; GFX7-NEXT: v_mad_i32_i24 v1, v3, v1, v0 |
| 1877 | ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v1, v0 |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 1878 | ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1879 | ; GFX7-NEXT: s_endpgm |
| 1880 | ; |
| 1881 | ; GFX8-LABEL: idot2_MultipleUses_add1: |
| 1882 | ; GFX8: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1883 | ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 1884 | ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1885 | ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1886 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1887 | ; GFX8-NEXT: v_mov_b32_e32 v1, s1 |
| 1888 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1889 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 1890 | ; GFX8-NEXT: flat_load_dword v3, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1891 | ; GFX8-NEXT: v_mov_b32_e32 v1, s3 |
| 1892 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1893 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 1894 | ; GFX8-NEXT: flat_load_dword v0, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1895 | ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1896 | ; GFX8-NEXT: s_waitcnt vmcnt(1) |
| 1897 | ; GFX8-NEXT: v_bfe_i32 v1, v3, 0, 16 |
| 1898 | ; GFX8-NEXT: v_ashrrev_i32_e32 v3, 16, v3 |
| 1899 | ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| 1900 | ; GFX8-NEXT: v_bfe_i32 v2, v0, 0, 16 |
| 1901 | ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 16, v0 |
Jay Foad | 0412f51 | 2019-12-17 16:09:02 +0000 | [diff] [blame] | 1902 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1903 | ; GFX8-NEXT: v_mad_i32_i24 v0, v0, v3, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1904 | ; GFX8-NEXT: v_mad_i32_i24 v1, v2, v1, v0 |
| 1905 | ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v1, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1906 | ; GFX8-NEXT: v_mov_b32_e32 v0, s4 |
| 1907 | ; GFX8-NEXT: v_mov_b32_e32 v1, s5 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1908 | ; GFX8-NEXT: flat_store_dword v[0:1], v2 |
| 1909 | ; GFX8-NEXT: s_endpgm |
| 1910 | ; |
| 1911 | ; GFX9-NODL-LABEL: idot2_MultipleUses_add1: |
| 1912 | ; GFX9-NODL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1913 | ; GFX9-NODL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 1914 | ; GFX9-NODL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1915 | ; GFX9-NODL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 1916 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1917 | ; GFX9-NODL-NEXT: global_load_dword v1, v0, s[0:1] |
| 1918 | ; GFX9-NODL-NEXT: global_load_dword v2, v0, s[2:3] |
| 1919 | ; GFX9-NODL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 1920 | ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1921 | ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) |
| 1922 | ; GFX9-NODL-NEXT: v_mul_i32_i24_sdwa v3, sext(v2), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 1923 | ; GFX9-NODL-NEXT: v_ashrrev_i32_e32 v1, 16, v1 |
| 1924 | ; GFX9-NODL-NEXT: v_ashrrev_i32_e32 v2, 16, v2 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1925 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1926 | ; GFX9-NODL-NEXT: v_mad_i32_i24 v1, v2, v1, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1927 | ; GFX9-NODL-NEXT: v_add3_u32 v1, v1, v3, v1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1928 | ; GFX9-NODL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1929 | ; GFX9-NODL-NEXT: s_endpgm |
| 1930 | ; |
| 1931 | ; GFX9-DL-LABEL: idot2_MultipleUses_add1: |
| 1932 | ; GFX9-DL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1933 | ; GFX9-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 1934 | ; GFX9-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1935 | ; GFX9-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 1936 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1937 | ; GFX9-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 1938 | ; GFX9-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 1939 | ; GFX9-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 1940 | ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1941 | ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) |
| 1942 | ; GFX9-DL-NEXT: v_mul_i32_i24_sdwa v3, sext(v2), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 1943 | ; GFX9-DL-NEXT: v_ashrrev_i32_e32 v1, 16, v1 |
| 1944 | ; GFX9-DL-NEXT: v_ashrrev_i32_e32 v2, 16, v2 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1945 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1946 | ; GFX9-DL-NEXT: v_mad_i32_i24 v1, v2, v1, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1947 | ; GFX9-DL-NEXT: v_add3_u32 v1, v1, v3, v1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1948 | ; GFX9-DL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 1949 | ; GFX9-DL-NEXT: s_endpgm |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 1950 | ; |
| 1951 | ; GFX10-DL-LABEL: idot2_MultipleUses_add1: |
| 1952 | ; GFX10-DL: ; %bb.0: ; %entry |
Matt Arsenault | b1bcb7c | 2024-07-15 09:59:07 +0400 | [diff] [blame] | 1953 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1954 | ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 1955 | ; GFX10-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1956 | ; GFX10-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1957 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
| 1958 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1959 | ; GFX10-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 1960 | ; GFX10-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 1961 | ; GFX10-DL-NEXT: s_waitcnt_depctr 0xffe3 |
| 1962 | ; GFX10-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1963 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(1) |
| 1964 | ; GFX10-DL-NEXT: v_ashrrev_i32_e32 v0, 16, v1 |
| 1965 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) |
| 1966 | ; GFX10-DL-NEXT: v_ashrrev_i32_e32 v3, 16, v2 |
| 1967 | ; GFX10-DL-NEXT: v_mul_i32_i24_sdwa v1, sext(v2), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 1968 | ; GFX10-DL-NEXT: v_mov_b32_e32 v2, 0 |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 1969 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1970 | ; GFX10-DL-NEXT: v_mad_i32_i24 v0, v3, v0, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1971 | ; GFX10-DL-NEXT: v_add3_u32 v0, v0, v1, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 1972 | ; GFX10-DL-NEXT: global_store_dword v2, v0, s[6:7] |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 1973 | ; GFX10-DL-NEXT: s_endpgm |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1974 | ptr addrspace(1) %src2, |
| 1975 | ptr addrspace(1) nocapture %dst) { |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 1976 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 1977 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1978 | %gep1 = getelementptr <2 x i16>, ptr addrspace(1) %src1, i32 %idx |
| 1979 | %vec1 = load <2 x i16>, ptr addrspace(1) %gep1 |
| 1980 | %gep2 = getelementptr <2 x i16>, ptr addrspace(1) %src2, i32 %idx |
| 1981 | %vec2 = load <2 x i16>, ptr addrspace(1) %gep2 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 1982 | |
| 1983 | %s1.elt1 = extractelement <2 x i16> %vec1, i64 0 |
| 1984 | %conv = sext i16 %s1.elt1 to i32 |
| 1985 | %s2.elt1 = extractelement <2 x i16> %vec2, i64 0 |
| 1986 | %conv2 = sext i16 %s2.elt1 to i32 |
| 1987 | %mul1 = mul i32 %conv2, %conv |
| 1988 | |
| 1989 | %s1.elt2 = extractelement <2 x i16> %vec1, i64 1 |
| 1990 | %conv3 = sext i16 %s1.elt2 to i32 |
| 1991 | %s2.elt2 = extractelement <2 x i16> %vec2, i64 1 |
| 1992 | %conv4 = sext i16 %s2.elt2 to i32 |
| 1993 | %mul2 = mul i32 %conv4, %conv3 |
| 1994 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 1995 | %s3 = load i32, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 1996 | %add1 = add i32 %mul2, %s3 |
| 1997 | %add2 = add i32 %add1, %mul1 |
| 1998 | |
| 1999 | %res = add i32 %add2, %add1 |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2000 | store i32 %res, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 2001 | ret void |
| 2002 | } |
| 2003 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2004 | define amdgpu_kernel void @udot2_MultipleUses_mul1(ptr addrspace(1) %src1, |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2005 | ; GFX7-LABEL: udot2_MultipleUses_mul1: |
| 2006 | ; GFX7: ; %bb.0: ; %entry |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 2007 | ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| 2008 | ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd |
| 2009 | ; GFX7-NEXT: s_mov_b32 s7, 0xf000 |
| 2010 | ; GFX7-NEXT: s_mov_b32 s10, 0 |
| 2011 | ; GFX7-NEXT: s_mov_b32 s11, s7 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2012 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 2013 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[0:1] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2014 | ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 2015 | ; GFX7-NEXT: v_mov_b32_e32 v1, 0 |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 2016 | ; GFX7-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 |
| 2017 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[2:3] |
| 2018 | ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 |
| 2019 | ; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 |
| 2020 | ; GFX7-NEXT: s_mov_b32 s6, -1 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2021 | ; GFX7-NEXT: s_waitcnt vmcnt(1) |
| 2022 | ; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v2 |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 2023 | ; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2024 | ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| 2025 | ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v0 |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 2026 | ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2027 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 2028 | ; GFX7-NEXT: v_mad_u32_u24 v4, v0, v2, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2029 | ; GFX7-NEXT: v_mad_u32_u24 v1, v3, v1, v4 |
| 2030 | ; GFX7-NEXT: v_mad_u32_u24 v0, v0, v2, v1 |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 2031 | ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2032 | ; GFX7-NEXT: s_endpgm |
| 2033 | ; |
| 2034 | ; GFX8-LABEL: udot2_MultipleUses_mul1: |
| 2035 | ; GFX8: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2036 | ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 2037 | ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2038 | ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2039 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2040 | ; GFX8-NEXT: v_mov_b32_e32 v1, s1 |
| 2041 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2042 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 2043 | ; GFX8-NEXT: flat_load_dword v3, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2044 | ; GFX8-NEXT: v_mov_b32_e32 v1, s3 |
| 2045 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2046 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 2047 | ; GFX8-NEXT: flat_load_dword v0, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2048 | ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2049 | ; GFX8-NEXT: s_waitcnt vmcnt(1) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 2050 | ; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v3 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2051 | ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v3 |
| 2052 | ; GFX8-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 2053 | ; GFX8-NEXT: v_and_b32_e32 v2, 0xffff, v0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2054 | ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2055 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2056 | ; GFX8-NEXT: v_mad_u32_u24 v4, v2, v1, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2057 | ; GFX8-NEXT: v_mad_u32_u24 v0, v0, v3, v4 |
| 2058 | ; GFX8-NEXT: v_mad_u32_u24 v2, v2, v1, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2059 | ; GFX8-NEXT: v_mov_b32_e32 v0, s4 |
| 2060 | ; GFX8-NEXT: v_mov_b32_e32 v1, s5 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2061 | ; GFX8-NEXT: flat_store_dword v[0:1], v2 |
| 2062 | ; GFX8-NEXT: s_endpgm |
| 2063 | ; |
| 2064 | ; GFX9-NODL-LABEL: udot2_MultipleUses_mul1: |
| 2065 | ; GFX9-NODL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2066 | ; GFX9-NODL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 2067 | ; GFX9-NODL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2068 | ; GFX9-NODL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2069 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2070 | ; GFX9-NODL-NEXT: global_load_dword v1, v0, s[0:1] |
| 2071 | ; GFX9-NODL-NEXT: global_load_dword v2, v0, s[2:3] |
| 2072 | ; GFX9-NODL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 2073 | ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2074 | ; GFX9-NODL-NEXT: s_waitcnt vmcnt(1) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 2075 | ; GFX9-NODL-NEXT: v_and_b32_e32 v3, 0xffff, v1 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2076 | ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 2077 | ; GFX9-NODL-NEXT: v_and_b32_e32 v4, 0xffff, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2078 | ; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| 2079 | ; GFX9-NODL-NEXT: v_mul_u32_u24_e32 v2, v4, v3 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2080 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2081 | ; GFX9-NODL-NEXT: v_mad_u32_u24 v3, v4, v3, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2082 | ; GFX9-NODL-NEXT: v_add3_u32 v1, v1, v3, v2 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2083 | ; GFX9-NODL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2084 | ; GFX9-NODL-NEXT: s_endpgm |
| 2085 | ; |
| 2086 | ; GFX9-DL-LABEL: udot2_MultipleUses_mul1: |
| 2087 | ; GFX9-DL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2088 | ; GFX9-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 2089 | ; GFX9-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2090 | ; GFX9-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2091 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2092 | ; GFX9-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 2093 | ; GFX9-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 2094 | ; GFX9-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 2095 | ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2096 | ; GFX9-DL-NEXT: s_waitcnt vmcnt(1) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 2097 | ; GFX9-DL-NEXT: v_and_b32_e32 v3, 0xffff, v1 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2098 | ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 2099 | ; GFX9-DL-NEXT: v_and_b32_e32 v4, 0xffff, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2100 | ; GFX9-DL-NEXT: v_mul_u32_u24_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| 2101 | ; GFX9-DL-NEXT: v_mul_u32_u24_e32 v2, v4, v3 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2102 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2103 | ; GFX9-DL-NEXT: v_mad_u32_u24 v3, v4, v3, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2104 | ; GFX9-DL-NEXT: v_add3_u32 v1, v1, v3, v2 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2105 | ; GFX9-DL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2106 | ; GFX9-DL-NEXT: s_endpgm |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 2107 | ; |
| 2108 | ; GFX10-DL-LABEL: udot2_MultipleUses_mul1: |
| 2109 | ; GFX10-DL: ; %bb.0: ; %entry |
Matt Arsenault | b1bcb7c | 2024-07-15 09:59:07 +0400 | [diff] [blame] | 2110 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2111 | ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 2112 | ; GFX10-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2113 | ; GFX10-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 2114 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2115 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2116 | ; GFX10-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 2117 | ; GFX10-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 2118 | ; GFX10-DL-NEXT: s_waitcnt_depctr 0xffe3 |
| 2119 | ; GFX10-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2120 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(1) |
Jay Foad | 3eb2281 | 2022-05-16 15:48:11 +0100 | [diff] [blame] | 2121 | ; GFX10-DL-NEXT: v_and_b32_e32 v0, 0xffff, v1 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2122 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | 3eb2281 | 2022-05-16 15:48:11 +0100 | [diff] [blame] | 2123 | ; GFX10-DL-NEXT: v_and_b32_e32 v3, 0xffff, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2124 | ; GFX10-DL-NEXT: v_mul_u32_u24_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| 2125 | ; GFX10-DL-NEXT: v_mul_u32_u24_e32 v2, v3, v0 |
Jay Foad | 0412f51 | 2019-12-17 16:09:02 +0000 | [diff] [blame] | 2126 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2127 | ; GFX10-DL-NEXT: v_mad_u32_u24 v0, v3, v0, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2128 | ; GFX10-DL-NEXT: v_mov_b32_e32 v3, 0 |
| 2129 | ; GFX10-DL-NEXT: v_add3_u32 v0, v1, v0, v2 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2130 | ; GFX10-DL-NEXT: global_store_dword v3, v0, s[6:7] |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 2131 | ; GFX10-DL-NEXT: s_endpgm |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2132 | ptr addrspace(1) %src2, |
| 2133 | ptr addrspace(1) nocapture %dst) { |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 2134 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2135 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2136 | %gep1 = getelementptr <2 x i16>, ptr addrspace(1) %src1, i32 %idx |
| 2137 | %vec1 = load <2 x i16>, ptr addrspace(1) %gep1 |
| 2138 | %gep2 = getelementptr <2 x i16>, ptr addrspace(1) %src2, i32 %idx |
| 2139 | %vec2 = load <2 x i16>, ptr addrspace(1) %gep2 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 2140 | |
| 2141 | %s1.elt1 = extractelement <2 x i16> %vec1, i64 0 |
| 2142 | %conv = zext i16 %s1.elt1 to i32 |
| 2143 | %s2.elt1 = extractelement <2 x i16> %vec2, i64 0 |
| 2144 | %conv2 = zext i16 %s2.elt1 to i32 |
| 2145 | %mul1 = mul i32 %conv2, %conv |
| 2146 | |
| 2147 | %s1.elt2 = extractelement <2 x i16> %vec1, i64 1 |
| 2148 | %conv3 = zext i16 %s1.elt2 to i32 |
| 2149 | %s2.elt2 = extractelement <2 x i16> %vec2, i64 1 |
| 2150 | %conv4 = zext i16 %s2.elt2 to i32 |
| 2151 | %mul2 = mul i32 %conv4, %conv3 |
| 2152 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2153 | %s3 = load i32, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 2154 | %add0 = add i32 %mul1, %s3 |
| 2155 | |
| 2156 | %add1 = add i32 %mul2, %add0 |
| 2157 | %add2 = add i32 %add1, %mul1 |
| 2158 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2159 | store i32 %add2, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 2160 | ret void |
| 2161 | } |
| 2162 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2163 | define amdgpu_kernel void @idot2_MultipleUses_mul1(ptr addrspace(1) %src1, |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2164 | ; GFX7-LABEL: idot2_MultipleUses_mul1: |
| 2165 | ; GFX7: ; %bb.0: ; %entry |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 2166 | ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| 2167 | ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd |
| 2168 | ; GFX7-NEXT: s_mov_b32 s7, 0xf000 |
| 2169 | ; GFX7-NEXT: s_mov_b32 s10, 0 |
| 2170 | ; GFX7-NEXT: s_mov_b32 s11, s7 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2171 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 2172 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[0:1] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2173 | ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 2174 | ; GFX7-NEXT: v_mov_b32_e32 v1, 0 |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 2175 | ; GFX7-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 |
| 2176 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[2:3] |
| 2177 | ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 |
| 2178 | ; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 |
| 2179 | ; GFX7-NEXT: s_mov_b32 s6, -1 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2180 | ; GFX7-NEXT: s_waitcnt vmcnt(1) |
| 2181 | ; GFX7-NEXT: v_bfe_i32 v1, v2, 0, 16 |
| 2182 | ; GFX7-NEXT: v_ashrrev_i32_e32 v2, 16, v2 |
| 2183 | ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| 2184 | ; GFX7-NEXT: v_bfe_i32 v3, v0, 0, 16 |
| 2185 | ; GFX7-NEXT: v_ashrrev_i32_e32 v0, 16, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2186 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 2187 | ; GFX7-NEXT: v_mad_i32_i24 v4, v3, v1, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2188 | ; GFX7-NEXT: v_mad_i32_i24 v0, v0, v2, v4 |
| 2189 | ; GFX7-NEXT: v_mad_i32_i24 v0, v3, v1, v0 |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 2190 | ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2191 | ; GFX7-NEXT: s_endpgm |
| 2192 | ; |
| 2193 | ; GFX8-LABEL: idot2_MultipleUses_mul1: |
| 2194 | ; GFX8: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2195 | ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 2196 | ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2197 | ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2198 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2199 | ; GFX8-NEXT: v_mov_b32_e32 v1, s1 |
| 2200 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2201 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 2202 | ; GFX8-NEXT: flat_load_dword v3, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2203 | ; GFX8-NEXT: v_mov_b32_e32 v1, s3 |
| 2204 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2205 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 2206 | ; GFX8-NEXT: flat_load_dword v0, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2207 | ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2208 | ; GFX8-NEXT: s_waitcnt vmcnt(1) |
| 2209 | ; GFX8-NEXT: v_bfe_i32 v1, v3, 0, 16 |
| 2210 | ; GFX8-NEXT: v_ashrrev_i32_e32 v3, 16, v3 |
| 2211 | ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| 2212 | ; GFX8-NEXT: v_bfe_i32 v2, v0, 0, 16 |
| 2213 | ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 16, v0 |
| 2214 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2215 | ; GFX8-NEXT: v_mad_i32_i24 v4, v2, v1, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2216 | ; GFX8-NEXT: v_mad_i32_i24 v0, v0, v3, v4 |
| 2217 | ; GFX8-NEXT: v_mad_i32_i24 v2, v2, v1, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2218 | ; GFX8-NEXT: v_mov_b32_e32 v0, s4 |
| 2219 | ; GFX8-NEXT: v_mov_b32_e32 v1, s5 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2220 | ; GFX8-NEXT: flat_store_dword v[0:1], v2 |
| 2221 | ; GFX8-NEXT: s_endpgm |
| 2222 | ; |
| 2223 | ; GFX9-NODL-LABEL: idot2_MultipleUses_mul1: |
| 2224 | ; GFX9-NODL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2225 | ; GFX9-NODL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 2226 | ; GFX9-NODL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2227 | ; GFX9-NODL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 2228 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2229 | ; GFX9-NODL-NEXT: global_load_dword v1, v0, s[0:1] |
| 2230 | ; GFX9-NODL-NEXT: global_load_dword v2, v0, s[2:3] |
| 2231 | ; GFX9-NODL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 2232 | ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2233 | ; GFX9-NODL-NEXT: s_waitcnt vmcnt(1) |
| 2234 | ; GFX9-NODL-NEXT: v_bfe_i32 v3, v1, 0, 16 |
| 2235 | ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) |
| 2236 | ; GFX9-NODL-NEXT: v_bfe_i32 v4, v2, 0, 16 |
| 2237 | ; GFX9-NODL-NEXT: v_mul_i32_i24_sdwa v1, sext(v2), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| 2238 | ; GFX9-NODL-NEXT: v_mul_i32_i24_e32 v2, v4, v3 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2239 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2240 | ; GFX9-NODL-NEXT: v_mad_i32_i24 v3, v4, v3, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2241 | ; GFX9-NODL-NEXT: v_add3_u32 v1, v1, v3, v2 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2242 | ; GFX9-NODL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2243 | ; GFX9-NODL-NEXT: s_endpgm |
| 2244 | ; |
| 2245 | ; GFX9-DL-LABEL: idot2_MultipleUses_mul1: |
| 2246 | ; GFX9-DL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2247 | ; GFX9-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 2248 | ; GFX9-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2249 | ; GFX9-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 2250 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2251 | ; GFX9-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 2252 | ; GFX9-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 2253 | ; GFX9-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 2254 | ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2255 | ; GFX9-DL-NEXT: s_waitcnt vmcnt(1) |
| 2256 | ; GFX9-DL-NEXT: v_bfe_i32 v3, v1, 0, 16 |
| 2257 | ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) |
| 2258 | ; GFX9-DL-NEXT: v_bfe_i32 v4, v2, 0, 16 |
| 2259 | ; GFX9-DL-NEXT: v_mul_i32_i24_sdwa v1, sext(v2), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| 2260 | ; GFX9-DL-NEXT: v_mul_i32_i24_e32 v2, v4, v3 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2261 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2262 | ; GFX9-DL-NEXT: v_mad_i32_i24 v3, v4, v3, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2263 | ; GFX9-DL-NEXT: v_add3_u32 v1, v1, v3, v2 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2264 | ; GFX9-DL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2265 | ; GFX9-DL-NEXT: s_endpgm |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 2266 | ; |
| 2267 | ; GFX10-DL-LABEL: idot2_MultipleUses_mul1: |
| 2268 | ; GFX10-DL: ; %bb.0: ; %entry |
Matt Arsenault | b1bcb7c | 2024-07-15 09:59:07 +0400 | [diff] [blame] | 2269 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2270 | ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 2271 | ; GFX10-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2272 | ; GFX10-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 2273 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2274 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2275 | ; GFX10-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 2276 | ; GFX10-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 2277 | ; GFX10-DL-NEXT: s_waitcnt_depctr 0xffe3 |
| 2278 | ; GFX10-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2279 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(1) |
| 2280 | ; GFX10-DL-NEXT: v_bfe_i32 v0, v1, 0, 16 |
| 2281 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) |
| 2282 | ; GFX10-DL-NEXT: v_bfe_i32 v3, v2, 0, 16 |
| 2283 | ; GFX10-DL-NEXT: v_mul_i32_i24_sdwa v1, sext(v2), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| 2284 | ; GFX10-DL-NEXT: v_mul_i32_i24_e32 v2, v3, v0 |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 2285 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2286 | ; GFX10-DL-NEXT: v_mad_i32_i24 v0, v3, v0, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2287 | ; GFX10-DL-NEXT: v_mov_b32_e32 v3, 0 |
| 2288 | ; GFX10-DL-NEXT: v_add3_u32 v0, v1, v0, v2 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2289 | ; GFX10-DL-NEXT: global_store_dword v3, v0, s[6:7] |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 2290 | ; GFX10-DL-NEXT: s_endpgm |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2291 | ptr addrspace(1) %src2, |
| 2292 | ptr addrspace(1) nocapture %dst) { |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 2293 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2294 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2295 | %gep1 = getelementptr <2 x i16>, ptr addrspace(1) %src1, i32 %idx |
| 2296 | %vec1 = load <2 x i16>, ptr addrspace(1) %gep1 |
| 2297 | %gep2 = getelementptr <2 x i16>, ptr addrspace(1) %src2, i32 %idx |
| 2298 | %vec2 = load <2 x i16>, ptr addrspace(1) %gep2 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 2299 | |
| 2300 | %s1.elt1 = extractelement <2 x i16> %vec1, i64 0 |
| 2301 | %conv = sext i16 %s1.elt1 to i32 |
| 2302 | %s2.elt1 = extractelement <2 x i16> %vec2, i64 0 |
| 2303 | %conv2 = sext i16 %s2.elt1 to i32 |
| 2304 | %mul1 = mul i32 %conv2, %conv |
| 2305 | |
| 2306 | %s1.elt2 = extractelement <2 x i16> %vec1, i64 1 |
| 2307 | %conv3 = sext i16 %s1.elt2 to i32 |
| 2308 | %s2.elt2 = extractelement <2 x i16> %vec2, i64 1 |
| 2309 | %conv4 = sext i16 %s2.elt2 to i32 |
| 2310 | %mul2 = mul i32 %conv4, %conv3 |
| 2311 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2312 | %s3 = load i32, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 2313 | %add0 = add i32 %mul1, %s3 |
| 2314 | |
| 2315 | %add1 = add i32 %mul2, %add0 |
| 2316 | %add2 = add i32 %add1, %mul1 |
| 2317 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2318 | store i32 %add2, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 2319 | ret void |
| 2320 | } |
| 2321 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2322 | define amdgpu_kernel void @udot2_MultipleUses_mul2(ptr addrspace(1) %src1, |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2323 | ; GFX7-LABEL: udot2_MultipleUses_mul2: |
| 2324 | ; GFX7: ; %bb.0: ; %entry |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 2325 | ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| 2326 | ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd |
| 2327 | ; GFX7-NEXT: s_mov_b32 s7, 0xf000 |
| 2328 | ; GFX7-NEXT: s_mov_b32 s10, 0 |
| 2329 | ; GFX7-NEXT: s_mov_b32 s11, s7 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2330 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 2331 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[0:1] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2332 | ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 2333 | ; GFX7-NEXT: v_mov_b32_e32 v1, 0 |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 2334 | ; GFX7-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 |
| 2335 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[2:3] |
| 2336 | ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 |
| 2337 | ; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 |
| 2338 | ; GFX7-NEXT: s_mov_b32 s6, -1 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2339 | ; GFX7-NEXT: s_waitcnt vmcnt(1) |
| 2340 | ; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v2 |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 2341 | ; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2342 | ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| 2343 | ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2344 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 2345 | ; GFX7-NEXT: v_mad_u32_u24 v4, v3, v1, s0 |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 2346 | ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2347 | ; GFX7-NEXT: v_mad_u32_u24 v1, v3, v1, v4 |
| 2348 | ; GFX7-NEXT: v_mad_u32_u24 v0, v0, v2, v1 |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 2349 | ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2350 | ; GFX7-NEXT: s_endpgm |
| 2351 | ; |
| 2352 | ; GFX8-LABEL: udot2_MultipleUses_mul2: |
| 2353 | ; GFX8: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2354 | ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 2355 | ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2356 | ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2357 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2358 | ; GFX8-NEXT: v_mov_b32_e32 v1, s1 |
| 2359 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2360 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 2361 | ; GFX8-NEXT: flat_load_dword v3, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2362 | ; GFX8-NEXT: v_mov_b32_e32 v1, s3 |
| 2363 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2364 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 2365 | ; GFX8-NEXT: flat_load_dword v0, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2366 | ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2367 | ; GFX8-NEXT: s_waitcnt vmcnt(1) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 2368 | ; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v3 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2369 | ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v3 |
| 2370 | ; GFX8-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 2371 | ; GFX8-NEXT: v_and_b32_e32 v2, 0xffff, v0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2372 | ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2373 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2374 | ; GFX8-NEXT: v_mad_u32_u24 v4, v0, v3, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2375 | ; GFX8-NEXT: v_mad_u32_u24 v0, v0, v3, v4 |
| 2376 | ; GFX8-NEXT: v_mad_u32_u24 v2, v2, v1, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2377 | ; GFX8-NEXT: v_mov_b32_e32 v0, s4 |
| 2378 | ; GFX8-NEXT: v_mov_b32_e32 v1, s5 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2379 | ; GFX8-NEXT: flat_store_dword v[0:1], v2 |
| 2380 | ; GFX8-NEXT: s_endpgm |
| 2381 | ; |
| 2382 | ; GFX9-NODL-LABEL: udot2_MultipleUses_mul2: |
| 2383 | ; GFX9-NODL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2384 | ; GFX9-NODL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 2385 | ; GFX9-NODL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2386 | ; GFX9-NODL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 2387 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2388 | ; GFX9-NODL-NEXT: global_load_dword v1, v0, s[0:1] |
| 2389 | ; GFX9-NODL-NEXT: global_load_dword v2, v0, s[2:3] |
| 2390 | ; GFX9-NODL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 2391 | ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2392 | ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) |
| 2393 | ; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 2394 | ; GFX9-NODL-NEXT: v_lshrrev_b32_e32 v1, 16, v1 |
| 2395 | ; GFX9-NODL-NEXT: v_lshrrev_b32_e32 v2, 16, v2 |
| 2396 | ; GFX9-NODL-NEXT: v_mul_u32_u24_e32 v4, v2, v1 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2397 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2398 | ; GFX9-NODL-NEXT: v_mad_u32_u24 v1, v2, v1, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2399 | ; GFX9-NODL-NEXT: v_add3_u32 v1, v4, v1, v3 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2400 | ; GFX9-NODL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2401 | ; GFX9-NODL-NEXT: s_endpgm |
| 2402 | ; |
| 2403 | ; GFX9-DL-LABEL: udot2_MultipleUses_mul2: |
| 2404 | ; GFX9-DL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2405 | ; GFX9-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 2406 | ; GFX9-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2407 | ; GFX9-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 2408 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2409 | ; GFX9-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 2410 | ; GFX9-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 2411 | ; GFX9-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 2412 | ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2413 | ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) |
| 2414 | ; GFX9-DL-NEXT: v_mul_u32_u24_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 2415 | ; GFX9-DL-NEXT: v_lshrrev_b32_e32 v1, 16, v1 |
| 2416 | ; GFX9-DL-NEXT: v_lshrrev_b32_e32 v2, 16, v2 |
| 2417 | ; GFX9-DL-NEXT: v_mul_u32_u24_e32 v4, v2, v1 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2418 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2419 | ; GFX9-DL-NEXT: v_mad_u32_u24 v1, v2, v1, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2420 | ; GFX9-DL-NEXT: v_add3_u32 v1, v4, v1, v3 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2421 | ; GFX9-DL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2422 | ; GFX9-DL-NEXT: s_endpgm |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 2423 | ; |
| 2424 | ; GFX10-DL-LABEL: udot2_MultipleUses_mul2: |
| 2425 | ; GFX10-DL: ; %bb.0: ; %entry |
Matt Arsenault | b1bcb7c | 2024-07-15 09:59:07 +0400 | [diff] [blame] | 2426 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2427 | ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 2428 | ; GFX10-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2429 | ; GFX10-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 2430 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2431 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2432 | ; GFX10-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 2433 | ; GFX10-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 2434 | ; GFX10-DL-NEXT: s_waitcnt_depctr 0xffe3 |
| 2435 | ; GFX10-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2436 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(1) |
| 2437 | ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v0, 16, v1 |
| 2438 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) |
| 2439 | ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 |
| 2440 | ; GFX10-DL-NEXT: v_mul_u32_u24_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 2441 | ; GFX10-DL-NEXT: v_mul_u32_u24_e32 v2, v3, v0 |
Austin Kerbow | 2291bd1 | 2020-11-30 09:06:35 -0800 | [diff] [blame] | 2442 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2443 | ; GFX10-DL-NEXT: v_mad_u32_u24 v0, v3, v0, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2444 | ; GFX10-DL-NEXT: v_mov_b32_e32 v3, 0 |
| 2445 | ; GFX10-DL-NEXT: v_add3_u32 v0, v2, v0, v1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2446 | ; GFX10-DL-NEXT: global_store_dword v3, v0, s[6:7] |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 2447 | ; GFX10-DL-NEXT: s_endpgm |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2448 | ptr addrspace(1) %src2, |
| 2449 | ptr addrspace(1) nocapture %dst) { |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 2450 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2451 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2452 | %gep1 = getelementptr <2 x i16>, ptr addrspace(1) %src1, i32 %idx |
| 2453 | %vec1 = load <2 x i16>, ptr addrspace(1) %gep1 |
| 2454 | %gep2 = getelementptr <2 x i16>, ptr addrspace(1) %src2, i32 %idx |
| 2455 | %vec2 = load <2 x i16>, ptr addrspace(1) %gep2 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 2456 | |
| 2457 | %s1.elt1 = extractelement <2 x i16> %vec1, i64 0 |
| 2458 | %conv = zext i16 %s1.elt1 to i32 |
| 2459 | %s2.elt1 = extractelement <2 x i16> %vec2, i64 0 |
| 2460 | %conv2 = zext i16 %s2.elt1 to i32 |
| 2461 | %mul1 = mul i32 %conv2, %conv |
| 2462 | |
| 2463 | %s1.elt2 = extractelement <2 x i16> %vec1, i64 1 |
| 2464 | %conv3 = zext i16 %s1.elt2 to i32 |
| 2465 | %s2.elt2 = extractelement <2 x i16> %vec2, i64 1 |
| 2466 | %conv4 = zext i16 %s2.elt2 to i32 |
| 2467 | %mul2 = mul i32 %conv4, %conv3 |
| 2468 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2469 | %s3 = load i32, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 2470 | %add0 = add i32 %mul2, %s3 |
| 2471 | |
| 2472 | %add1 = add i32 %mul2, %add0 |
| 2473 | %add2 = add i32 %add1, %mul1 |
| 2474 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2475 | store i32 %add2, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 2476 | ret void |
| 2477 | } |
| 2478 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2479 | define amdgpu_kernel void @idot2_MultipleUses_mul2(ptr addrspace(1) %src1, |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2480 | ; GFX7-LABEL: idot2_MultipleUses_mul2: |
| 2481 | ; GFX7: ; %bb.0: ; %entry |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 2482 | ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| 2483 | ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd |
| 2484 | ; GFX7-NEXT: s_mov_b32 s7, 0xf000 |
| 2485 | ; GFX7-NEXT: s_mov_b32 s10, 0 |
| 2486 | ; GFX7-NEXT: s_mov_b32 s11, s7 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2487 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 2488 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[0:1] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2489 | ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 2490 | ; GFX7-NEXT: v_mov_b32_e32 v1, 0 |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 2491 | ; GFX7-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 |
| 2492 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[2:3] |
| 2493 | ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 |
| 2494 | ; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 |
| 2495 | ; GFX7-NEXT: s_mov_b32 s6, -1 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2496 | ; GFX7-NEXT: s_waitcnt vmcnt(1) |
| 2497 | ; GFX7-NEXT: v_bfe_i32 v1, v2, 0, 16 |
| 2498 | ; GFX7-NEXT: v_ashrrev_i32_e32 v2, 16, v2 |
| 2499 | ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| 2500 | ; GFX7-NEXT: v_bfe_i32 v3, v0, 0, 16 |
| 2501 | ; GFX7-NEXT: v_ashrrev_i32_e32 v0, 16, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2502 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 2503 | ; GFX7-NEXT: v_mad_i32_i24 v4, v0, v2, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2504 | ; GFX7-NEXT: v_mad_i32_i24 v0, v0, v2, v4 |
| 2505 | ; GFX7-NEXT: v_mad_i32_i24 v0, v3, v1, v0 |
Matt Arsenault | d21fc58 | 2025-02-07 12:31:14 +0700 | [diff] [blame] | 2506 | ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2507 | ; GFX7-NEXT: s_endpgm |
| 2508 | ; |
| 2509 | ; GFX8-LABEL: idot2_MultipleUses_mul2: |
| 2510 | ; GFX8: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2511 | ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 2512 | ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2513 | ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2514 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2515 | ; GFX8-NEXT: v_mov_b32_e32 v1, s1 |
| 2516 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2517 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 2518 | ; GFX8-NEXT: flat_load_dword v3, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2519 | ; GFX8-NEXT: v_mov_b32_e32 v1, s3 |
| 2520 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2521 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 2522 | ; GFX8-NEXT: flat_load_dword v0, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2523 | ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2524 | ; GFX8-NEXT: s_waitcnt vmcnt(1) |
| 2525 | ; GFX8-NEXT: v_bfe_i32 v1, v3, 0, 16 |
| 2526 | ; GFX8-NEXT: v_ashrrev_i32_e32 v3, 16, v3 |
| 2527 | ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| 2528 | ; GFX8-NEXT: v_bfe_i32 v2, v0, 0, 16 |
| 2529 | ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 16, v0 |
Jay Foad | 0412f51 | 2019-12-17 16:09:02 +0000 | [diff] [blame] | 2530 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2531 | ; GFX8-NEXT: v_mad_i32_i24 v4, v0, v3, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2532 | ; GFX8-NEXT: v_mad_i32_i24 v0, v0, v3, v4 |
| 2533 | ; GFX8-NEXT: v_mad_i32_i24 v2, v2, v1, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2534 | ; GFX8-NEXT: v_mov_b32_e32 v0, s4 |
| 2535 | ; GFX8-NEXT: v_mov_b32_e32 v1, s5 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2536 | ; GFX8-NEXT: flat_store_dword v[0:1], v2 |
| 2537 | ; GFX8-NEXT: s_endpgm |
| 2538 | ; |
| 2539 | ; GFX9-NODL-LABEL: idot2_MultipleUses_mul2: |
| 2540 | ; GFX9-NODL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2541 | ; GFX9-NODL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 2542 | ; GFX9-NODL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2543 | ; GFX9-NODL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 2544 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2545 | ; GFX9-NODL-NEXT: global_load_dword v1, v0, s[0:1] |
| 2546 | ; GFX9-NODL-NEXT: global_load_dword v2, v0, s[2:3] |
| 2547 | ; GFX9-NODL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 2548 | ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2549 | ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) |
| 2550 | ; GFX9-NODL-NEXT: v_mul_i32_i24_sdwa v3, sext(v2), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 2551 | ; GFX9-NODL-NEXT: v_ashrrev_i32_e32 v1, 16, v1 |
| 2552 | ; GFX9-NODL-NEXT: v_ashrrev_i32_e32 v2, 16, v2 |
| 2553 | ; GFX9-NODL-NEXT: v_mul_i32_i24_e32 v4, v2, v1 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2554 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2555 | ; GFX9-NODL-NEXT: v_mad_i32_i24 v1, v2, v1, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2556 | ; GFX9-NODL-NEXT: v_add3_u32 v1, v4, v1, v3 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2557 | ; GFX9-NODL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2558 | ; GFX9-NODL-NEXT: s_endpgm |
| 2559 | ; |
| 2560 | ; GFX9-DL-LABEL: idot2_MultipleUses_mul2: |
| 2561 | ; GFX9-DL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2562 | ; GFX9-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 2563 | ; GFX9-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2564 | ; GFX9-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 2565 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2566 | ; GFX9-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 2567 | ; GFX9-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 2568 | ; GFX9-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 2569 | ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2570 | ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) |
| 2571 | ; GFX9-DL-NEXT: v_mul_i32_i24_sdwa v3, sext(v2), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 2572 | ; GFX9-DL-NEXT: v_ashrrev_i32_e32 v1, 16, v1 |
| 2573 | ; GFX9-DL-NEXT: v_ashrrev_i32_e32 v2, 16, v2 |
| 2574 | ; GFX9-DL-NEXT: v_mul_i32_i24_e32 v4, v2, v1 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2575 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2576 | ; GFX9-DL-NEXT: v_mad_i32_i24 v1, v2, v1, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2577 | ; GFX9-DL-NEXT: v_add3_u32 v1, v4, v1, v3 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2578 | ; GFX9-DL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2579 | ; GFX9-DL-NEXT: s_endpgm |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 2580 | ; |
| 2581 | ; GFX10-DL-LABEL: idot2_MultipleUses_mul2: |
| 2582 | ; GFX10-DL: ; %bb.0: ; %entry |
Matt Arsenault | b1bcb7c | 2024-07-15 09:59:07 +0400 | [diff] [blame] | 2583 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2584 | ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 2585 | ; GFX10-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2586 | ; GFX10-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 2587 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2588 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2589 | ; GFX10-DL-NEXT: global_load_dword v1, v0, s[0:1] |
| 2590 | ; GFX10-DL-NEXT: global_load_dword v2, v0, s[2:3] |
| 2591 | ; GFX10-DL-NEXT: s_waitcnt_depctr 0xffe3 |
| 2592 | ; GFX10-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2593 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(1) |
| 2594 | ; GFX10-DL-NEXT: v_ashrrev_i32_e32 v0, 16, v1 |
| 2595 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) |
| 2596 | ; GFX10-DL-NEXT: v_ashrrev_i32_e32 v3, 16, v2 |
| 2597 | ; GFX10-DL-NEXT: v_mul_i32_i24_sdwa v1, sext(v2), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 |
| 2598 | ; GFX10-DL-NEXT: v_mul_i32_i24_e32 v2, v3, v0 |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 2599 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2600 | ; GFX10-DL-NEXT: v_mad_i32_i24 v0, v3, v0, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2601 | ; GFX10-DL-NEXT: v_mov_b32_e32 v3, 0 |
| 2602 | ; GFX10-DL-NEXT: v_add3_u32 v0, v2, v0, v1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2603 | ; GFX10-DL-NEXT: global_store_dword v3, v0, s[6:7] |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 2604 | ; GFX10-DL-NEXT: s_endpgm |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2605 | ptr addrspace(1) %src2, |
| 2606 | ptr addrspace(1) nocapture %dst) { |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 2607 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2608 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2609 | %gep1 = getelementptr <2 x i16>, ptr addrspace(1) %src1, i32 %idx |
| 2610 | %vec1 = load <2 x i16>, ptr addrspace(1) %gep1 |
| 2611 | %gep2 = getelementptr <2 x i16>, ptr addrspace(1) %src2, i32 %idx |
| 2612 | %vec2 = load <2 x i16>, ptr addrspace(1) %gep2 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 2613 | |
| 2614 | %s1.elt1 = extractelement <2 x i16> %vec1, i64 0 |
| 2615 | %conv = sext i16 %s1.elt1 to i32 |
| 2616 | %s2.elt1 = extractelement <2 x i16> %vec2, i64 0 |
| 2617 | %conv2 = sext i16 %s2.elt1 to i32 |
| 2618 | %mul1 = mul i32 %conv2, %conv |
| 2619 | |
| 2620 | %s1.elt2 = extractelement <2 x i16> %vec1, i64 1 |
| 2621 | %conv3 = sext i16 %s1.elt2 to i32 |
| 2622 | %s2.elt2 = extractelement <2 x i16> %vec2, i64 1 |
| 2623 | %conv4 = sext i16 %s2.elt2 to i32 |
| 2624 | %mul2 = mul i32 %conv4, %conv3 |
| 2625 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2626 | %s3 = load i32, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 2627 | %add0 = add i32 %mul2, %s3 |
| 2628 | |
| 2629 | %add1 = add i32 %mul2, %add0 |
| 2630 | %add2 = add i32 %add1, %mul1 |
| 2631 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2632 | store i32 %add2, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 2633 | ret void |
| 2634 | } |
| 2635 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2636 | define amdgpu_kernel void @udot2_acc16(ptr addrspace(1) %src1, |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2637 | ; GFX7-LABEL: udot2_acc16: |
| 2638 | ; GFX7: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2639 | ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| 2640 | ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd |
| 2641 | ; GFX7-NEXT: s_mov_b32 s7, 0xf000 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2642 | ; GFX7-NEXT: s_mov_b32 s10, 0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2643 | ; GFX7-NEXT: s_mov_b32 s11, s7 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2644 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2645 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[0:1] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2646 | ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 2647 | ; GFX7-NEXT: v_mov_b32_e32 v1, 0 |
| 2648 | ; GFX7-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2649 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[2:3] |
Joe Nash | 3ce1b96 | 2021-09-08 13:22:15 -0400 | [diff] [blame] | 2650 | ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2651 | ; GFX7-NEXT: s_mov_b32 s6, -1 |
| 2652 | ; GFX7-NEXT: buffer_load_ushort v1, off, s[4:7], 0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2653 | ; GFX7-NEXT: s_waitcnt vmcnt(2) |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 2654 | ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v2 |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 2655 | ; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2656 | ; GFX7-NEXT: s_waitcnt vmcnt(1) |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 2657 | ; GFX7-NEXT: v_lshrrev_b32_e32 v4, 16, v0 |
Jay Foad | e292650 | 2022-05-16 15:53:03 +0100 | [diff] [blame] | 2658 | ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2659 | ; GFX7-NEXT: s_waitcnt vmcnt(0) |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 2660 | ; GFX7-NEXT: v_mad_u32_u24 v1, v3, v4, v1 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2661 | ; GFX7-NEXT: v_mad_u32_u24 v0, v2, v0, v1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2662 | ; GFX7-NEXT: buffer_store_short v0, off, s[4:7], 0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2663 | ; GFX7-NEXT: s_endpgm |
| 2664 | ; |
| 2665 | ; GFX8-LABEL: udot2_acc16: |
| 2666 | ; GFX8: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2667 | ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 2668 | ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2669 | ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2670 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2671 | ; GFX8-NEXT: v_mov_b32_e32 v1, s1 |
| 2672 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2673 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 2674 | ; GFX8-NEXT: flat_load_dword v3, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2675 | ; GFX8-NEXT: v_mov_b32_e32 v1, s3 |
| 2676 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2677 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 2678 | ; GFX8-NEXT: flat_load_dword v2, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2679 | ; GFX8-NEXT: v_mov_b32_e32 v0, s4 |
| 2680 | ; GFX8-NEXT: v_mov_b32_e32 v1, s5 |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 2681 | ; GFX8-NEXT: flat_load_ushort v4, v[0:1] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2682 | ; GFX8-NEXT: s_waitcnt vmcnt(2) |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 2683 | ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v3 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2684 | ; GFX8-NEXT: s_waitcnt vmcnt(1) |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 2685 | ; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v2 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2686 | ; GFX8-NEXT: s_waitcnt vmcnt(0) |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 2687 | ; GFX8-NEXT: v_mad_u16 v4, v5, v6, v4 |
| 2688 | ; GFX8-NEXT: v_mad_u16 v2, v3, v2, v4 |
| 2689 | ; GFX8-NEXT: flat_store_short v[0:1], v2 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2690 | ; GFX8-NEXT: s_endpgm |
| 2691 | ; |
| 2692 | ; GFX9-NODL-LABEL: udot2_acc16: |
| 2693 | ; GFX9-NODL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2694 | ; GFX9-NODL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 2695 | ; GFX9-NODL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2696 | ; GFX9-NODL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 2697 | ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, 0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2698 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2699 | ; GFX9-NODL-NEXT: global_load_dword v2, v0, s[0:1] |
| 2700 | ; GFX9-NODL-NEXT: global_load_dword v3, v0, s[2:3] |
| 2701 | ; GFX9-NODL-NEXT: global_load_ushort v4, v1, s[6:7] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2702 | ; GFX9-NODL-NEXT: s_waitcnt vmcnt(2) |
| 2703 | ; GFX9-NODL-NEXT: v_lshrrev_b32_e32 v0, 16, v2 |
| 2704 | ; GFX9-NODL-NEXT: s_waitcnt vmcnt(1) |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 2705 | ; GFX9-NODL-NEXT: v_lshrrev_b32_e32 v5, 16, v3 |
Austin Kerbow | 2291bd1 | 2020-11-30 09:06:35 -0800 | [diff] [blame] | 2706 | ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 2707 | ; GFX9-NODL-NEXT: v_mad_legacy_u16 v0, v0, v5, v4 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2708 | ; GFX9-NODL-NEXT: v_mad_legacy_u16 v0, v2, v3, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2709 | ; GFX9-NODL-NEXT: global_store_short v1, v0, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2710 | ; GFX9-NODL-NEXT: s_endpgm |
| 2711 | ; |
| 2712 | ; GFX9-DL-LABEL: udot2_acc16: |
| 2713 | ; GFX9-DL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2714 | ; GFX9-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 2715 | ; GFX9-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2716 | ; GFX9-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 2717 | ; GFX9-DL-NEXT: v_mov_b32_e32 v1, 0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2718 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2719 | ; GFX9-DL-NEXT: global_load_dword v2, v0, s[0:1] |
| 2720 | ; GFX9-DL-NEXT: global_load_dword v3, v0, s[2:3] |
| 2721 | ; GFX9-DL-NEXT: global_load_ushort v4, v1, s[6:7] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2722 | ; GFX9-DL-NEXT: s_waitcnt vmcnt(2) |
| 2723 | ; GFX9-DL-NEXT: v_lshrrev_b32_e32 v0, 16, v2 |
| 2724 | ; GFX9-DL-NEXT: s_waitcnt vmcnt(1) |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 2725 | ; GFX9-DL-NEXT: v_lshrrev_b32_e32 v5, 16, v3 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2726 | ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) |
Austin Kerbow | da067ed | 2021-11-10 09:59:31 -0800 | [diff] [blame] | 2727 | ; GFX9-DL-NEXT: v_mad_legacy_u16 v0, v0, v5, v4 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2728 | ; GFX9-DL-NEXT: v_mad_legacy_u16 v0, v2, v3, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2729 | ; GFX9-DL-NEXT: global_store_short v1, v0, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2730 | ; GFX9-DL-NEXT: s_endpgm |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 2731 | ; |
| 2732 | ; GFX10-DL-LABEL: udot2_acc16: |
| 2733 | ; GFX10-DL: ; %bb.0: ; %entry |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2734 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2735 | ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 2736 | ; GFX10-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2737 | ; GFX10-DL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 2738 | ; GFX10-DL-NEXT: v_mov_b32_e32 v1, 0 |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 2739 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2740 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2741 | ; GFX10-DL-NEXT: global_load_dword v2, v0, s[0:1] |
| 2742 | ; GFX10-DL-NEXT: global_load_dword v3, v0, s[2:3] |
| 2743 | ; GFX10-DL-NEXT: global_load_ushort v4, v1, s[6:7] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2744 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(2) |
Baptiste Saleil | caf1294 | 2021-04-26 15:48:12 -0400 | [diff] [blame] | 2745 | ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v0, 16, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2746 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(1) |
| 2747 | ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v5, 16, v3 |
| 2748 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) |
Baptiste Saleil | caf1294 | 2021-04-26 15:48:12 -0400 | [diff] [blame] | 2749 | ; GFX10-DL-NEXT: v_mad_u16 v0, v0, v5, v4 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2750 | ; GFX10-DL-NEXT: v_mad_u16 v0, v2, v3, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2751 | ; GFX10-DL-NEXT: global_store_short v1, v0, s[6:7] |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 2752 | ; GFX10-DL-NEXT: s_endpgm |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2753 | ptr addrspace(1) %src2, |
| 2754 | ptr addrspace(1) nocapture %dst) { |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 2755 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2756 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2757 | %gep1 = getelementptr <2 x i16>, ptr addrspace(1) %src1, i32 %idx |
| 2758 | %v1 = load <2 x i16>, ptr addrspace(1) %gep1 |
| 2759 | %gep2 = getelementptr <2 x i16>, ptr addrspace(1) %src2, i32 %idx |
| 2760 | %v2 = load <2 x i16>, ptr addrspace(1) %gep2 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 2761 | |
| 2762 | %v1e1 = extractelement <2 x i16> %v1, i64 0 |
| 2763 | %v2e1 = extractelement <2 x i16> %v2, i64 0 |
| 2764 | %mul1 = mul i16 %v1e1, %v2e1 |
| 2765 | |
| 2766 | %v1e2 = extractelement <2 x i16> %v1, i64 1 |
| 2767 | %v2e2 = extractelement <2 x i16> %v2, i64 1 |
| 2768 | %mul2 = mul i16 %v1e2, %v2e2 |
| 2769 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2770 | %s2 = load i16, ptr addrspace(1) %dst, align 2 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 2771 | %add1 = add i16 %mul2, %s2 |
| 2772 | %add2 = add i16 %add1, %mul1 |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2773 | store i16 %add2, ptr addrspace(1) %dst, align 2 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 2774 | ret void |
| 2775 | } |
| 2776 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2777 | define amdgpu_kernel void @notsdot2_sext8(ptr addrspace(1) %src1, |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2778 | ; GFX7-LABEL: notsdot2_sext8: |
| 2779 | ; GFX7: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2780 | ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| 2781 | ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd |
| 2782 | ; GFX7-NEXT: s_mov_b32 s7, 0xf000 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2783 | ; GFX7-NEXT: s_mov_b32 s10, 0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2784 | ; GFX7-NEXT: s_mov_b32 s11, s7 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2785 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2786 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[0:1] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2787 | ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
| 2788 | ; GFX7-NEXT: v_mov_b32_e32 v1, 0 |
| 2789 | ; GFX7-NEXT: buffer_load_ushort v2, v[0:1], s[8:11], 0 addr64 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2790 | ; GFX7-NEXT: s_mov_b64 s[8:9], s[2:3] |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2791 | ; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[8:11], 0 addr64 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2792 | ; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 |
| 2793 | ; GFX7-NEXT: s_mov_b32 s6, -1 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2794 | ; GFX7-NEXT: s_waitcnt vmcnt(1) |
| 2795 | ; GFX7-NEXT: v_bfe_i32 v1, v2, 0, 8 |
| 2796 | ; GFX7-NEXT: v_bfe_i32 v2, v2, 8, 8 |
| 2797 | ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| 2798 | ; GFX7-NEXT: v_bfe_i32 v3, v0, 0, 8 |
| 2799 | ; GFX7-NEXT: v_bfe_i32 v0, v0, 8, 8 |
| 2800 | ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2801 | ; GFX7-NEXT: v_mad_i32_i24 v0, v0, v2, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2802 | ; GFX7-NEXT: v_mad_i32_i24 v0, v3, v1, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2803 | ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2804 | ; GFX7-NEXT: s_endpgm |
| 2805 | ; |
| 2806 | ; GFX8-LABEL: notsdot2_sext8: |
| 2807 | ; GFX8: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2808 | ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 2809 | ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2810 | ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 1, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2811 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2812 | ; GFX8-NEXT: v_mov_b32_e32 v1, s1 |
| 2813 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2814 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| 2815 | ; GFX8-NEXT: flat_load_ushort v3, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2816 | ; GFX8-NEXT: v_mov_b32_e32 v1, s3 |
| 2817 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2818 | ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
Jay Foad | 0412f51 | 2019-12-17 16:09:02 +0000 | [diff] [blame] | 2819 | ; GFX8-NEXT: flat_load_ushort v0, v[0:1] |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2820 | ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x0 |
Tony | 1bc7bff | 2020-10-16 07:09:38 +0000 | [diff] [blame] | 2821 | ; GFX8-NEXT: s_waitcnt vmcnt(1) |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2822 | ; GFX8-NEXT: v_bfe_i32 v1, v3, 0, 8 |
| 2823 | ; GFX8-NEXT: v_lshrrev_b16_e32 v3, 8, v3 |
| 2824 | ; GFX8-NEXT: v_bfe_i32 v3, v3, 0, 8 |
| 2825 | ; GFX8-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | 62fd7f7 | 2020-01-07 15:43:46 +0000 | [diff] [blame] | 2826 | ; GFX8-NEXT: v_bfe_i32 v2, v0, 0, 8 |
Stanislav Mekhanoshin | 71ed66d | 2020-05-12 14:18:53 -0700 | [diff] [blame] | 2827 | ; GFX8-NEXT: v_lshrrev_b16_e32 v0, 8, v0 |
Jay Foad | 0412f51 | 2019-12-17 16:09:02 +0000 | [diff] [blame] | 2828 | ; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 8 |
Tony | 1bc7bff | 2020-10-16 07:09:38 +0000 | [diff] [blame] | 2829 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2830 | ; GFX8-NEXT: v_mad_i32_i24 v0, v0, v3, s0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2831 | ; GFX8-NEXT: v_mad_i32_i24 v2, v2, v1, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2832 | ; GFX8-NEXT: v_mov_b32_e32 v0, s4 |
| 2833 | ; GFX8-NEXT: v_mov_b32_e32 v1, s5 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2834 | ; GFX8-NEXT: flat_store_dword v[0:1], v2 |
| 2835 | ; GFX8-NEXT: s_endpgm |
| 2836 | ; |
| 2837 | ; GFX9-NODL-LABEL: notsdot2_sext8: |
| 2838 | ; GFX9-NODL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2839 | ; GFX9-NODL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 2840 | ; GFX9-NODL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2841 | ; GFX9-NODL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2842 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2843 | ; GFX9-NODL-NEXT: global_load_ushort v1, v0, s[0:1] |
| 2844 | ; GFX9-NODL-NEXT: global_load_ushort v2, v0, s[2:3] |
| 2845 | ; GFX9-NODL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2846 | ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, 0 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 2847 | ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2848 | ; GFX9-NODL-NEXT: v_mul_i32_i24_sdwa v3, sext(v2), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 |
| 2849 | ; GFX9-NODL-NEXT: v_lshrrev_b16_e32 v1, 8, v1 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 2850 | ; GFX9-NODL-NEXT: v_lshrrev_b16_e32 v2, 8, v2 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2851 | ; GFX9-NODL-NEXT: v_mul_i32_i24_sdwa v1, sext(v2), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 |
Jay Foad | 0412f51 | 2019-12-17 16:09:02 +0000 | [diff] [blame] | 2852 | ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2853 | ; GFX9-NODL-NEXT: v_add3_u32 v1, v1, s0, v3 |
| 2854 | ; GFX9-NODL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2855 | ; GFX9-NODL-NEXT: s_endpgm |
| 2856 | ; |
| 2857 | ; GFX9-DL-LABEL: notsdot2_sext8: |
| 2858 | ; GFX9-DL: ; %bb.0: ; %entry |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2859 | ; GFX9-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 2860 | ; GFX9-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2861 | ; GFX9-DL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2862 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2863 | ; GFX9-DL-NEXT: global_load_ushort v1, v0, s[0:1] |
| 2864 | ; GFX9-DL-NEXT: global_load_ushort v2, v0, s[2:3] |
| 2865 | ; GFX9-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
| 2866 | ; GFX9-DL-NEXT: s_mov_b32 s1, 0xc0c0001 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2867 | ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0 |
Jeffrey Byrnes | 7794e16 | 2023-08-28 15:44:23 -0700 | [diff] [blame] | 2868 | ; GFX9-DL-NEXT: s_waitcnt vmcnt(1) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2869 | ; GFX9-DL-NEXT: v_perm_b32 v1, v1, v1, s1 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 2870 | ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2871 | ; GFX9-DL-NEXT: v_perm_b32 v2, v2, v2, s1 |
Jay Foad | 0412f51 | 2019-12-17 16:09:02 +0000 | [diff] [blame] | 2872 | ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2873 | ; GFX9-DL-NEXT: v_dot4_i32_i8 v1, v2, v1, s0 |
| 2874 | ; GFX9-DL-NEXT: global_store_dword v0, v1, s[6:7] |
Matt Arsenault | 6f35f0c | 2018-08-31 15:05:06 +0000 | [diff] [blame] | 2875 | ; GFX9-DL-NEXT: s_endpgm |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 2876 | ; |
| 2877 | ; GFX10-DL-LABEL: notsdot2_sext8: |
| 2878 | ; GFX10-DL: ; %bb.0: ; %entry |
Matt Arsenault | b1bcb7c | 2024-07-15 09:59:07 +0400 | [diff] [blame] | 2879 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2880 | ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 2881 | ; GFX10-DL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2882 | ; GFX10-DL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
Jeffrey Byrnes | 7794e16 | 2023-08-28 15:44:23 -0700 | [diff] [blame] | 2883 | ; GFX10-DL-NEXT: v_mov_b32_e32 v3, 0 |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 2884 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 2885 | ; GFX10-DL-NEXT: s_clause 0x1 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2886 | ; GFX10-DL-NEXT: global_load_ushort v1, v0, s[0:1] |
| 2887 | ; GFX10-DL-NEXT: global_load_ushort v2, v0, s[2:3] |
| 2888 | ; GFX10-DL-NEXT: s_waitcnt_depctr 0xffe3 |
| 2889 | ; GFX10-DL-NEXT: s_load_dword s0, s[6:7], 0x0 |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 2890 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(1) |
Jeffrey Byrnes | 7794e16 | 2023-08-28 15:44:23 -0700 | [diff] [blame] | 2891 | ; GFX10-DL-NEXT: v_perm_b32 v0, v1, v1, 0xc0c0001 |
Matt Arsenault | d2e52ee | 2020-11-10 11:06:59 -0500 | [diff] [blame] | 2892 | ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) |
Jeffrey Byrnes | 7794e16 | 2023-08-28 15:44:23 -0700 | [diff] [blame] | 2893 | ; GFX10-DL-NEXT: v_perm_b32 v1, v2, v2, 0xc0c0001 |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 2894 | ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2895 | ; GFX10-DL-NEXT: v_mov_b32_e32 v2, s0 |
Shilei Tian | d63c2e5 | 2024-01-15 23:11:50 -0500 | [diff] [blame] | 2896 | ; GFX10-DL-NEXT: v_dot4c_i32_i8 v2, v1, v0 |
Shilei Tian | 6548b63 | 2024-11-08 20:21:16 -0500 | [diff] [blame] | 2897 | ; GFX10-DL-NEXT: global_store_dword v3, v2, s[6:7] |
Stanislav Mekhanoshin | e917b3b | 2019-06-20 16:29:40 +0000 | [diff] [blame] | 2898 | ; GFX10-DL-NEXT: s_endpgm |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2899 | ptr addrspace(1) %src2, |
| 2900 | ptr addrspace(1) nocapture %dst) { |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 2901 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2902 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2903 | %gep1 = getelementptr <2 x i8>, ptr addrspace(1) %src1, i32 %idx |
| 2904 | %vec1 = load <2 x i8>, ptr addrspace(1) %gep1 |
| 2905 | %gep2 = getelementptr <2 x i8>, ptr addrspace(1) %src2, i32 %idx |
| 2906 | %vec2 = load <2 x i8>, ptr addrspace(1) %gep2 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 2907 | |
| 2908 | %s1.elt1 = extractelement <2 x i8> %vec1, i64 0 |
| 2909 | %conv = sext i8 %s1.elt1 to i32 |
| 2910 | %s2.elt1 = extractelement <2 x i8> %vec2, i64 0 |
| 2911 | %conv2 = sext i8 %s2.elt1 to i32 |
| 2912 | %mul1 = mul nuw i32 %conv2, %conv |
| 2913 | |
| 2914 | %s1.elt2 = extractelement <2 x i8> %vec1, i64 1 |
| 2915 | %conv3 = sext i8 %s1.elt2 to i32 |
| 2916 | %s2.elt2 = extractelement <2 x i8> %vec2, i64 1 |
| 2917 | %conv4 = sext i8 %s2.elt2 to i32 |
| 2918 | %mul2 = mul nuw i32 %conv4, %conv3 |
| 2919 | |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2920 | %s3 = load i32, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 2921 | %add = add i32 %mul2, %s3 |
| 2922 | %add6 = add i32 %add, %mul1 |
Nikita Popov | bdf2fbb | 2022-12-19 12:39:01 +0100 | [diff] [blame] | 2923 | store i32 %add6, ptr addrspace(1) %dst, align 4 |
Farhana Aleen | 3528c80 | 2018-08-21 16:21:15 +0000 | [diff] [blame] | 2924 | ret void |
| 2925 | } |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 2926 | |
| 2927 | declare i32 @llvm.amdgcn.workitem.id.x() |