Matt Arsenault | 5660bb6 | 2019-11-18 16:48:07 +0530 | [diff] [blame] | 1 | ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -amdgpu-sdwa-peephole=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=NOSDWA,GCN %s |
| 2 | ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -amdgpu-sdwa-peephole -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI,GFX89,SDWA,GCN %s |
| 3 | ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -amdgpu-sdwa-peephole -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9_10,SDWA,GCN %s |
| 4 | ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -amdgpu-sdwa-peephole -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10,GFX9_10,SDWA,GCN %s |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 5 | |
| 6 | ; GCN-LABEL: {{^}}add_shr_i32: |
| 7 | ; NOSDWA: v_lshrrev_b32_e32 v[[DST:[0-9]+]], 16, v{{[0-9]+}} |
Dmitry Preobrazhensky | a0342dc | 2017-11-20 18:24:21 +0000 | [diff] [blame] | 8 | ; NOSDWA: v_add_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v[[DST]] |
Matt Arsenault | 9a7e29a | 2017-11-29 02:25:14 +0000 | [diff] [blame] | 9 | ; NOSDWA-NOT: v_add_{{(_co)?}}_u32_sdwa |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 10 | |
Matt Arsenault | 84445dd | 2017-11-30 22:51:26 +0000 | [diff] [blame] | 11 | ; VI: v_add_u32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 |
| 12 | ; GFX9: v_add_u32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 |
Stanislav Mekhanoshin | 971cb8b | 2019-05-06 22:27:05 +0000 | [diff] [blame] | 13 | ; GFX10: v_add_nc_u32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 14 | |
Matt Arsenault | 5660bb6 | 2019-11-18 16:48:07 +0530 | [diff] [blame] | 15 | define amdgpu_kernel void @add_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 16 | %a = load i32, i32 addrspace(1)* %in, align 4 |
| 17 | %shr = lshr i32 %a, 16 |
| 18 | %add = add i32 %a, %shr |
| 19 | store i32 %add, i32 addrspace(1)* %out, align 4 |
| 20 | ret void |
| 21 | } |
| 22 | |
| 23 | ; GCN-LABEL: {{^}}sub_shr_i32: |
| 24 | ; NOSDWA: v_lshrrev_b32_e32 v[[DST:[0-9]+]], 16, v{{[0-9]+}} |
Dmitry Preobrazhensky | a0342dc | 2017-11-20 18:24:21 +0000 | [diff] [blame] | 25 | ; NOSDWA: v_subrev_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v[[DST]] |
Matt Arsenault | 9a7e29a | 2017-11-29 02:25:14 +0000 | [diff] [blame] | 26 | ; NOSDWA-NOT: v_subrev_{{(_co)?}}_u32_sdwa |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 27 | |
Matt Arsenault | 84445dd | 2017-11-30 22:51:26 +0000 | [diff] [blame] | 28 | ; VI: v_subrev_u32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 |
| 29 | ; GFX9: v_sub_u32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
Stanislav Mekhanoshin | 971cb8b | 2019-05-06 22:27:05 +0000 | [diff] [blame] | 30 | ; GFX10: v_sub_nc_u32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
Matt Arsenault | 5660bb6 | 2019-11-18 16:48:07 +0530 | [diff] [blame] | 31 | define amdgpu_kernel void @sub_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 32 | %a = load i32, i32 addrspace(1)* %in, align 4 |
| 33 | %shr = lshr i32 %a, 16 |
| 34 | %sub = sub i32 %shr, %a |
| 35 | store i32 %sub, i32 addrspace(1)* %out, align 4 |
| 36 | ret void |
| 37 | } |
| 38 | |
| 39 | ; GCN-LABEL: {{^}}mul_shr_i32: |
| 40 | ; NOSDWA: v_lshrrev_b32_e32 v[[DST0:[0-9]+]], 16, v{{[0-9]+}} |
| 41 | ; NOSDWA: v_lshrrev_b32_e32 v[[DST1:[0-9]+]], 16, v{{[0-9]+}} |
Matt Arsenault | 6c29c5a | 2017-07-10 19:53:57 +0000 | [diff] [blame] | 42 | ; NOSDWA: v_mul_u32_u24_e32 v{{[0-9]+}}, v[[DST0]], v[[DST1]] |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 43 | ; NOSDWA-NOT: v_mul_u32_u24_sdwa |
| 44 | |
| 45 | ; SDWA: v_mul_u32_u24_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| 46 | |
Matt Arsenault | 5660bb6 | 2019-11-18 16:48:07 +0530 | [diff] [blame] | 47 | define amdgpu_kernel void @mul_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in1, i32 addrspace(1)* %in2) #0 { |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 48 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
| 49 | %gep1 = getelementptr i32, i32 addrspace(1)* %in1, i32 %idx |
| 50 | %gep2 = getelementptr i32, i32 addrspace(1)* %in2, i32 %idx |
| 51 | %a = load i32, i32 addrspace(1)* %gep1, align 4 |
| 52 | %b = load i32, i32 addrspace(1)* %gep2, align 4 |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 53 | %shra = lshr i32 %a, 16 |
| 54 | %shrb = lshr i32 %b, 16 |
| 55 | %mul = mul i32 %shra, %shrb |
| 56 | store i32 %mul, i32 addrspace(1)* %out, align 4 |
| 57 | ret void |
| 58 | } |
| 59 | |
| 60 | ; GCN-LABEL: {{^}}mul_i16: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 61 | ; NOSDWA: v_mul_lo_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 62 | ; NOSDWA-NOT: v_mul_u32_u24_sdwa |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 63 | ; GFX89: v_mul_lo_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
Dmitry Preobrazhensky | cd95343 | 2021-04-01 14:21:00 +0300 | [diff] [blame] | 64 | ; GFX10: v_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 65 | ; SDWA-NOT: v_mul_u32_u24_sdwa |
| 66 | |
Matt Arsenault | 5660bb6 | 2019-11-18 16:48:07 +0530 | [diff] [blame] | 67 | define amdgpu_kernel void @mul_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %ina, i16 addrspace(1)* %inb) #0 { |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 68 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 69 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
| 70 | %gepa = getelementptr i16, i16 addrspace(1)* %ina, i32 %idx |
| 71 | %gepb = getelementptr i16, i16 addrspace(1)* %inb, i32 %idx |
| 72 | %a = load i16, i16 addrspace(1)* %gepa, align 4 |
| 73 | %b = load i16, i16 addrspace(1)* %gepb, align 4 |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 74 | %mul = mul i16 %a, %b |
| 75 | store i16 %mul, i16 addrspace(1)* %out, align 4 |
| 76 | ret void |
| 77 | } |
| 78 | |
| 79 | ; GCN-LABEL: {{^}}mul_v2i16: |
| 80 | ; NOSDWA: v_lshrrev_b32_e32 v[[DST0:[0-9]+]], 16, v{{[0-9]+}} |
| 81 | ; NOSDWA: v_lshrrev_b32_e32 v[[DST1:[0-9]+]], 16, v{{[0-9]+}} |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 82 | ; NOSDWA: v_mul_lo_u16_e32 v[[DST_MUL:[0-9]+]], v[[DST1]], v[[DST0]] |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 83 | ; NOSDWA: v_lshlrev_b32_e32 v[[DST_SHL:[0-9]+]], 16, v[[DST_MUL]] |
Matt Arsenault | 6c29c5a | 2017-07-10 19:53:57 +0000 | [diff] [blame] | 84 | ; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[DST_SHL]] |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 85 | ; NOSDWA-NOT: v_mul_u32_u24_sdwa |
| 86 | |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 87 | ; VI-DAG: v_mul_lo_u16_e32 v[[DST_MUL_LO:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} |
| 88 | ; VI-DAG: v_mul_lo_u16_sdwa v[[DST_MUL_HI:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| 89 | ; VI: v_or_b32_e32 v{{[0-9]+}}, v[[DST_MUL_LO]], v[[DST_MUL_HI]] |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 90 | |
Stanislav Mekhanoshin | 971cb8b | 2019-05-06 22:27:05 +0000 | [diff] [blame] | 91 | ; GFX9_10: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 92 | |
Matt Arsenault | 5660bb6 | 2019-11-18 16:48:07 +0530 | [diff] [blame] | 93 | define amdgpu_kernel void @mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) #0 { |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 94 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 95 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
| 96 | %gepa = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %ina, i32 %idx |
| 97 | %gepb = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %inb, i32 %idx |
| 98 | %a = load <2 x i16>, <2 x i16> addrspace(1)* %gepa, align 4 |
| 99 | %b = load <2 x i16>, <2 x i16> addrspace(1)* %gepb, align 4 |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 100 | %mul = mul <2 x i16> %a, %b |
| 101 | store <2 x i16> %mul, <2 x i16> addrspace(1)* %out, align 4 |
| 102 | ret void |
| 103 | } |
| 104 | |
| 105 | ; GCN-LABEL: {{^}}mul_v4i16: |
| 106 | ; NOSDWA: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} |
| 107 | ; NOSDWA: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 108 | ; NOSDWA: v_mul_lo_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 109 | ; NOSDWA: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} |
| 110 | ; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
| 111 | ; NOSDWA-NOT: v_mul_u32_u24_sdwa |
| 112 | |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 113 | ; VI-DAG: v_mul_lo_u16_e32 v[[DST_MUL0:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} |
| 114 | ; VI-DAG: v_mul_lo_u16_sdwa v[[DST_MUL1:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| 115 | ; VI-DAG: v_mul_lo_u16_e32 v[[DST_MUL2:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} |
| 116 | ; VI-DAG: v_mul_lo_u16_sdwa v[[DST_MUL3:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| 117 | ; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, v[[DST_MUL2]], v[[DST_MUL3]] |
| 118 | ; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, v[[DST_MUL0]], v[[DST_MUL1]] |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 119 | |
Stanislav Mekhanoshin | 971cb8b | 2019-05-06 22:27:05 +0000 | [diff] [blame] | 120 | ; GFX9_10-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
| 121 | ; GFX9_10-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 122 | |
Matt Arsenault | 5660bb6 | 2019-11-18 16:48:07 +0530 | [diff] [blame] | 123 | define amdgpu_kernel void @mul_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %ina, <4 x i16> addrspace(1)* %inb) #0 { |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 124 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 125 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
| 126 | %gepa = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %ina, i32 %idx |
| 127 | %gepb = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %inb, i32 %idx |
| 128 | %a = load <4 x i16>, <4 x i16> addrspace(1)* %gepa, align 4 |
| 129 | %b = load <4 x i16>, <4 x i16> addrspace(1)* %gepb, align 4 |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 130 | %mul = mul <4 x i16> %a, %b |
| 131 | store <4 x i16> %mul, <4 x i16> addrspace(1)* %out, align 4 |
| 132 | ret void |
| 133 | } |
| 134 | |
| 135 | ; GCN-LABEL: {{^}}mul_v8i16: |
| 136 | ; NOSDWA: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} |
| 137 | ; NOSDWA: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 138 | ; NOSDWA: v_mul_lo_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 139 | ; NOSDWA: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} |
| 140 | ; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
| 141 | ; NOSDWA-NOT: v_mul_u32_u24_sdwa |
| 142 | |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 143 | ; VI-DAG: v_mul_lo_u16_e32 v[[DST_MUL0:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} |
| 144 | ; VI-DAG: v_mul_lo_u16_sdwa v[[DST_MUL1:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| 145 | ; VI-DAG: v_mul_lo_u16_e32 v[[DST_MUL2:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} |
| 146 | ; VI-DAG: v_mul_lo_u16_sdwa v[[DST_MUL3:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| 147 | ; VI-DAG: v_mul_lo_u16_e32 v[[DST_MUL4:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} |
| 148 | ; VI-DAG: v_mul_lo_u16_sdwa v[[DST_MUL5:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| 149 | ; VI-DAG: v_mul_lo_u16_e32 v[[DST_MUL6:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} |
| 150 | ; VI-DAG: v_mul_lo_u16_sdwa v[[DST_MUL7:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| 151 | ; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, v[[DST_MUL6]], v[[DST_MUL7]] |
| 152 | ; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, v[[DST_MUL4]], v[[DST_MUL5]] |
| 153 | ; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, v[[DST_MUL2]], v[[DST_MUL3]] |
| 154 | ; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, v[[DST_MUL0]], v[[DST_MUL1]] |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 155 | |
Stanislav Mekhanoshin | 971cb8b | 2019-05-06 22:27:05 +0000 | [diff] [blame] | 156 | ; GFX9_10-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
| 157 | ; GFX9_10-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
| 158 | ; GFX9_10-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
| 159 | ; GFX9_10-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 160 | |
Matt Arsenault | 5660bb6 | 2019-11-18 16:48:07 +0530 | [diff] [blame] | 161 | define amdgpu_kernel void @mul_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %ina, <8 x i16> addrspace(1)* %inb) #0 { |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 162 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 163 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
| 164 | %gepa = getelementptr <8 x i16>, <8 x i16> addrspace(1)* %ina, i32 %idx |
| 165 | %gepb = getelementptr <8 x i16>, <8 x i16> addrspace(1)* %inb, i32 %idx |
| 166 | %a = load <8 x i16>, <8 x i16> addrspace(1)* %gepa, align 4 |
| 167 | %b = load <8 x i16>, <8 x i16> addrspace(1)* %gepb, align 4 |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 168 | %mul = mul <8 x i16> %a, %b |
| 169 | store <8 x i16> %mul, <8 x i16> addrspace(1)* %out, align 4 |
| 170 | ret void |
| 171 | } |
| 172 | |
| 173 | ; GCN-LABEL: {{^}}mul_half: |
| 174 | ; NOSDWA: v_mul_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
| 175 | ; NOSDWA-NOT: v_mul_f16_sdwa |
| 176 | ; SDWA: v_mul_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
| 177 | ; SDWA-NOT: v_mul_f16_sdwa |
| 178 | |
Matt Arsenault | 5660bb6 | 2019-11-18 16:48:07 +0530 | [diff] [blame] | 179 | define amdgpu_kernel void @mul_half(half addrspace(1)* %out, half addrspace(1)* %ina, half addrspace(1)* %inb) #0 { |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 180 | entry: |
| 181 | %a = load half, half addrspace(1)* %ina, align 4 |
| 182 | %b = load half, half addrspace(1)* %inb, align 4 |
| 183 | %mul = fmul half %a, %b |
| 184 | store half %mul, half addrspace(1)* %out, align 4 |
| 185 | ret void |
| 186 | } |
| 187 | |
| 188 | ; GCN-LABEL: {{^}}mul_v2half: |
| 189 | ; NOSDWA: v_lshrrev_b32_e32 v[[DST0:[0-9]+]], 16, v{{[0-9]+}} |
| 190 | ; NOSDWA: v_lshrrev_b32_e32 v[[DST1:[0-9]+]], 16, v{{[0-9]+}} |
Matt Arsenault | 6c29c5a | 2017-07-10 19:53:57 +0000 | [diff] [blame] | 191 | ; NOSDWA: v_mul_f16_e32 v[[DST_MUL:[0-9]+]], v[[DST0]], v[[DST1]] |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 192 | ; NOSDWA: v_lshlrev_b32_e32 v[[DST_SHL:[0-9]+]], 16, v[[DST_MUL]] |
Matt Arsenault | 6c29c5a | 2017-07-10 19:53:57 +0000 | [diff] [blame] | 193 | ; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[DST_SHL]] |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 194 | ; NOSDWA-NOT: v_mul_f16_sdwa |
| 195 | |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 196 | ; VI-DAG: v_mul_f16_sdwa v[[DST_MUL_HI:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| 197 | ; VI-DAG: v_mul_f16_e32 v[[DST_MUL_LO:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} |
Matt Arsenault | 6c29c5a | 2017-07-10 19:53:57 +0000 | [diff] [blame] | 198 | ; VI: v_or_b32_e32 v{{[0-9]+}}, v[[DST_MUL_LO]], v[[DST_MUL_HI]] |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 199 | |
Stanislav Mekhanoshin | 971cb8b | 2019-05-06 22:27:05 +0000 | [diff] [blame] | 200 | ; GFX9_10: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 201 | |
Matt Arsenault | 5660bb6 | 2019-11-18 16:48:07 +0530 | [diff] [blame] | 202 | define amdgpu_kernel void @mul_v2half(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %ina, <2 x half> addrspace(1)* %inb) #0 { |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 203 | entry: |
| 204 | %a = load <2 x half>, <2 x half> addrspace(1)* %ina, align 4 |
| 205 | %b = load <2 x half>, <2 x half> addrspace(1)* %inb, align 4 |
| 206 | %mul = fmul <2 x half> %a, %b |
| 207 | store <2 x half> %mul, <2 x half> addrspace(1)* %out, align 4 |
| 208 | ret void |
| 209 | } |
| 210 | |
| 211 | ; GCN-LABEL: {{^}}mul_v4half: |
| 212 | ; NOSDWA: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} |
| 213 | ; NOSDWA: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} |
| 214 | ; NOSDWA: v_mul_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
| 215 | ; NOSDWA: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} |
| 216 | ; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
| 217 | ; NOSDWA-NOT: v_mul_f16_sdwa |
| 218 | |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 219 | ; VI-DAG: v_mul_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| 220 | ; VI-DAG: v_mul_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| 221 | ; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
| 222 | ; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
| 223 | |
Stanislav Mekhanoshin | 971cb8b | 2019-05-06 22:27:05 +0000 | [diff] [blame] | 224 | ; GFX9_10-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
| 225 | ; GFX9_10-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 226 | |
Matt Arsenault | 5660bb6 | 2019-11-18 16:48:07 +0530 | [diff] [blame] | 227 | define amdgpu_kernel void @mul_v4half(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %ina, <4 x half> addrspace(1)* %inb) #0 { |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 228 | entry: |
| 229 | %a = load <4 x half>, <4 x half> addrspace(1)* %ina, align 4 |
| 230 | %b = load <4 x half>, <4 x half> addrspace(1)* %inb, align 4 |
| 231 | %mul = fmul <4 x half> %a, %b |
| 232 | store <4 x half> %mul, <4 x half> addrspace(1)* %out, align 4 |
| 233 | ret void |
| 234 | } |
| 235 | |
| 236 | ; GCN-LABEL: {{^}}mul_v8half: |
| 237 | ; NOSDWA: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} |
| 238 | ; NOSDWA: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} |
| 239 | ; NOSDWA: v_mul_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
| 240 | ; NOSDWA: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} |
| 241 | ; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
| 242 | ; NOSDWA-NOT: v_mul_f16_sdwa |
| 243 | |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 244 | ; VI-DAG: v_mul_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| 245 | ; VI-DAG: v_mul_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| 246 | ; VI-DAG: v_mul_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| 247 | ; VI-DAG: v_mul_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| 248 | ; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
| 249 | ; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
| 250 | ; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
| 251 | ; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
| 252 | |
Stanislav Mekhanoshin | 971cb8b | 2019-05-06 22:27:05 +0000 | [diff] [blame] | 253 | ; GFX9_10-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
| 254 | ; GFX9_10-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
| 255 | ; GFX9_10-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
| 256 | ; GFX9_10-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 257 | |
Matt Arsenault | 5660bb6 | 2019-11-18 16:48:07 +0530 | [diff] [blame] | 258 | define amdgpu_kernel void @mul_v8half(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %ina, <8 x half> addrspace(1)* %inb) #0 { |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 259 | entry: |
| 260 | %a = load <8 x half>, <8 x half> addrspace(1)* %ina, align 4 |
| 261 | %b = load <8 x half>, <8 x half> addrspace(1)* %inb, align 4 |
| 262 | %mul = fmul <8 x half> %a, %b |
| 263 | store <8 x half> %mul, <8 x half> addrspace(1)* %out, align 4 |
| 264 | ret void |
| 265 | } |
| 266 | |
| 267 | ; GCN-LABEL: {{^}}mul_i8: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 268 | ; NOSDWA: v_mul_lo_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 269 | ; NOSDWA-NOT: v_mul_u32_u24_sdwa |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 270 | ; GFX89: v_mul_lo_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
Dmitry Preobrazhensky | cd95343 | 2021-04-01 14:21:00 +0300 | [diff] [blame] | 271 | ; GFX10: v_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 272 | ; SDWA-NOT: v_mul_u32_u24_sdwa |
| 273 | |
Matt Arsenault | 5660bb6 | 2019-11-18 16:48:07 +0530 | [diff] [blame] | 274 | define amdgpu_kernel void @mul_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %ina, i8 addrspace(1)* %inb) #0 { |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 275 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 276 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
| 277 | %gepa = getelementptr i8, i8 addrspace(1)* %ina, i32 %idx |
| 278 | %gepb = getelementptr i8, i8 addrspace(1)* %inb, i32 %idx |
| 279 | %a = load i8, i8 addrspace(1)* %gepa, align 4 |
| 280 | %b = load i8, i8 addrspace(1)* %gepb, align 4 |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 281 | %mul = mul i8 %a, %b |
| 282 | store i8 %mul, i8 addrspace(1)* %out, align 4 |
| 283 | ret void |
| 284 | } |
| 285 | |
| 286 | ; GCN-LABEL: {{^}}mul_v2i8: |
| 287 | ; NOSDWA: v_lshrrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} |
| 288 | ; NOSDWA: v_lshrrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 289 | ; NOSDWA: v_mul_lo_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 290 | ; NOSDWA: v_lshlrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} |
| 291 | ; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
| 292 | ; NOSDWA-NOT: v_mul_u32_u24_sdwa |
| 293 | |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 294 | ; VI: v_mul_lo_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1 |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 295 | |
| 296 | ; GFX9-DAG: v_mul_lo_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1 |
| 297 | ; GFX9-DAG: v_mul_lo_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
Stanislav Mekhanoshin | 971cb8b | 2019-05-06 22:27:05 +0000 | [diff] [blame] | 298 | |
Dmitry Preobrazhensky | cd95343 | 2021-04-01 14:21:00 +0300 | [diff] [blame] | 299 | ; GFX10-DAG: v_mul_lo_u16 |
| 300 | ; GFX10-DAG: v_mul_lo_u16 |
Stanislav Mekhanoshin | 971cb8b | 2019-05-06 22:27:05 +0000 | [diff] [blame] | 301 | |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 302 | ; GFX9: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 303 | |
Dmitry Preobrazhensky | cd95343 | 2021-04-01 14:21:00 +0300 | [diff] [blame] | 304 | ; GFX10: v_lshlrev_b16 v{{[0-9]+}}, 8, v |
Matt Arsenault | 190a17b | 2019-10-08 17:36:38 +0000 | [diff] [blame] | 305 | ; GFX10: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
Matt Arsenault | 5660bb6 | 2019-11-18 16:48:07 +0530 | [diff] [blame] | 306 | define amdgpu_kernel void @mul_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %ina, <2 x i8> addrspace(1)* %inb) #0 { |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 307 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 308 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
| 309 | %gepa = getelementptr <2 x i8>, <2 x i8> addrspace(1)* %ina, i32 %idx |
| 310 | %gepb = getelementptr <2 x i8>, <2 x i8> addrspace(1)* %inb, i32 %idx |
| 311 | %a = load <2 x i8>, <2 x i8> addrspace(1)* %gepa, align 4 |
| 312 | %b = load <2 x i8>, <2 x i8> addrspace(1)* %gepb, align 4 |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 313 | %mul = mul <2 x i8> %a, %b |
| 314 | store <2 x i8> %mul, <2 x i8> addrspace(1)* %out, align 4 |
| 315 | ret void |
| 316 | } |
| 317 | |
| 318 | ; GCN-LABEL: {{^}}mul_v4i8: |
| 319 | ; NOSDWA: v_lshrrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} |
| 320 | ; NOSDWA: v_lshrrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 321 | ; NOSDWA: v_mul_lo_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 322 | ; NOSDWA: v_lshlrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} |
| 323 | ; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
| 324 | ; NOSDWA-NOT: v_mul_u32_u24_sdwa |
| 325 | |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 326 | ; VI-DAG: v_mul_lo_u16_sdwa |
| 327 | ; VI-DAG: v_mul_lo_u16_sdwa |
| 328 | ; VI-DAG: v_mul_lo_u16_sdwa |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 329 | |
| 330 | ; GFX9-DAG: v_mul_lo_u16_sdwa |
| 331 | ; GFX9-DAG: v_mul_lo_u16_sdwa |
| 332 | ; GFX9-DAG: v_mul_lo_u16_sdwa |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 333 | |
Dmitry Preobrazhensky | cd95343 | 2021-04-01 14:21:00 +0300 | [diff] [blame] | 334 | ; GFX10-DAG: v_mul_lo_u16 |
| 335 | ; GFX10-DAG: v_mul_lo_u16 |
| 336 | ; GFX10-DAG: v_mul_lo_u16 |
| 337 | ; GFX10-DAG: v_mul_lo_u16 |
Stanislav Mekhanoshin | 971cb8b | 2019-05-06 22:27:05 +0000 | [diff] [blame] | 338 | |
Matt Arsenault | 5660bb6 | 2019-11-18 16:48:07 +0530 | [diff] [blame] | 339 | define amdgpu_kernel void @mul_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %ina, <4 x i8> addrspace(1)* %inb) #0 { |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 340 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 341 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
| 342 | %gepa = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %ina, i32 %idx |
| 343 | %gepb = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %inb, i32 %idx |
| 344 | %a = load <4 x i8>, <4 x i8> addrspace(1)* %gepa, align 4 |
| 345 | %b = load <4 x i8>, <4 x i8> addrspace(1)* %gepb, align 4 |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 346 | %mul = mul <4 x i8> %a, %b |
| 347 | store <4 x i8> %mul, <4 x i8> addrspace(1)* %out, align 4 |
| 348 | ret void |
| 349 | } |
| 350 | |
| 351 | ; GCN-LABEL: {{^}}mul_v8i8: |
| 352 | ; NOSDWA: v_lshrrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} |
| 353 | ; NOSDWA: v_lshrrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 354 | ; NOSDWA: v_mul_lo_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 355 | ; NOSDWA: v_lshlrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} |
| 356 | ; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
| 357 | ; NOSDWA-NOT: v_mul_u32_u24_sdwa |
| 358 | |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 359 | ; VI-DAG: v_mul_lo_u16_sdwa |
| 360 | ; VI-DAG: v_mul_lo_u16_sdwa |
| 361 | ; VI-DAG: v_mul_lo_u16_sdwa |
| 362 | ; VI-DAG: v_mul_lo_u16_sdwa |
| 363 | ; VI-DAG: v_mul_lo_u16_sdwa |
| 364 | ; VI-DAG: v_mul_lo_u16_sdwa |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 365 | |
| 366 | ; GFX9-DAG: v_mul_lo_u16_sdwa |
| 367 | ; GFX9-DAG: v_mul_lo_u16_sdwa |
| 368 | ; GFX9-DAG: v_mul_lo_u16_sdwa |
| 369 | ; GFX9-DAG: v_mul_lo_u16_sdwa |
| 370 | ; GFX9-DAG: v_mul_lo_u16_sdwa |
| 371 | ; GFX9-DAG: v_mul_lo_u16_sdwa |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 372 | |
Dmitry Preobrazhensky | cd95343 | 2021-04-01 14:21:00 +0300 | [diff] [blame] | 373 | ; GFX10-DAG: v_mul_lo_u16 |
| 374 | ; GFX10-DAG: v_mul_lo_u16 |
| 375 | ; GFX10-DAG: v_mul_lo_u16 |
| 376 | ; GFX10-DAG: v_mul_lo_u16 |
| 377 | ; GFX10-DAG: v_mul_lo_u16 |
| 378 | ; GFX10-DAG: v_mul_lo_u16 |
| 379 | ; GFX10-DAG: v_mul_lo_u16 |
| 380 | ; GFX10-DAG: v_mul_lo_u16 |
Stanislav Mekhanoshin | 971cb8b | 2019-05-06 22:27:05 +0000 | [diff] [blame] | 381 | |
Matt Arsenault | 5660bb6 | 2019-11-18 16:48:07 +0530 | [diff] [blame] | 382 | define amdgpu_kernel void @mul_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(1)* %ina, <8 x i8> addrspace(1)* %inb) #0 { |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 383 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 384 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
| 385 | %gepa = getelementptr <8 x i8>, <8 x i8> addrspace(1)* %ina, i32 %idx |
| 386 | %gepb = getelementptr <8 x i8>, <8 x i8> addrspace(1)* %inb, i32 %idx |
| 387 | %a = load <8 x i8>, <8 x i8> addrspace(1)* %gepa, align 4 |
| 388 | %b = load <8 x i8>, <8 x i8> addrspace(1)* %gepb, align 4 |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 389 | %mul = mul <8 x i8> %a, %b |
| 390 | store <8 x i8> %mul, <8 x i8> addrspace(1)* %out, align 4 |
| 391 | ret void |
| 392 | } |
| 393 | |
Sam Kolton | 9fa1696 | 2017-04-06 15:03:28 +0000 | [diff] [blame] | 394 | ; GCN-LABEL: {{^}}sitofp_v2i16_to_v2f16: |
Matt Arsenault | 68e70fb | 2019-09-30 13:39:33 -0400 | [diff] [blame] | 395 | ; NOSDWA-DAG: v_cvt_f16_i16_e32 v{{[0-9]+}}, v{{[0-9]+}} |
| 396 | ; NOSDWA-DAG: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} |
| 397 | ; NOSDWA-DAG: v_cvt_f16_i16_e32 v{{[0-9]+}}, v{{[0-9]+}} |
| 398 | ; NOSDWA-NOT: v_cvt_f16_i16_sdwa |
Sam Kolton | 9fa1696 | 2017-04-06 15:03:28 +0000 | [diff] [blame] | 399 | |
Matt Arsenault | 68e70fb | 2019-09-30 13:39:33 -0400 | [diff] [blame] | 400 | ; SDWA-DAG: v_cvt_f16_i16_e32 v{{[0-9]+}}, v{{[0-9]+}} |
| 401 | ; SDWA-DAG: v_cvt_f16_i16_sdwa v{{[0-9]+}}, v{{[0-9]+}} dst_sel:{{(WORD_1|DWORD)?}} dst_unused:UNUSED_PAD src0_sel:WORD_1 |
Sam Kolton | 9fa1696 | 2017-04-06 15:03:28 +0000 | [diff] [blame] | 402 | |
Matt Arsenault | 68e70fb | 2019-09-30 13:39:33 -0400 | [diff] [blame] | 403 | ; FIXME: Should be able to avoid or |
Sam Kolton | 9fa1696 | 2017-04-06 15:03:28 +0000 | [diff] [blame] | 404 | define amdgpu_kernel void @sitofp_v2i16_to_v2f16( |
| 405 | <2 x half> addrspace(1)* %r, |
Matt Arsenault | 5660bb6 | 2019-11-18 16:48:07 +0530 | [diff] [blame] | 406 | <2 x i16> addrspace(1)* %a) #0 { |
Sam Kolton | 9fa1696 | 2017-04-06 15:03:28 +0000 | [diff] [blame] | 407 | entry: |
| 408 | %a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a |
| 409 | %r.val = sitofp <2 x i16> %a.val to <2 x half> |
| 410 | store <2 x half> %r.val, <2 x half> addrspace(1)* %r |
| 411 | ret void |
| 412 | } |
| 413 | |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 414 | |
| 415 | ; GCN-LABEL: {{^}}mac_v2half: |
| 416 | ; NOSDWA: v_lshrrev_b32_e32 v[[DST0:[0-9]+]], 16, v{{[0-9]+}} |
| 417 | ; NOSDWA: v_lshrrev_b32_e32 v[[DST1:[0-9]+]], 16, v{{[0-9]+}} |
Matt Arsenault | 6c29c5a | 2017-07-10 19:53:57 +0000 | [diff] [blame] | 418 | ; NOSDWA: v_mac_f16_e32 v[[DST_MAC:[0-9]+]], v[[DST0]], v[[DST1]] |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 419 | ; NOSDWA: v_lshlrev_b32_e32 v[[DST_SHL:[0-9]+]], 16, v[[DST_MAC]] |
Matt Arsenault | 6c29c5a | 2017-07-10 19:53:57 +0000 | [diff] [blame] | 420 | ; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[DST_SHL]] |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 421 | ; NOSDWA-NOT: v_mac_f16_sdwa |
| 422 | |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 423 | ; VI: v_mac_f16_sdwa v[[DST_MAC:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| 424 | ; VI: v_lshlrev_b32_e32 v[[DST_SHL:[0-9]+]], 16, v[[DST_MAC]] |
| 425 | |
Stanislav Mekhanoshin | 971cb8b | 2019-05-06 22:27:05 +0000 | [diff] [blame] | 426 | ; GFX9_10: v_pk_mul_f16 v[[DST_MUL:[0-9]+]], v{{[0-9]+}}, v[[SRC:[0-9]+]] |
| 427 | ; GFX9_10: v_pk_add_f16 v{{[0-9]+}}, v[[DST_MUL]], v[[SRC]] |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 428 | |
Matt Arsenault | 5660bb6 | 2019-11-18 16:48:07 +0530 | [diff] [blame] | 429 | define amdgpu_kernel void @mac_v2half(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %ina, <2 x half> addrspace(1)* %inb) #0 { |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 430 | entry: |
| 431 | %a = load <2 x half>, <2 x half> addrspace(1)* %ina, align 4 |
| 432 | %b = load <2 x half>, <2 x half> addrspace(1)* %inb, align 4 |
| 433 | %mul = fmul <2 x half> %a, %b |
| 434 | %mac = fadd <2 x half> %mul, %b |
| 435 | store <2 x half> %mac, <2 x half> addrspace(1)* %out, align 4 |
| 436 | ret void |
| 437 | } |
| 438 | |
| 439 | ; GCN-LABEL: {{^}}immediate_mul_v2i16: |
| 440 | ; NOSDWA-NOT: v_mul_u32_u24_sdwa |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 441 | ; VI-DAG: v_mov_b32_e32 v[[M321:[0-9]+]], 0x141 |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 442 | ; VI-DAG: v_mul_lo_u16_e32 v{{[0-9]+}}, 0x7b, v{{[0-9]+}} |
| 443 | ; VI-DAG: v_mul_lo_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[M321]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 444 | |
| 445 | ; GFX9: s_mov_b32 s[[IMM:[0-9]+]], 0x141007b |
| 446 | ; GFX9: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, s[[IMM]] |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 447 | |
Stanislav Mekhanoshin | 971cb8b | 2019-05-06 22:27:05 +0000 | [diff] [blame] | 448 | ; GFX10: v_pk_mul_lo_u16 v{{[0-9]+}}, 0x141007b, v{{[0-9]+}} |
| 449 | |
Matt Arsenault | 5660bb6 | 2019-11-18 16:48:07 +0530 | [diff] [blame] | 450 | define amdgpu_kernel void @immediate_mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 451 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 452 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
| 453 | %gep = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %idx |
| 454 | %a = load <2 x i16>, <2 x i16> addrspace(1)* %gep, align 4 |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 455 | %mul = mul <2 x i16> %a, <i16 123, i16 321> |
| 456 | store <2 x i16> %mul, <2 x i16> addrspace(1)* %out, align 4 |
| 457 | ret void |
| 458 | } |
| 459 | |
| 460 | ; Double use of same src - should not convert it |
| 461 | ; GCN-LABEL: {{^}}mulmul_v2i16: |
| 462 | ; NOSDWA: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} |
| 463 | ; NOSDWA: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 464 | ; NOSDWA: v_mul_lo_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 465 | ; NOSDWA: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} |
| 466 | ; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
| 467 | ; NOSDWA-NOT: v_mul_u32_u24_sdwa |
| 468 | |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 469 | ; VI: v_mul_lo_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 470 | |
Stanislav Mekhanoshin | 971cb8b | 2019-05-06 22:27:05 +0000 | [diff] [blame] | 471 | ; GFX9_10: v_pk_mul_lo_u16 v[[DST1:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} |
| 472 | ; GFX9_10: v_pk_mul_lo_u16 v{{[0-9]+}}, v[[DST1]], v{{[0-9]+}} |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 473 | |
Matt Arsenault | 5660bb6 | 2019-11-18 16:48:07 +0530 | [diff] [blame] | 474 | define amdgpu_kernel void @mulmul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) #0 { |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 475 | entry: |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 476 | %idx = call i32 @llvm.amdgcn.workitem.id.x() |
| 477 | %gepa = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %ina, i32 %idx |
| 478 | %gepb = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %inb, i32 %idx |
| 479 | %a = load <2 x i16>, <2 x i16> addrspace(1)* %gepa, align 4 |
| 480 | %b = load <2 x i16>, <2 x i16> addrspace(1)* %gepb, align 4 |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 481 | %mul = mul <2 x i16> %a, %b |
| 482 | %mul2 = mul <2 x i16> %mul, %b |
| 483 | store <2 x i16> %mul2, <2 x i16> addrspace(1)* %out, align 4 |
| 484 | ret void |
| 485 | } |
| 486 | |
Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 487 | ; GCN-LABEL: {{^}}add_bb_v2i16: |
Matt Arsenault | 9a7e29a | 2017-11-29 02:25:14 +0000 | [diff] [blame] | 488 | ; NOSDWA-NOT: v_add_{{(_co)?}}_u32_sdwa |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 489 | |
Dmitry Preobrazhensky | a0342dc | 2017-11-20 18:24:21 +0000 | [diff] [blame] | 490 | ; VI: v_add_u32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 491 | |
Stanislav Mekhanoshin | 971cb8b | 2019-05-06 22:27:05 +0000 | [diff] [blame] | 492 | ; GFX9_10: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 493 | |
Matt Arsenault | 5660bb6 | 2019-11-18 16:48:07 +0530 | [diff] [blame] | 494 | define amdgpu_kernel void @add_bb_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) #0 { |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 495 | entry: |
Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 496 | %a = load <2 x i16>, <2 x i16> addrspace(1)* %ina, align 4 |
| 497 | %b = load <2 x i16>, <2 x i16> addrspace(1)* %inb, align 4 |
| 498 | br label %add_label |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 499 | add_label: |
Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 500 | %add = add <2 x i16> %a, %b |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 501 | br label %store_label |
| 502 | store_label: |
Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 503 | store <2 x i16> %add, <2 x i16> addrspace(1)* %out, align 4 |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 504 | ret void |
Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 505 | } |
Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 506 | |
| 507 | |
| 508 | ; Check that "pulling out" SDWA operands works correctly. |
| 509 | ; GCN-LABEL: {{^}}pulled_out_test: |
Stanislav Mekhanoshin | 465a1ff | 2017-06-20 18:32:42 +0000 | [diff] [blame] | 510 | ; NOSDWA-DAG: v_and_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} |
Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 511 | ; NOSDWA-DAG: v_lshlrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} |
Stanislav Mekhanoshin | 465a1ff | 2017-06-20 18:32:42 +0000 | [diff] [blame] | 512 | ; NOSDWA-DAG: v_and_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} |
Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 513 | ; NOSDWA-DAG: v_lshlrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} |
| 514 | ; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
| 515 | ; NOSDWA-NOT: v_and_b32_sdwa |
| 516 | ; NOSDWA-NOT: v_or_b32_sdwa |
| 517 | |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 518 | ; VI-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
Stanislav Mekhanoshin | 971cb8b | 2019-05-06 22:27:05 +0000 | [diff] [blame] | 519 | ; GFX9_10-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
| 520 | ; GFX89-DAG: v_lshlrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} |
| 521 | ; |
| 522 | ; GFX10-DAG: v_lshrrev_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| 523 | ; |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 524 | ; VI-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
Stanislav Mekhanoshin | 971cb8b | 2019-05-06 22:27:05 +0000 | [diff] [blame] | 525 | ; GFX9_10-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
| 526 | ; GFX89-DAG: v_lshlrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} |
| 527 | ; |
| 528 | ; GFX10-DAG: v_lshrrev_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| 529 | ; |
| 530 | ; GFX89: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| 531 | ; |
Matt Arsenault | 190a17b | 2019-10-08 17:36:38 +0000 | [diff] [blame] | 532 | ; GFX10: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| 533 | ; GFX10: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| 534 | ; GFX10: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| 535 | ; GFX10: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 536 | |
Matt Arsenault | 5660bb6 | 2019-11-18 16:48:07 +0530 | [diff] [blame] | 537 | define amdgpu_kernel void @pulled_out_test(<8 x i8> addrspace(1)* %sourceA, <8 x i8> addrspace(1)* %destValues) #0 { |
Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 538 | entry: |
| 539 | %idxprom = ashr exact i64 15, 32 |
| 540 | %arrayidx = getelementptr inbounds <8 x i8>, <8 x i8> addrspace(1)* %sourceA, i64 %idxprom |
| 541 | %tmp = load <8 x i8>, <8 x i8> addrspace(1)* %arrayidx, align 8 |
| 542 | |
| 543 | %tmp1 = extractelement <8 x i8> %tmp, i32 0 |
| 544 | %tmp2 = extractelement <8 x i8> %tmp, i32 1 |
| 545 | %tmp3 = extractelement <8 x i8> %tmp, i32 2 |
| 546 | %tmp4 = extractelement <8 x i8> %tmp, i32 3 |
| 547 | %tmp5 = extractelement <8 x i8> %tmp, i32 4 |
| 548 | %tmp6 = extractelement <8 x i8> %tmp, i32 5 |
| 549 | %tmp7 = extractelement <8 x i8> %tmp, i32 6 |
| 550 | %tmp8 = extractelement <8 x i8> %tmp, i32 7 |
| 551 | |
| 552 | %tmp9 = insertelement <2 x i8> undef, i8 %tmp1, i32 0 |
| 553 | %tmp10 = insertelement <2 x i8> %tmp9, i8 %tmp2, i32 1 |
| 554 | %tmp11 = insertelement <2 x i8> undef, i8 %tmp3, i32 0 |
| 555 | %tmp12 = insertelement <2 x i8> %tmp11, i8 %tmp4, i32 1 |
| 556 | %tmp13 = insertelement <2 x i8> undef, i8 %tmp5, i32 0 |
| 557 | %tmp14 = insertelement <2 x i8> %tmp13, i8 %tmp6, i32 1 |
| 558 | %tmp15 = insertelement <2 x i8> undef, i8 %tmp7, i32 0 |
| 559 | %tmp16 = insertelement <2 x i8> %tmp15, i8 %tmp8, i32 1 |
| 560 | |
| 561 | %tmp17 = shufflevector <2 x i8> %tmp10, <2 x i8> %tmp12, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| 562 | %tmp18 = shufflevector <2 x i8> %tmp14, <2 x i8> %tmp16, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| 563 | %tmp19 = shufflevector <4 x i8> %tmp17, <4 x i8> %tmp18, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
Matt Arsenault | 6c29c5a | 2017-07-10 19:53:57 +0000 | [diff] [blame] | 564 | |
Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 565 | %arrayidx5 = getelementptr inbounds <8 x i8>, <8 x i8> addrspace(1)* %destValues, i64 %idxprom |
| 566 | store <8 x i8> %tmp19, <8 x i8> addrspace(1)* %arrayidx5, align 8 |
| 567 | ret void |
| 568 | } |
Matt Arsenault | 8ae38bc | 2017-12-05 20:32:01 +0000 | [diff] [blame] | 569 | |
Matt Arsenault | c24d5e2 | 2018-02-08 22:46:38 +0000 | [diff] [blame] | 570 | ; GCN-LABEL: {{^}}sdwa_crash_inlineasm_def: |
Matt Arsenault | 8ae38bc | 2017-12-05 20:32:01 +0000 | [diff] [blame] | 571 | ; GCN: s_mov_b32 s{{[0-9]+}}, 0xffff |
| 572 | ; GCN: v_and_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} |
Nicolai Haehnle | a9cc92c | 2018-11-30 22:55:29 +0000 | [diff] [blame] | 573 | ; |
| 574 | ; TODO: Why is the constant not peepholed into the v_or_b32_e32? |
| 575 | ; |
Jay Foad | 8a52bd8 | 2021-11-19 16:40:29 +0000 | [diff] [blame] | 576 | ; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, 0x10000, |
Nicolai Haehnle | a9cc92c | 2018-11-30 22:55:29 +0000 | [diff] [blame] | 577 | ; SDWA: v_or_b32_e32 v{{[0-9]+}}, 0x10000, |
Matt Arsenault | 8ae38bc | 2017-12-05 20:32:01 +0000 | [diff] [blame] | 578 | define amdgpu_kernel void @sdwa_crash_inlineasm_def() #0 { |
| 579 | bb: |
| 580 | br label %bb1 |
| 581 | |
| 582 | bb1: ; preds = %bb11, %bb |
| 583 | %tmp = phi <2 x i32> [ %tmp12, %bb11 ], [ undef, %bb ] |
| 584 | br i1 true, label %bb2, label %bb11 |
| 585 | |
| 586 | bb2: ; preds = %bb1 |
| 587 | %tmp3 = call i32 asm "v_and_b32_e32 $0, $1, $2", "=v,s,v"(i32 65535, i32 undef) #1 |
| 588 | %tmp5 = or i32 %tmp3, 65536 |
| 589 | %tmp6 = insertelement <2 x i32> %tmp, i32 %tmp5, i64 0 |
| 590 | br label %bb11 |
| 591 | |
| 592 | bb11: ; preds = %bb10, %bb2 |
| 593 | %tmp12 = phi <2 x i32> [ %tmp6, %bb2 ], [ %tmp, %bb1 ] |
Stanislav Mekhanoshin | c8f78f8 | 2019-04-05 20:11:32 +0000 | [diff] [blame] | 594 | store volatile <2 x i32> %tmp12, <2 x i32> addrspace(1)* undef |
Matt Arsenault | 8ae38bc | 2017-12-05 20:32:01 +0000 | [diff] [blame] | 595 | br label %bb1 |
| 596 | } |
Matt Arsenault | 5660bb6 | 2019-11-18 16:48:07 +0530 | [diff] [blame] | 597 | |
Jay Foad | fdaa2d0 | 2021-02-19 15:04:03 +0000 | [diff] [blame] | 598 | declare i32 @llvm.amdgcn.workitem.id.x() |
| 599 | |
Matt Arsenault | 5660bb6 | 2019-11-18 16:48:07 +0530 | [diff] [blame] | 600 | attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" } |