Fangrui Song | 9e9907f | 2024-01-16 21:54:58 -0800 | [diff] [blame] | 1 | ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX90A %s |
Fabian Ritter | a33a84e | 2025-02-13 15:17:12 +0100 | [diff] [blame] | 2 | ; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX942 %s |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 3 | |
| 4 | declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16.1k(<4 x i16>, <4 x i16>, <32 x float>, i32, i32, i32) |
| 5 | declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x4bf16.1k(<4 x i16>, <4 x i16>, <16 x float>, i32, i32, i32) |
| 6 | declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x4bf16.1k(<4 x i16>, <4 x i16>, <4 x float>, i32, i32, i32) |
| 7 | declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x8bf16.1k(<4 x i16>, <4 x i16>, <16 x float>, i32, i32, i32) |
| 8 | declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x16bf16.1k(<4 x i16>, <4 x i16>, <4 x float>, i32, i32, i32) |
| 9 | declare <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double, double, <4 x double>, i32, i32, i32) |
| 10 | declare double @llvm.amdgcn.mfma.f64.4x4x4f64(double, double, double, i32, i32, i32) |
| 11 | declare i32 @llvm.amdgcn.workitem.id.x() |
| 12 | |
| 13 | ; GCN-LABEL: {{^}}test_mfma_f32_32x32x4bf16_1k: |
| 14 | ; GCN-DAG: s_load_dwordx16 |
| 15 | ; GCN-DAG: s_load_dwordx16 |
Stanislav Mekhanoshin | 72c1a0d9 | 2022-03-22 12:08:29 -0700 | [diff] [blame] | 16 | ; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 |
| 17 | ; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 |
| 18 | ; GCN-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} |
Jay Foad | f510045 | 2022-01-14 11:03:21 +0000 | [diff] [blame] | 19 | ; GFX90A: v_mfma_f32_32x32x4bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 |
Fabian Ritter | a33a84e | 2025-02-13 15:17:12 +0100 | [diff] [blame] | 20 | ; GFX942: v_mfma_f32_32x32x4_2b_bf16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 21 | ; GCN-NOT: v_accvgpr_read_b32 |
| 22 | ; GCN-COUNT-8: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] |
Matt Arsenault | ad386a8 | 2022-11-28 14:13:14 -0500 | [diff] [blame] | 23 | define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(ptr addrspace(1) %arg) #0 { |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 24 | bb: |
Matt Arsenault | ad386a8 | 2022-11-28 14:13:14 -0500 | [diff] [blame] | 25 | %in.1 = load <32 x float>, ptr addrspace(1) %arg |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 26 | %a = bitcast i64 1 to <4 x i16> |
| 27 | %b = bitcast i64 2 to <4 x i16> |
| 28 | %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16.1k(<4 x i16> %a, <4 x i16> %b, <32 x float> %in.1, i32 1, i32 2, i32 3) |
Matt Arsenault | ad386a8 | 2022-11-28 14:13:14 -0500 | [diff] [blame] | 29 | store <32 x float> %mai.1, ptr addrspace(1) %arg |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 30 | ret void |
| 31 | } |
| 32 | |
| 33 | ; GCN-LABEL: {{^}}test_mfma_f32_16x16x4bf16_1k: |
Stanislav Mekhanoshin | 72c1a0d9 | 2022-03-22 12:08:29 -0700 | [diff] [blame] | 34 | ; GCN-DAG: s_load_dwordx16 |
| 35 | ; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 |
| 36 | ; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 |
| 37 | ; GCN-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} |
| 38 | ; GFX90A: v_mfma_f32_16x16x4bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 |
Fabian Ritter | a33a84e | 2025-02-13 15:17:12 +0100 | [diff] [blame] | 39 | ; GFX942: v_mfma_f32_16x16x4_4b_bf16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 |
Stanislav Mekhanoshin | 72c1a0d9 | 2022-03-22 12:08:29 -0700 | [diff] [blame] | 40 | ; GCN-NOT: v_accvgpr_read_b32 |
| 41 | ; GCN-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] |
Matt Arsenault | ad386a8 | 2022-11-28 14:13:14 -0500 | [diff] [blame] | 42 | define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(ptr addrspace(1) %arg) #0 { |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 43 | bb: |
Matt Arsenault | ad386a8 | 2022-11-28 14:13:14 -0500 | [diff] [blame] | 44 | %in.1 = load <16 x float>, ptr addrspace(1) %arg |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 45 | %a = bitcast i64 1 to <4 x i16> |
| 46 | %b = bitcast i64 2 to <4 x i16> |
| 47 | %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.16x16x4bf16.1k(<4 x i16> %a, <4 x i16> %b, <16 x float> %in.1, i32 1, i32 2, i32 3) |
Matt Arsenault | ad386a8 | 2022-11-28 14:13:14 -0500 | [diff] [blame] | 48 | store <16 x float> %mai.1, ptr addrspace(1) %arg |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 49 | ret void |
| 50 | } |
| 51 | |
| 52 | ; GCN-LABEL: {{^}}test_mfma_f32_4x4x4bf16_1k: |
Stanislav Mekhanoshin | 72c1a0d9 | 2022-03-22 12:08:29 -0700 | [diff] [blame] | 53 | ; GCN-DAG: s_load_dwordx4 |
| 54 | ; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 |
| 55 | ; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 |
| 56 | ; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} |
| 57 | ; GFX90A: v_mfma_f32_4x4x4bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 |
Fabian Ritter | a33a84e | 2025-02-13 15:17:12 +0100 | [diff] [blame] | 58 | ; GFX942: v_mfma_f32_4x4x4_16b_bf16 [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 |
Stanislav Mekhanoshin | 72c1a0d9 | 2022-03-22 12:08:29 -0700 | [diff] [blame] | 59 | ; GCN-NOT: v_accvgpr_read_b32 |
| 60 | ; GCN: global_store_dwordx4 v{{[0-9]+}}, [[RES]], |
Matt Arsenault | ad386a8 | 2022-11-28 14:13:14 -0500 | [diff] [blame] | 61 | define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(ptr addrspace(1) %arg) #0 { |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 62 | bb: |
Matt Arsenault | ad386a8 | 2022-11-28 14:13:14 -0500 | [diff] [blame] | 63 | %in.1 = load <4 x float>, ptr addrspace(1) %arg |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 64 | %a = bitcast i64 1 to <4 x i16> |
| 65 | %b = bitcast i64 2 to <4 x i16> |
| 66 | %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x4bf16.1k(<4 x i16> %a, <4 x i16> %b, <4 x float> %in.1, i32 1, i32 2, i32 3) |
Matt Arsenault | ad386a8 | 2022-11-28 14:13:14 -0500 | [diff] [blame] | 67 | store <4 x float> %mai.1, ptr addrspace(1) %arg |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 68 | ret void |
| 69 | } |
| 70 | |
| 71 | ; GCN-LABEL: {{^}}test_mfma_f32_32x32x8bf16_1k: |
Stanislav Mekhanoshin | 72c1a0d9 | 2022-03-22 12:08:29 -0700 | [diff] [blame] | 72 | ; GCN-DAG: s_load_dwordx16 |
| 73 | ; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 |
| 74 | ; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 |
| 75 | ; GCN-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} |
| 76 | ; GFX90A: v_mfma_f32_32x32x8bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 |
Fabian Ritter | a33a84e | 2025-02-13 15:17:12 +0100 | [diff] [blame] | 77 | ; GFX942: v_mfma_f32_32x32x8_bf16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 |
Stanislav Mekhanoshin | 72c1a0d9 | 2022-03-22 12:08:29 -0700 | [diff] [blame] | 78 | ; GCN-NOT: v_accvgpr_read_b32 |
| 79 | ; GCN-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] |
Matt Arsenault | ad386a8 | 2022-11-28 14:13:14 -0500 | [diff] [blame] | 80 | define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(ptr addrspace(1) %arg) #0 { |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 81 | bb: |
Matt Arsenault | ad386a8 | 2022-11-28 14:13:14 -0500 | [diff] [blame] | 82 | %in.1 = load <16 x float>, ptr addrspace(1) %arg |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 83 | %a = bitcast i64 1 to <4 x i16> |
| 84 | %b = bitcast i64 2 to <4 x i16> |
| 85 | %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8bf16.1k(<4 x i16> %a, <4 x i16> %b, <16 x float> %in.1, i32 1, i32 2, i32 3) |
Matt Arsenault | ad386a8 | 2022-11-28 14:13:14 -0500 | [diff] [blame] | 86 | store <16 x float> %mai.1, ptr addrspace(1) %arg |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 87 | ret void |
| 88 | } |
| 89 | |
| 90 | ; GCN-LABEL: {{^}}test_mfma_f32_16x16x16bf16_1k: |
Stanislav Mekhanoshin | 72c1a0d9 | 2022-03-22 12:08:29 -0700 | [diff] [blame] | 91 | ; GCN-DAG: s_load_dwordx4 |
| 92 | ; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 |
| 93 | ; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 |
| 94 | ; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} |
| 95 | ; GFX90A: v_mfma_f32_16x16x16bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 |
Fabian Ritter | a33a84e | 2025-02-13 15:17:12 +0100 | [diff] [blame] | 96 | ; GFX942: v_mfma_f32_16x16x16_bf16 [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 |
Stanislav Mekhanoshin | 72c1a0d9 | 2022-03-22 12:08:29 -0700 | [diff] [blame] | 97 | ; GCN-NOT: v_accvgpr_read_b32 |
| 98 | ; GCN: global_store_dwordx4 v{{[0-9]+}}, [[RES]], |
Matt Arsenault | ad386a8 | 2022-11-28 14:13:14 -0500 | [diff] [blame] | 99 | define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(ptr addrspace(1) %arg) #0 { |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 100 | bb: |
Matt Arsenault | ad386a8 | 2022-11-28 14:13:14 -0500 | [diff] [blame] | 101 | %in.1 = load <4 x float>, ptr addrspace(1) %arg |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 102 | %a = bitcast i64 1 to <4 x i16> |
| 103 | %b = bitcast i64 2 to <4 x i16> |
| 104 | %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x16bf16.1k(<4 x i16> %a, <4 x i16> %b, <4 x float> %in.1, i32 1, i32 2, i32 3) |
Matt Arsenault | ad386a8 | 2022-11-28 14:13:14 -0500 | [diff] [blame] | 105 | store <4 x float> %mai.1, ptr addrspace(1) %arg |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 106 | ret void |
| 107 | } |
| 108 | |
| 109 | ; GCN-LABEL: {{^}}test_mfma_f64_4x4x4f64: |
| 110 | ; GFX90A: v_mfma_f64_4x4x4f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], 0{{$}} |
| 111 | ; GFX90A: v_mfma_f64_4x4x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 blgp:3 |
Fabian Ritter | a33a84e | 2025-02-13 15:17:12 +0100 | [diff] [blame] | 112 | ; GFX942: v_mfma_f64_4x4x4_4b_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], 0{{$}} |
| 113 | ; GFX942: v_mfma_f64_4x4x4_4b_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0] |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 114 | ; GCN: global_store_dwordx2 |
Matt Arsenault | ad386a8 | 2022-11-28 14:13:14 -0500 | [diff] [blame] | 115 | define amdgpu_kernel void @test_mfma_f64_4x4x4f64(ptr addrspace(1) %arg, double %a, double %b) #0 { |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 116 | bb: |
| 117 | %mai.1 = tail call double @llvm.amdgcn.mfma.f64.4x4x4f64(double %a, double %b, double 0.0, i32 0, i32 0, i32 0) |
| 118 | %mai.2 = tail call double @llvm.amdgcn.mfma.f64.4x4x4f64(double %a, double %b, double %mai.1, i32 1, i32 2, i32 3) |
Matt Arsenault | ad386a8 | 2022-11-28 14:13:14 -0500 | [diff] [blame] | 119 | store double %mai.2, ptr addrspace(1) %arg |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 120 | ret void |
| 121 | } |
| 122 | |
| 123 | ; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64: |
| 124 | ; GCN: s_load_dwordx8 |
| 125 | ; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 |
Fabian Ritter | a33a84e | 2025-02-13 15:17:12 +0100 | [diff] [blame] | 126 | ; GFX942: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 neg:[1,1,0] |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 127 | ; GCN: global_store_dwordx4 |
| 128 | ; GCN: global_store_dwordx4 |
Matt Arsenault | ad386a8 | 2022-11-28 14:13:14 -0500 | [diff] [blame] | 129 | define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, double %a, double %b) #0 { |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 130 | bb: |
Matt Arsenault | ad386a8 | 2022-11-28 14:13:14 -0500 | [diff] [blame] | 131 | %in.1 = load <4 x double>, ptr addrspace(1) %arg |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 132 | %mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> %in.1, i32 1, i32 2, i32 3) |
Matt Arsenault | ad386a8 | 2022-11-28 14:13:14 -0500 | [diff] [blame] | 133 | store <4 x double> %mai.1, ptr addrspace(1) %arg |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 134 | ret void |
| 135 | } |
| 136 | |
| 137 | ; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm: |
| 138 | ; GFX90A: v_mfma_f64_16x16x4f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], 0{{$}} |
| 139 | ; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 blgp:3 |
Fabian Ritter | a33a84e | 2025-02-13 15:17:12 +0100 | [diff] [blame] | 140 | ; GFX942: v_mfma_f64_16x16x4_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], 0{{$}} |
| 141 | ; GFX942: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0] |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 142 | ; GCN: global_store_dwordx4 |
| 143 | ; GCN: global_store_dwordx4 |
Matt Arsenault | ad386a8 | 2022-11-28 14:13:14 -0500 | [diff] [blame] | 144 | define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm(ptr addrspace(1) %arg, double %a, double %b) #0 { |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 145 | bb: |
| 146 | %mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> <double 0.0, double 0.0, double 0.0, double 0.0>, i32 0, i32 0, i32 0) |
| 147 | %mai.2 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> %mai.1, i32 1, i32 2, i32 3) |
Matt Arsenault | ad386a8 | 2022-11-28 14:13:14 -0500 | [diff] [blame] | 148 | store <4 x double> %mai.2, ptr addrspace(1) %arg |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 149 | ret void |
| 150 | } |
| 151 | |
| 152 | ; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_imm: |
| 153 | ; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}} |
Fabian Ritter | a33a84e | 2025-02-13 15:17:12 +0100 | [diff] [blame] | 154 | ; GFX942: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}} |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 155 | ; GCN: global_store_dwordx4 |
| 156 | ; GCN: global_store_dwordx4 |
Matt Arsenault | ad386a8 | 2022-11-28 14:13:14 -0500 | [diff] [blame] | 157 | define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, double %a, double %b) #0 { |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 158 | bb: |
| 159 | %mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> <double 0.0, double 0.0, double 0.0, double 1.0>, i32 0, i32 0, i32 0) |
Matt Arsenault | ad386a8 | 2022-11-28 14:13:14 -0500 | [diff] [blame] | 160 | store <4 x double> %mai.1, ptr addrspace(1) %arg |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 161 | ret void |
| 162 | } |
| 163 | |
| 164 | ; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_lit: |
Stanislav Mekhanoshin | 72c1a0d9 | 2022-03-22 12:08:29 -0700 | [diff] [blame] | 165 | ; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} |
Jay Foad | a419666 | 2023-11-13 13:53:10 +0000 | [diff] [blame] | 166 | ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x405ec000 |
Stanislav Mekhanoshin | 72c1a0d9 | 2022-03-22 12:08:29 -0700 | [diff] [blame] | 167 | ; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}} |
Fabian Ritter | a33a84e | 2025-02-13 15:17:12 +0100 | [diff] [blame] | 168 | ; GFX942: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}} |
Stanislav Mekhanoshin | 72c1a0d9 | 2022-03-22 12:08:29 -0700 | [diff] [blame] | 169 | ; GCN: global_store_dwordx4 |
| 170 | ; GCN: global_store_dwordx4 |
Matt Arsenault | ad386a8 | 2022-11-28 14:13:14 -0500 | [diff] [blame] | 171 | define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) %arg, double %a, double %b) #0 { |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 172 | bb: |
| 173 | %mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> <double 123.0, double 123.0, double 123.0, double 123.0>, i32 0, i32 0, i32 0) |
Matt Arsenault | ad386a8 | 2022-11-28 14:13:14 -0500 | [diff] [blame] | 174 | store <4 x double> %mai.1, ptr addrspace(1) %arg |
Stanislav Mekhanoshin | a8d9d50 | 2021-02-17 13:37:46 -0800 | [diff] [blame] | 175 | ret void |
| 176 | } |
Stanislav Mekhanoshin | aeaf85b | 2022-01-12 16:03:16 -0800 | [diff] [blame] | 177 | |
| 178 | attributes #0 = { "amdgpu-flat-work-group-size"="1,256" } |