blob: 222fcdcf643a5a90e0959ffbb8da5a328dfbd4e4 [file] [log] [blame]
# RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s
# GCN-LABEL: name: valu_write_vgpr_sgemm_mfma_read
# GCN: V_MOV_B32
# GCN: V_MOV_B32
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_MFMA
name: valu_write_vgpr_sgemm_mfma_read
body: |
bb.0:
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: valu_write_agpr_sgemm_mfma_read
# GCN: V_ACCVGPR_WRITE_B32_e64
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_MFMA
name: valu_write_agpr_sgemm_mfma_read
body: |
bb.0:
$vgpr0 = IMPLICIT_DEF
$agpr4 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $agpr4, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: valu_write_vgpr_dgemm_mfma_read
# GCN: V_MOV_B32
# GCN: V_MOV_B32
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_MFMA
name: valu_write_vgpr_dgemm_mfma_read
body: |
bb.0:
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
$vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: valu_write_vgpr_smfmac_read
# GCN: V_MOV_B32
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_SMFMAC
name: valu_write_vgpr_smfmac_read
body: |
bb.0:
$vgpr32 = V_MOV_B32_e32 1, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec
...
# GCN-LABEL: name: accmov_write_agpr_sgemm_mfma_read
# GCN: V_ACCVGPR_MOV_B32
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_MFMA
name: accmov_write_agpr_sgemm_mfma_read
body: |
bb.0:
$vgpr0 = IMPLICIT_DEF
$agpr4 = V_ACCVGPR_MOV_B32 $agpr5, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $agpr4, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: nonxdl_sgemm4x4_mfma_write_agpr_mfma_read_same_agpr_as_srcc
# GCN: V_MFMA
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_MFMA
name: nonxdl_sgemm4x4_mfma_write_agpr_mfma_read_same_agpr_as_srcc
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: nonxdl_sgemm4x4_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc
# GCN: V_MFMA
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_MFMA
name: nonxdl_sgemm4x4_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr4, $vgpr5, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr4, $vgpr5, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: nonxdl_sgemm16x16_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc
# GCN: V_MFMA
# GCN-NEXT: V_MFMA
name: nonxdl_sgemm16x16_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr26, $vgpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr26, $vgpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: nonxdl_sgemm4x4_mfma_write_agpr_smfmac_read_same_agpr_as_srcc
# GCN: V_MFMA
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_SMFMAC
name: nonxdl_sgemm4x4_mfma_write_agpr_smfmac_read_same_agpr_as_srcc
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr4, $vgpr5, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc
# GCN: V_MFMA
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm4x4_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc
# GCN: V_MFMA
# GCN-NEXT: S_NOP 3
# GCN-NEXT: V_MFMA
name: dgemm4x4_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc
body: |
bb.0:
$vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc
# GCN: V_MFMA
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_MFMA
name: xdl_sgemm4x4_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr4, $vgpr5, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr4, $vgpr5, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc
# GCN: V_MFMA
# GCN-NEXT: V_MFMA
name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_16X16X32I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_16X16X32I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: smfmac16x16_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc
# GCN: V_SMFMAC
# GCN-NEXT: V_SMFMAC
name: smfmac16x16_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec
...
# GCN-LABEL: name: sgemm4x4_mfma_write_agpr_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: sgemm4x4_mfma_write_agpr_mfma_read_overlap
body: |
bb.0:
$agpr2_agpr3_agpr4_agpr5 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: sgemm4x4_mfma_write_vgpr_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: sgemm4x4_mfma_write_vgpr_mfma_read_overlap
body: |
bb.0:
$vgpr2_vgpr3_vgpr4_vgpr5 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr6, $vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr6, $vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: sgemm4x4_mfma_write_agpr_smfmac_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_SMFMAC
name: sgemm4x4_mfma_write_agpr_smfmac_read_overlap
body: |
bb.0:
$agpr2_agpr3_agpr4_agpr5 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm16x16_mfma_write_agpr_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_MFMA
name: xdl_sgemm16x16_mfma_write_agpr_mfma_read_overlap
body: |
bb.0:
$agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17 = V_MFMA_I32_16X16X4I8_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_MFMA
name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_overlap
body: |
bb.0:
$vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr26, $vgpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr26, $vgpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: nonxdl_sgemm16x16_mfma_write_agpr_xdl_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: V_MFMA
name: nonxdl_sgemm16x16_mfma_write_agpr_xdl_mfma_read_overlap
body: |
bb.0:
$agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17 = V_MFMA_F32_16X16X1F32_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_I32_16X16X4I8_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: nonxdl_sgemm16x16_mfma_write_agpr_nonxdl_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: V_MFMA
name: nonxdl_sgemm16x16_mfma_write_agpr_nonxdl_mfma_read_overlap
body: |
bb.0:
$agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17 = V_MFMA_F32_16X16X1F32_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm16x16_mfma_write_agpr_smfmac_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_SMFMAC
name: xdl_sgemm16x16_mfma_write_agpr_smfmac_read_overlap
body: |
bb.0:
$agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17 = V_MFMA_I32_16X16X4I8_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm32x32_mfma_write_agpr_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_MFMA
name: xdl_sgemm32x32_mfma_write_agpr_mfma_read_overlap
body: |
bb.0:
$agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31_agpr32_agpr33 = V_MFMA_F32_32X32X4F16_e64 $vgpr26_vgpr27, $vgpr28_vgpr29, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm32x32_mfma_write_vgpr_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_MFMA
name: xdl_sgemm32x32_mfma_write_vgpr_mfma_read_overlap
body: |
bb.0:
$vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X2F32_vgprcd_e64 $vgpr126, $vgpr127, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: nonxdl_sgemm32x32_mfma_write_agpr_xdl_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: V_MFMA
name: nonxdl_sgemm32x32_mfma_write_agpr_xdl_mfma_read_overlap
body: |
bb.0:
$agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31_agpr32_agpr33 = V_MFMA_F32_32X32X1F32_e64 $vgpr26, $vgpr28, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: nonxdl_sgemm32x32_mfma_write_agpr_nonxdl_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 7
# GCN-NEXT: V_MFMA
name: nonxdl_sgemm32x32_mfma_write_agpr_nonxdl_mfma_read_overlap
body: |
bb.0:
$agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31_agpr32_agpr33 = V_MFMA_F32_32X32X1F32_e64 $vgpr26, $vgpr28, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm32x32_mfma_write_agpr_smfmac_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_SMFMAC
name: xdl_sgemm32x32_mfma_write_agpr_smfmac_read_overlap
body: |
bb.0:
$agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31_agpr32_agpr33 = V_MFMA_F32_32X32X4F16_e64 $vgpr26_vgpr27, $vgpr28_vgpr29, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_vgpr_mfma_read_overlap
body: |
bb.0:
$vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm4x4_mfma_write_vgpr_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 3
# GCN-NEXT: V_MFMA
name: dgemm4x4_mfma_write_vgpr_mfma_read_overlap
body: |
bb.0:
$vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_read_overlap
body: |
bb.0:
$vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr10, $vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm4x4_mfma_write_vgpr_sgemm_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 3
# GCN-NEXT: V_MFMA
name: dgemm4x4_mfma_write_vgpr_sgemm_mfma_read_overlap
body: |
bb.0:
$vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr10, $vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: sgemm4x4_mfma_write_vgpr_dgemm_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: sgemm4x4_mfma_write_vgpr_dgemm_mfma_read_overlap
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr10, $vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm16x16_mfma_write_vgpr_dgemm_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_MFMA
name: xdl_sgemm16x16_mfma_write_vgpr_dgemm_mfma_read_overlap
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr26, $vgpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr20_vgpr21, $vgpr20_vgpr21, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm32x32_mfma_write_vgpr_dgemm_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_MFMA
name: xdl_sgemm32x32_mfma_write_vgpr_dgemm_mfma_read_overlap
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr120_vgpr121, $vgpr120_vgpr121, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm16x16_mfma_write_agpr_mfma_read_partial
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_MFMA
name: xdl_sgemm16x16_mfma_write_agpr_mfma_read_partial
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_I32_16X16X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_partial
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_MFMA
name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_partial
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr16, $vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr16, $vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_agpr_mfma_srca_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_MFMA
name: xdl_sgemm4x4_mfma_write_agpr_mfma_srca_read_overlap
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $agpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm16x16_mfma_write_agpr_mfma_srca_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: xdl_sgemm16x16_mfma_write_agpr_mfma_srca_read_overlap
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_I32_16X16X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec
$agpr20_agpr21_agpr22_agpr23 = V_MFMA_F32_4X4X1F32_e64 $agpr1, $vgpr0, $agpr20_agpr21_agpr22_agpr23, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: nonxdl_sgemm16x16_mfma_write_agpr_mfma_srca_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_MFMA
name: nonxdl_sgemm16x16_mfma_write_agpr_mfma_srca_read_overlap
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec
$agpr20_agpr21_agpr22_agpr23 = V_MFMA_F32_4X4X1F32_e64 $agpr1, $vgpr0, $agpr20_agpr21_agpr22_agpr23, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: smfmac32x32_write_agpr_mfma_srca_read_overlap
# GCN: V_SMFMAC
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: smfmac32x32_write_agpr_mfma_srca_read_overlap
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_SMFMAC_I32_32X32X32_I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $mode, implicit $exec
$agpr20_agpr21_agpr22_agpr23 = V_MFMA_F32_4X4X1F32_e64 $agpr1, $vgpr0, $agpr20_agpr21_agpr22_agpr23, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: smfmac32x32_write_agpr_smfmac_srcc_read_overlap
# GCN: V_SMFMAC
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_SMFMAC
name: smfmac32x32_write_agpr_smfmac_srcc_read_overlap
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_SMFMAC_I32_32X32X32_I8_e64 $agpr0_agpr1, $agpr2_agpr3_agpr4_agpr5, $vgpr2, 0, 0, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $mode, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_SMFMAC_I32_32X32X32_I8_e64 $agpr0_agpr1, $agpr2_agpr3_agpr4_agpr5, $vgpr2, 0, 0, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm32x32_mfma_write_agpr_mfma_srca_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: xdl_sgemm32x32_mfma_write_agpr_mfma_srca_read_overlap
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X4F16_e64 $vgpr26_vgpr27, $vgpr28_vgpr29, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec
$agpr20_agpr21_agpr22_agpr23 = V_MFMA_F32_4X4X1F32_e64 $agpr1, $vgpr0, $agpr20_agpr21_agpr22_agpr23, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: nonxdl_sgemm32x32_mfma_write_agpr_mfma_srca_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_MFMA
name: nonxdl_sgemm32x32_mfma_write_agpr_mfma_srca_read_overlap
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X1F32_e64 $vgpr26, $vgpr28, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec
$agpr120_agpr121_agpr122_agpr123 = V_MFMA_F32_4X4X1F32_e64 $agpr1, $vgpr0, $agpr120_agpr121_agpr122_agpr123, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_vgpr_mfma_srca_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_MFMA
name: xdl_sgemm4x4_mfma_write_vgpr_mfma_srca_read_overlap
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr0, $agpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_vgpr_dmfma4x4_srca_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_MFMA
name: xdl_sgemm4x4_mfma_write_vgpr_dmfma4x4_srca_read_overlap
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr4_vgpr5, $vgpr4_vgpr5, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_vgpr_dmfma16x16_srca_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_MFMA
name: xdl_sgemm4x4_mfma_write_vgpr_dmfma16x16_srca_read_overlap
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr4_vgpr5, $vgpr4_vgpr5, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_vgpr_smfmac_srca_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_SMFMAC
name: xdl_sgemm4x4_mfma_write_vgpr_smfmac_srca_read_overlap
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr12_vgpr13_vgpr14_vgpr15, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm4x4_mfma_write_vgpr_mfma_srca_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 5
# GCN-NEXT: V_MFMA
name: dgemm4x4_mfma_write_vgpr_mfma_srca_read_overlap
body: |
bb.0:
$vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec
$vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr2_vgpr3, $vgpr10_vgpr11, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_srca_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_vgpr_mfma_srca_read_overlap
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr10_vgpr11, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm4x4_mfma_write_vgpr_sgemm_mfma_srca_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 5
# GCN-NEXT: V_MFMA
name: dgemm4x4_mfma_write_vgpr_sgemm_mfma_srca_read_overlap
body: |
bb.0:
$vgpr4_vgpr5 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr4, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_srca_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_srca_read_overlap
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr4, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_vgpr_dgemm_mfma_srca_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_MFMA
name: xdl_sgemm4x4_mfma_write_vgpr_dgemm_mfma_srca_read_overlap
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr4, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr4_vgpr5 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr4_vgpr5, $vgpr4_vgpr5, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_agpr_mfma_srcb_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_MFMA
name: xdl_sgemm4x4_mfma_write_agpr_mfma_srcb_read_overlap
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $agpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_vgpr_mfma_srcb_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_MFMA
name: xdl_sgemm4x4_mfma_write_vgpr_mfma_srcb_read_overlap
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $agpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm4x4_mfma_write_vgpr_mfma_srcb_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 5
# GCN-NEXT: V_MFMA
name: dgemm4x4_mfma_write_vgpr_mfma_srcb_read_overlap
body: |
bb.0:
$vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec
$vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm4x4_mfma_write_vgpr_smfmac_srcb_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 5
# GCN-NEXT: V_SMFMAC
name: dgemm4x4_mfma_write_vgpr_smfmac_srcb_read_overlap
body: |
bb.0:
$vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr10_vgpr11, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm4x4_mfma_write_vgpr_smfmac_srcc_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 5
# GCN-NEXT: V_SMFMAC
name: dgemm4x4_mfma_write_vgpr_smfmac_srcc_read_overlap
body: |
bb.0:
$vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15, $vgpr2, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_srcb_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_vgpr_mfma_srcb_read_overlap
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr0_vgpr1, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_smfmac_srcb_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_SMFMAC
name: dgemm16x16_mfma_write_vgpr_smfmac_srcb_read_overlap
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr10_vgpr11, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_smfmac_srcc_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_SMFMAC
name: dgemm16x16_mfma_write_vgpr_smfmac_srcc_read_overlap
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15, $vgpr2, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_vgpr_smfmac_srcc_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_SMFMAC
name: xdl_sgemm4x4_mfma_write_vgpr_smfmac_srcc_read_overlap
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15, $vgpr1, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_vm_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 4
# GCN-NEXT: BUFFER_STORE_DWORD
name: xdl_smfma4x4_write_vgpr_vm_read
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_flat_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 4
# GCN-NEXT: FLAT_STORE_DWORD
name: xdl_smfma4x4_write_vgpr_flat_read
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
...
# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_lds_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 4
# GCN-NEXT: DS_WRITE_B32
name: xdl_smfma4x4_write_vgpr_lds_read
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
DS_WRITE_B32 $vgpr0, $vgpr4, 0, 0, implicit $m0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_exp_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 4
# GCN-NEXT: EXP_DONE
name: xdl_smfma4x4_write_vgpr_exp_read
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
EXP_DONE 12, $vgpr4, $vgpr0, $vgpr0, $vgpr0, 0, 0, 15, implicit $exec
...
# GCN-LABEL: name: smfmac16x16_write_vgpr_flat_read
# GCN: V_SMFMAC
# GCN-NEXT: S_NOP 6
# GCN-NEXT: FLAT_STORE_DWORD
name: smfmac16x16_write_vgpr_flat_read
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $vgpr0_vgpr1_vgpr2_vgpr3, implicit $mode, implicit $exec
FLAT_STORE_DWORD $vgpr16_vgpr17, $vgpr1, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
...
# GCN-LABEL: name: xdl_smfma16x16_write_vgpr_flat_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: FLAT_STORE_DWORD
name: xdl_smfma16x16_write_vgpr_flat_read
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr16, $vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
FLAT_STORE_DWORD $vgpr16_vgpr17, $vgpr1, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
...
# GCN-LABEL: name: smfmac32x32_write_vgpr_flat_read
# GCN: V_SMFMAC
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: FLAT_STORE_DWORD
name: smfmac32x32_write_vgpr_flat_read
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_SMFMAC_I32_32X32X32_I8_e64 $agpr0_agpr1, $agpr2_agpr3_agpr4_agpr5, $vgpr2, 0, 0, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $mode, implicit $exec
FLAT_STORE_DWORD $vgpr16_vgpr17, $vgpr1, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
...
# GCN-LABEL: name: xdl_smfma32x32_write_vgpr_flat_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: FLAT_STORE_DWORD
name: xdl_smfma32x32_write_vgpr_flat_read
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X4F16_e64 $vgpr26_vgpr27, $vgpr28_vgpr29, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec
FLAT_STORE_DWORD $vgpr16_vgpr17, $agpr1, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
...
# GCN-LABEL: name: dmfma4x4_write_vgpr_flat_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 0
# GCN-NEXT: FLAT_STORE_DWORD
name: dmfma4x4_write_vgpr_flat_read_overlap
body: |
bb.0:
$vgpr4_vgpr5 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec
FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
...
# GCN-LABEL: name: dmfma4x4_write_vgpr_flat_read_full
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 0
# GCN-NEXT: FLAT_STORE_DWORD
name: dmfma4x4_write_vgpr_flat_read_full
body: |
bb.0:
$vgpr4_vgpr5 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec
FLAT_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
...
# GCN-LABEL: name: dmfma16x16_write_vgpr_flat_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 1
# GCN-NEXT: FLAT_STORE_DWORD
name: dmfma16x16_write_vgpr_flat_read
body: |
bb.0:
$vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec
FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
...
# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_valu_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_MOV_B32
name: xdl_smfma4x4_write_vgpr_valu_read
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MOV_B32
name: xdl_smfma16x16_write_vgpr_valu_read
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr16, $vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
$vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_smfma32x32_write_vgpr_valu_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MOV_B32
name: xdl_smfma32x32_write_vgpr_valu_read
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
$vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dmfma4x4_write_vgpr_valu_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 5
# GCN-NEXT: V_MOV_B32
name: dmfma4x4_write_vgpr_valu_read
body: |
bb.0:
$vgpr4_vgpr5 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec
$vgpr6 = V_MOV_B32_e32 $vgpr5, implicit $exec
...
# GCN-LABEL: name: dmfma16x16_write_vgpr_valu_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MOV_B32
name: dmfma16x16_write_vgpr_valu_read
body: |
bb.0:
$vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec
$vgpr12 = V_MOV_B32_e32 $vgpr4, implicit $exec
...
# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_accv_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64
name: xdl_smfma4x4_write_vgpr_accv_read
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_smfma16x16_write_vgpr_accv_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64
name: xdl_smfma16x16_write_vgpr_accv_read
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr16, $vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
$agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_smfma32x32_write_vgpr_accv_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64
name: xdl_smfma32x32_write_vgpr_accv_read
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
$agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_dot_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_DOT
name: xdl_smfma4x4_write_vgpr_dot_read
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
...
# GCN-LABEL: name: dmfma4x4_write_vgpr_dot_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 5
# GCN-NEXT: V_DOT
name: dmfma4x4_write_vgpr_dot_read
body: |
bb.0:
$vgpr4_vgpr5 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr5, $vgpr1, implicit $exec
...
# GCN-LABEL: name: dmfma16x16_write_vgpr_dot_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_DOT
name: dmfma16x16_write_vgpr_dot_read
body: |
bb.0:
$vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr4, $vgpr1, implicit $exec
...
# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_valu_write
# GCN: V_MFMA
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_MOV_B32
name: xdl_smfma4x4_write_vgpr_valu_write
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr4, $vgpr0, $vgpr6_vgpr7_vgpr8_vgpr9, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_write
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MOV_B32
name: xdl_smfma16x16_write_vgpr_valu_write
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr16, $vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_smfma32x32_write_vgpr_valu_write
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MOV_B32
name: xdl_smfma32x32_write_vgpr_valu_write
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_valu_f16_write
# GCN: V_MFMA
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_FMA_F16_e64
name: xdl_smfma4x4_write_vgpr_valu_f16_write
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_FMA_F16_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_f16_write
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_FMA_F16_e64
name: xdl_smfma16x16_write_vgpr_valu_f16_write
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr16, $vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_FMA_F16_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_smfma32x32_write_vgpr_valu_f16_write
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_FMA_F16_e64
name: xdl_smfma32x32_write_vgpr_valu_f16_write
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_FMA_F16_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_valu_sdwa_write
# GCN: V_MFMA
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_MOV_B32_sdwa
name: xdl_smfma4x4_write_vgpr_valu_sdwa_write
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_MOV_B32_sdwa 0, $vgpr16, 0, 5, 2, 4, implicit $exec, implicit $vgpr1(tied-def 0)
...
# GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_sdwa_write
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MOV_B32_sdwa
name: xdl_smfma16x16_write_vgpr_valu_sdwa_write
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr16, $vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_MOV_B32_sdwa 0, $vgpr16, 0, 5, 2, 4, implicit $exec, implicit $vgpr1(tied-def 0)
...
# GCN-LABEL: name: xdl_smfma32x32_write_vgpr_valu_sdwa_write
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MOV_B32_sdwa
name: xdl_smfma32x32_write_vgpr_valu_sdwa_write
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_MOV_B32_sdwa 0, $vgpr16, 0, 5, 2, 4, implicit $exec, implicit $vgpr1(tied-def 0)
...
# GCN-LABEL: name: dmfma4x4_write_vgpr_valu_write
# GCN: V_MFMA
# GCN-NEXT: S_NOP 5
# GCN-NEXT: V_MOV_B32
name: dmfma4x4_write_vgpr_valu_write
body: |
bb.0:
$vgpr4_vgpr5 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec
$vgpr4 = V_MOV_B32_e32 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dmfma16x16_write_vgpr_valu_write
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MOV_B32
name: dmfma16x16_write_vgpr_valu_write
body: |
bb.0:
$vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_accv_write
# GCN: V_MFMA
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_ACCVGPR_READ_B32_e64
name: xdl_smfma4x4_write_vgpr_accv_write
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_dot_write
# GCN: V_MFMA
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_DOT
name: xdl_smfma4x4_write_vgpr_dot_write
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: nonxdl_smfma4x4_read_srcc_vgpr_valu_write
# GCN: V_MFMA
# GCN-NEXT: V_MOV_B32
name: nonxdl_smfma4x4_read_srcc_vgpr_valu_write
body: |
bb.0:
$vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr8, $vgpr9, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: smfma16x16_read_srcc_vgpr_valu_write
# GCN: V_MFMA
# GCN-NEXT: V_MOV_B32
name: smfma16x16_read_srcc_vgpr_valu_write
body: |
bb.0:
$vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: smfma32x32_read_srcc_vgpr_valu_write
# GCN: V_MFMA
# GCN-NEXT: V_MOV_B32
name: smfma32x32_read_srcc_vgpr_valu_write
body: |
bb.0:
$vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_32X32X2F32_vgprcd_e64 $vgpr0, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: smfma4x4_read_srca_vgpr_valu_write
# GCN: V_MFMA
# GCN-NEXT: V_MOV_B32
name: smfma4x4_read_srca_vgpr_valu_write
body: |
bb.0:
$vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr8, $vgpr9, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr8 = V_MOV_B32_e32 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: smfma16x16_read_srca_vgpr_valu_write
# GCN: V_MFMA
# GCN-NEXT: V_MOV_B32
name: smfma16x16_read_srca_vgpr_valu_write
body: |
bb.0:
$vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
$vgpr18 = V_MOV_B32_e32 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: smfma32x32_read_srca_vgpr_valu_write
# GCN: V_MFMA
# GCN-NEXT: V_MOV_B32
name: smfma32x32_read_srca_vgpr_valu_write
body: |
bb.0:
$vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_32X32X2F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
$vgpr18 = V_MOV_B32_e32 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: smfma4x4_read_srcb_vgpr_valu_write
# GCN: V_MFMA
# GCN-NEXT: V_MOV_B32
name: smfma4x4_read_srcb_vgpr_valu_write
body: |
bb.0:
$vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr8, $vgpr9, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr9 = V_MOV_B32_e32 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: smfma16x16_read_srcb_vgpr_valu_write
# GCN: V_MFMA
# GCN-NEXT: V_MOV_B32
name: smfma16x16_read_srcb_vgpr_valu_write
body: |
bb.0:
$vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
$vgpr19 = V_MOV_B32_e32 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: smfma32x32_read_srcb_vgpr_valu_write
# GCN: V_MFMA
# GCN-NEXT: V_MOV_B32
name: smfma32x32_read_srcb_vgpr_valu_write
body: |
bb.0:
$vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_32X32X2F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
$vgpr19 = V_MOV_B32_e32 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dmfma4x4_read_srcc_vgpr_valu_write
# GCN: V_MFMA
# GCN-NEXT: V_MOV_B32
name: dmfma4x4_read_srcc_vgpr_valu_write
body: |
bb.0:
$vgpr4_vgpr5 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dmfma16x16_read_srcc_vgpr_valu_write
# GCN: V_MFMA
# GCN-NEXT: V_MOV_B32
name: dmfma16x16_read_srcc_vgpr_valu_write
body: |
bb.0:
$vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: smfma16x16_read_srcc_vgpr_accv_write
# GCN: V_MFMA
# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64
name: smfma16x16_read_srcc_vgpr_accv_write
body: |
bb.0:
$agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17 = V_MFMA_F32_16X16X1F32_e64 $agpr18, $agpr19, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec
$agpr1 = V_ACCVGPR_WRITE_B32_e64 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: sgemm_to_fma64
# GCN: V_MFMA
# GCN-NEXT: V_FMA_F64_e64
name: sgemm_to_fma64
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr4_vgpr5 = V_FMA_F64_e64 0, $vgpr4_vgpr5, 0, $vgpr4_vgpr5, 0, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm_to_fma64
# GCN: V_MFMA
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_FMA_F64_e64
name: dgemm_to_fma64
body: |
bb.0:
$vgpr0_vgpr1 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec
$vgpr4_vgpr5 = V_FMA_F64_e64 0, $vgpr4_vgpr5, 0, $vgpr4_vgpr5, 0, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm_to_fmac64
# GCN: V_MFMA
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_FMAC_F64
name: dgemm_to_fmac64
body: |
bb.0:
$vgpr0_vgpr1 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec
$vgpr4_vgpr5 = V_FMAC_F64_e32 $vgpr4_vgpr5, $vgpr4_vgpr5, $vgpr4_vgpr5, implicit $mode, implicit $exec
...
# GCN-LABEL: name: flat_store_data_agpr_overwritten
# GCN: FLAT_STORE_DWORDX4
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64
name: flat_store_data_agpr_overwritten
body: |
bb.0:
FLAT_STORE_DWORDX4 $vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
$agpr0 = V_ACCVGPR_WRITE_B32_e64 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dot_write_vgpr_accv_read
# GCN: V_DOT
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64
name: dot_write_vgpr_accv_read
body: |
bb.0:
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
$agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec
...
# GCN-LABEL: name: valu_write_vgpr_dot_read
# GCN: V_MOV_B32
# GCN-NEXT: V_DOT
name: valu_write_vgpr_dot_read
body: |
bb.0:
$vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
...
# GCN-LABEL: name: accv_write_vgpr_dot_read
# GCN: V_ACCVGPR_READ
# GCN-NEXT: V_DOT
name: accv_write_vgpr_dot_read
body: |
bb.0:
$vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
...
# GCN-LABEL: name: dot_write_vgpr_same_dot_read_srcc
# GCN: V_DOT
# GCN-NEXT: V_DOT
name: dot_write_vgpr_same_dot_read_srcc
body: |
bb.0:
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
...
# GCN-LABEL: name: dot_write_vgpr_different_dot_read_srcc
# GCN: V_DOT
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_DOT
name: dot_write_vgpr_different_dot_read_srcc
body: |
bb.0:
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
$vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dot_write_vgpr_different_dot_write
# GCN: V_DOT
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_DOT
name: dot_write_vgpr_different_dot_write
body: |
bb.0:
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
$vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dot_write_vgpr_different_valu_read
# GCN: V_DOT
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MOV_B32_e32
name: dot_write_vgpr_different_valu_read
body: |
bb.0:
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
$vgpr0 = V_MOV_B32_e32 $vgpr4, implicit $exec
...
# GCN-LABEL: name: dot_write_vgpr_different_valu_write
# GCN: V_DOT
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MOV_B32_e32
name: dot_write_vgpr_different_valu_write
body: |
bb.0:
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
$vgpr4 = V_MOV_B32_e32 1, implicit $exec
...
# GCN-LABEL: name: dot_write_vgpr_same_dot_read_srca
# GCN: V_DOT
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_DOT
name: dot_write_vgpr_same_dot_read_srca
body: |
bb.0:
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
$vgpr0 = V_DOT4C_I32_I8_e32 $vgpr4, $vgpr1, $vgpr0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dot_write_vgpr_same_dot_read_srcb
# GCN: V_DOT
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_DOT
name: dot_write_vgpr_same_dot_read_srcb
body: |
bb.0:
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
$vgpr0 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr4, $vgpr0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: vcmpx_write_exec_mfma
# GCN: V_CMPX_EQ_I32_e32
# GCN-NEXT: S_NOP 3
# GCN-NEXT: V_MFMA
name: vcmpx_write_exec_mfma
body: |
bb.0:
implicit $exec, implicit $vcc = V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit $exec
$agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32_e64 killed $agpr8, killed $vgpr1, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: valu_write_agpr_dgemm_mfma_read
# GCN: V_ACCVGPR_WRITE_B32_e64
# GCN: V_ACCVGPR_WRITE_B32_e64
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_MFMA
name: valu_write_agpr_dgemm_mfma_read
body: |
bb.0:
$agpr0 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec
$agpr1 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec
$agpr2_agpr3 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $agpr0_agpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_read_same_agpr_as_srcc
# GCN: V_MFMA
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_agpr_mfma_read_same_agpr_as_srcc
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = V_MFMA_F64_16X16X4F64_e64 $agpr10_agpr11, $agpr10_agpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = V_MFMA_F64_16X16X4F64_e64 $agpr10_agpr11, $agpr10_agpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm4x4_mfma_write_agpr_mfma_read_same_agpr_as_srcc
# GCN: V_MFMA
# GCN-NEXT: S_NOP 3
# GCN-NEXT: V_MFMA
name: dgemm4x4_mfma_write_agpr_mfma_read_same_agpr_as_srcc
body: |
bb.0:
$agpr2_agpr3 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $vgpr0_vgpr1, $agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
$agpr2_agpr3 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $vgpr0_vgpr1, $agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_agpr_mfma_read_overlap
body: |
bb.0:
$agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm4x4_mfma_write_agpr_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 3
# GCN-NEXT: V_MFMA
name: dgemm4x4_mfma_write_agpr_mfma_read_overlap
body: |
bb.0:
$agpr2_agpr3 = V_MFMA_F64_4X4X4F64_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_sgemm_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_agpr_sgemm_mfma_read_overlap
body: |
bb.0:
$agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr10, $vgpr11, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: sgemm4x4_mfma_write_agpr_dgemm_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: sgemm4x4_mfma_write_agpr_dgemm_mfma_read_overlap
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr10, $vgpr11, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
$agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm16x16_mfma_write_sgpr_dgemm_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_MFMA
name: xdl_sgemm16x16_mfma_write_sgpr_dgemm_mfma_read_overlap
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_I32_16X16X4I8_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = V_MFMA_F64_16X16X4F64_e64 $vgpr20_vgpr21, $vgpr20_vgpr21, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm32x32_mfma_write_agpr_dgemm_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_MFMA
name: xdl_sgemm32x32_mfma_write_agpr_dgemm_mfma_read_overlap
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X4F16_e64 $vgpr26_vgpr27, $vgpr28_vgpr29, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = V_MFMA_F64_16X16X4F64_e64 $vgpr120_vgpr121, $vgpr120_vgpr121, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_agpr_dmfma4x4_srca_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_MFMA
name: xdl_sgemm4x4_mfma_write_agpr_dmfma4x4_srca_read_overlap
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $agpr0_agpr1, $vgpr4_vgpr5, $vgpr4_vgpr5, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_agpr_dmfma16x16_srca_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_MFMA
name: xdl_sgemm4x4_mfma_write_agpr_dmfma16x16_srca_read_overlap
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $agpr0_agpr1, $vgpr4_vgpr5, $vgpr4_vgpr5, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm4x4_mfma_write_agpr_mfma_srca_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 5
# GCN-NEXT: V_MFMA
name: dgemm4x4_mfma_write_agpr_mfma_srca_read_overlap
body: |
bb.0:
$agpr2_agpr3 = V_MFMA_F64_4X4X4F64_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
$vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $agpr2_agpr3, $vgpr10_vgpr11, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_srca_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_agpr_mfma_srca_read_overlap
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17, 0, 0, 0, implicit $mode, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $agpr0_agpr1, $vgpr10_vgpr11, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm4x4_mfma_write_agpr_sgemm_mfma_srca_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 5
# GCN-NEXT: V_MFMA
name: dgemm4x4_mfma_write_agpr_sgemm_mfma_srca_read_overlap
body: |
bb.0:
$agpr4_agpr5 = V_MFMA_F64_4X4X4F64_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $agpr4, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_sgemm_mfma_srca_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_agpr_sgemm_mfma_srca_read_overlap
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17, 0, 0, 0, implicit $mode, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $agpr4, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_agpr_dgemm_mfma_srca_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_MFMA
name: xdl_sgemm4x4_mfma_write_agpr_dgemm_mfma_srca_read_overlap
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr4, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr4_vgpr5 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $agpr0_agpr1, $vgpr4_vgpr5, $vgpr4_vgpr5, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm4x4_mfma_write_agpr_mfma_srcb_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 5
# GCN-NEXT: V_MFMA
name: dgemm4x4_mfma_write_agpr_mfma_srcb_read_overlap
body: |
bb.0:
$agpr2_agpr3 = V_MFMA_F64_4X4X4F64_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
$vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $agpr2_agpr3, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_srcb_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_agpr_mfma_srcb_read_overlap
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17, 0, 0, 0, implicit $mode, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $agpr0_agpr1, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dmfma4x4_write_agpr_flat_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 0
# GCN-NEXT: FLAT_STORE_DWORD
name: dmfma4x4_write_agpr_flat_read_overlap
body: |
bb.0:
$agpr4_agpr5 = V_MFMA_F64_4X4X4F64_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
FLAT_STORE_DWORD $vgpr0_vgpr1, $agpr5, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
...
# GCN-LABEL: name: dmfma4x4_write_agpr_flat_read_full
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 0
# GCN-NEXT: FLAT_STORE_DWORD
name: dmfma4x4_write_agpr_flat_read_full
body: |
bb.0:
$agpr4_agpr5 = V_MFMA_F64_4X4X4F64_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
FLAT_STORE_DWORDX2 $vgpr0_vgpr1, $agpr4_agpr5, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
...
# GCN-LABEL: name: dmfma16x16_write_agpr_flat_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 1
# GCN-NEXT: FLAT_STORE_DWORD
name: dmfma16x16_write_agpr_flat_read
body: |
bb.0:
$agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec
FLAT_STORE_DWORD $vgpr0_vgpr1, $agpr4, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
...
# GCN-LABEL: name: dmfma4x4_write_agpr_valu_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 5
# GCN-NEXT: V_ACCVGPR_READ_B32_e64
name: dmfma4x4_write_agpr_valu_read
body: |
bb.0:
$agpr4_agpr5 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
$vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec
...
# GCN-LABEL: name: dmfma16x16_write_agpr_valu_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_ACCVGPR_READ_B32_e64
name: dmfma16x16_write_agpr_valu_read
body: |
bb.0:
$agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec
$vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec
...
# GCN-LABEL: name: dmfma4x4_write_agpr_valu_write
# GCN: V_MFMA
# GCN-NEXT: S_NOP 5
# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64
name: dmfma4x4_write_agpr_valu_write
body: |
bb.0:
$agpr4_agpr5 = V_MFMA_F64_4X4X4F64_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
$agpr4 = V_ACCVGPR_WRITE_B32_e64 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dmfma16x16_write_agpr_valu_write
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64
name: dmfma16x16_write_agpr_valu_write
body: |
bb.0:
$agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec
$agpr3 = V_ACCVGPR_WRITE_B32_e64 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dmfma4x4_read_srcc_agpr_valu_write
# GCN: V_MFMA
# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64
name: dmfma4x4_read_srcc_agpr_valu_write
body: |
bb.0:
$agpr4_agpr5 = V_MFMA_F64_4X4X4F64_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
$agpr1 = V_ACCVGPR_WRITE_B32_e64 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dmfma16x16_read_srcc_agpr_valu_write
# GCN: V_MFMA
# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64
name: dmfma16x16_read_srcc_agpr_valu_write
body: |
bb.0:
$agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec
$agpr1 = V_ACCVGPR_WRITE_B32_e64 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm_accvgr_to_fma64
# GCN: V_MFMA
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_FMA_F64_e64
name: dgemm_accvgr_to_fma64
body: |
bb.0:
$agpr0_agpr1 = V_MFMA_F64_4X4X4F64_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
$vgpr4_vgpr5 = V_FMA_F64_e64 0, $vgpr4_vgpr5, 0, $vgpr4_vgpr5, 0, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm_accvgr_to_fmac64
# GCN: V_MFMA
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_FMAC_F64
name: dgemm_accvgr_to_fmac64
body: |
bb.0:
$agpr0_agpr1 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $agpr0_agpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
$vgpr4_vgpr5 = V_FMAC_F64_e32 $vgpr4_vgpr5, $vgpr4_vgpr5, $vgpr4_vgpr5, implicit $mode, implicit $exec
...
# GCN-LABEL: name: sgemm16X16X16_mfma_write_agpr_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_MFMA
name: sgemm16X16X16_mfma_write_agpr_mfma_read_overlap
body: |
bb.0:
$agpr2_agpr3_agpr4_agpr5 = V_MFMA_F32_16X16X16F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_16X16X16F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: sgemm16X16X32_mfma_write_agpr_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_MFMA
name: sgemm16X16X32_mfma_write_agpr_mfma_read_overlap
body: |
bb.0:
$agpr2_agpr3_agpr4_agpr5 = V_MFMA_I32_16X16X32I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_16X16X32I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: sgemm16X16X16_mfma_write_agpr_dgemm_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_MFMA
name: sgemm16X16X16_mfma_write_agpr_dgemm_read_overlap
body: |
bb.0:
$vgpr2_vgpr3_vgpr4_vgpr5 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr8_vgpr9, $vgpr8_vgpr9, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr6_vgpr7 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: sgemm16X16X16_mfma_write_agpr_smfmac_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_SMFMAC
name: sgemm16X16X16_mfma_write_agpr_smfmac_read_overlap
body: |
bb.0:
$agpr2_agpr3_agpr4_agpr5 = V_MFMA_F32_16X16X16F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec
...
# GCN-LABEL: name: smfmac16x16_write_agpr_smfmac_read_overlap
# GCN: V_SMFMAC
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_SMFMAC
name: smfmac16x16_write_agpr_smfmac_read_overlap
body: |
bb.0:
$agpr2_agpr3_agpr4_agpr5 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr2_agpr3_agpr4_agpr5, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_agpr_mfma_srca_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 6
# GCN-NEXT: V_MFMA
name: xdl_sgemm16X16X16_mfma_write_agpr_mfma_srca_read_overlap
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_16X16X32I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $agpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm16X16X32_mfma_write_agpr_mfma_srcb_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 6
# GCN-NEXT: V_MFMA
name: xdl_sgemm16X16X32_mfma_write_agpr_mfma_srcb_read_overlap
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_16X16X32I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr0, $agpr1, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_dmfma16x16_srca_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 6
# GCN-NEXT: V_MFMA
name: xdl_sgemm16X16X16_mfma_write_vgpr_dmfma16x16_srca_read_overlap
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_16X16X32I8_vgprcd_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr4_vgpr5, $vgpr4_vgpr5, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_valu_write
# GCN: V_MFMA
# GCN-NEXT: S_NOP 6
# GCN-NEXT: V_MOV_B32
name: xdl_sgemm16X16X16_mfma_write_vgpr_valu_write
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_16X16X32I8_vgprcd_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_vm_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 6
# GCN-NEXT: BUFFER_STORE_DWORD
name: xdl_sgemm16X16X16_mfma_write_vgpr_vm_read
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_16X16X32I8_vgprcd_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_valu_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 6
# GCN-NEXT: V_MOV_B32
name: xdl_sgemm16X16X16_mfma_write_vgpr_valu_read
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_16X16X32I8_vgprcd_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_dot_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 6
# GCN-NEXT: V_DOT
name: xdl_sgemm16X16X16_mfma_write_vgpr_dot_read
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_16X16X32I8_vgprcd_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
...
# GCN-LABEL: name: smfmac16x16x32_write_agpr_mfma_read_same_agpr_as_srcc
# GCN: V_SMFMAC
# GCN-NEXT: V_SMFMAC
name: smfmac16x16x32_write_agpr_mfma_read_same_agpr_as_srcc
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec
...
# GCN-LABEL: name: smfmac32x32x32_write_agpr_mfma_read_same_agpr_as_srcc
# GCN: V_SMFMAC
# GCN-NEXT: V_SMFMAC
name: smfmac32x32x32_write_agpr_mfma_read_same_agpr_as_srcc
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_SMFMAC_I32_32X32X32_I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_SMFMAC_I32_32X32X32_I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $mode, implicit $exec
...
# GCN-LABEL: name: smfmac16x16x32_mfma_write_agpr_mfma_read_overlap
# GCN: V_SMFMAC
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_SMFMAC
name: smfmac16x16x32_mfma_write_agpr_mfma_read_overlap
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec
$agpr2_agpr3_agpr4_agpr5 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr2_agpr3_agpr4_agpr5, implicit $mode, implicit $exec
...
# GCN-LABEL: name: smfmac32x32x32_mfma_write_agpr_mfma_read_overlap
# GCN: V_SMFMAC
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_SMFMAC
name: smfmac32x32x32_mfma_write_agpr_mfma_read_overlap
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_SMFMAC_F32_32X32X16_BF16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $mode, implicit $exec
$agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17 = V_SMFMAC_F32_32X32X16_BF16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17, implicit $mode, implicit $exec
...
# GCN-LABEL: name: smfmac16x16x32_mfma_write_vgpr_smfmac_read_idx
# GCN: V_SMFMAC
# GCN-NEXT: S_NOP 6
# GCN-NEXT: V_SMFMAC
name: smfmac16x16x32_mfma_write_vgpr_smfmac_read_idx
body: |
bb.0:
$vgpr6_vgpr7_vgpr8_vgpr9 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $vgpr6_vgpr7_vgpr8_vgpr9, implicit $mode, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr6, 0, 0, $vgpr0_vgpr1_vgpr2_vgpr3, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm4x4_mfma_write_vgpr_smfmac16x16x32_read_overlap
# GCN: V_MFMA
# GCN-NEXT: V_SMFMAC
name: dgemm4x4_mfma_write_vgpr_smfmac16x16x32_read_overlap
body: |
bb.0:
$vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3 = V_SMFMAC_F32_16X16X32_BF16_e64 $vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15, $vgpr32, 0, 0, $vgpr0_vgpr1_vgpr2_vgpr3, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfmai8_read_overlap
# GCN: V_MFMA
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_vgpr_mfmai8_read_overlap
body: |
bb.0:
$vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_16X16X32I8_vgprcd_e64 $vgpr10_vgpr11, $vgpr12_vgpr13, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfmaxf32_read_overlap
# GCN: V_MFMA
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_vgpr_mfmaxf32_read_overlap
body: |
bb.0:
$vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X8XF32_vgprcd_e64 $vgpr10_vgpr11, $vgpr12_vgpr13, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: nonxdl_sgemm4x4_mfma_write_agpr_nonxdl_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_MFMA
name: nonxdl_sgemm4x4_mfma_write_agpr_nonxdl_mfma_read_overlap
body: |
bb.0:
$agpr2_agpr3_agpr4_agpr5 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: nonxdl_sgemm4x4_mfma_write_agpr_xdl_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: V_MFMA
name: nonxdl_sgemm4x4_mfma_write_agpr_xdl_mfma_read_overlap
body: |
bb.0:
$agpr2_agpr3_agpr4_agpr5 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_agpr_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: xdl_sgemm4x4_mfma_write_agpr_mfma_read_overlap
body: |
bb.0:
$agpr2_agpr3_agpr4_agpr5 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: nonxdl_sgemm4x4_mfma_write_agpr_mfma_srca_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 3
# GCN-NEXT: V_MFMA
name: nonxdl_sgemm4x4_mfma_write_agpr_mfma_srca_read_overlap
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $agpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: nonxdl_smfma4x4_write_vgpr_vm_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 3
# GCN-NEXT: BUFFER_STORE_DWORD
name: nonxdl_smfma4x4_write_vgpr_vm_read
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: nonxdl_smfma4x4_write_vgpr_valu_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 3
# GCN-NEXT: V_MOV_B32
name: nonxdl_smfma4x4_write_vgpr_valu_read
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: nonxdl_smfma4x4_write_vgpr_valu_write
# GCN: V_MFMA
# GCN-NEXT: S_NOP 3
# GCN-NEXT: V_MOV_B32
name: nonxdl_smfma4x4_write_vgpr_valu_write
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr4, $vgpr0, $vgpr6_vgpr7_vgpr8_vgpr9, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: nonxdl_8pass_smfma16x16_write_vgpr_vm_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 1
# GCN-NEXT: BUFFER_STORE_DWORD
name: nonxdl_8pass_smfma16x16_write_vgpr_vm_read
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr26, $vgpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: nonxdl_8pass_smfma16x16_write_vgpr_valu_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_MOV_B32
name: nonxdl_8pass_smfma16x16_write_vgpr_valu_read
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr26, $vgpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
$vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: nonxdl_8pass_smfma16x16_write_vgpr_valu_write
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_MOV_B32
name: nonxdl_8pass_smfma16x16_write_vgpr_valu_write
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr26, $vgpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: nonxdl_smfma32x32_write_vgpr_vm_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 1
# GCN-NEXT: BUFFER_STORE_DWORD
name: nonxdl_smfma32x32_write_vgpr_vm_read
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X1F32_vgprcd_e64 $agpr26, $agpr28, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: nonxdl_smfma32x32_write_vgpr_valu_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_MOV_B32
name: nonxdl_smfma32x32_write_vgpr_valu_read
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X1F32_vgprcd_e64 $agpr26, $agpr28, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
$vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: nonxdl_smfma32x32_write_vgpr_valu_write
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_MOV_B32
name: nonxdl_smfma32x32_write_vgpr_valu_write
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X1F32_vgprcd_e64 $agpr26, $agpr28, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm16x16_4pass_mfma_write_agpr_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_MFMA
name: xdl_sgemm16x16_4pass_mfma_write_agpr_mfma_read_overlap
body: |
bb.0:
$agpr2_agpr3_agpr4_agpr5 = V_MFMA_I32_16X16X32I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_16X16X8XF32_e64 $vgpr10_vgpr11, $vgpr12_vgpr13, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: smfmac16x16_mfma_write_agpr_mfma_read_overlap
# GCN: V_SMFMAC
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_MFMA
name: smfmac16x16_mfma_write_agpr_mfma_read_overlap
body: |
bb.0:
$agpr2_agpr3_agpr4_agpr5 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr2_agpr3_agpr4_agpr5, implicit $mode, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_16X16X8XF32_e64 $vgpr10_vgpr11, $vgpr12_vgpr13, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm4x4_mfma_read_vgpr_srcc_valu_write
# GCN: V_MFMA
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_MOV_B32
name: xdl_sgemm4x4_mfma_read_vgpr_srcc_valu_write
body: |
bb.0:
$vgpr10_vgpr11_vgpr12_vgpr13 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr4, $vgpr5, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_MOV_B32_e32 0, implicit $exec
...
# GCN-LABEL: name: nonxdl_sgemm4x4_mfma_read_vgpr_srcc_valu_write
# GCN: V_MFMA
# GCN-NEXT: V_MOV_B32
name: nonxdl_sgemm4x4_mfma_read_vgpr_srcc_valu_write
body: |
bb.0:
$vgpr10_vgpr11_vgpr12_vgpr13 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr4, $vgpr5, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_MOV_B32_e32 0, implicit $exec
...
# GCN-LABEL: name: xdl_4pass_sgemm16x16_mfma_read_vgpr_srcc_valu_write
# GCN: V_MFMA
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MOV_B32
name: xdl_4pass_sgemm16x16_mfma_read_vgpr_srcc_valu_write
body: |
bb.0:
$vgpr10_vgpr11_vgpr12_vgpr13 = V_MFMA_F32_16X16X8XF32_vgprcd_e64 $vgpr10_vgpr11, $vgpr12_vgpr13, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_MOV_B32_e32 0, implicit $exec
...
# GCN-LABEL: name: smfmac16x16_read_vgpr_srcc_valu_write
# GCN: V_SMFMAC
# GCN-NEXT: S_NOP 6
# GCN-NEXT: V_MOV_B32
name: smfmac16x16_read_vgpr_srcc_valu_write
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $vgpr0_vgpr1_vgpr2_vgpr3, implicit $mode, implicit $exec
$vgpr1 = V_MOV_B32_e32 0, implicit $exec
...
# GCN-LABEL: name: xdl_8pass_sgemm16x16_mfma_read_vgpr_srcc_valu_write
# GCN: V_MFMA
# GCN-NEXT: S_NOP 6
# GCN-NEXT: V_MOV_B32
name: xdl_8pass_sgemm16x16_mfma_read_vgpr_srcc_valu_write
body: |
bb.0:
$vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $agpr26, $agpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_MOV_B32_e32 0, implicit $exec
...
# GCN-LABEL: name: nonxdl_8pass_sgemm16x16_mfma_read_vgpr_srcc_valu_write
# GCN: V_MFMA
# GCN-NEXT: V_MOV_B32
name: nonxdl_8pass_sgemm16x16_mfma_read_vgpr_srcc_valu_write
body: |
bb.0:
$vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $agpr26, $agpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_MOV_B32_e32 0, implicit $exec
...
# GCN-LABEL: name: smfmac32x32_read_vgpr_srcc_valu_write
# GCN: V_SMFMAC
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MOV_B32
name: smfmac32x32_read_vgpr_srcc_valu_write
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_SMFMAC_I32_32X32X32_I8_e64 $agpr0_agpr1, $agpr2_agpr3_agpr4_agpr5, $vgpr2, 0, 0, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $mode, implicit $exec
$vgpr1 = V_MOV_B32_e32 0, implicit $exec
...
# GCN-LABEL: name: xdl_sgemm32x32_mfma_read_vgpr_srcc_valu_write
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 6
# GCN-NEXT: V_MOV_B32
name: xdl_sgemm32x32_mfma_read_vgpr_srcc_valu_write
body: |
bb.0:
$vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $agpr126_agpr127, $agpr128_agpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_MOV_B32_e32 0, implicit $exec
...
# GCN-LABEL: name: nonxdl_sgemm32x32_mfma_read_vgpr_srcc_valu_write
# GCN: V_MFMA
# GCN-NEXT: V_MOV_B32
name: nonxdl_sgemm32x32_mfma_read_vgpr_srcc_valu_write
body: |
bb.0:
$vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_MFMA_F32_32X32X1F32_vgprcd_e64 $agpr26, $agpr28, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
$vgpr1 = V_MOV_B32_e32 0, implicit $exec
...
# GCN-LABEL: name: dgemm_between_valu_write_buffer_store_no_snop
# GCN: V_MOV_B32_e32
# GCN-NEXT: V_MFMA_F64
# GCN-NOT: S_NOP
# GCN-NEXT: BUFFER_STORE_DWORD
name: dgemm_between_valu_write_buffer_store_no_snop
body: |
bb.0:
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$agpr0_agpr1 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $agpr0_agpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
BUFFER_STORE_DWORDX2_OFFEN_exact $vgpr2_vgpr3, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
...