| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 |
| # RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=postmisched,post-RA-hazard-rec %s -o - | FileCheck --check-prefix=GCN %s |
| |
| # Bring all independent V_LSHL_ADD_U32_e64 instructions into the shadow |
| # of the WMMA so then hazard recognizer only need to insert 4 V_NOP_e32 |
| # instructions instead of 8. |
| |
| --- |
| name: test_wmma_scale_f32_16x16x128_f8f6f4_shadow_sched |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45 |
| |
| ; GCN-LABEL: name: test_wmma_scale_f32_16x16x128_f8f6f4_shadow_sched |
| ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, 1, 2, 1, 1, 0, 0, 0, 0, 0, 0, implicit $exec |
| ; GCN-NEXT: $vgpr46 = V_LSHL_ADD_U32_e64 killed $vgpr43, 1, $vgpr43, implicit $exec |
| ; GCN-NEXT: $vgpr47 = V_LSHL_ADD_U32_e64 killed $vgpr42, 1, $vgpr42, implicit $exec |
| ; GCN-NEXT: $vgpr48 = V_LSHL_ADD_U32_e64 killed $vgpr41, 1, $vgpr41, implicit $exec |
| ; GCN-NEXT: $vgpr49 = V_LSHL_ADD_U32_e64 killed $vgpr40, 1, $vgpr40, implicit $exec |
| ; GCN-NEXT: V_NOP_e32 implicit $exec |
| ; GCN-NEXT: V_NOP_e32 implicit $exec |
| ; GCN-NEXT: V_NOP_e32 implicit $exec |
| ; GCN-NEXT: V_NOP_e32 implicit $exec |
| ; GCN-NEXT: $vgpr4 = V_ADD_F32_e32 1065353216, killed $vgpr4, implicit $mode, implicit $exec |
| ; GCN-NEXT: $vgpr5 = V_ADD_F32_e32 1065353216, killed $vgpr5, implicit $mode, implicit $exec |
| ; GCN-NEXT: $vgpr6 = V_ADD_F32_e32 1065353216, killed $vgpr6, implicit $mode, implicit $exec |
| ; GCN-NEXT: $vgpr7 = V_ADD_F32_e32 1065353216, killed $vgpr7, implicit $mode, implicit $exec |
| ; GCN-NEXT: $vgpr8 = V_ADD_F32_e32 1065353216, killed $vgpr8, implicit $mode, implicit $exec |
| ; GCN-NEXT: $vgpr9 = V_ADD_F32_e32 1065353216, killed $vgpr9, implicit $mode, implicit $exec |
| ; GCN-NEXT: $vgpr10 = V_ADD_F32_e32 1065353216, killed $vgpr10, implicit $mode, implicit $exec |
| ; GCN-NEXT: $vgpr11 = V_ADD_F32_e32 1065353216, killed $vgpr11, implicit $mode, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORDX4 renamable $vgpr44_vgpr45, killed renamable $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORDX4 renamable $vgpr44_vgpr45, killed renamable $vgpr8_vgpr9_vgpr10_vgpr11, 32, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORDX4 killed renamable $vgpr44_vgpr45, killed renamable $vgpr46_vgpr47_vgpr48_vgpr49, 64, 0, implicit $exec |
| ; GCN-NEXT: S_ENDPGM 0 |
| early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, 1, 2, 1, 1, 0, 0, 0, 0, 0, 0, implicit $exec |
| $vgpr4 = V_ADD_F32_e32 1065353216, $vgpr4, implicit $mode, implicit $exec |
| $vgpr5 = V_ADD_F32_e32 1065353216, $vgpr5, implicit $mode, implicit $exec |
| $vgpr6 = V_ADD_F32_e32 1065353216, $vgpr6, implicit $mode, implicit $exec |
| $vgpr7 = V_ADD_F32_e32 1065353216, $vgpr7, implicit $mode, implicit $exec |
| $vgpr8 = V_ADD_F32_e32 1065353216, $vgpr8, implicit $mode, implicit $exec |
| $vgpr9 = V_ADD_F32_e32 1065353216, $vgpr9, implicit $mode, implicit $exec |
| $vgpr10 = V_ADD_F32_e32 1065353216, $vgpr10, implicit $mode, implicit $exec |
| $vgpr11 = V_ADD_F32_e32 1065353216, $vgpr11, implicit $mode, implicit $exec |
| $vgpr46 = V_LSHL_ADD_U32_e64 $vgpr43, 1, $vgpr43, implicit $exec |
| $vgpr47 = V_LSHL_ADD_U32_e64 $vgpr42, 1, $vgpr42, implicit $exec |
| $vgpr48 = V_LSHL_ADD_U32_e64 $vgpr41, 1, $vgpr41, implicit $exec |
| $vgpr49 = V_LSHL_ADD_U32_e64 $vgpr40, 1, $vgpr40, implicit $exec |
| GLOBAL_STORE_DWORDX4 renamable $vgpr44_vgpr45, killed renamable $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORDX4 renamable $vgpr44_vgpr45, killed renamable $vgpr8_vgpr9_vgpr10_vgpr11, 32, 0, implicit $exec |
| GLOBAL_STORE_DWORDX4 renamable $vgpr44_vgpr45, killed renamable $vgpr46_vgpr47_vgpr48_vgpr49, 64, 0, implicit $exec |
| S_ENDPGM 0 |
| ... |