| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py |
| # RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck %s |
| |
| # Test that si-fix-sgpr-copies preserves the Lo256 register class constraint |
| # when eliminating a VGPR-to-SGPR copy used as a WMMA scale operand. |
| # |
| # The scale_src0 and scale_src1 operands of V_WMMA_SCALE instructions require |
| # registers from VCSrc_b32_Lo256 (VS_32_Lo256), which only allows VGPRs 0-255. |
| # When si-fix-sgpr-copies eliminates a VGPR-to-SGPR copy by replacing uses of |
| # the SGPR with the VGPR source, it must constrain the VGPR to vgpr_32_lo256 |
| # to preserve this hardware encoding requirement. |
| |
| --- |
| name: wmma_scale_copy_vgpr_to_sgpr |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $sgpr0 |
| ; CHECK-LABEL: name: wmma_scale_copy_vgpr_to_sgpr |
| ; CHECK: liveins: $vgpr0, $sgpr0 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32_lo256 = COPY $vgpr0 |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_512_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_512_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vgpr_32_lo256 = IMPLICIT_DEF |
| ; CHECK-NEXT: early-clobber %6:vreg_256_align2 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr [[DEF]], [[DEF1]], 0, [[DEF2]], [[COPY]], [[DEF3]], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec |
| ; CHECK-NEXT: S_ENDPGM 0 |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:sreg_32 = COPY %0 |
| %2:vreg_512_align2 = IMPLICIT_DEF |
| %3:vreg_512_align2 = IMPLICIT_DEF |
| %4:vreg_256_align2 = IMPLICIT_DEF |
| %5:vgpr_32_lo256 = IMPLICIT_DEF |
| %6:vreg_256_align2 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr %2, %3, 0, %4, %1, %5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec |
| S_ENDPGM 0 |
| ... |
| |
| # Also test scale_src1 (operand 6) constraint is preserved. |
| |
| --- |
| name: wmma_scale_copy_vgpr_to_sgpr_src1 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $sgpr0 |
| ; CHECK-LABEL: name: wmma_scale_copy_vgpr_to_sgpr_src1 |
| ; CHECK: liveins: $vgpr0, $sgpr0 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32_lo256 = COPY $vgpr0 |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_512_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_512_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vgpr_32_lo256 = IMPLICIT_DEF |
| ; CHECK-NEXT: early-clobber %6:vreg_256_align2 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr [[DEF]], [[DEF1]], 0, [[DEF2]], [[DEF3]], [[COPY]], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec |
| ; CHECK-NEXT: S_ENDPGM 0 |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:sreg_32 = COPY %0 |
| %2:vreg_512_align2 = IMPLICIT_DEF |
| %3:vreg_512_align2 = IMPLICIT_DEF |
| %4:vreg_256_align2 = IMPLICIT_DEF |
| %5:vgpr_32_lo256 = IMPLICIT_DEF |
| %6:vreg_256_align2 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr %2, %3, 0, %4, %5, %1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec |
| S_ENDPGM 0 |
| ... |