| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py |
| # RUN: llc -march=amdgcn -run-pass simple-register-coalescing -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s |
| # |
| |
| |
| --- |
| # the COPY can be coalesced based on subregister liveness |
| name: subrange_coalesce_liveout |
| tracksRegLiveness: true |
| body: | |
| ; GCN-LABEL: name: subrange_coalesce_liveout |
| ; GCN: bb.0: |
| ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) |
| ; GCN: liveins: $vgpr0_vgpr1 |
| ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 |
| ; GCN: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec |
| ; GCN: S_CBRANCH_EXECZ %bb.2, implicit $exec |
| ; GCN: S_BRANCH %bb.1 |
| ; GCN: bb.1: |
| ; GCN: successors: %bb.2(0x80000000) |
| ; GCN: [[GLOBAL_LOAD_DWORDX4_]].sub0:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub1, implicit $exec |
| ; GCN: S_BRANCH %bb.2 |
| ; GCN: bb.2: |
| ; GCN: dead %3:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub2, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec |
| ; GCN: S_ENDPGM 0 |
| bb.0: |
| successors: %bb.1, %bb.2 |
| liveins: $vgpr0_vgpr1 |
| |
| %0:vreg_64 = COPY $vgpr0_vgpr1 |
| %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec |
| %2:vgpr_32 = COPY %1.sub0 |
| S_CBRANCH_EXECZ %bb.2, implicit $exec |
| S_BRANCH %bb.1 |
| |
| bb.1: |
| successors: %bb.2 |
| |
| %2:vgpr_32 = V_AND_B32_e64 %1.sub0, %1.sub1, implicit $exec |
| S_BRANCH %bb.2 |
| |
| bb.2: |
| %4:vgpr_32 = V_ADD_U32_e32 %1.sub2, %2, implicit $exec |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| # early-clobber stops the coalescer from coalescing the COPY |
| name: subrange_coalesce_early_clobber |
| tracksRegLiveness: true |
| body: | |
| ; GCN-LABEL: name: subrange_coalesce_early_clobber |
| ; GCN: bb.0: |
| ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) |
| ; GCN: liveins: $vgpr0_vgpr1 |
| ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 |
| ; GCN: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec |
| ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX4_]].sub0 |
| ; GCN: S_CBRANCH_EXECZ %bb.2, implicit $exec |
| ; GCN: S_BRANCH %bb.1 |
| ; GCN: bb.1: |
| ; GCN: successors: %bb.2(0x80000000) |
| ; GCN: early-clobber [[COPY1]]:vgpr_32 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub2, implicit $exec |
| ; GCN: S_BRANCH %bb.2 |
| ; GCN: bb.2: |
| ; GCN: dead %3:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub2, [[COPY1]], implicit $exec |
| ; GCN: S_ENDPGM 0 |
| bb.0: |
| successors: %bb.1, %bb.2 |
| liveins: $vgpr0_vgpr1 |
| |
| %0:vreg_64 = COPY $vgpr0_vgpr1 |
| %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec |
| %2:vgpr_32 = COPY %1.sub0 |
| S_CBRANCH_EXECZ %bb.2, implicit $exec |
| S_BRANCH %bb.1 |
| |
| bb.1: |
| successors: %bb.2 |
| |
| early-clobber %2:vgpr_32 = V_AND_B32_e64 %1.sub0, %1.sub2, implicit $exec |
| S_BRANCH %bb.2 |
| |
| bb.2: |
| %4:vgpr_32 = V_ADD_U32_e32 %1.sub2, %2, implicit $exec |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| # non-conflict lane(sub1) was redefined, coalescable |
| name: subrange_coalesce_unrelated_sub_redefined |
| tracksRegLiveness: true |
| body: | |
| ; GCN-LABEL: name: subrange_coalesce_unrelated_sub_redefined |
| ; GCN: bb.0: |
| ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) |
| ; GCN: liveins: $vgpr0_vgpr1 |
| ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 |
| ; GCN: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec |
| ; GCN: S_CBRANCH_EXECZ %bb.2, implicit $exec |
| ; GCN: S_BRANCH %bb.1 |
| ; GCN: bb.1: |
| ; GCN: successors: %bb.2(0x80000000) |
| ; GCN: [[GLOBAL_LOAD_DWORDX4_]].sub0:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub1, implicit $exec |
| ; GCN: [[GLOBAL_LOAD_DWORDX4_]].sub1:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec |
| ; GCN: S_BRANCH %bb.2 |
| ; GCN: bb.2: |
| ; GCN: dead %3:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub1, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec |
| ; GCN: S_ENDPGM 0 |
| bb.0: |
| successors: %bb.1, %bb.2 |
| liveins: $vgpr0_vgpr1 |
| |
| %0:vreg_64 = COPY $vgpr0_vgpr1 |
| %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec |
| %2:vgpr_32 = COPY %1.sub0 |
| S_CBRANCH_EXECZ %bb.2, implicit $exec |
| S_BRANCH %bb.1 |
| |
| bb.1: |
| successors: %bb.2 |
| |
| %2:vgpr_32 = V_AND_B32_e64 %1.sub0, %1.sub1, implicit $exec |
| ; %1.sub1 was re-defined |
| %1.sub1:vreg_128 = V_AND_B32_e64 %2, %2, implicit $exec |
| S_BRANCH %bb.2 |
| |
| bb.2: |
| %4:vgpr_32 = V_ADD_U32_e32 %1.sub1, %2, implicit $exec |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| # Another complex example showing the capability of resolving lane conflict |
| # based on subranges. |
| name: subrange_coalesce_complex_pattern |
| tracksRegLiveness: true |
| body: | |
| ; GCN-LABEL: name: subrange_coalesce_complex_pattern |
| ; GCN: bb.0: |
| ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) |
| ; GCN: liveins: $vgpr0_vgpr1 |
| ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 |
| ; GCN: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec |
| ; GCN: S_CBRANCH_EXECZ %bb.2, implicit $exec |
| ; GCN: S_BRANCH %bb.1 |
| ; GCN: bb.1: |
| ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) |
| ; GCN: [[GLOBAL_LOAD_DWORDX4_]].sub0:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub1, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec |
| ; GCN: [[GLOBAL_LOAD_DWORDX4_]].sub2:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec |
| ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec |
| ; GCN: S_BRANCH %bb.2 |
| ; GCN: bb.2: |
| ; GCN: dead %3:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub1, [[GLOBAL_LOAD_DWORDX4_]].sub2, implicit $exec |
| ; GCN: S_ENDPGM 0 |
| bb.0: |
| successors: %bb.1, %bb.2 |
| liveins: $vgpr0_vgpr1 |
| |
| %0:vreg_64 = COPY $vgpr0_vgpr1 |
| %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec |
| %2:vgpr_32 = COPY %1.sub0 |
| S_CBRANCH_EXECZ %bb.2, implicit $exec |
| S_BRANCH %bb.1 |
| |
| bb.1: |
| successors: %bb.1, %bb.2 |
| |
| %2:vgpr_32 = V_AND_B32_e64 %1.sub1, %2, implicit $exec |
| %1.sub2:vreg_128 = V_AND_B32_e64 %2, %2, implicit $exec |
| S_CBRANCH_EXECZ %bb.1, implicit $exec |
| S_BRANCH %bb.2 |
| |
| bb.2: |
| %4:vgpr_32 = V_ADD_U32_e32 %1.sub1, %1.sub2, implicit $exec |
| S_ENDPGM 0 |
| |
| ... |