| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 |
| # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=si-fold-operands -o - %s | FileCheck --check-prefix=GCN %s |
| |
| # Constant-folding of scalar S_ADD_I32 / S_ADD_U32 / S_SUB_I32 / S_SUB_U32 in |
| # SIFoldOperands::tryConstantFoldOp: |
| # |
| # * (imm, imm) folds to S_MOV_B32 of the precomputed value |
| # * (x, 0) and (0, x) for ADD fold to a copy of x |
| # * (x, 0) for SUB folds to a copy of x; (0, x) does NOT fold (negation) |
| # * Folding is gated by allImplicitDefsAreDead, so when the SCC def is live |
| # the instruction must be left untouched. |
| |
| # In each imm-imm case below, the immediate K is materialised by an upstream |
| # S_MOV_B32 and folded into the SOP2 by tryFoldFoldableCopy; that triggers |
| # tryConstantFoldOp, which then collapses the (imm, imm) SOP2 into a single |
| # S_MOV_B32 via the new evalBinaryInstruction handlers. |
| --- |
| name: s_add_i32_imm_imm_scc_dead |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: s_add_i32_imm_imm_scc_dead |
| ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 12345 |
| ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]] |
| %0:sreg_32 = S_MOV_B32 12345 |
| %1:sreg_32 = S_ADD_I32 0, %0, implicit-def dead $scc |
| S_ENDPGM 0, implicit %1 |
| ... |
| |
| --- |
| name: s_add_u32_imm_imm_scc_dead |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: s_add_u32_imm_imm_scc_dead |
| ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 99 |
| ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]] |
| %0:sreg_32 = S_MOV_B32 49 |
| %1:sreg_32 = S_ADD_U32 50, %0, implicit-def dead $scc |
| S_ENDPGM 0, implicit %1 |
| ... |
| |
| --- |
| name: s_sub_i32_imm_imm_scc_dead |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: s_sub_i32_imm_imm_scc_dead |
| ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 43 |
| ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]] |
| %0:sreg_32 = S_MOV_B32 7 |
| %1:sreg_32 = S_SUB_I32 50, %0, implicit-def dead $scc |
| S_ENDPGM 0, implicit %1 |
| ... |
| |
| --- |
| name: s_sub_u32_imm_imm_scc_dead |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: s_sub_u32_imm_imm_scc_dead |
| ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 92 |
| ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]] |
| %0:sreg_32 = S_MOV_B32 7 |
| %1:sreg_32 = S_SUB_U32 99, %0, implicit-def dead $scc |
| S_ENDPGM 0, implicit %1 |
| ... |
| |
| # In each (reg, 0) / (0, reg) case below, the zero is materialised by an |
| # upstream S_MOV_B32 0 and folded into the SOP2 by tryFoldFoldableCopy; that |
| # triggers tryConstantFoldOp, which then takes the new add/sub branch and |
| # collapses the SOP2 to a COPY of the non-zero operand (or, for `0 - x`, |
| # correctly leaves the SOP2 untouched). |
| --- |
| name: s_add_i32_reg_zero |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0 |
| ; GCN-LABEL: name: s_add_i32_reg_zero |
| ; GCN: liveins: $sgpr0 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 |
| ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]] |
| ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] |
| %0:sreg_32 = COPY $sgpr0 |
| %1:sreg_32 = S_MOV_B32 0 |
| %2:sreg_32 = S_ADD_I32 %0, %1, implicit-def dead $scc |
| S_ENDPGM 0, implicit %2 |
| ... |
| |
| --- |
| name: s_add_i32_zero_reg |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0 |
| ; GCN-LABEL: name: s_add_i32_zero_reg |
| ; GCN: liveins: $sgpr0 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 |
| ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]] |
| ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] |
| %0:sreg_32 = COPY $sgpr0 |
| %1:sreg_32 = S_MOV_B32 0 |
| %2:sreg_32 = S_ADD_I32 %1, %0, implicit-def dead $scc |
| S_ENDPGM 0, implicit %2 |
| ... |
| |
| --- |
| name: s_sub_i32_reg_zero |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0 |
| ; GCN-LABEL: name: s_sub_i32_reg_zero |
| ; GCN: liveins: $sgpr0 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 |
| ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]] |
| ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] |
| %0:sreg_32 = COPY $sgpr0 |
| %1:sreg_32 = S_MOV_B32 0 |
| %2:sreg_32 = S_SUB_I32 %0, %1, implicit-def dead $scc |
| S_ENDPGM 0, implicit %2 |
| ... |
| |
| # S_SUB_I32 0, %x is *negation*, not a copy. Must NOT fold to a copy. |
| --- |
| name: s_sub_i32_zero_reg_must_not_fold |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0 |
| ; GCN-LABEL: name: s_sub_i32_zero_reg_must_not_fold |
| ; GCN: liveins: $sgpr0 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 |
| ; GCN-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 0, [[COPY]], implicit-def dead $scc |
| ; GCN-NEXT: S_ENDPGM 0, implicit [[S_SUB_I32_]] |
| %0:sreg_32 = COPY $sgpr0 |
| %1:sreg_32 = S_MOV_B32 0 |
| %2:sreg_32 = S_SUB_I32 %1, %0, implicit-def dead $scc |
| S_ENDPGM 0, implicit %2 |
| ... |
| |
| # SCC live: tryFoldFoldableCopy still folds the upstream MOV's immediate into |
| # the SOP2's source, but tryConstantFoldOp's allImplicitDefsAreDead gate must |
| # reject the (imm, imm) -> S_MOV_B32 collapse since the carry-out is needed. |
| --- |
| name: s_add_i32_imm_imm_scc_live_must_not_fold |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: s_add_i32_imm_imm_scc_live_must_not_fold |
| ; GCN: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 0, 99, implicit-def $scc |
| ; GCN-NEXT: S_ENDPGM 0, implicit [[S_ADD_I32_]], implicit $scc |
| %0:sreg_32 = S_MOV_B32 99 |
| %1:sreg_32 = S_ADD_I32 0, %0, implicit-def $scc |
| S_ENDPGM 0, implicit %1, implicit $scc |
| ... |