blob: b168264bf6a6d538c6f2baeef1fdf7e7b5898609 [file] [edit]
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=si-fold-operands -o - %s | FileCheck --check-prefix=GCN %s
# Constant-folding of scalar S_ADD_I32 / S_ADD_U32 / S_SUB_I32 / S_SUB_U32 in
# SIFoldOperands::tryConstantFoldOp:
#
# * (imm, imm) folds to S_MOV_B32 of the precomputed value
# * (x, 0) and (0, x) for ADD fold to a copy of x
# * (x, 0) for SUB folds to a copy of x; (0, x) does NOT fold (negation)
# * Folding is gated by allImplicitDefsAreDead, so when the SCC def is live
# the instruction must be left untouched.
# In each imm-imm case below, the immediate K is materialised by an upstream
# S_MOV_B32 and folded into the SOP2 by tryFoldFoldableCopy; that triggers
# tryConstantFoldOp, which then collapses the (imm, imm) SOP2 into a single
# S_MOV_B32 via the new evalBinaryInstruction handlers.
---
name: s_add_i32_imm_imm_scc_dead
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: s_add_i32_imm_imm_scc_dead
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 12345
; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]]
%0:sreg_32 = S_MOV_B32 12345
%1:sreg_32 = S_ADD_I32 0, %0, implicit-def dead $scc
S_ENDPGM 0, implicit %1
...
---
name: s_add_u32_imm_imm_scc_dead
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: s_add_u32_imm_imm_scc_dead
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 99
; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]]
%0:sreg_32 = S_MOV_B32 49
%1:sreg_32 = S_ADD_U32 50, %0, implicit-def dead $scc
S_ENDPGM 0, implicit %1
...
---
name: s_sub_i32_imm_imm_scc_dead
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: s_sub_i32_imm_imm_scc_dead
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 43
; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]]
%0:sreg_32 = S_MOV_B32 7
%1:sreg_32 = S_SUB_I32 50, %0, implicit-def dead $scc
S_ENDPGM 0, implicit %1
...
---
name: s_sub_u32_imm_imm_scc_dead
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: s_sub_u32_imm_imm_scc_dead
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 92
; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]]
%0:sreg_32 = S_MOV_B32 7
%1:sreg_32 = S_SUB_U32 99, %0, implicit-def dead $scc
S_ENDPGM 0, implicit %1
...
# In each (reg, 0) / (0, reg) case below, the zero is materialised by an
# upstream S_MOV_B32 0 and folded into the SOP2 by tryFoldFoldableCopy; that
# triggers tryConstantFoldOp, which then takes the new add/sub branch and
# collapses the SOP2 to a COPY of the non-zero operand (or, for `0 - x`,
# correctly leaves the SOP2 untouched).
---
name: s_add_i32_reg_zero
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0
; GCN-LABEL: name: s_add_i32_reg_zero
; GCN: liveins: $sgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]]
; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]]
%0:sreg_32 = COPY $sgpr0
%1:sreg_32 = S_MOV_B32 0
%2:sreg_32 = S_ADD_I32 %0, %1, implicit-def dead $scc
S_ENDPGM 0, implicit %2
...
---
name: s_add_i32_zero_reg
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0
; GCN-LABEL: name: s_add_i32_zero_reg
; GCN: liveins: $sgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]]
; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]]
%0:sreg_32 = COPY $sgpr0
%1:sreg_32 = S_MOV_B32 0
%2:sreg_32 = S_ADD_I32 %1, %0, implicit-def dead $scc
S_ENDPGM 0, implicit %2
...
---
name: s_sub_i32_reg_zero
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0
; GCN-LABEL: name: s_sub_i32_reg_zero
; GCN: liveins: $sgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]]
; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]]
%0:sreg_32 = COPY $sgpr0
%1:sreg_32 = S_MOV_B32 0
%2:sreg_32 = S_SUB_I32 %0, %1, implicit-def dead $scc
S_ENDPGM 0, implicit %2
...
# S_SUB_I32 0, %x is *negation*, not a copy. Must NOT fold to a copy.
---
name: s_sub_i32_zero_reg_must_not_fold
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0
; GCN-LABEL: name: s_sub_i32_zero_reg_must_not_fold
; GCN: liveins: $sgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 0, [[COPY]], implicit-def dead $scc
; GCN-NEXT: S_ENDPGM 0, implicit [[S_SUB_I32_]]
%0:sreg_32 = COPY $sgpr0
%1:sreg_32 = S_MOV_B32 0
%2:sreg_32 = S_SUB_I32 %1, %0, implicit-def dead $scc
S_ENDPGM 0, implicit %2
...
# SCC live: tryFoldFoldableCopy still folds the upstream MOV's immediate into
# the SOP2's source, but tryConstantFoldOp's allImplicitDefsAreDead gate must
# reject the (imm, imm) -> S_MOV_B32 collapse since the carry-out is needed.
---
name: s_add_i32_imm_imm_scc_live_must_not_fold
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: s_add_i32_imm_imm_scc_live_must_not_fold
; GCN: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 0, 99, implicit-def $scc
; GCN-NEXT: S_ENDPGM 0, implicit [[S_ADD_I32_]], implicit $scc
%0:sreg_32 = S_MOV_B32 99
%1:sreg_32 = S_ADD_I32 0, %0, implicit-def $scc
S_ENDPGM 0, implicit %1, implicit $scc
...