| # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN |
| # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN |
| |
| --- |
| # old is undefined: only combine when masks are fully enabled and |
| # bound_ctrl:1 is set, otherwise the result of DPP VALU op can be undefined. |
| # GCN-LABEL: name: old_is_undef |
| # GCN: %2:vgpr_32 = IMPLICIT_DEF |
| # VOP2: |
| # GCN: %4:vgpr_32 = V_ADD_U32_dpp %2, %0, %1, 1, 15, 15, 1, implicit $exec |
| # GCN: %6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec |
| # GCN: %8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec |
| # GCN: %10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec |
| # VOP1: |
| # GCN: %12:vgpr_32 = V_NOT_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec |
| # GCN: %14:vgpr_32 = V_NOT_B32_e32 %13, implicit $exec |
| # GCN: %16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec |
| # GCN: %18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec |
| name: old_is_undef |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = IMPLICIT_DEF |
| |
| ; VOP2 |
| %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec |
| %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec |
| |
| %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec |
| %6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec |
| |
| %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec |
| %8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec |
| |
| %9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec |
| %10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec |
| |
| ; VOP1 |
| %11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec |
| %12:vgpr_32 = V_NOT_B32_e32 %11, implicit $exec |
| |
| %13:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec |
| %14:vgpr_32 = V_NOT_B32_e32 %13, implicit $exec |
| |
| %15:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec |
| %16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec |
| |
| %17:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec |
| %18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec |
| ... |
| |
| # old is zero cases: |
| |
| # GCN-LABEL: name: old_is_0 |
| |
| # VOP2: |
| # case 1: old is zero, masks are fully enabled, bound_ctrl:1 is on: |
| # the DPP mov result would be either zero ({src lane disabled}|{src lane is |
| # out of range}) or active src lane result - can combine with old = undef. |
| # undef is preffered as it makes life easier for the regalloc. |
| # GCN: [[U1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| # GCN: %4:vgpr_32 = V_ADD_U32_dpp [[U1]], %0, %1, 1, 15, 15, 1, implicit $exec |
| |
| # case 2: old is zero, masks are fully enabled, bound_ctrl:1 is off: |
| # as the DPP mov old is zero this case is no different from case 1 - combine it |
| # setting bound_ctrl:1 on for the combined DPP VALU op to make old undefined |
| # GCN: [[U2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| # GCN: %6:vgpr_32 = V_ADD_U32_dpp [[U2]], %0, %1, 1, 15, 15, 1, implicit $exec |
| |
| # case 3: masks are partialy disabled, bound_ctrl:1 is on: |
| # the DPP mov result would be either zero ({src lane disabled}|{src lane is |
| # out of range} or {the DPP mov's dest VGPR write is disabled by masks}) or |
| # active src lane result - can combine with old = src1 of the VALU op. |
| # The VALU op should have the same masks as DPP mov as they select lanes |
| # with identity value. |
| # Special case: the bound_ctrl for the combined DPP VALU op isn't important |
| # here but let's make it off to keep the combiner's logic simpler. |
| # GCN: %8:vgpr_32 = V_ADD_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec |
| |
| # case 4: masks are partialy disabled, bound_ctrl:1 is off: |
| # the DPP mov result would be either zero ({src lane disabled}|{src lane is |
| # out of range} or {the DPP mov's dest VGPR write is disabled by masks}) or |
| # active src lane result - can combine with old = src1 of the VALU op. |
| # The VALU op should have the same masks as DPP mov as they select |
| # lanes with identity value |
| # GCN: %10:vgpr_32 = V_ADD_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec |
| |
| # VOP1: |
| # see case 1 |
| # GCN: [[U3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| # GCN: %12:vgpr_32 = V_NOT_B32_dpp [[U3]], %0, 1, 15, 15, 1, implicit $exec |
| # see case 2 |
| # GCN: [[U4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| # GCN: %14:vgpr_32 = V_NOT_B32_dpp [[U4]], %0, 1, 15, 15, 1, implicit $exec |
| # case 3 and 4 not appliable as there is no way to specify unchanged result |
| # for the unary VALU op |
| # GCN: %16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec |
| # GCN: %18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec |
| |
| name: old_is_0 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| |
| ; VOP2 |
| %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec |
| %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec |
| |
| %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec |
| %6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec |
| |
| %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec |
| %8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec |
| |
| %9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec |
| %10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec |
| |
| ; VOP1 |
| %11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec |
| %12:vgpr_32 = V_NOT_B32_e32 %11, implicit $exec |
| |
| %13:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec |
| %14:vgpr_32 = V_NOT_B32_e32 %13, implicit $exec |
| |
| %15:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec |
| %16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec |
| |
| %17:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec |
| %18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec |
| ... |
| |
| # old is nonzero identity cases: |
| |
| # old is nonzero identity, masks are fully enabled, bound_ctrl:1 is off: |
| # the DPP mov result would be either identity ({src lane disabled}|{out of |
| # range}) or src lane result - can combine with old = src1 of the VALU op |
| # The DPP VALU op should have the same masks (and bctrl) as DPP mov as they |
| # select lanes with identity value |
| |
| # GCN-LABEL: name: nonzero_old_is_identity_masks_enabled_bctl_off |
| # GCN: %4:vgpr_32 = V_MUL_U32_U24_dpp %1, %0, %1, 1, 15, 15, 0, implicit $exec |
| # GCN: %7:vgpr_32 = V_AND_B32_dpp %1, %0, %1, 1, 15, 15, 0, implicit $exec |
| # GCN: %10:vgpr_32 = V_MAX_I32_dpp %1, %0, %1, 1, 15, 15, 0, implicit $exec |
| # GCN: %13:vgpr_32 = V_MIN_I32_dpp %1, %0, %1, 1, 15, 15, 0, implicit $exec |
| |
| name: nonzero_old_is_identity_masks_enabled_bctl_off |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| |
| %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec |
| %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec |
| %4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec |
| |
| %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec |
| %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 15, 0, implicit $exec |
| %7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec |
| |
| %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec |
| %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 15, 15, 0, implicit $exec |
| %10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec |
| |
| %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec |
| %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 15, 0, implicit $exec |
| %13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec |
| ... |
| |
| # old is nonzero identity, masks are partially enabled, bound_ctrl:1 is off: |
| # the DPP mov result would be either identity ({src lane disabled}|{src lane is |
| # out of range} or {the DPP mov's dest VGPR write is disabled by masks}) or |
| # active src lane result - can combine with old = src1 of the VALU op. |
| # The DPP VALU op should have the same masks (and bctrl) as DPP mov as they |
| # select lanes with identity value |
| |
| # GCN-LABEL: name: nonzero_old_is_identity_masks_partially_disabled_bctl_off |
| # GCN: %4:vgpr_32 = V_MUL_U32_U24_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec |
| # GCN: %7:vgpr_32 = V_AND_B32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec |
| # GCN: %10:vgpr_32 = V_MAX_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec |
| # GCN: %13:vgpr_32 = V_MIN_I32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec |
| |
| name: nonzero_old_is_identity_masks_partially_disabled_bctl_off |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| |
| %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec |
| %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec |
| %4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec |
| |
| %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec |
| %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec |
| %7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec |
| |
| %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec |
| %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec |
| %10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec |
| |
| %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec |
| %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec |
| %13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec |
| ... |
| |
| # old is nonzero identity, masks are partially enabled, bound_ctrl:1 is on: |
| # the DPP mov result may have 3 different values: |
| # 1. the active src lane result |
| # 2. 0 if the src lane is disabled|out of range |
| # 3. DPP mov's old value if the mov's dest VGPR write is disabled by masks |
| # can't combine |
| |
| # GCN-LABEL: name: nonzero_old_is_identity_masks_partially_disabled_bctl0 |
| # GCN: %4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec |
| # GCN: %7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec |
| # GCN: %10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec |
| # GCN: %13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec |
| |
| name: nonzero_old_is_identity_masks_partially_disabled_bctl0 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| |
| %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec |
| %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec |
| %4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec |
| |
| %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec |
| %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 1, implicit $exec |
| %7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec |
| |
| %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec |
| %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 1, implicit $exec |
| %10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec |
| |
| %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec |
| %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 1, implicit $exec |
| %13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec |
| ... |
| |
| # when the DPP source isn't a src0 operand the operation should be commuted if possible |
| # GCN-LABEL: name: dpp_commute |
| # GCN: %4:vgpr_32 = V_MUL_U32_U24_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec |
| # GCN: %7:vgpr_32 = V_AND_B32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec |
| # GCN: %10:vgpr_32 = V_MAX_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec |
| # GCN: %13:vgpr_32 = V_MIN_I32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec |
| # GCN: %16:vgpr_32 = V_SUBREV_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec |
| # GCN: %19:vgpr_32 = V_ADD_CO_U32_e32 5, %18, implicit-def $vcc, implicit $exec |
| name: dpp_commute |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| |
| %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec |
| %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec |
| %4:vgpr_32 = V_MUL_U32_U24_e32 %1, %3, implicit $exec |
| |
| %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec |
| %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec |
| %7:vgpr_32 = V_AND_B32_e32 %1, %6, implicit $exec |
| |
| %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec |
| %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec |
| %10:vgpr_32 = V_MAX_I32_e32 %1, %9, implicit $exec |
| |
| %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec |
| %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec |
| %13:vgpr_32 = V_MIN_I32_e32 %1, %12, implicit $exec |
| |
| %14:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| %15:vgpr_32 = V_MOV_B32_dpp %14, %0, 1, 14, 15, 0, implicit $exec |
| %16:vgpr_32 = V_SUB_CO_U32_e32 %1, %15, implicit-def $vcc, implicit $exec |
| |
| ; this cannot be combined because immediate as src0 isn't commutable |
| %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| %18:vgpr_32 = V_MOV_B32_dpp %17, %0, 1, 14, 15, 0, implicit $exec |
| %19:vgpr_32 = V_ADD_CO_U32_e32 5, %18, implicit-def $vcc, implicit $exec |
| ... |
| |
| --- |
| |
| # check for floating point modifiers |
| # GCN-LABEL: name: add_f32_e64 |
| # GCN: %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec |
| # GCN: %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec |
| # GCN: %6:vgpr_32 = V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec |
| # GCN: %8:vgpr_32 = V_ADD_F32_dpp %2, 1, %1, 2, %0, 1, 15, 15, 1, implicit $mode, implicit $exec |
| # GCN: %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec |
| |
| name: add_f32_e64 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = IMPLICIT_DEF |
| |
| ; this shouldn't be combined as omod is set |
| %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec |
| %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec |
| |
| ; this should be combined as all modifiers are default |
| %5:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec |
| %6:vgpr_32 = V_ADD_F32_e64 0, %5, 0, %0, 0, 0, implicit $mode, implicit $exec |
| |
| ; this should be combined as modifiers other than abs|neg are default |
| %7:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec |
| %8:vgpr_32 = V_ADD_F32_e64 1, %7, 2, %0, 0, 0, implicit $mode, implicit $exec |
| |
| ; this shouldn't be combined as modifiers aren't abs|neg |
| %9:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec |
| %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec |
| ... |
| |
| # check for e64 modifiers |
| # GCN-LABEL: name: add_u32_e64 |
| # GCN: %4:vgpr_32 = V_ADD_U32_dpp %2, %0, %1, 1, 15, 15, 1, implicit $exec |
| # GCN: %6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec |
| |
| name: add_u32_e64 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = IMPLICIT_DEF |
| |
| ; this should be combined as all modifiers are default |
| %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec |
| %4:vgpr_32 = V_ADD_U32_e64 %3, %1, 0, implicit $exec |
| |
| ; this shouldn't be combined as clamp is set |
| %5:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec |
| %6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec |
| ... |
| |
| # GCN-LABEL: name: add_co_u32_e64 |
| # GCN: %4:vgpr_32, %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, %1, 0, implicit $exec |
| |
| name: add_co_u32_e64 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = IMPLICIT_DEF |
| |
| ; this shouldn't be combined as the carry-out is used |
| %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec |
| %4:vgpr_32, %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, %1, 0, implicit $exec |
| |
| S_NOP 0, implicit %5 |
| ... |
| |
| # tests on sequences of dpp consumers |
| # GCN-LABEL: name: dpp_seq |
| # GCN: %4:vgpr_32 = V_ADD_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec |
| # GCN: %5:vgpr_32 = V_SUBREV_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec |
| # GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec |
| # broken sequence: |
| # GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec |
| |
| name: dpp_seq |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| |
| %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec |
| %4:vgpr_32 = V_ADD_CO_U32_e32 %3, %1, implicit-def $vcc, implicit $exec |
| %5:vgpr_32 = V_SUB_CO_U32_e32 %1, %3, implicit-def $vcc, implicit $exec |
| %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec |
| |
| %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec |
| %8:vgpr_32 = V_ADD_CO_U32_e32 %7, %1, implicit-def $vcc, implicit $exec |
| ; this breaks the sequence |
| %9:vgpr_32 = V_SUB_CO_U32_e32 5, %7, implicit-def $vcc, implicit $exec |
| ... |
| |
| # tests on sequences of dpp consumers followed by control flow |
| # GCN-LABEL: name: dpp_seq_cf |
| # GCN: %4:vgpr_32 = V_ADD_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec |
| # GCN: %5:vgpr_32 = V_SUBREV_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec |
| # GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec |
| |
| name: dpp_seq_cf |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| successors: %bb.1, %bb.2 |
| liveins: $vgpr0, $vgpr1 |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| |
| %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec |
| %4:vgpr_32 = V_ADD_CO_U32_e32 %3, %1, implicit-def $vcc, implicit $exec |
| %5:vgpr_32 = V_SUB_CO_U32_e32 %1, %3, implicit-def $vcc, implicit $exec |
| %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec |
| |
| %7:sreg_64 = V_CMP_EQ_U32_e64 %5, %6, implicit $exec |
| %8:sreg_64 = SI_IF %7, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec |
| S_BRANCH %bb.1 |
| |
| bb.1: |
| successors: %bb.2 |
| |
| bb.2: |
| SI_END_CF %8, implicit-def dead $exec, implicit-def dead $scc, implicit $exec |
| ... |
| |
| # GCN-LABEL: name: old_in_diff_bb |
| # GCN: %4:vgpr_32 = V_ADD_U32_dpp %0, %1, %0, 1, 1, 1, 0, implicit $exec |
| |
| name: old_in_diff_bb |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| successors: %bb.1 |
| liveins: $vgpr0, $vgpr1 |
| |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| S_BRANCH %bb.1 |
| |
| bb.1: |
| %3:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 1, 1, 0, implicit $exec |
| %4:vgpr_32 = V_ADD_U32_e32 %3, %0, implicit $exec |
| ... |
| |
| # old reg def is in diff BB but bound_ctrl:1 - can combine |
| # GCN-LABEL: name: old_in_diff_bb_bctrl_zero |
| # GCN: %4:vgpr_32 = V_ADD_U32_dpp {{%[0-9]}}, %0, %1, 1, 15, 15, 1, implicit $exec |
| |
| name: old_in_diff_bb_bctrl_zero |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| successors: %bb.1 |
| liveins: $vgpr0, $vgpr1 |
| |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| S_BRANCH %bb.1 |
| |
| bb.1: |
| %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec |
| %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec |
| ... |
| |
| # EXEC mask changed between def and use - cannot combine |
| # GCN-LABEL: name: exec_changed |
| # GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec |
| |
| name: exec_changed |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec |
| %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec |
| %5:sreg_64 = COPY $exec, implicit-def $exec |
| %6:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec |
| ... |
| |
| # test if $old definition is correctly tracked through subreg manipulation pseudos |
| |
| # GCN-LABEL: name: mul_old_subreg |
| # GCN: %7:vgpr_32 = V_MUL_I32_I24_dpp %0.sub1, %1, %0.sub1, 1, 1, 1, 0, implicit $exec |
| |
| name: mul_old_subreg |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| |
| %0:vreg_64 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec |
| %3:vgpr_32 = V_MOV_B32_e32 42, implicit $exec |
| %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 |
| %5:vreg_64 = INSERT_SUBREG %4, %1, %subreg.sub1 ; %5.sub0 is taken from %4 |
| %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, %1, 1, 1, 1, 0, implicit $exec |
| %7:vgpr_32 = V_MUL_I32_I24_e32 %6, %0.sub1, implicit $exec |
| ... |
| |
| # GCN-LABEL: name: add_old_subreg |
| # GCN: %5:vgpr_32 = V_ADD_U32_dpp %0.sub1, %1, %0.sub1, 1, 1, 1, 0, implicit $exec |
| |
| name: add_old_subreg |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| |
| %0:vreg_64 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| %3:vreg_64 = INSERT_SUBREG %0, %2, %subreg.sub1 ; %3.sub1 is inserted |
| %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 1, 1, 0, implicit $exec |
| %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec |
| ... |
| |
| # GCN-LABEL: name: add_old_subreg_undef |
| # GCN: %5:vgpr_32 = V_ADD_U32_dpp undef %3.sub1, %1, %0.sub1, 1, 15, 15, 1, implicit $exec |
| |
| name: add_old_subreg_undef |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| |
| %0:vreg_64 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| %3:vreg_64 = REG_SEQUENCE %2, %subreg.sub0 ; %3.sub1 is undef |
| %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 15, 15, 1, implicit $exec |
| %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec |
| ... |
| |
| # Test instruction which does not have modifiers in VOP1 form but does in DPP form. |
| # GCN-LABEL: name: dpp_vop1 |
| # GCN: %3:vgpr_32 = V_CEIL_F32_dpp %0, 0, undef %2:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec |
| name: dpp_vop1 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| %1:vgpr_32 = IMPLICIT_DEF |
| %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec |
| %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec |
| ... |
| |
| # Test instruction which does not have modifiers in VOP2 form but does in DPP form. |
| # GCN-LABEL: name: dpp_min |
| # GCN: %3:vgpr_32 = V_MIN_F32_dpp %0, 0, undef %2:vgpr_32, 0, undef %4:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec |
| name: dpp_min |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| %1:vgpr_32 = IMPLICIT_DEF |
| %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec |
| %4:vgpr_32 = V_MIN_F32_e32 %2, undef %3:vgpr_32, implicit $mode, implicit $exec |
| ... |
| |
| # Test an undef old operand |
| # GCN-LABEL: name: dpp_undef_old |
| # GCN: %3:vgpr_32 = V_CEIL_F32_dpp undef %1:vgpr_32, 0, undef %2:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec |
| name: dpp_undef_old |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec |
| %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec |
| ... |
| |
| # Do not combine a dpp mov which writes a physreg. |
| # GCN-LABEL: name: phys_dpp_mov_dst |
| # GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec |
| # GCN: %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec |
| name: phys_dpp_mov_dst |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec |
| %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec |
| ... |
| |
| # Do not combine a dpp mov which reads a physreg. |
| # GCN-LABEL: name: phys_dpp_mov_old_src |
| # GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec |
| # GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec |
| name: phys_dpp_mov_old_src |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec |
| %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec |
| ... |
| |
| # Do not combine a dpp mov which reads a physreg. |
| # GCN-LABEL: name: phys_dpp_mov_src |
| # GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec |
| # GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec |
| name: phys_dpp_mov_src |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec |
| %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec |
| ... |
| |
| # GCN-LABEL: name: dpp_reg_sequence_both_combined |
| # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 |
| # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 |
| # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec |
| # GCN: %9:vgpr_32 = IMPLICIT_DEF |
| # GCN: %8:vgpr_32 = IMPLICIT_DEF |
| # GCN: %6:vgpr_32 = V_ADD_CO_U32_dpp %9, %1.sub0, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $exec |
| # GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec |
| name: dpp_reg_sequence_both_combined |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 |
| |
| %0:vreg_64 = COPY $vgpr0_vgpr1 |
| %1:vreg_64 = COPY $vgpr2_vgpr3 |
| %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec |
| %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec |
| %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec |
| %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 |
| %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec |
| %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec |
| ... |
| |
| # GCN-LABEL: name: dpp_reg_sequence_first_combined |
| # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 |
| # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 |
| # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec |
| # GCN: %8:vgpr_32 = IMPLICIT_DEF |
| # GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec |
| # GCN: %5:vreg_64 = REG_SEQUENCE undef %3:vgpr_32, %subreg.sub0, %4, %subreg.sub1 |
| # GCN: %6:vgpr_32 = V_ADD_CO_U32_dpp %8, %1.sub0, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $exec |
| # GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec |
| name: dpp_reg_sequence_first_combined |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 |
| |
| %0:vreg_64 = COPY $vgpr0_vgpr1 |
| %1:vreg_64 = COPY $vgpr2_vgpr3 |
| %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec |
| %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec |
| %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec |
| %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 |
| %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec |
| %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec |
| ... |
| |
| # GCN-LABEL: name: dpp_reg_sequence_second_combined |
| # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 |
| # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 |
| # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec |
| # GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec |
| # GCN: %8:vgpr_32 = IMPLICIT_DEF |
| # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, undef %4:vgpr_32, %subreg.sub1 |
| # GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec |
| # GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec |
| name: dpp_reg_sequence_second_combined |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 |
| |
| %0:vreg_64 = COPY $vgpr0_vgpr1 |
| %1:vreg_64 = COPY $vgpr2_vgpr3 |
| %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec |
| %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec |
| %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec |
| %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 |
| %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec |
| %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec |
| ... |
| |
| # GCN-LABEL: name: dpp_reg_sequence_none_combined |
| # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 |
| # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 |
| # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec |
| # GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec |
| # GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec |
| # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 |
| # GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec |
| # GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec |
| name: dpp_reg_sequence_none_combined |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 |
| |
| %0:vreg_64 = COPY $vgpr0_vgpr1 |
| %1:vreg_64 = COPY $vgpr2_vgpr3 |
| %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec |
| %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec |
| %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec |
| %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 |
| %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec |
| %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec |
| ... |
| |
| # GCN-LABEL: name: dpp_reg_sequence_exec_changed |
| # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 |
| # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 |
| # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec |
| # GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec |
| # GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec |
| # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 |
| # GCN: S_BRANCH %bb.1 |
| # GCN: bb.1: |
| # GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec |
| # GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec |
| name: dpp_reg_sequence_exec_changed |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 |
| |
| %0:vreg_64 = COPY $vgpr0_vgpr1 |
| %1:vreg_64 = COPY $vgpr2_vgpr3 |
| %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec |
| %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec |
| %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec |
| %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 |
| S_BRANCH %bb.1 |
| |
| bb.1: |
| %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec |
| %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec |
| ... |
| |
| # GCN-LABEL: name: dpp_reg_sequence_subreg |
| # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 |
| # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 |
| # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec |
| # GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec |
| # GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec |
| # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 |
| # GCN: %6:vreg_64 = REG_SEQUENCE %5.sub0, %subreg.sub0, %5.sub1, %subreg.sub1 |
| # GCN: %7:vgpr_32 = V_ADD_CO_U32_e32 %6.sub0, %2, implicit-def $vcc, implicit $exec |
| # GCN: %8:vgpr_32 = V_ADDC_U32_e32 %6.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec |
| name: dpp_reg_sequence_subreg |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 |
| |
| %0:vreg_64 = COPY $vgpr0_vgpr1 |
| %1:vreg_64 = COPY $vgpr2_vgpr3 |
| %8:vgpr_32 = V_MOV_B32_e32 5, implicit $exec |
| %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec |
| %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec |
| %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 |
| %5:vreg_64 = REG_SEQUENCE %4.sub0, %subreg.sub0, %4.sub1, %subreg.sub1 |
| %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %8, implicit-def $vcc, implicit $exec |
| %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %8, implicit-def $vcc, implicit $vcc, implicit $exec |
| ... |
| |
| # GCN-LABEL: name: dpp64_add64_impdef |
| # GCN: %3:vgpr_32 = V_ADD_CO_U32_dpp %1.sub0, %0.sub0, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec |
| # GCN: %5:vgpr_32 = V_ADDC_U32_dpp %1.sub1, %0.sub1, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec |
| name: dpp64_add64_impdef |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| %0:vreg_64 = IMPLICIT_DEF |
| %1:vreg_64 = IMPLICIT_DEF |
| %2:vreg_64 = V_MOV_B64_DPP_PSEUDO %1:vreg_64, %0:vreg_64, 1, 15, 15, 1, implicit $exec |
| %5:vgpr_32 = V_ADD_CO_U32_e32 %2.sub0, undef %4:vgpr_32, implicit-def $vcc, implicit $exec |
| %6:vgpr_32 = V_ADDC_U32_e32 %2.sub1, undef %4, implicit-def $vcc, implicit $vcc, implicit $exec |
| ... |
| |
| # GCN-LABEL: name: dpp64_add64_undef |
| # GCN: %3:vgpr_32 = V_ADD_CO_U32_dpp undef %1.sub0:vreg_64, undef %2.sub0:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec |
| # GCN: %5:vgpr_32 = V_ADDC_U32_dpp undef %1.sub1:vreg_64, undef %2.sub1:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec |
| name: dpp64_add64_undef |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| %2:vreg_64 = V_MOV_B64_DPP_PSEUDO undef %1:vreg_64, undef %0:vreg_64, 1, 15, 15, 1, implicit $exec |
| %5:vgpr_32 = V_ADD_CO_U32_e32 %2.sub0, undef %4:vgpr_32, implicit-def $vcc, implicit $exec |
| %6:vgpr_32 = V_ADDC_U32_e32 %2.sub1, undef %4, implicit-def $vcc, implicit $vcc, implicit $exec |
| ... |
| |
| # GCN-LABEL: name: dpp64_add64_first_combined |
| # GCN: %8:vgpr_32 = V_MOV_B32_dpp undef %1.sub1:vreg_64, undef %2.sub1:vreg_64, 1, 15, 15, 1, implicit $exec |
| # GCN: %0:vreg_64 = REG_SEQUENCE undef %7:vgpr_32, %subreg.sub0, %8, %subreg.sub1 |
| # GCN: %3:vgpr_32 = V_ADD_CO_U32_dpp undef %1.sub0:vreg_64, undef %2.sub0:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec |
| # GCN: %5:vgpr_32, dead %6:sreg_64_xexec = V_ADDC_U32_e64 1, %0.sub1, undef $vcc, 0, implicit $exec |
| name: dpp64_add64_first_combined |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| %2:vreg_64 = V_MOV_B64_DPP_PSEUDO undef %1:vreg_64, undef %0:vreg_64, 1, 15, 15, 1, implicit $exec |
| %4:vgpr_32 = V_ADD_CO_U32_e32 %2.sub0, undef %3:vgpr_32, implicit-def $vcc, implicit $exec |
| %5:vgpr_32, dead %6:sreg_64_xexec = V_ADDC_U32_e64 1, %2.sub1, undef $vcc, 0, implicit $exec |
| ... |
| |
| # GCN-LABEL: name: dont_combine_cndmask_with_src2 |
| # GCN: %5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec |
| name: dont_combine_cndmask_with_src2 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = IMPLICIT_DEF |
| |
| %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec |
| %4:sreg_64_xexec = IMPLICIT_DEF |
| %5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec |
| ... |
| |
| --- |
| |
| # Make sure flags aren't dropped |
| # GCN-LABEL: name: flags_add_f32_e64 |
| # GCN: %4:vgpr_32 = nnan nofpexcept V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec |
| name: flags_add_f32_e64 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = IMPLICIT_DEF |
| |
| %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec |
| %4:vgpr_32 = nofpexcept nnan V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $mode, implicit $exec |
| S_ENDPGM 0, implicit %4 |
| |
| ... |
| |
| # GCN-LABEL: name: dont_combine_more_than_one_operand |
| # GCN: %3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec |
| name: dont_combine_more_than_one_operand |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = V_MOV_B32_dpp %0, %1, 1, 15, 15, 1, implicit $exec |
| %3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec |
| ... |
| |
| # GCN-LABEL: name: dont_combine_more_than_one_operand_dpp_reg_sequence |
| # GCN: %5:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %4.sub0, implicit-def $vcc, implicit $exec |
| # GCN: %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec |
| name: dont_combine_more_than_one_operand_dpp_reg_sequence |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 |
| %0:vreg_64 = COPY $vgpr0_vgpr1 |
| %1:vreg_64 = COPY $vgpr2_vgpr3 |
| %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec |
| %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec |
| %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 |
| %5:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %4.sub0, implicit-def $vcc, implicit $exec |
| %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec |
| ... |
| |
| # execMayBeModifiedBeforeAnyUse used to assert if the queried |
| # V_MOV_B32_dpp was the last instruction in the block. |
| --- |
| name: mov_dpp_last_block_inst |
| tracksRegLiveness: true |
| body: | |
| ; GCN-LABEL: name: mov_dpp_last_block_inst |
| ; GCN: bb.0: |
| ; GCN-NEXT: successors: %bb.1(0x80000000) |
| ; GCN-NEXT: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr8 |
| ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF |
| ; GCN-NEXT: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: bb.1: |
| ; GCN-NEXT: successors: %bb.2(0x80000000) |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[DEF]], %bb.0, %5, %bb.2 |
| ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[PHI]], 323, 15, 15, 0, implicit $exec |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: bb.2: |
| ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[DEF2]], implicit $exec |
| ; GCN-NEXT: V_CMP_NE_U32_e32 1, [[V_CNDMASK_B32_e64_]], implicit-def $vcc, implicit $exec |
| ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc |
| ; GCN-NEXT: S_BRANCH %bb.3 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: bb.3: |
| ; GCN-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8 |
| |
| %0:sgpr_32 = COPY $sgpr8 |
| %1:vgpr_32 = IMPLICIT_DEF |
| %2:sreg_32 = IMPLICIT_DEF |
| %3:sreg_64_xexec = IMPLICIT_DEF |
| |
| bb.1: |
| %4:vgpr_32 = PHI %1, %bb.0, %5, %bb.2 |
| %5:vgpr_32 = V_MOV_B32_dpp %1, %4, 323, 15, 15, 0, implicit $exec |
| |
| bb.2: |
| %6:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %3, implicit $exec |
| V_CMP_NE_U32_e32 1, %6, implicit-def $vcc, implicit $exec |
| S_CBRANCH_VCCNZ %bb.1, implicit $vcc |
| S_BRANCH %bb.3 |
| |
| bb.3: |
| S_ENDPGM 0 |
| |
| ... |