| # RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN |
| |
| --- |
| |
| # GCN-label: name: vop3 |
| # GCN: %6:vgpr_32, %7:sreg_32_xm0_xexec = V_SUBBREV_U32_e64_dpp %3, %0, %1, %5, 1, 1, 15, 15, 1, implicit $exec |
| # GCN: %8:vgpr_32 = V_CVT_PK_U8_F32_e64_dpp %3, 4, %0, 2, %2, 2, %1, 1, 1, 15, 15, 1, implicit $mode, implicit $exec |
| # GCN: %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %0, 0, 12345678, 0, 0, implicit $mode, implicit $exec |
| # GCN: %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 2, 0, %7, 0, 0, implicit $mode, implicit $exec |
| name: vop3 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1, $vgpr2 |
| |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = COPY $vgpr2 |
| %3:vgpr_32 = IMPLICIT_DEF |
| %4:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec |
| |
| %5:sreg_32_xm0_xexec = IMPLICIT_DEF |
| %6:vgpr_32, %7:sreg_32_xm0_xexec = V_SUBBREV_U32_e64 %4, %1, %5, 1, implicit $exec |
| |
| %8:vgpr_32 = V_CVT_PK_U8_F32_e64 4, %4, 2, %2, 2, %1, 1, implicit $mode, implicit $exec |
| |
| ; should not be combined because src2 literal is illegal |
| %9:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec |
| %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %0, 0, 12345678, 0, 0, implicit $mode, implicit $exec |
| |
| ; should not be combined because src1 imm is illegal |
| %11:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec |
| %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 2, 0, %7, 0, 0, implicit $mode, implicit $exec |
| ... |
| |
| # Regression test for src_modifiers on base u16 opcode |
| # GCN-label: name: vop3_u16 |
| # GCN: %5:vgpr_32 = V_ADD_NC_U16_e64_dpp %3, 0, %1, 0, %3, 0, 0, 1, 15, 15, 1, implicit $exec |
| # GCN: %7:vgpr_32 = V_ADD_NC_U16_e64_dpp %3, 4, %5, 8, %5, 0, 0, 1, 15, 15, 1, implicit $exec |
| name: vop3_u16 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1, $vgpr2 |
| |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = COPY $vgpr2 |
| %3:vgpr_32 = IMPLICIT_DEF |
| %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec |
| %5:vgpr_32 = V_ADD_NC_U16_e64 0, %4, 0, %3, 0, 0, implicit $exec |
| %6:vgpr_32 = V_MOV_B32_dpp %3, %5, 1, 15, 15, 1, implicit $exec |
| %7:vgpr_32 = V_ADD_NC_U16_e64 4, %6, 8, %5, 0, 0, implicit $exec |
| ... |
| |
| name: vop3p |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1, $vgpr2 |
| |
| ; GCN-LABEL: name: vop3p |
| ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 |
| ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 |
| ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 |
| ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 |
| ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; GCN: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec |
| ; GCN: [[V_DOT2_F32_F16_:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16 0, [[V_MOV_B32_dpp]], 0, [[COPY]], 0, [[COPY2]], 0, 5, 0, 0, 0, implicit $mode, implicit $exec |
| ; GCN: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec |
| ; GCN: [[V_DOT2_F32_F16_1:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16 0, [[V_MOV_B32_dpp1]], 0, [[COPY]], 0, [[COPY2]], 0, 0, 4, 0, 0, implicit $mode, implicit $exec |
| ; GCN: [[V_DOT2_F32_F16_dpp:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16_dpp [[DEF]], 10, [[COPY1]], 8, [[COPY]], 13, [[COPY2]], 1, 0, 7, 4, 5, 1, 15, 15, 1, implicit $mode, implicit $exec |
| ; GCN: [[V_FMA_MIX_F32_dpp:%[0-9]+]]:vgpr_32 = V_FMA_MIX_F32_dpp [[DEF]], 8, [[COPY1]], 8, [[COPY]], 8, [[COPY2]], 1, 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec |
| ; GCN: [[V_FMA_MIXLO_F16_dpp:%[0-9]+]]:vgpr_32 = V_FMA_MIXLO_F16_dpp [[DEF]], 8, [[COPY1]], 8, [[COPY]], 8, [[COPY2]], 0, [[COPY2]], 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec |
| ; GCN: [[V_FMA_MIXHI_F16_dpp:%[0-9]+]]:vgpr_32 = V_FMA_MIXHI_F16_dpp [[DEF]], 8, [[COPY1]], 8, [[COPY]], 8, [[COPY2]], 1, [[COPY]], 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = COPY $vgpr2 |
| %3:vgpr_32 = IMPLICIT_DEF |
| |
| ; this should not be combined because op_sel is not zero |
| %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec |
| %5:vgpr_32 = V_DOT2_F32_F16 0, %4, 0, %0, 0, %2, 0, 5, 0, 0, 0, implicit $mode, implicit $exec |
| |
| ; this should not be combined because op_sel_hi is not all set |
| %6:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec |
| %7:vgpr_32 = V_DOT2_F32_F16 0, %6, 0, %0, 0, %2, 0, 0, 4, 0, 0, implicit $mode, implicit $exec |
| |
| %8:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec |
| %9:vgpr_32 = V_DOT2_F32_F16 10, %8, 8, %0, 13, %2, 1, 0, 7, 4, 5, implicit $mode, implicit $exec |
| |
| %10:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec |
| %11:vgpr_32 = V_FMA_MIX_F32 8, %10, 8, %0, 8, %2, 1, 0, 7, implicit $mode, implicit $exec |
| |
| %12:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec |
| %13:vgpr_32 = V_FMA_MIXLO_F16 8, %12, 8, %0, 8, %2, 0, %2, 0, 7, implicit $mode, implicit $exec |
| |
| %14:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec |
| %15:vgpr_32 = V_FMA_MIXHI_F16 8, %14, 8, %0, 8, %2, 1, %0, 0, 7, implicit $mode, implicit $exec |
| |
| ... |
| |
| # GCN-LABEL: name: fmac_e64 |
| # GCN: %5:vgpr_32 = V_FMAC_F32_e64_dpp %3, 2, %0, 2, %1, 2, %2, 1, 2, 1, 15, 15, 1, implicit $mode, implicit $exec |
| name: fmac_e64 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1, $vgpr2 |
| |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = COPY $vgpr2 |
| %3:vgpr_32 = IMPLICIT_DEF |
| %4:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec |
| %6:vgpr_32 = V_FMAC_F32_e64 2, %4, 2, %1, 2, %2, 1, 2, implicit $mode, implicit $exec |
| ... |
| |
| # when the DPP source isn't a src0 operand the operation should be commuted if possible |
| # GCN-LABEL: name: dpp_commute_shrink |
| # GCN: %4:vgpr_32 = V_MUL_U32_U24_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec |
| # GCN: %7:vgpr_32 = V_AND_B32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec |
| # GCN: %10:vgpr_32 = V_MAX_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec |
| # GCN: %13:vgpr_32 = V_MIN_I32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec |
| # GCN: %16:vgpr_32 = V_SUBREV_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec |
| name: dpp_commute_shrink |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| |
| %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec |
| %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec |
| %4:vgpr_32 = V_MUL_U32_U24_e64 %1, %3, 0, implicit $exec |
| |
| %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec |
| %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec |
| %7:vgpr_32 = V_AND_B32_e64 %1, %6, implicit $exec |
| |
| %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec |
| %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec |
| %10:vgpr_32 = V_MAX_I32_e64 %1, %9, implicit $exec |
| |
| %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec |
| %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec |
| %13:vgpr_32 = V_MIN_I32_e64 %1, %12, implicit $exec |
| |
| %14:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| %15:vgpr_32 = V_MOV_B32_dpp %14, %0, 1, 14, 15, 0, implicit $exec |
| %16:vgpr_32 = V_SUB_U32_e64 %1, %15, 0, implicit $exec |
| |
| ... |
| |
| # do not combine, dpp arg used twice |
| # GCN-label: name: dpp_arg_twice |
| # GCN: %4:vgpr_32 = V_FMA_F32_e64 1, %1, 2, %3, 2, %3, 1, 2, implicit $mode, implicit $exec |
| # GCN: %6:vgpr_32 = V_FMA_F32_e64 2, %5, 2, %1, 2, %5, 1, 2, implicit $mode, implicit $exec |
| # GCN: %8:vgpr_32 = V_FMA_F32_e64 2, %7, 2, %7, 2, %1, 1, 2, implicit $mode, implicit $exec |
| name: dpp_arg_twice |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = IMPLICIT_DEF |
| |
| %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec |
| %4:vgpr_32 = V_FMA_F32_e64 1, %1, 2, %3, 2, %3, 1, 2, implicit $mode, implicit $exec |
| |
| %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec |
| %6:vgpr_32 = V_FMA_F32_e64 2, %5, 2, %1, 2, %5, 1, 2, implicit $mode, implicit $exec |
| |
| %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec |
| %8:vgpr_32 = V_FMA_F32_e64 2, %7, 2, %7, 2, %1, 1, 2, implicit $mode, implicit $exec |
| |
| ... |
| |
| # when the dpp source isn't a src0 operand the operation should be commuted if possible |
| # GCN-label: name: dpp_commute_e64 |
| # GCN: %4:vgpr_32 = V_MUL_U32_U24_e64_dpp %1, %0, %1, 1, 1, 14, 15, 0, implicit $exec |
| # GCN: %7:vgpr_32 = V_FMA_F32_e64_dpp %5, 2, %0, 1, %1, 2, %1, 1, 2, 1, 15, 15, 1, implicit $mode, implicit $exec |
| # GCN: %10:vgpr_32 = V_SUBREV_U32_e64_dpp %1, %0, %1, 1, 1, 14, 15, 0, implicit $exec |
| # GCN: %13:vgpr_32, %14:sreg_32_xm0_xexec = V_ADD_CO_U32_e64_dpp %1, %0, %1, 0, 1, 14, 15, 0, implicit $exec |
| # GCN: %17:vgpr_32, %18:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 5, %16, 0, implicit $exec |
| name: dpp_commute_e64 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| |
| %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec |
| %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec |
| %4:vgpr_32 = V_MUL_U32_U24_e64 %1, %3, 1, implicit $exec |
| |
| %5:vgpr_32 = IMPLICIT_DEF |
| %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 15, 1, implicit $exec |
| %7:vgpr_32 = V_FMA_F32_e64 1, %1, 2, %6, 2, %1, 1, 2, implicit $mode, implicit $exec |
| |
| %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec |
| %10:vgpr_32 = V_SUB_U32_e64 %1, %9, 1, implicit $exec |
| |
| %11:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 14, 15, 0, implicit $exec |
| %13:vgpr_32, %14:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1, %12, 0, implicit $exec |
| |
| ; this cannot be combined because immediate as src0 isn't commutable |
| %15:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| %16:vgpr_32 = V_MOV_B32_dpp %15, %0, 1, 14, 15, 0, implicit $exec |
| %17:vgpr_32, %18:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 5, %16, 0, implicit $exec |
| ... |
| |
| --- |
| |
| # check for floating point modifiers |
| # GCN-LABEL: name: add_f32_e64 |
| # GCN: %4:vgpr_32 = V_ADD_F32_e64_dpp %2, 0, %1, 0, %0, 0, 1, 1, 15, 15, 1, implicit $mode, implicit $exec |
| # GCN: %6:vgpr_32 = V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec |
| # GCN: %8:vgpr_32 = V_ADD_F32_dpp %2, 1, %1, 2, %0, 1, 15, 15, 1, implicit $mode, implicit $exec |
| # GCN: %10:vgpr_32 = V_ADD_F32_e64_dpp %2, 4, %1, 8, %0, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec |
| |
| name: add_f32_e64 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = IMPLICIT_DEF |
| |
| ; this should be combined as e64 |
| %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec |
| %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec |
| |
| ; this should be combined and shrunk as all modifiers are default |
| %5:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec |
| %6:vgpr_32 = V_ADD_F32_e64 0, %5, 0, %0, 0, 0, implicit $mode, implicit $exec |
| |
| ; this should be combined and shrunk as modifiers other than abs|neg are default |
| %7:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec |
| %8:vgpr_32 = V_ADD_F32_e64 1, %7, 2, %0, 0, 0, implicit $mode, implicit $exec |
| |
| ; this should be combined as e64 |
| %9:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec |
| %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec |
| ... |
| |
| # check for e64 modifiers |
| # GCN-LABEL: name: add_u32_e64 |
| # GCN: %4:vgpr_32 = V_ADD_U32_dpp %2, %0, %1, 1, 15, 15, 1, implicit $exec |
| # GCN: %6:vgpr_32 = V_ADD_U32_e64_dpp %2, %0, %1, 1, 1, 15, 15, 1, implicit $exec |
| |
| name: add_u32_e64 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = IMPLICIT_DEF |
| |
| ; this should be combined and shrunk as all modifiers are default |
| %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec |
| %4:vgpr_32 = V_ADD_U32_e64 %3, %1, 0, implicit $exec |
| |
| ; this should be combined as _e64 |
| %5:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec |
| %6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec |
| ... |
| |
| # tests on sequences of dpp consumers |
| # GCN-LABEL: name: dpp_seq |
| # GCN: %4:vgpr_32 = V_ADD_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec |
| # GCN: %5:vgpr_32 = V_SUBREV_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec |
| # GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec |
| # broken sequence: |
| # GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec |
| |
| name: dpp_seq |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| |
| %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec |
| %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec |
| %5:vgpr_32 = V_SUB_U32_e32 %1, %3, implicit $exec |
| %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec |
| |
| %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec |
| %8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec |
| ; this breaks the sequence |
| %9:vgpr_32 = V_SUB_U32_e32 5, %7, implicit $exec |
| ... |
| |
| # tests on sequences of dpp consumers followed by control flow |
| # GCN-LABEL: name: dpp_seq_cf |
| # GCN: %4:vgpr_32 = V_ADD_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec |
| # GCN: %5:vgpr_32 = V_SUBREV_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec |
| # GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec |
| |
| name: dpp_seq_cf |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| successors: %bb.1, %bb.2 |
| liveins: $vgpr0, $vgpr1 |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| |
| %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec |
| %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec |
| %5:vgpr_32 = V_SUB_U32_e32 %1, %3, implicit $exec |
| %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec |
| |
| %7:sreg_32 = V_CMP_EQ_U32_e64 %5, %6, implicit $exec |
| %8:sreg_32 = SI_IF %7, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec |
| S_BRANCH %bb.1 |
| |
| bb.1: |
| successors: %bb.2 |
| |
| bb.2: |
| SI_END_CF %8, implicit-def dead $exec, implicit-def dead $scc, implicit $exec |
| ... |
| |
| # GCN-LABEL: name: old_in_diff_bb |
| # GCN: %4:vgpr_32 = V_ADD_U32_dpp %0, %1, %0, 1, 1, 1, 0, implicit $exec |
| |
| name: old_in_diff_bb |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| successors: %bb.1 |
| liveins: $vgpr0, $vgpr1 |
| |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| S_BRANCH %bb.1 |
| |
| bb.1: |
| %3:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 1, 1, 0, implicit $exec |
| %4:vgpr_32 = V_ADD_U32_e32 %3, %0, implicit $exec |
| ... |
| |
| # old reg def is in diff BB but bound_ctrl:1 - can combine |
| # GCN-LABEL: name: old_in_diff_bb_bctrl_zero |
| # GCN: %4:vgpr_32 = V_ADD_U32_dpp {{%[0-9]}}, %0, %1, 1, 15, 15, 1, implicit $exec |
| |
| name: old_in_diff_bb_bctrl_zero |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| successors: %bb.1 |
| liveins: $vgpr0, $vgpr1 |
| |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| S_BRANCH %bb.1 |
| |
| bb.1: |
| %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec |
| %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec |
| ... |
| |
| # EXEC mask changed between def and use - cannot combine |
| # GCN-LABEL: name: exec_changed |
| # GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec |
| |
| name: exec_changed |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec |
| %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec |
| %5:sreg_64 = COPY $exec, implicit-def $exec |
| %6:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec |
| ... |
| |
| # test if $old definition is correctly tracked through subreg manipulation pseudos |
| |
| # GCN-LABEL: name: mul_old_subreg |
| # GCN: %7:vgpr_32 = V_MUL_I32_I24_dpp %0.sub1, %1, %0.sub1, 1, 1, 1, 0, implicit $exec |
| |
| name: mul_old_subreg |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| |
| %0:vreg_64 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec |
| %3:vgpr_32 = V_MOV_B32_e32 42, implicit $exec |
| %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 |
| %5:vreg_64 = INSERT_SUBREG %4, %1, %subreg.sub1 ; %5.sub0 is taken from %4 |
| %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, %1, 1, 1, 1, 0, implicit $exec |
| %7:vgpr_32 = V_MUL_I32_I24_e32 %6, %0.sub1, implicit $exec |
| ... |
| |
| # GCN-LABEL: name: add_old_subreg |
| # GCN: %5:vgpr_32 = V_ADD_U32_dpp %0.sub1, %1, %0.sub1, 1, 1, 1, 0, implicit $exec |
| |
| name: add_old_subreg |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| |
| %0:vreg_64 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| %3:vreg_64 = INSERT_SUBREG %0, %2, %subreg.sub1 ; %3.sub1 is inserted |
| %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 1, 1, 0, implicit $exec |
| %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec |
| ... |
| |
| # GCN-LABEL: name: add_old_subreg_undef |
| # GCN: %5:vgpr_32 = V_ADD_U32_dpp undef %3.sub1, %1, %0.sub1, 1, 15, 15, 1, implicit $exec |
| |
| name: add_old_subreg_undef |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| |
| %0:vreg_64 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| %3:vreg_64 = REG_SEQUENCE %2, %subreg.sub0 ; %3.sub1 is undef |
| %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 15, 15, 1, implicit $exec |
| %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec |
| ... |
| |
| # Test instruction which does not have modifiers in VOP1 form but does in DPP form. |
| # GCN-LABEL: name: dpp_vop1 |
| # GCN: %3:vgpr_32 = V_CEIL_F32_dpp %0, 0, undef %2:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec |
| name: dpp_vop1 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| %1:vgpr_32 = IMPLICIT_DEF |
| %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec |
| %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec |
| ... |
| |
| # Test instruction which does not have modifiers in VOP2 form but does in DPP form. |
| # GCN-LABEL: name: dpp_min |
| # GCN: %3:vgpr_32 = V_MIN_F32_dpp %0, 0, undef %2:vgpr_32, 0, undef %4:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec |
| name: dpp_min |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| %1:vgpr_32 = IMPLICIT_DEF |
| %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec |
| %4:vgpr_32 = V_MIN_F32_e32 %2, undef %3:vgpr_32, implicit $mode, implicit $exec |
| ... |
| |
| # Test an undef old operand |
| # GCN-LABEL: name: dpp_undef_old |
| # GCN: %3:vgpr_32 = V_CEIL_F32_dpp undef %1:vgpr_32, 0, undef %2:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec |
| name: dpp_undef_old |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec |
| %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec |
| ... |
| |
| # Do not combine a dpp mov which writes a physreg. |
| # GCN-LABEL: name: phys_dpp_mov_dst |
| # GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec |
| # GCN: %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec |
| name: phys_dpp_mov_dst |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec |
| %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec |
| ... |
| |
| # Do not combine a dpp mov which reads a physreg. |
| # GCN-LABEL: name: phys_dpp_mov_old_src |
| # GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec |
| # GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec |
| name: phys_dpp_mov_old_src |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec |
| %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec |
| ... |
| |
| # Do not combine a dpp mov which reads a physreg. |
| # GCN-LABEL: name: phys_dpp_mov_src |
| # GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec |
| # GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec |
| name: phys_dpp_mov_src |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec |
| %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec |
| ... |
| |
| # GCN-LABEL: name: dpp_reg_sequence_both_combined |
| # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 |
| # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 |
| # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec |
| # GCN: %9:vgpr_32 = IMPLICIT_DEF |
| # GCN: %8:vgpr_32 = IMPLICIT_DEF |
| # GCN: %6:vgpr_32 = V_ADD_U32_dpp %9, %1.sub0, %2, 1, 15, 15, 1, implicit $exec |
| # GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec |
| name: dpp_reg_sequence_both_combined |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 |
| |
| %0:vreg_64 = COPY $vgpr0_vgpr1 |
| %1:vreg_64 = COPY $vgpr2_vgpr3 |
| %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec |
| %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec |
| %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec |
| %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 |
| %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec |
| %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec |
| ... |
| |
| # GCN-LABEL: name: dpp_reg_sequence_first_combined |
| # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 |
| # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 |
| # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec |
| # GCN: %8:vgpr_32 = IMPLICIT_DEF |
| # GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec |
| # GCN: %5:vreg_64 = REG_SEQUENCE undef %3:vgpr_32, %subreg.sub0, %4, %subreg.sub1 |
| # GCN: %6:vgpr_32 = V_ADD_U32_dpp %8, %1.sub0, %2, 1, 15, 15, 1, implicit $exec |
| # GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec |
| name: dpp_reg_sequence_first_combined |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 |
| |
| %0:vreg_64 = COPY $vgpr0_vgpr1 |
| %1:vreg_64 = COPY $vgpr2_vgpr3 |
| %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec |
| %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec |
| %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec |
| %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 |
| %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec |
| %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec |
| ... |
| |
| # GCN-LABEL: name: dpp_reg_sequence_second_combined |
| # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 |
| # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 |
| # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec |
| # GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec |
| # GCN: %8:vgpr_32 = IMPLICIT_DEF |
| # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, undef %4:vgpr_32, %subreg.sub1 |
| # GCN: %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec |
| # GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec |
| name: dpp_reg_sequence_second_combined |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 |
| |
| %0:vreg_64 = COPY $vgpr0_vgpr1 |
| %1:vreg_64 = COPY $vgpr2_vgpr3 |
| %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec |
| %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec |
| %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec |
| %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 |
| %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec |
| %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec |
| ... |
| |
| # GCN-LABEL: name: dpp_reg_sequence_none_combined |
| # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 |
| # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 |
| # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec |
| # GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec |
| # GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec |
| # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 |
| # GCN: %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec |
| # GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec |
| name: dpp_reg_sequence_none_combined |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 |
| |
| %0:vreg_64 = COPY $vgpr0_vgpr1 |
| %1:vreg_64 = COPY $vgpr2_vgpr3 |
| %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec |
| %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec |
| %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec |
| %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 |
| %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec |
| %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec |
| ... |
| |
| # GCN-LABEL: name: dpp_reg_sequence_exec_changed |
| # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 |
| # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 |
| # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec |
| # GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec |
| # GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec |
| # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 |
| # GCN: S_BRANCH %bb.1 |
| # GCN: bb.1: |
| # GCN: %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec |
| # GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec |
| name: dpp_reg_sequence_exec_changed |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 |
| |
| %0:vreg_64 = COPY $vgpr0_vgpr1 |
| %1:vreg_64 = COPY $vgpr2_vgpr3 |
| %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec |
| %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec |
| %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec |
| %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 |
| S_BRANCH %bb.1 |
| |
| bb.1: |
| %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec |
| %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec |
| ... |
| |
| # GCN-LABEL: name: dpp_reg_sequence_subreg |
| # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 |
| # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 |
| # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec |
| # GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec |
| # GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec |
| # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 |
| # GCN: %6:vreg_64 = REG_SEQUENCE %5.sub0, %subreg.sub0, %5.sub1, %subreg.sub1 |
| # GCN: %7:vgpr_32 = V_ADD_U32_e32 %6.sub0, %2, implicit $exec |
| # GCN: %8:vgpr_32 = V_ADDC_U32_e32 %6.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec |
| name: dpp_reg_sequence_subreg |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 |
| |
| %0:vreg_64 = COPY $vgpr0_vgpr1 |
| %1:vreg_64 = COPY $vgpr2_vgpr3 |
| %8:vgpr_32 = V_MOV_B32_e32 5, implicit $exec |
| %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec |
| %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec |
| %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 |
| %5:vreg_64 = REG_SEQUENCE %4.sub0, %subreg.sub0, %4.sub1, %subreg.sub1 |
| %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %8, implicit $exec |
| %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %8, implicit-def $vcc, implicit $vcc, implicit $exec |
| ... |
| |
| # GCN-LABEL: name: dpp_reg_sequence_src2_reject |
| #GCN: %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec |
| #GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec |
| #GCN: %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 |
| #GCN: %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec |
| #GCN: %6:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub0, 1, 2, implicit $mode, implicit $exec |
| #GCN: %7:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub1, 1, 2, implicit $mode, implicit $exec |
| name: dpp_reg_sequence_src2_reject |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 |
| |
| %0:vreg_64 = COPY $vgpr0_vgpr1 |
| %1:vreg_64 = COPY $vgpr2_vgpr3 |
| %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec |
| %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec |
| %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 |
| %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec |
| ; use of dpp arg as src2, reject |
| %6:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub0, 1, 2, implicit $mode, implicit $exec |
| ; cannot commute src0 and src2, and %4.sub0 already rejected, reject |
| %7:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub1, 1, 2, implicit $mode, implicit $exec |
| ... |
| |
| # GCN-LABEL: name: dpp_reg_sequence_src2 |
| #GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec |
| #GCN: %4:vreg_64 = REG_SEQUENCE undef %2:vgpr_32, %subreg.sub0, %3, %subreg.sub1 |
| #GCN: %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec |
| #GCN: %6:vgpr_32 = V_FMA_F32_e64_dpp %8, 2, %1.sub0, 2, %5, 2, %4.sub1, 1, 2, 1, 15, 15, 1, implicit $mode, implicit $exec |
| name: dpp_reg_sequence_src2 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 |
| |
| %0:vreg_64 = COPY $vgpr0_vgpr1 |
| %1:vreg_64 = COPY $vgpr2_vgpr3 |
| %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec |
| %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec |
| %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 |
| %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec |
| %6:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub1, 1, 2, implicit $mode, implicit $exec |
| ... |
| |
| # GCN-LABEL: name: dpp64_add64_impdef |
| # GCN: %3:vgpr_32 = V_ADD_U32_dpp %1.sub0, %0.sub0, undef %4:vgpr_32, 1, 15, 15, 1, implicit $exec |
| # GCN: %5:vgpr_32 = V_ADDC_U32_dpp %1.sub1, %0.sub1, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec |
| name: dpp64_add64_impdef |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| %0:vreg_64 = IMPLICIT_DEF |
| %1:vreg_64 = IMPLICIT_DEF |
| %2:vreg_64 = V_MOV_B64_DPP_PSEUDO %1:vreg_64, %0:vreg_64, 1, 15, 15, 1, implicit $exec |
| %5:vgpr_32 = V_ADD_U32_e32 %2.sub0, undef %4:vgpr_32, implicit $exec |
| %6:vgpr_32 = V_ADDC_U32_e32 %2.sub1, undef %4, implicit-def $vcc, implicit $vcc, implicit $exec |
| ... |
| |
| # GCN-LABEL: name: dpp64_add64_undef |
| # GCN: %3:vgpr_32 = V_ADD_U32_dpp undef %1.sub0:vreg_64, undef %2.sub0:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit $exec |
| # GCN: %5:vgpr_32 = V_ADDC_U32_dpp undef %1.sub1:vreg_64, undef %2.sub1:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec |
| name: dpp64_add64_undef |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| %2:vreg_64 = V_MOV_B64_DPP_PSEUDO undef %1:vreg_64, undef %0:vreg_64, 1, 15, 15, 1, implicit $exec |
| %5:vgpr_32 = V_ADD_U32_e32 %2.sub0, undef %4:vgpr_32, implicit $exec |
| %6:vgpr_32 = V_ADDC_U32_e32 %2.sub1, undef %4, implicit-def $vcc, implicit $vcc, implicit $exec |
| ... |
| |
| |
| # GCN-LABEL: name: cndmask_with_src2 |
| # GCN: %5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec |
| # GCN: %8:vgpr_32 = V_CNDMASK_B32_e64_dpp %2, 4, %1, 0, %1, %7, 1, 15, 15, 1, implicit $exec |
| name: cndmask_with_src2 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = IMPLICIT_DEF |
| |
| %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec |
| %4:sreg_32_xm0_xexec = IMPLICIT_DEF |
| %5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec |
| |
| ; src2 is legal for _e64 |
| %6:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 15, 15, 1, implicit $exec |
| %7:sreg_32_xm0_xexec = IMPLICIT_DEF |
| %8:vgpr_32 = V_CNDMASK_B32_e64 4, %6, 0, %1, %7, implicit $exec |
| ... |
| |
| --- |
| |
| # Make sure flags aren't dropped |
| # GCN-LABEL: name: flags_add_f32_e64 |
| # GCN: %4:vgpr_32 = nnan nofpexcept V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec |
| name: flags_add_f32_e64 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = IMPLICIT_DEF |
| |
| %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec |
| %4:vgpr_32 = nofpexcept nnan V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $mode, implicit $exec |
| S_ENDPGM 0, implicit %4 |
| |
| ... |
| |
| # GCN-LABEL: name: dont_combine_more_than_one_operand |
| # GCN: %3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec |
| name: dont_combine_more_than_one_operand |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = V_MOV_B32_dpp %0, %1, 1, 15, 15, 1, implicit $exec |
| %3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec |
| ... |
| |
| # GCN-LABEL: name: dont_combine_more_than_one_operand_dpp_reg_sequence |
| # GCN: %5:vgpr_32 = V_ADD_U32_e32 %4.sub0, %4.sub0, implicit $exec |
| # GCN: %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec |
| name: dont_combine_more_than_one_operand_dpp_reg_sequence |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 |
| %0:vreg_64 = COPY $vgpr0_vgpr1 |
| %1:vreg_64 = COPY $vgpr2_vgpr3 |
| %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec |
| %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec |
| %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 |
| %5:vgpr_32 = V_ADD_U32_e32 %4.sub0, %4.sub0, implicit $exec |
| %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec |
| ... |