[AMDGPU] Automate creation of byte_sel dags. NFCI. (#140155)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 47ee0a7..84a6aea 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1293,6 +1293,7 @@ def ByteSel : NamedIntOperand<"byte_sel"> { let Validator = "isUInt<2>"; } +def ByteSel0 : DefaultOperand<ByteSel, 0>; let PrintMethod = "printBitOp3" in def BitOp3 : NamedIntOperand<"bitop3">; @@ -1971,7 +1972,8 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC, RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod, - Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, + bit HasFP8ByteSel = 0, bit HasFP8DstByteSel = 0> { dag src0 = !if(!ge(NumSrcArgs, 1), !if (HasModifiers, (ins Src0Mod:$src0_modifiers, Src0RC:$src0), @@ -1987,20 +1989,29 @@ (ins Src2Mod:$src2_modifiers, Src2RC:$src2), (ins Src2RC:$src2)), (ins)); - dag clamp = !if(HasClamp, (ins Clamp0:$clamp), (ins)); + // If there is vdst_in after clamp with HasFP8DstByteSel we cannot use + // Clamp0 with default value, all default operands must be at the end. + dag clamp = !if(HasClamp, !if(HasFP8DstByteSel, (ins Clamp:$clamp), + (ins Clamp0:$clamp)), + (ins)); dag omod = !if(HasOMod, (ins omod0:$omod), (ins)); + dag bytesel = !if(HasFP8ByteSel, + !con(!if(HasFP8DstByteSel, (ins VGPR_32:$vdst_in), (ins)), + (ins ByteSel0:$byte_sel)), + (ins)); - dag ret = !con(src0, src1, src2, clamp, omod); + dag ret = !con(src0, src1, src2, clamp, omod, bytesel); } class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC, RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod, - Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel> { + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel, + bit HasFP8ByteSel = 0, bit HasFP8DstByteSel = 0> { // getInst64 handles clamp and omod. implicit mutex between vop3p and omod dag base = getIns64 <Src0RC, Src1RC, Src2RC, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod, - Src0Mod, Src1Mod, Src2Mod>.ret; + Src0Mod, Src1Mod, Src2Mod, HasFP8ByteSel, HasFP8DstByteSel>.ret; dag opsel = (ins op_sel0:$op_sel); dag ret = !con(base, !if(HasOpSel, opsel, (ins))); } @@ -2612,7 +2623,8 @@ field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret; field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, - HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret; + HasOMod, Src0Mod, Src1Mod, Src2Mod, + HasFP8ByteSel, HasFP8DstByteSel>.ret; field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, HasClamp, HasOpSel, HasNeg, Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret; @@ -2630,7 +2642,8 @@ Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret; defvar InsVOP3DPPBase = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod, - Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel>.ret; + Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel, + HasFP8ByteSel, HasFP8DstByteSel>.ret; defvar InsVOP3PDPPBase = getInsVOP3P<Src0VOP3DPP, Src1VOP3DPP, Src2VOP3DPP, NumSrcArgs, HasClamp, HasOpSel, HasNeg, Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP>.ret;
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index f885de3..7fdd951 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -674,17 +674,6 @@ let HasClamp = 0; let HasOMod = 0; let HasModifiers = 0; - - defvar bytesel = (ins ByteSel:$byte_sel); - let Ins64 = !con(getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, - HasClamp, HasModifiers, HasSrc2Mods, - HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret, - bytesel); - let InsVOP3Base = !con(getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, Src2VOP3DPP, - NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, - HasOMod, Src0ModVOP3DPP, Src1ModVOP3DPP, - Src2ModVOP3DPP, HasOpSel>.ret, - bytesel); } let SubtargetPredicate = isGFX12Plus, OtherPredicates = [HasFP8ConversionInsts],
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index a7b90b9..0252c4f 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -593,6 +593,7 @@ let HasExtVOP3DPP = 1; let HasOpSel = 1; let HasFP8DstByteSel = 1; + let HasFP8ByteSel = 0; // It works as a dst-bytesel, but does not have byte_sel operand. let AsmVOP3OpSel = !subst(", $src2_modifiers", "", getAsmVOP3OpSel<3, HasClamp, HasOMod, HasSrc0FloatMods, HasSrc1FloatMods, @@ -607,16 +608,6 @@ VOP3_Profile<VOPProfile<[i32, SrcVT, i32, untyped]>> { let HasFP8DstByteSel = 1; let HasClamp = 0; - defvar bytesel = (ins VGPR_32:$vdst_in, ByteSel:$byte_sel); - let Ins64 = !con(getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, - HasClamp, HasModifiers, HasSrc2Mods, - HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret, - bytesel); - let InsVOP3Base = !con( - getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, - Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod, - Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel>.ret, - bytesel); } def IsPow2Plus1: PatLeaf<(i32 imm), [{