[GlobalISel] convergent intrinsics
Introduced the convergent equivalent of the existing G_INTRINSIC opcodes:
- G_INTRINSIC_CONVERGENT
- G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS
Out of the targets that currently have some support for GlobalISel, the patch
assumes that the convergent intrinsics only relevant to SPIRV and AMDGPU.
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D154766
diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst
index 636afd6..ee374ef 100644
--- a/llvm/docs/GlobalISel/GenericOpcode.rst
+++ b/llvm/docs/GlobalISel/GenericOpcode.rst
@@ -856,13 +856,25 @@
throw routines do not return, so no code that must be on an executable path must
be placed after throwing.
-G_INTRINSIC, G_INTRINSIC_W_SIDE_EFFECTS
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+G_INTRINSIC, G_INTRINSIC_CONVERGENT
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-Call an intrinsic
+Call an intrinsic that has no side-effects.
-The _W_SIDE_EFFECTS version is considered to have unknown side-effects and
-as such cannot be reordered across other side-effecting instructions.
+The _CONVERGENT variant corresponds to an LLVM IR intrinsic marked `convergent`.
+
+.. note::
+
+ Unlike SelectionDAG, there is no _VOID variant. Both of these are permitted
+ to have zero, one, or multiple results.
+
+G_INTRINSIC_W_SIDE_EFFECTS, G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Call an intrinsic that is considered to have unknown side-effects and as such
+cannot be reordered across other side-effecting instructions.
+
+The _CONVERGENT variant corresponds to an LLVM IR intrinsic marked `convergent`.
.. note::
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 9d73402..20dd73c 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -366,14 +366,33 @@
}
bool is(Intrinsic::ID ID) const { return getIntrinsicID() == ID; }
+
bool hasSideEffects() const {
- return getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS;
+ switch (getOpcode()) {
+ case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
+ case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ bool isConvergent() const {
+ switch (getOpcode()) {
+ case TargetOpcode::G_INTRINSIC_CONVERGENT:
+ case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
+ return true;
+ default:
+ return false;
+ }
}
static bool classof(const MachineInstr *MI) {
switch (MI->getOpcode()) {
case TargetOpcode::G_INTRINSIC:
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
+ case TargetOpcode::G_INTRINSIC_CONVERGENT:
+ case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
return true;
default:
return false;
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 3016211..3571bc6 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -1108,20 +1108,25 @@
MachineInstrBuilder buildInsert(const DstOp &Res, const SrcOp &Src,
const SrcOp &Op, unsigned Index);
- /// Build and insert either a G_INTRINSIC (if \p HasSideEffects is false) or
- /// G_INTRINSIC_W_SIDE_EFFECTS instruction. Its first operand will be the
- /// result register definition unless \p Reg is NoReg (== 0). The second
- /// operand will be the intrinsic's ID.
+ /// Build and insert a G_INTRINSIC instruction.
///
- /// Callers are expected to add the required definitions and uses afterwards.
+ /// There are four different opcodes based on combinations of whether the
+ /// intrinsic has side effects and whether it is convergent. These properties
+ /// can be specified as explicit parameters, or else they are retrieved from
+ /// the MCID for the intrinsic.
+ ///
+ /// The parameter \p Res provides the Registers or MOs that will be defined by
+ /// this instruction.
///
/// \pre setBasicBlock or setMI must have been called.
///
/// \return a MachineInstrBuilder for the newly created instruction.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef<Register> Res,
- bool HasSideEffects);
+ bool HasSideEffects, bool isConvergent);
+ MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef<Register> Res);
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef<DstOp> Res,
- bool HasSideEffects);
+ bool HasSideEffects, bool isConvergent);
+ MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef<DstOp> Res);
/// Build and insert \p Res = G_FPTRUNC \p Op
///
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index 186bea7..67430a1 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -426,6 +426,12 @@
/// Generic intrinsic use (with side effects).
HANDLE_TARGET_OPCODE(G_INTRINSIC_W_SIDE_EFFECTS)
+/// Generic intrinsic use (without side effects).
+HANDLE_TARGET_OPCODE(G_INTRINSIC_CONVERGENT)
+
+/// Generic intrinsic use (with side effects).
+HANDLE_TARGET_OPCODE(G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS)
+
/// Generic extension allowing rubbish in high bits.
HANDLE_TARGET_OPCODE(G_ANYEXT)
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index 00d56d1..680d322 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -1228,10 +1228,6 @@
let OutOperandList = (outs);
let InOperandList = (ins unknown:$intrin, variable_ops);
let hasSideEffects = false;
-
- // Conservatively assume this is convergent. If there turnes out to
- // be a need, there should be separate convergent intrinsic opcodes.
- let isConvergent = 1;
}
// Intrinsic with side effects.
@@ -1241,9 +1237,23 @@
let hasSideEffects = true;
let mayLoad = true;
let mayStore = true;
+}
- // Conservatively assume this is convergent. If there turnes out to
- // be a need, there should be separate convergent intrinsic opcodes.
+// Convergent intrinsic without side effects.
+def G_INTRINSIC_CONVERGENT : GenericInstruction {
+ let OutOperandList = (outs);
+ let InOperandList = (ins unknown:$intrin, variable_ops);
+ let hasSideEffects = false;
+ let isConvergent = true;
+}
+
+// Convergent intrinsic with side effects.
+def G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS : GenericInstruction {
+ let OutOperandList = (outs);
+ let InOperandList = (ins unknown:$intrin, variable_ops);
+ let hasSideEffects = true;
+ let mayLoad = true;
+ let mayStore = true;
let isConvergent = true;
}
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 41a9539..2fad089 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -39,6 +39,10 @@
// SelectionDAG has one setcc for all compares. This differentiates
// for G_ICMP and G_FCMP.
Instruction IfFloatingPoint = ?;
+
+ // SelectionDAG does not differentiate between convergent and non-convergent
+ // intrinsics. This specifies an alternate opcode for a convergent intrinsic.
+ Instruction IfConvergent = ?;
}
// These are defined in the same order as the G_* instructions.
@@ -106,10 +110,17 @@
def : GINodeEquiv<G_FLDEXP, fldexp>;
def : GINodeEquiv<G_FCANONICALIZE, fcanonicalize>;
def : GINodeEquiv<G_IS_FPCLASS, is_fpclass>;
-def : GINodeEquiv<G_INTRINSIC, intrinsic_wo_chain>;
+
+def : GINodeEquiv<G_INTRINSIC, intrinsic_wo_chain> {
+ let IfConvergent = G_INTRINSIC_CONVERGENT;
+}
+
// ISD::INTRINSIC_VOID can also be handled with G_INTRINSIC_W_SIDE_EFFECTS.
-def : GINodeEquiv<G_INTRINSIC_W_SIDE_EFFECTS, intrinsic_void>;
-def : GINodeEquiv<G_INTRINSIC_W_SIDE_EFFECTS, intrinsic_w_chain>;
+let IfConvergent = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS in {
+ def : GINodeEquiv<G_INTRINSIC_W_SIDE_EFFECTS, intrinsic_void>;
+ def : GINodeEquiv<G_INTRINSIC_W_SIDE_EFFECTS, intrinsic_w_chain>;
+}
+
def : GINodeEquiv<G_BR, br>;
def : GINodeEquiv<G_BSWAP, bswap>;
def : GINodeEquiv<G_BITREVERSE, bitreverse>;
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index 363ffbf..2c726ac 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -48,6 +48,8 @@
}
case TargetOpcode::G_INTRINSIC:
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
+ case TargetOpcode::G_INTRINSIC_CONVERGENT:
+ case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
default:
return TL.computeKnownAlignForTargetInstr(*this, R, MRI, Depth + 1);
}
@@ -726,6 +728,8 @@
}
case TargetOpcode::G_INTRINSIC:
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
+ case TargetOpcode::G_INTRINSIC_CONVERGENT:
+ case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
default: {
unsigned NumBits =
TL.computeNumSignBitsForTargetInstr(*this, R, DemandedElts, MRI, Depth);
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 9a67a8d..8f9d917 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2533,8 +2533,7 @@
// Ignore the callsite attributes. Backend code is most likely not expecting
// an intrinsic to sometimes have side effects and sometimes not.
- MachineInstrBuilder MIB =
- MIRBuilder.buildIntrinsic(ID, ResultRegs, !F->doesNotAccessMemory());
+ MachineInstrBuilder MIB = MIRBuilder.buildIntrinsic(ID, ResultRegs);
if (isa<FPMathOperator>(CI))
MIB->copyIRFlags(CI);
@@ -2885,7 +2884,7 @@
}
}
- MIRBuilder.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>(), true);
+ MIRBuilder.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>());
return true;
}
diff --git a/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp b/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp
index 8cfb1b7..45b403b 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp
@@ -76,6 +76,9 @@
setScalarAction(TargetOpcode::G_INTRINSIC, 0, {{1, Legal}});
setScalarAction(TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS, 0, {{1, Legal}});
+ setScalarAction(TargetOpcode::G_INTRINSIC_CONVERGENT, 0, {{1, Legal}});
+ setScalarAction(TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS, 0,
+ {{1, Legal}});
setLegalizeScalarToDifferentSizeStrategy(
TargetOpcode::G_IMPLICIT_DEF, 0, narrowToSmallerAndUnsupportedIfTooSmall);
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index f0da0d8..e816da8 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -119,8 +119,7 @@
MIRBuilder.setInstrAndDebugLoc(MI);
- if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
- MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
+ if (isa<GIntrinsic>(MI))
return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
auto Step = LI.getAction(MI, MRI);
switch (Step.Action) {
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 962b54e..90358e7 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -775,30 +775,55 @@
return buildInstr(TargetOpcode::G_INSERT, Res, {Src, Op, uint64_t(Index)});
}
-MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
- ArrayRef<Register> ResultRegs,
- bool HasSideEffects) {
- auto MIB =
- buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
- : TargetOpcode::G_INTRINSIC);
+static unsigned getIntrinsicOpcode(bool HasSideEffects, bool IsConvergent) {
+ if (HasSideEffects && IsConvergent)
+ return TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS;
+ if (HasSideEffects)
+ return TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS;
+ if (IsConvergent)
+ return TargetOpcode::G_INTRINSIC_CONVERGENT;
+ return TargetOpcode::G_INTRINSIC;
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
+ ArrayRef<Register> ResultRegs,
+ bool HasSideEffects, bool isConvergent) {
+ auto MIB = buildInstr(getIntrinsicOpcode(HasSideEffects, isConvergent));
for (unsigned ResultReg : ResultRegs)
MIB.addDef(ResultReg);
MIB.addIntrinsicID(ID);
return MIB;
}
+MachineInstrBuilder
+MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
+ ArrayRef<Register> ResultRegs) {
+ auto Attrs = Intrinsic::getAttributes(getContext(), ID);
+ bool HasSideEffects = !Attrs.getMemoryEffects().doesNotAccessMemory();
+ bool isConvergent = Attrs.hasFnAttr(Attribute::Convergent);
+ return buildIntrinsic(ID, ResultRegs, HasSideEffects, isConvergent);
+}
+
MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
ArrayRef<DstOp> Results,
- bool HasSideEffects) {
- auto MIB =
- buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
- : TargetOpcode::G_INTRINSIC);
+ bool HasSideEffects,
+ bool isConvergent) {
+ auto MIB = buildInstr(getIntrinsicOpcode(HasSideEffects, isConvergent));
for (DstOp Result : Results)
Result.addDefToMIB(*getMRI(), MIB);
MIB.addIntrinsicID(ID);
return MIB;
}
+MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
+ ArrayRef<DstOp> Results) {
+ auto Attrs = Intrinsic::getAttributes(getContext(), ID);
+ bool HasSideEffects = !Attrs.getMemoryEffects().doesNotAccessMemory();
+ bool isConvergent = Attrs.hasFnAttr(Attribute::Convergent);
+ return buildIntrinsic(ID, Results, HasSideEffects, isConvergent);
+}
+
MachineInstrBuilder MachineIRBuilder::buildTrunc(const DstOp &Res,
const SrcOp &Op) {
return buildInstr(TargetOpcode::G_TRUNC, Res, Op);
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index 936e1c2..af235f5 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -32,6 +32,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/CodeGenCommonISel.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRangeCalc.h"
@@ -223,7 +224,11 @@
bool verifyAllRegOpsScalar(const MachineInstr &MI,
const MachineRegisterInfo &MRI);
bool verifyVectorElementMatch(LLT Ty0, LLT Ty1, const MachineInstr *MI);
+
+ bool verifyGIntrinsicSideEffects(const MachineInstr *MI);
+ bool verifyGIntrinsicConvergence(const MachineInstr *MI);
void verifyPreISelGenericInstruction(const MachineInstr *MI);
+
void visitMachineInstrBefore(const MachineInstr *MI);
void visitMachineOperand(const MachineOperand *MO, unsigned MONum);
void visitMachineBundleAfter(const MachineInstr *MI);
@@ -955,6 +960,55 @@
return true;
}
+bool MachineVerifier::verifyGIntrinsicSideEffects(const MachineInstr *MI) {
+ auto Opcode = MI->getOpcode();
+ bool NoSideEffects = Opcode == TargetOpcode::G_INTRINSIC ||
+ Opcode == TargetOpcode::G_INTRINSIC_CONVERGENT;
+ unsigned IntrID = cast<GIntrinsic>(MI)->getIntrinsicID();
+ if (IntrID != 0 && IntrID < Intrinsic::num_intrinsics) {
+ AttributeList Attrs = Intrinsic::getAttributes(
+ MF->getFunction().getContext(), static_cast<Intrinsic::ID>(IntrID));
+ bool DeclHasSideEffects = !Attrs.getMemoryEffects().doesNotAccessMemory();
+ if (NoSideEffects && DeclHasSideEffects) {
+ report(Twine(TII->getName(Opcode),
+ " used with intrinsic that accesses memory"),
+ MI);
+ return false;
+ }
+ if (!NoSideEffects && !DeclHasSideEffects) {
+ report(Twine(TII->getName(Opcode), " used with readnone intrinsic"), MI);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool MachineVerifier::verifyGIntrinsicConvergence(const MachineInstr *MI) {
+ auto Opcode = MI->getOpcode();
+ bool NotConvergent = Opcode == TargetOpcode::G_INTRINSIC ||
+ Opcode == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS;
+ unsigned IntrID = cast<GIntrinsic>(MI)->getIntrinsicID();
+ if (IntrID != 0 && IntrID < Intrinsic::num_intrinsics) {
+ AttributeList Attrs = Intrinsic::getAttributes(
+ MF->getFunction().getContext(), static_cast<Intrinsic::ID>(IntrID));
+ bool DeclIsConvergent = Attrs.hasFnAttr(Attribute::Convergent);
+ if (NotConvergent && DeclIsConvergent) {
+ report(Twine(TII->getName(Opcode), " used with a convergent intrinsic"),
+ MI);
+ return false;
+ }
+ if (!NotConvergent && !DeclIsConvergent) {
+ report(
+ Twine(TII->getName(Opcode), " used with a non-convergent intrinsic"),
+ MI);
+ return false;
+ }
+ }
+
+ return true;
+}
+
void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
if (isFunctionSelected)
report("Unexpected generic instruction in a Selected function", MI);
@@ -1493,7 +1547,9 @@
break;
}
case TargetOpcode::G_INTRINSIC:
- case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: {
+ case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
+ case TargetOpcode::G_INTRINSIC_CONVERGENT:
+ case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
// TODO: Should verify number of def and use operands, but the current
// interface requires passing in IR types for mangling.
const MachineOperand &IntrIDOp = MI->getOperand(MI->getNumExplicitDefs());
@@ -1502,21 +1558,10 @@
break;
}
- bool NoSideEffects = MI->getOpcode() == TargetOpcode::G_INTRINSIC;
- unsigned IntrID = IntrIDOp.getIntrinsicID();
- if (IntrID != 0 && IntrID < Intrinsic::num_intrinsics) {
- AttributeList Attrs = Intrinsic::getAttributes(
- MF->getFunction().getContext(), static_cast<Intrinsic::ID>(IntrID));
- bool DeclHasSideEffects = !Attrs.getMemoryEffects().doesNotAccessMemory();
- if (NoSideEffects && DeclHasSideEffects) {
- report("G_INTRINSIC used with intrinsic that accesses memory", MI);
- break;
- }
- if (!NoSideEffects && !DeclHasSideEffects) {
- report("G_INTRINSIC_W_SIDE_EFFECTS used with readnone intrinsic", MI);
- break;
- }
- }
+ if (!verifyGIntrinsicSideEffects(MI))
+ break;
+ if (!verifyGIntrinsicConvergence(MI))
+ break;
break;
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 47d5ce5..8a68af4 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1497,8 +1497,7 @@
llvm_unreachable("unexpected vector shape");
MachineInstrBuilder UADD;
for (LLT HTy : HAddTys) {
- UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}, /*HasSideEffects =*/false)
- .addUse(HSum);
+ UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
HSum = UADD.getReg(0);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index 9ba5ea8..8c036f8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -82,9 +82,10 @@
ExtReg = MIRBuilder.buildBitcast(S32, ExtReg).getReg(0);
}
- auto ToSGPR = MIRBuilder.buildIntrinsic(Intrinsic::amdgcn_readfirstlane,
- {MRI.getType(ExtReg)}, false)
- .addReg(ExtReg);
+ auto ToSGPR = MIRBuilder
+ .buildIntrinsic(Intrinsic::amdgcn_readfirstlane,
+ {MRI.getType(ExtReg)})
+ .addReg(ExtReg);
ExtReg = ToSGPR.getReg(0);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
index ea2cb5d..d4f3820 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
@@ -42,7 +42,8 @@
case AMDGPU::G_AMDGPU_FMIN_LEGACY:
case AMDGPU::G_AMDGPU_FMAX_LEGACY:
return true;
- case AMDGPU::G_INTRINSIC: {
+ case AMDGPU::G_INTRINSIC:
+ case AMDGPU::G_INTRINSIC_CONVERGENT: {
unsigned IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
switch (IntrinsicID) {
case Intrinsic::amdgcn_rcp:
@@ -67,8 +68,7 @@
LLVM_READONLY
static bool opMustUseVOP3Encoding(const MachineInstr &MI,
const MachineRegisterInfo &MRI) {
- return MI.getNumOperands() >
- (MI.getOpcode() == AMDGPU::G_INTRINSIC ? 4u : 3u) ||
+ return MI.getNumOperands() > (isa<GIntrinsic>(MI) ? 4u : 3u) ||
MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() == 64;
}
@@ -86,13 +86,15 @@
case TargetOpcode::INLINEASM:
case TargetOpcode::INLINEASM_BR:
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
+ case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
case AMDGPU::G_BITCAST:
case AMDGPU::G_ANYEXT:
case AMDGPU::G_BUILD_VECTOR:
case AMDGPU::G_BUILD_VECTOR_TRUNC:
case AMDGPU::G_PHI:
return false;
- case AMDGPU::G_INTRINSIC: {
+ case AMDGPU::G_INTRINSIC:
+ case AMDGPU::G_INTRINSIC_CONVERGENT: {
unsigned IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
switch (IntrinsicID) {
case Intrinsic::amdgcn_interp_p1:
@@ -228,7 +230,8 @@
case AMDGPU::G_FCANONICALIZE:
case AMDGPU::G_AMDGPU_RCP_IFLAG:
return true;
- case AMDGPU::G_INTRINSIC: {
+ case AMDGPU::G_INTRINSIC:
+ case AMDGPU::G_INTRINSIC_CONVERGENT: {
unsigned IntrinsicID = cast<GIntrinsic>(MatchInfo)->getIntrinsicID();
switch (IntrinsicID) {
case Intrinsic::amdgcn_rcp:
@@ -327,7 +330,8 @@
case AMDGPU::G_FPTRUNC:
NegateOperand(MatchInfo->getOperand(1));
break;
- case AMDGPU::G_INTRINSIC: {
+ case AMDGPU::G_INTRINSIC:
+ case AMDGPU::G_INTRINSIC_CONVERGENT: {
unsigned IntrinsicID = cast<GIntrinsic>(MatchInfo)->getIntrinsicID();
switch (IntrinsicID) {
case Intrinsic::amdgcn_rcp:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 059a4fe6..04f7ef4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -3431,8 +3431,10 @@
case TargetOpcode::G_INSERT:
return selectG_INSERT(I);
case TargetOpcode::G_INTRINSIC:
+ case TargetOpcode::G_INTRINSIC_CONVERGENT:
return selectG_INTRINSIC(I);
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
+ case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
return selectG_INTRINSIC_W_SIDE_EFFECTS(I);
case TargetOpcode::G_ICMP:
if (selectG_ICMP(I))
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 7b3dced..f278f6e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -2346,10 +2346,10 @@
auto Const0 = B.buildConstant(S32, FractBits - 32);
auto Const1 = B.buildConstant(S32, ExpBits);
- auto ExpPart = B.buildIntrinsic(Intrinsic::amdgcn_ubfe, {S32}, false)
- .addUse(Hi)
- .addUse(Const0.getReg(0))
- .addUse(Const1.getReg(0));
+ auto ExpPart = B.buildIntrinsic(Intrinsic::amdgcn_ubfe, {S32})
+ .addUse(Hi)
+ .addUse(Const0.getReg(0))
+ .addUse(Const1.getReg(0));
return B.buildSub(S32, ExpPart, B.buildConstant(S32, 1023));
}
@@ -2437,8 +2437,7 @@
auto X = B.buildXor(S32, Unmerge.getReg(0), Unmerge.getReg(1));
auto OppositeSign = B.buildAShr(S32, X, ThirtyOne);
auto MaxShAmt = B.buildAdd(S32, ThirtyTwo, OppositeSign);
- auto LS = B.buildIntrinsic(Intrinsic::amdgcn_sffbh, {S32},
- /*HasSideEffects=*/false)
+ auto LS = B.buildIntrinsic(Intrinsic::amdgcn_sffbh, {S32})
.addUse(Unmerge.getReg(1));
auto LS2 = B.buildSub(S32, LS, One);
ShAmt = B.buildUMin(S32, LS2, MaxShAmt);
@@ -2671,15 +2670,16 @@
auto OneOver2Pi = B.buildFConstant(Ty, 0.5 * numbers::inv_pi);
if (ST.hasTrigReducedRange()) {
auto MulVal = B.buildFMul(Ty, SrcReg, OneOver2Pi, Flags);
- TrigVal = B.buildIntrinsic(Intrinsic::amdgcn_fract, {Ty}, false)
- .addUse(MulVal.getReg(0))
- .setMIFlags(Flags).getReg(0);
+ TrigVal = B.buildIntrinsic(Intrinsic::amdgcn_fract, {Ty})
+ .addUse(MulVal.getReg(0))
+ .setMIFlags(Flags)
+ .getReg(0);
} else
TrigVal = B.buildFMul(Ty, SrcReg, OneOver2Pi, Flags).getReg(0);
Intrinsic::ID TrigIntrin = MI.getOpcode() == AMDGPU::G_FSIN ?
Intrinsic::amdgcn_sin : Intrinsic::amdgcn_cos;
- B.buildIntrinsic(TrigIntrin, ArrayRef<Register>(DstReg), false)
+ B.buildIntrinsic(TrigIntrin, ArrayRef<Register>(DstReg))
.addUse(TrigVal)
.setMIFlags(Flags);
MI.eraseFromParent();
@@ -2772,7 +2772,7 @@
// functions that use local objects. However, if these dead functions are
// not eliminated, we don't want a compile time error. Just emit a warning
// and a trap, since there should be no callable path here.
- B.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>(), true);
+ B.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>());
B.buildUndef(DstReg);
MI.eraseFromParent();
return true;
@@ -2798,8 +2798,7 @@
// Adjust alignment for that dynamic shared memory array.
MFI->setDynLDSAlign(MF.getFunction(), *cast<GlobalVariable>(GV));
LLT S32 = LLT::scalar(32);
- auto Sz =
- B.buildIntrinsic(Intrinsic::amdgcn_groupstaticsize, {S32}, false);
+ auto Sz = B.buildIntrinsic(Intrinsic::amdgcn_groupstaticsize, {S32});
B.buildIntToPtr(DstReg, Sz);
MI.eraseFromParent();
return true;
@@ -3074,9 +3073,9 @@
const LLT F32 = LLT::scalar(32);
// Nothing in half is a denormal when promoted to f32.
auto Ext = B.buildFPExt(F32, Src, Flags);
- auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_log, {F32}, false)
- .addUse(Ext.getReg(0))
- .setMIFlags(Flags);
+ auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_log, {F32})
+ .addUse(Ext.getReg(0))
+ .setMIFlags(Flags);
B.buildFPTrunc(Dst, Log2, Flags);
MI.eraseFromParent();
return true;
@@ -3086,14 +3085,14 @@
auto [ScaledInput, IsLtSmallestNormal] = getScaledLogInput(B, Src, Flags);
if (!ScaledInput) {
- B.buildIntrinsic(Intrinsic::amdgcn_log, {MI.getOperand(0)}, false)
+ B.buildIntrinsic(Intrinsic::amdgcn_log, {MI.getOperand(0)})
.addUse(Src)
.setMIFlags(Flags);
MI.eraseFromParent();
return true;
}
- auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}, false)
+ auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty})
.addUse(ScaledInput)
.setMIFlags(Flags);
@@ -3153,9 +3152,8 @@
if (ScaledInput)
X = ScaledInput;
- auto Y = B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}, false)
- .addUse(X)
- .setMIFlags(Flags);
+ auto Y =
+ B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}).addUse(X).setMIFlags(Flags);
Register R;
if (ST.hasFastFMAF32()) {
@@ -3232,7 +3230,7 @@
LLT Ty = B.getMRI()->getType(Dst);
auto Log2Operand = Ty == LLT::scalar(16)
? B.buildFLog2(Ty, Src, Flags)
- : B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}, false)
+ : B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty})
.addUse(Src)
.setMIFlags(Flags);
auto Log2BaseInvertedOperand = B.buildFConstant(Ty, Log2BaseInverted);
@@ -3255,9 +3253,9 @@
if (Ty == F16) {
// Nothing in half is a denormal when promoted to f32.
auto Ext = B.buildFPExt(F32, Src, Flags);
- auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {F32}, false)
- .addUse(Ext.getReg(0))
- .setMIFlags(Flags);
+ auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {F32})
+ .addUse(Ext.getReg(0))
+ .setMIFlags(Flags);
B.buildFPTrunc(Dst, Log2, Flags);
MI.eraseFromParent();
return true;
@@ -3267,9 +3265,9 @@
if (allowApproxFunc(B.getMF(), Flags) ||
!needsDenormHandlingF32(B.getMF(), Src, Flags)) {
- B.buildIntrinsic(Intrinsic::amdgcn_exp2, ArrayRef<Register>{Dst}, false)
- .addUse(Src)
- .setMIFlags(Flags);
+ B.buildIntrinsic(Intrinsic::amdgcn_exp2, ArrayRef<Register>{Dst})
+ .addUse(Src)
+ .setMIFlags(Flags);
MI.eraseFromParent();
return true;
}
@@ -3287,7 +3285,7 @@
auto AddOffset = B.buildSelect(F32, NeedsScaling, SixtyFour, Zero, Flags);
auto AddInput = B.buildFAdd(F32, Src, AddOffset, Flags);
- auto Exp2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {Ty}, false)
+ auto Exp2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {Ty})
.addUse(AddInput.getReg(0))
.setMIFlags(Flags);
@@ -3307,9 +3305,9 @@
auto Mul = B.buildFMul(Ty, Src, K, Flags);
if (Ty == LLT::scalar(32)) {
- B.buildIntrinsic(Intrinsic::amdgcn_exp2, ArrayRef<Register>{Dst}, false)
- .addUse(Mul.getReg(0))
- .setMIFlags(Flags);
+ B.buildIntrinsic(Intrinsic::amdgcn_exp2, ArrayRef<Register>{Dst})
+ .addUse(Mul.getReg(0))
+ .setMIFlags(Flags);
} else {
B.buildFExp2(Dst, Mul.getReg(0), Flags);
}
@@ -3429,7 +3427,7 @@
auto A = B.buildFAdd(Ty, PHSubE, PL, Flags);
auto IntE = B.buildFPTOSI(LLT::scalar(32), E);
- auto Exp2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {Ty}, false)
+ auto Exp2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {Ty})
.addUse(A.getReg(0))
.setMIFlags(Flags);
auto R = B.buildFLdexp(Ty, Exp2, IntE, Flags);
@@ -3471,20 +3469,20 @@
if (Ty == S32) {
auto Log = B.buildFLog2(S32, Src0, Flags);
- auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {S32}, false)
- .addUse(Log.getReg(0))
- .addUse(Src1)
- .setMIFlags(Flags);
+ auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {S32})
+ .addUse(Log.getReg(0))
+ .addUse(Src1)
+ .setMIFlags(Flags);
B.buildFExp2(Dst, Mul, Flags);
} else if (Ty == S16) {
// There's no f16 fmul_legacy, so we need to convert for it.
auto Log = B.buildFLog2(S16, Src0, Flags);
auto Ext0 = B.buildFPExt(S32, Log, Flags);
auto Ext1 = B.buildFPExt(S32, Src1, Flags);
- auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {S32}, false)
- .addUse(Ext0.getReg(0))
- .addUse(Ext1.getReg(0))
- .setMIFlags(Flags);
+ auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {S32})
+ .addUse(Ext0.getReg(0))
+ .addUse(Ext1.getReg(0))
+ .setMIFlags(Flags);
B.buildFExp2(Dst, B.buildFPTrunc(S16, Mul), Flags);
} else
@@ -3526,9 +3524,9 @@
//
// Convert floor(x) to (x - fract(x))
- auto Fract = B.buildIntrinsic(Intrinsic::amdgcn_fract, {S64}, false)
- .addUse(OrigSrc)
- .setMIFlags(Flags);
+ auto Fract = B.buildIntrinsic(Intrinsic::amdgcn_fract, {S64})
+ .addUse(OrigSrc)
+ .setMIFlags(Flags);
// Give source modifier matching some assistance before obscuring a foldable
// pattern.
@@ -4477,9 +4475,9 @@
// 1 / x -> RCP(x)
if (CLHS->isExactlyValue(1.0)) {
- B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res, false)
- .addUse(RHS)
- .setMIFlags(Flags);
+ B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res)
+ .addUse(RHS)
+ .setMIFlags(Flags);
MI.eraseFromParent();
return true;
@@ -4490,9 +4488,9 @@
// -1 / x -> RCP( FNEG(x) )
if (CLHS->isExactlyValue(-1.0)) {
auto FNeg = B.buildFNeg(ResTy, RHS, Flags);
- B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res, false)
- .addUse(FNeg.getReg(0))
- .setMIFlags(Flags);
+ B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res)
+ .addUse(FNeg.getReg(0))
+ .setMIFlags(Flags);
MI.eraseFromParent();
return true;
@@ -4506,9 +4504,9 @@
return false;
// x / y -> x * (1.0 / y)
- auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {ResTy}, false)
- .addUse(RHS)
- .setMIFlags(Flags);
+ auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {ResTy})
+ .addUse(RHS)
+ .setMIFlags(Flags);
B.buildFMul(Res, LHS, RCP, Flags);
MI.eraseFromParent();
@@ -4534,9 +4532,9 @@
auto NegY = B.buildFNeg(ResTy, Y);
auto One = B.buildFConstant(ResTy, 1.0);
- auto R = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {ResTy}, false)
- .addUse(Y)
- .setMIFlags(Flags);
+ auto R = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {ResTy})
+ .addUse(Y)
+ .setMIFlags(Flags);
auto Tmp0 = B.buildFMA(ResTy, NegY, R, One);
R = B.buildFMA(ResTy, Tmp0, R, R);
@@ -4570,18 +4568,18 @@
auto LHSExt = B.buildFPExt(S32, LHS, Flags);
auto RHSExt = B.buildFPExt(S32, RHS, Flags);
- auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false)
- .addUse(RHSExt.getReg(0))
- .setMIFlags(Flags);
+ auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32})
+ .addUse(RHSExt.getReg(0))
+ .setMIFlags(Flags);
auto QUOT = B.buildFMul(S32, LHSExt, RCP, Flags);
auto RDst = B.buildFPTrunc(S16, QUOT, Flags);
- B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, Res, false)
- .addUse(RDst.getReg(0))
- .addUse(RHS)
- .addUse(LHS)
- .setMIFlags(Flags);
+ B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, Res)
+ .addUse(RDst.getReg(0))
+ .addUse(RHS)
+ .addUse(LHS)
+ .setMIFlags(Flags);
MI.eraseFromParent();
return true;
@@ -4636,21 +4634,21 @@
auto One = B.buildFConstant(S32, 1.0f);
auto DenominatorScaled =
- B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S32, S1}, false)
- .addUse(LHS)
- .addUse(RHS)
- .addImm(0)
- .setMIFlags(Flags);
+ B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S32, S1})
+ .addUse(LHS)
+ .addUse(RHS)
+ .addImm(0)
+ .setMIFlags(Flags);
auto NumeratorScaled =
- B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S32, S1}, false)
- .addUse(LHS)
- .addUse(RHS)
- .addImm(1)
- .setMIFlags(Flags);
+ B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S32, S1})
+ .addUse(LHS)
+ .addUse(RHS)
+ .addImm(1)
+ .setMIFlags(Flags);
- auto ApproxRcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false)
- .addUse(DenominatorScaled.getReg(0))
- .setMIFlags(Flags);
+ auto ApproxRcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32})
+ .addUse(DenominatorScaled.getReg(0))
+ .setMIFlags(Flags);
auto NegDivScale0 = B.buildFNeg(S32, DenominatorScaled, Flags);
// FIXME: Doesn't correctly model the FP mode switch, and the FP operations
@@ -4670,18 +4668,18 @@
if (Mode.FP32Denormals != DenormalMode::getIEEE())
toggleSPDenormMode(false, B, ST, Mode);
- auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S32}, false)
- .addUse(Fma4.getReg(0))
- .addUse(Fma1.getReg(0))
- .addUse(Fma3.getReg(0))
- .addUse(NumeratorScaled.getReg(1))
- .setMIFlags(Flags);
+ auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S32})
+ .addUse(Fma4.getReg(0))
+ .addUse(Fma1.getReg(0))
+ .addUse(Fma3.getReg(0))
+ .addUse(NumeratorScaled.getReg(1))
+ .setMIFlags(Flags);
- B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, Res, false)
- .addUse(Fmas.getReg(0))
- .addUse(RHS)
- .addUse(LHS)
- .setMIFlags(Flags);
+ B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, Res)
+ .addUse(Fmas.getReg(0))
+ .addUse(RHS)
+ .addUse(LHS)
+ .setMIFlags(Flags);
MI.eraseFromParent();
return true;
@@ -4704,27 +4702,27 @@
auto One = B.buildFConstant(S64, 1.0);
- auto DivScale0 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1}, false)
- .addUse(LHS)
- .addUse(RHS)
- .addImm(0)
- .setMIFlags(Flags);
+ auto DivScale0 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1})
+ .addUse(LHS)
+ .addUse(RHS)
+ .addImm(0)
+ .setMIFlags(Flags);
auto NegDivScale0 = B.buildFNeg(S64, DivScale0.getReg(0), Flags);
- auto Rcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S64}, false)
- .addUse(DivScale0.getReg(0))
- .setMIFlags(Flags);
+ auto Rcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S64})
+ .addUse(DivScale0.getReg(0))
+ .setMIFlags(Flags);
auto Fma0 = B.buildFMA(S64, NegDivScale0, Rcp, One, Flags);
auto Fma1 = B.buildFMA(S64, Rcp, Fma0, Rcp, Flags);
auto Fma2 = B.buildFMA(S64, NegDivScale0, Fma1, One, Flags);
- auto DivScale1 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1}, false)
- .addUse(LHS)
- .addUse(RHS)
- .addImm(1)
- .setMIFlags(Flags);
+ auto DivScale1 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1})
+ .addUse(LHS)
+ .addUse(RHS)
+ .addImm(1)
+ .setMIFlags(Flags);
auto Fma3 = B.buildFMA(S64, Fma1, Fma2, Fma1, Flags);
auto Mul = B.buildFMul(S64, DivScale1.getReg(0), Fma3, Flags);
@@ -4751,14 +4749,14 @@
Scale = DivScale1.getReg(1);
}
- auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S64}, false)
- .addUse(Fma4.getReg(0))
- .addUse(Fma3.getReg(0))
- .addUse(Mul.getReg(0))
- .addUse(Scale)
- .setMIFlags(Flags);
+ auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S64})
+ .addUse(Fma4.getReg(0))
+ .addUse(Fma3.getReg(0))
+ .addUse(Mul.getReg(0))
+ .addUse(Scale)
+ .setMIFlags(Flags);
- B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, ArrayRef(Res), false)
+ B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, ArrayRef(Res))
.addUse(Fmas.getReg(0))
.addUse(RHS)
.addUse(LHS)
@@ -4779,10 +4777,10 @@
LLT Ty = MRI.getType(Res0);
LLT InstrExpTy = Ty == LLT::scalar(16) ? LLT::scalar(16) : LLT::scalar(32);
- auto Mant = B.buildIntrinsic(Intrinsic::amdgcn_frexp_mant, {Ty}, false)
+ auto Mant = B.buildIntrinsic(Intrinsic::amdgcn_frexp_mant, {Ty})
.addUse(Val)
.setMIFlags(Flags);
- auto Exp = B.buildIntrinsic(Intrinsic::amdgcn_frexp_exp, {InstrExpTy}, false)
+ auto Exp = B.buildIntrinsic(Intrinsic::amdgcn_frexp_exp, {InstrExpTy})
.addUse(Val)
.setMIFlags(Flags);
@@ -4826,9 +4824,9 @@
auto Mul0 = B.buildFMul(S32, RHS, Sel, Flags);
- auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false)
- .addUse(Mul0.getReg(0))
- .setMIFlags(Flags);
+ auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32})
+ .addUse(Mul0.getReg(0))
+ .setMIFlags(Flags);
auto Mul1 = B.buildFMul(S32, LHS, RCP, Flags);
@@ -4881,8 +4879,8 @@
auto ScaleUp = B.buildSelect(S32, Scaling, ScaleUpFactor, ZeroInt);
auto SqrtX = B.buildFLdexp(F64, X, ScaleUp, Flags);
- auto SqrtY = B.buildIntrinsic(Intrinsic::amdgcn_rsq, {F64}, false)
- .addReg(SqrtX.getReg(0));
+ auto SqrtY =
+ B.buildIntrinsic(Intrinsic::amdgcn_rsq, {F64}).addReg(SqrtX.getReg(0));
auto Half = B.buildFConstant(F64, 0.5);
auto SqrtH0 = B.buildFMul(F64, SqrtY, Half);
@@ -4948,9 +4946,9 @@
else
return false;
- auto Rsq = B.buildIntrinsic(Intrinsic::amdgcn_rsq, {Ty}, false)
- .addUse(Src)
- .setMIFlags(Flags);
+ auto Rsq = B.buildIntrinsic(Intrinsic::amdgcn_rsq, {Ty})
+ .addUse(Src)
+ .setMIFlags(Flags);
// We don't need to concern ourselves with the snan handling difference, since
// the rsq quieted (or not) so use the one which will directly select.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
index 3b523d8..d99149d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
@@ -288,7 +288,7 @@
// rcp(sqrt(x))
if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) {
MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) {
- B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
+ B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)})
.addUse(SqrtSrcMI->getOperand(0).getReg())
.setMIFlags(MI.getFlags());
};
@@ -298,7 +298,7 @@
// sqrt(rcp(x))
if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) {
MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) {
- B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
+ B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)})
.addUse(RcpSrcMI->getOperand(0).getReg())
.setMIFlags(MI.getFlags());
};
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index c9a888e..1cb8fb8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -633,8 +633,10 @@
return AltMappings;
}
case AMDGPU::G_INTRINSIC:
+ case AMDGPU::G_INTRINSIC_CONVERGENT:
return getInstrAlternativeMappingsIntrinsic(MI, MRI);
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
+ case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
return getInstrAlternativeMappingsIntrinsicWSideEffects(MI, MRI);
default:
break;
@@ -923,8 +925,7 @@
// The ballot becomes a no-op during instruction selection.
CondReg = B.buildIntrinsic(Intrinsic::amdgcn_ballot,
- {LLT::scalar(Subtarget.isWave32() ? 32 : 64)},
- false)
+ {LLT::scalar(Subtarget.isWave32() ? 32 : 64)})
.addReg(CondReg)
.getReg(0);
MRI.setRegClass(CondReg, WaveRC);
@@ -1452,7 +1453,7 @@
const LLT S32 = LLT::scalar(32);
- unsigned FirstOpnd = MI.getOpcode() == AMDGPU::G_INTRINSIC ? 2 : 1;
+ unsigned FirstOpnd = isa<GIntrinsic>(MI) ? 2 : 1;
Register SrcReg = MI.getOperand(FirstOpnd).getReg();
Register OffsetReg = MI.getOperand(FirstOpnd + 1).getReg();
Register WidthReg = MI.getOperand(FirstOpnd + 2).getReg();
@@ -2949,7 +2950,8 @@
applyMappingSBufferLoad(OpdMapper);
return;
}
- case AMDGPU::G_INTRINSIC: {
+ case AMDGPU::G_INTRINSIC:
+ case AMDGPU::G_INTRINSIC_CONVERGENT: {
switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
case Intrinsic::amdgcn_readlane: {
substituteSimpleCopyRegs(OpdMapper, 2);
@@ -3035,7 +3037,8 @@
executeInWaterfallLoop(MI, MRI, { N });
return;
}
- case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
+ case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
+ case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
auto IntrID = cast<GIntrinsic>(MI).getIntrinsicID();
switch (IntrID) {
case Intrinsic::amdgcn_ds_ordered_add:
@@ -4198,7 +4201,8 @@
OpdsMapping[0] = AMDGPU::getValueMapping(ResultBank, Size0);
break;
}
- case AMDGPU::G_INTRINSIC: {
+ case AMDGPU::G_INTRINSIC:
+ case AMDGPU::G_INTRINSIC_CONVERGENT: {
switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
default:
return getInvalidInstructionMapping();
@@ -4560,7 +4564,8 @@
}
break;
}
- case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
+ case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
+ case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
auto IntrID = cast<GIntrinsic>(MI).getIntrinsicID();
switch (IntrID) {
case Intrinsic::amdgcn_s_getreg:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 5509b40..b78aa9a 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -11328,6 +11328,7 @@
return false;
return true;
case AMDGPU::G_INTRINSIC:
+ case AMDGPU::G_INTRINSIC_CONVERGENT:
switch (cast<GIntrinsic>(MI)->getIntrinsicID()) {
case Intrinsic::amdgcn_fmul_legacy:
case Intrinsic::amdgcn_fmad_ftz:
@@ -13761,7 +13762,8 @@
const MachineRegisterInfo &MRI, unsigned Depth) const {
const MachineInstr *MI = MRI.getVRegDef(R);
switch (MI->getOpcode()) {
- case AMDGPU::G_INTRINSIC: {
+ case AMDGPU::G_INTRINSIC:
+ case AMDGPU::G_INTRINSIC_CONVERGENT: {
switch (cast<GIntrinsic>(MI)->getIntrinsicID()) {
case Intrinsic::amdgcn_workitem_id_x:
knownBitsForWorkitemID(*getSubtarget(), KB, Known, 0);
@@ -13835,7 +13837,6 @@
AttributeList Attrs = Intrinsic::getAttributes(Ctx, IID);
if (MaybeAlign RetAlign = Attrs.getRetAlignment())
return *RetAlign;
- return Align(1);
}
return Align(1);
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
index c53f164..b4452d0 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
@@ -976,7 +976,7 @@
// Use Intrinsic::spv_extractelt so dynamic vs static extraction is
// handled later: extr = spv_extractelt LoadedVector, IndexRegister.
MachineInstrBuilder ExtractInst = MIRBuilder.buildIntrinsic(
- Intrinsic::spv_extractelt, ArrayRef<Register>{Extracted}, true);
+ Intrinsic::spv_extractelt, ArrayRef<Register>{Extracted}, true, false);
ExtractInst.addUse(LoadedVector).addUse(IndexRegister);
// If the index is dynamic, need check if it's < 3, and then use a select.
@@ -1644,8 +1644,8 @@
Register Reg = MRI->createVirtualRegister(&SPIRV::IDRegClass);
MRI->setType(Reg, LLType);
GR->assignSPIRVTypeToVReg(PointerSizeTy, Reg, MIRBuilder.getMF());
- auto GEPInst = MIRBuilder.buildIntrinsic(Intrinsic::spv_gep,
- ArrayRef<Register>{Reg}, true);
+ auto GEPInst = MIRBuilder.buildIntrinsic(
+ Intrinsic::spv_gep, ArrayRef<Register>{Reg}, true, false);
GEPInst
.addImm(GepMI->getOperand(2).getImm()) // In bound.
.addUse(ArrayMI->getOperand(0).getReg()) // Alloca.
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 7f00bb4..bd3b9b0 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -276,6 +276,7 @@
return selectOpUndef(ResVReg, ResType, I);
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
+ case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
return selectIntrinsic(ResVReg, ResType, I);
case TargetOpcode::G_BITREVERSE:
return selectBitreverse(ResVReg, ResType, I);
@@ -591,15 +592,16 @@
bool SPIRVInstructionSelector::selectLoad(Register ResVReg,
const SPIRVType *ResType,
MachineInstr &I) const {
- unsigned OpOffset =
- I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS ? 1 : 0;
+ unsigned OpOffset = isa<GIntrinsic>(I) ? 1 : 0;
Register Ptr = I.getOperand(1 + OpOffset).getReg();
auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpLoad))
.addDef(ResVReg)
.addUse(GR.getSPIRVTypeID(ResType))
.addUse(Ptr);
if (!I.getNumMemOperands()) {
- assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
+ assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS ||
+ I.getOpcode() ==
+ TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS);
addMemoryOperands(I.getOperand(2 + OpOffset).getImm(), MIB);
} else {
addMemoryOperands(*I.memoperands_begin(), MIB);
@@ -608,8 +610,7 @@
}
bool SPIRVInstructionSelector::selectStore(MachineInstr &I) const {
- unsigned OpOffset =
- I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS ? 1 : 0;
+ unsigned OpOffset = isa<GIntrinsic>(I) ? 1 : 0;
Register StoreVal = I.getOperand(0 + OpOffset).getReg();
Register Ptr = I.getOperand(1 + OpOffset).getReg();
MachineBasicBlock &BB = *I.getParent();
@@ -617,7 +618,9 @@
.addUse(Ptr)
.addUse(StoreVal);
if (!I.getNumMemOperands()) {
- assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
+ assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS ||
+ I.getOpcode() ==
+ TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS);
addMemoryOperands(I.getOperand(2 + OpOffset).getImm(), MIB);
} else {
addMemoryOperands(*I.memoperands_begin(), MIB);
@@ -719,7 +722,7 @@
Register MemSemEqReg;
Register MemSemNeqReg;
Register Ptr = I.getOperand(2).getReg();
- if (I.getOpcode() != TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS) {
+ if (!isa<GIntrinsic>(I)) {
assert(I.hasOneMemOperand());
const MachineMemOperand *MemOp = *I.memoperands_begin();
unsigned Scope = static_cast<uint32_t>(getScope(MemOp->getSyncScopeID()));
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
index 3ef90f9..e5094e8 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
@@ -219,7 +219,7 @@
ConstReg = ConstInstr->getOperand(1).getReg();
return MRI->getVRegDef(ConstReg);
}
- return ConstInstr;
+ return MRI->getVRegDef(ConstReg);
}
uint64_t getIConstVal(Register ConstReg, const MachineRegisterInfo *MRI) {
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/always-uniform-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/always-uniform-gmir.mir
index 6a0b5bb..288c809 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/always-uniform-gmir.mir
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/always-uniform-gmir.mir
@@ -10,7 +10,7 @@
; CHECK-NOT: DIVERGENT: {{.*}}llvm.amdgcn.readfirstlane
%6:_(p1) = G_IMPLICIT_DEF
%4:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x)
- %5:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %4(s32)
+ %5:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %4(s32)
G_STORE %5(s32), %6(p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
S_ENDPGM 0
...
@@ -29,7 +29,7 @@
%9:_(s64) = G_CONSTANT i64 8
%10:_(p4) = G_PTR_ADD %7, %9(s64)
%11:_(p1) = G_LOAD %10(p4) :: (dereferenceable invariant load (p1), addrspace 4)
- %12:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), %8(s32), %13(s32), 33
+ %12:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), %8(s32), %13(s32), 33
G_STORE %12(s64), %11(p1) :: (volatile store (s64) , addrspace 1)
S_ENDPGM 0
@@ -52,7 +52,7 @@
%11:_(s32) = G_EXTRACT_VECTOR_ELT %8(<2 x s32>), %12(s32)
%13:_(s64) = G_CONSTANT i64 4
%14:_(p4) = G_PTR_ADD %7, %13(s64)
- %15:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %9(s32), %11(s32), 33
+ %15:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %9(s32), %11(s32), 33
G_STORE %15(s64), %16(p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
S_ENDPGM 0
@@ -70,7 +70,7 @@
%6:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
%7:_(s32) = G_LOAD %6(p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
%8:_(s1) = G_TRUNC %7(s32)
- %9:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), %8(s1)
+ %9:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), %8(s1)
G_STORE %9(s64), %10(p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
S_ENDPGM 0
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir
index 779017e..b6cca69 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir
@@ -4,16 +4,16 @@
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x)
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_ICMP intpred(slt)
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_XOR %{{[0-9]*}}:_, %{{[0-9]*}}:_
-# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
-# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
+# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
+# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
# CHECK: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.1
# CHECK: DIVERGENT: G_BR %bb.2
# CHECK-LABEL: BLOCK bb.1
# CHECK-LABEL: BLOCK bb.2
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_PHI %{{[0-9]*}}:_(s32), %bb.1, %{{[0-9]*}}:_(s32), %bb.0
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_PHI %{{[0-9]*}}:_(s1), %bb.1, %{{[0-9]*}}:_(s1), %bb.0
-# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
-# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
+# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
+# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
# CHECK: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.3
# CHECK: DIVERGENT: G_BR %bb.4
# CHECK-LABEL: BLOCK bb.3
@@ -44,7 +44,7 @@
%14:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x)
%16:_(s1) = G_ICMP intpred(slt), %14(s32), %15
%18:_(s1) = G_XOR %16, %17
- %19:_(s1), %20:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %16(s1)
+ %19:_(s1), %20:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %16(s1)
G_BRCOND %19(s1), %bb.2
G_BR %bb.3
@@ -60,8 +60,8 @@
%25:_(s32) = G_PHI %22(s32), %bb.2, %33(s32), %bb.1
%26:_(s1) = G_PHI %24(s1), %bb.2, %18(s1), %bb.1
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %20(s64)
- %27:_(s1), %28:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %26(s1)
+ G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %20(s64)
+ %27:_(s1), %28:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %26(s1)
G_BRCOND %27(s1), %bb.4
G_BR %bb.5
@@ -72,7 +72,7 @@
bb.5:
%31:_(s32) = G_PHI %25(s32), %bb.3, %29(s32), %bb.4
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %28(s64)
+ G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %28(s64)
G_STORE %31(s32), %32(p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
S_ENDPGM 0
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/temporal-divergence.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/temporal-divergence.mir
index 7bff87c..1e83151 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/temporal-divergence.mir
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/temporal-divergence.mir
@@ -27,7 +27,7 @@
%11:_(s64) = G_PHI %12(s64), %bb.2, %15(s64), %bb.1
%18:_(s1) = G_CONSTANT i1 false
- %12:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %18(s1), %11(s64)
+ %12:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %18(s1), %11(s64)
; CHECK: DIVERGENT: SI_LOOP
SI_LOOP %12(s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.3
@@ -35,7 +35,7 @@
bb.3:
; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
%14:_(s64) = G_PHI %12(s64), %bb.2
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s64)
+ G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s64)
S_ENDPGM 0
...
@@ -82,7 +82,7 @@
successors: %bb.5, %bb.4
%15:_(s64) = G_PHI %24(s64), %bb.2, %16(s64), %bb.4
- %16:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %14(s1), %15(s64)
+ %16:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %14(s1), %15(s64)
; CHECK: DIVERGENT: SI_LOOP
SI_LOOP %16(s64), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.5
@@ -90,7 +90,7 @@
bb.5:
; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
%18:_(s64) = G_PHI %16(s64), %bb.4
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
+ G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
G_BR %bb.3
bb.6:
@@ -140,7 +140,7 @@
successors: %bb.5, %bb.4
%15:_(s64) = G_PHI %24(s64), %bb.2, %16(s64), %bb.4
- %16:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %14(s1), %15(s64)
+ %16:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %14(s1), %15(s64)
; CHECK: DIVERGENT: SI_LOOP
SI_LOOP %16(s64), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.5
@@ -148,7 +148,7 @@
bb.5:
; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
%18:_(s64) = G_PHI %16(s64), %bb.4
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
+ G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
G_BR %bb.3
bb.6:
@@ -191,7 +191,7 @@
%15:_(s64) = G_PHI %25(s64), %bb.2, %16(s64), %bb.3
%24:_(s1) = G_CONSTANT i1 false
- %16:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %24(s1), %15(s64)
+ %16:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %24(s1), %15(s64)
; CHECK: DIVERGENT: SI_LOOP
SI_LOOP %16(s64), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.4
@@ -201,7 +201,7 @@
successors: %bb.5, %bb.2
%18:_(s64) = G_PHI %16(s64), %bb.3
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
+ G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
G_BRCOND %13(s1), %bb.2
G_BR %bb.5
@@ -241,7 +241,7 @@
bb.2:
%15:_(s64) = G_PHI %16(s64), %bb.4, %19(s64), %bb.1
%24:_(s1) = G_CONSTANT i1 true
- %16:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %24(s1), %15(s64)
+ %16:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %24(s1), %15(s64)
bb.3:
successors: %bb.4, %bb.3
@@ -259,7 +259,7 @@
bb.5:
; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
%18:_(s64) = G_PHI %16(s64), %bb.4
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
+ G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
S_ENDPGM 0
...
@@ -291,7 +291,7 @@
%10:_(s64) = G_PHI %11(s64), %bb.2, %19(s64), %bb.1
%24:_(s1) = G_CONSTANT i1 false
- %11:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %24(s1), %10(s64)
+ %11:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %24(s1), %10(s64)
; CHECK: DIVERGENT: SI_LOOP
SI_LOOP %11(s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.3
@@ -300,7 +300,7 @@
; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
; CHECK-NOT: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
%13:_(s64) = G_PHI %11(s64), %bb.2
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %13(s64)
+ G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %13(s64)
%14:_(p4) = COPY %3(p4)
%15:_(s64) = G_CONSTANT i64 40
%16:_(p4) = G_PTR_ADD %14, %15(s64)
@@ -354,7 +354,7 @@
%15:_(s64) = G_PHI %23(s64), %bb.2, %16(s64), %bb.3
%25:_(s1) = G_CONSTANT i1 false
- %16:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %25(s1), %15(s64)
+ %16:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %25(s1), %15(s64)
; CHECK: DIVERGENT: SI_LOOP
SI_LOOP %16(s64), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.4
@@ -362,7 +362,7 @@
bb.4:
; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
%18:_(s64) = G_PHI %16(s64), %bb.3
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
+ G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
bb.5:
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/uses-value-from-cycle.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/uses-value-from-cycle.mir
index b7e0d54..d1f53aa 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/uses-value-from-cycle.mir
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/uses-value-from-cycle.mir
@@ -43,18 +43,18 @@
; CHECK: DIVERGENT: %{{[0-9]}}: %{{[0-9]}}:_(s32) = G_PHI
; CHECK: DIVERGENT: %{{[0-9]}}: %{{[0-9]}}:_(s32) = G_PHI
; CHECK: DIVERGENT: %{{[0-9]}}: %{{[0-9]}}:_(s32) = G_PHI
- ; CHECK-NOT: DIVERGENT: %{{[0-9]}}: %{{[0-9]}}:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break)
+ ; CHECK-NOT: DIVERGENT: %{{[0-9]}}: %{{[0-9]}}:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break)
%19:_(s32) = G_PHI %18(s32), %bb.7, %25(s32), %bb.4
%20:_(s32) = G_PHI %6(s32), %bb.7, %25(s32), %bb.4
%21:_(s1) = G_PHI %34(s1), %bb.7, %33(s1), %bb.4
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %16(s32)
- %22:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %21(s1), %0(s32)
+ G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %16(s32)
+ %22:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %21(s1), %0(s32)
SI_LOOP %22(s32), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.6
bb.6:
%24:_(s32) = G_PHI %22(s32), %bb.5
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %24(s32)
+ G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %24(s32)
SI_RETURN
bb.7:
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index bacb53a..3f157ff 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -273,6 +273,12 @@
# DEBUG-NEXT: G_INTRINSIC_W_SIDE_EFFECTS (opcode {{[0-9]+}}): 0 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: G_INTRINSIC_CONVERGENT (opcode {{[0-9]+}}): 0 type indices, 0 imm indices
+# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS (opcode {{[0-9]+}}): 0 type indices, 0 imm indices
+# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_ANYEXT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir
index 4817ebb..01fafd7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir
@@ -222,9 +222,9 @@
; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s44) = G_SSHLSAT [[TRUNC]], [[C]](s44)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[SSHLSAT]](s44)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
- ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+ ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
%1:_(s32) = COPY $sgpr0
@@ -236,9 +236,9 @@
%7:_(s44) = G_SSHLSAT %6, %5(s44)
%8:_(s64) = G_ANYEXT %7(s44)
%9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %8(s64)
- %11:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %9(s32)
+ %11:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %9(s32)
$sgpr0 = COPY %11(s32)
- %12:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %10(s32)
+ %12:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %10(s32)
$sgpr1 = COPY %12(s32)
SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
@@ -261,9 +261,9 @@
; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s55) = G_SSHLSAT [[TRUNC]], [[C]](s55)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[SSHLSAT]](s55)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32)
- ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+ ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[INT1]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
%1:_(s32) = COPY $vgpr0
@@ -276,9 +276,9 @@
%8:_(s55) = G_SSHLSAT %6, %7(s55)
%9:_(s64) = G_ANYEXT %8(s55)
%10:_(s32), %11:_(s32) = G_UNMERGE_VALUES %9(s64)
- %12:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %10(s32)
+ %12:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %10(s32)
$vgpr0 = COPY %12(s32)
- %13:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %11(s32)
+ %13:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %11(s32)
$vgpr1 = COPY %13(s32)
SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-shlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-shlsat.mir
index f4a1ddf..c5759ef 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-shlsat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-shlsat.mir
@@ -37,7 +37,7 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[C]](s32)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[SSHLSAT]](s32)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[SSHLSAT]](s32)
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%0:_(s32) = COPY $sgpr0
@@ -49,7 +49,7 @@
%5:_(s32) = G_SSHLSAT %3, %4(s32)
%7:_(s32) = G_SSHLSAT %5, %6(s32)
%9:_(s32) = G_SSHLSAT %7, %8(s32)
- %10:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %9(s32)
+ %10:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %9(s32)
$sgpr0 = COPY %10(s32)
SI_RETURN_TO_EPILOG implicit $sgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ds.swizzle.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ds.swizzle.mir
index f59cde5..9b7b653 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ds.swizzle.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ds.swizzle.mir
@@ -19,7 +19,7 @@
; CHECK-NEXT: [[DS_SWIZZLE_B32_:%[0-9]+]]:vgpr_32 = DS_SWIZZLE_B32 [[COPY]], 0, 0, implicit $exec
; CHECK-NEXT: S_ENDPGM 0, implicit [[DS_SWIZZLE_B32_]]
%0:vgpr(s32) = COPY $vgpr0
- %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), %0, 0
+ %1:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.swizzle), %0, 0
S_ENDPGM 0, implicit %1
...
@@ -42,7 +42,7 @@
; CHECK-NEXT: [[DS_SWIZZLE_B32_:%[0-9]+]]:vgpr_32 = DS_SWIZZLE_B32 [[COPY]], 65535, 0, implicit $exec
; CHECK-NEXT: S_ENDPGM 0, implicit [[DS_SWIZZLE_B32_]]
%0:vgpr(s32) = COPY $vgpr0
- %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), %0, 65535
+ %1:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.swizzle), %0, 65535
S_ENDPGM 0, implicit %1
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
index 61aa320..e29b676 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
@@ -23,7 +23,7 @@
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_FPTRUNC %0
%3:vgpr(s16) = G_FPTRUNC %1
- %4:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %2, %3, 0
+ %4:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 0
S_ENDPGM 0, implicit %4
...
@@ -49,7 +49,7 @@
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_FPTRUNC %0
%3:vgpr(s16) = G_FPTRUNC %1
- %4:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %2, %3, 15
+ %4:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 15
S_ENDPGM 0, implicit %4
...
@@ -71,7 +71,7 @@
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
- %4:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 0
+ %4:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 0
S_ENDPGM 0, implicit %4
...
@@ -93,7 +93,7 @@
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
- %4:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 15
+ %4:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 15
S_ENDPGM 0, implicit %4
...
@@ -119,7 +119,7 @@
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s64) = G_FPEXT %0
%3:vgpr(s64) = G_FPEXT %1
- %4:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %2, %3, 0
+ %4:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 0
S_ENDPGM 0, implicit %4
...
@@ -145,6 +145,6 @@
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s64) = G_FPEXT %0
%3:vgpr(s64) = G_FPEXT %1
- %4:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %2, %3, 15
+ %4:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 15
S_ENDPGM 0, implicit %4
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
index a5f7d0e..478f12e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
@@ -23,7 +23,7 @@
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_FPTRUNC %0
%3:vgpr(s16) = G_FPTRUNC %1
- %4:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %2, %3, 0
+ %4:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 0
S_ENDPGM 0, implicit %4
...
@@ -49,7 +49,7 @@
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_FPTRUNC %0
%3:vgpr(s16) = G_FPTRUNC %1
- %4:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %2, %3, 15
+ %4:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 15
S_ENDPGM 0, implicit %4
...
@@ -71,7 +71,7 @@
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
- %4:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 0
+ %4:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 0
S_ENDPGM 0, implicit %4
...
@@ -93,7 +93,7 @@
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
- %4:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 15
+ %4:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 15
S_ENDPGM 0, implicit %4
...
@@ -119,7 +119,7 @@
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s64) = G_FPEXT %0
%3:vgpr(s64) = G_FPEXT %1
- %4:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %2, %3, 0
+ %4:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 0
S_ENDPGM 0, implicit %4
...
@@ -145,6 +145,6 @@
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s64) = G_FPEXT %0
%3:vgpr(s64) = G_FPEXT %1
- %4:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %2, %3, 15
+ %4:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 15
S_ENDPGM 0, implicit %4
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.readfirstlane.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.readfirstlane.mir
index d5c27a3..b5af1ee 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.readfirstlane.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.readfirstlane.mir
@@ -2,7 +2,7 @@
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o - 2> %t | FileCheck -check-prefix=GCN %s
# RUN: FileCheck -check-prefix=ERR %s < %t
-# ERR: remark: <unknown>:0:0: cannot select: %1:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %0:sgpr(s32) (in function: readfirstlane_s)
+# ERR: remark: <unknown>:0:0: cannot select: %1:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0:sgpr(s32) (in function: readfirstlane_s)
---
name: readfirstlane_v
@@ -20,7 +20,7 @@
; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
; GCN-NEXT: S_ENDPGM 0, implicit [[V_READFIRSTLANE_B32_]]
%0:vgpr(s32) = COPY $vgpr0
- %1:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %0
+ %1:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0
S_ENDPGM 0, implicit %1
...
@@ -39,7 +39,7 @@
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 [[COPY]]
; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]]
%0:vgpr(s32) = G_CONSTANT i32 123
- %1:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %0
+ %1:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0
S_ENDPGM 0, implicit %1
...
@@ -57,9 +57,9 @@
; GCN: liveins: $sgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; GCN-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
+ ; GCN-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
; GCN-NEXT: S_ENDPGM 0, implicit [[INT]](s32)
%0:sgpr(s32) = COPY $sgpr0
- %1:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %0
+ %1:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0
S_ENDPGM 0, implicit %1
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.s.barrier.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.s.barrier.mir
index 428c78a..ea60f04 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.s.barrier.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.s.barrier.mir
@@ -13,7 +13,7 @@
bb.0:
; GCN-LABEL: name: s_barrier
; GCN: S_BARRIER
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.barrier)
+ G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.barrier)
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll
index 40aec00..e98f298 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll
@@ -51,8 +51,8 @@
; CHECK-NEXT: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
- ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
ret i32 %vgpr
}
@@ -66,10 +66,10 @@
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
- ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
- ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
- ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+ ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
ret i64 %vgpr
}
@@ -83,10 +83,10 @@
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
- ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
- ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
- ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+ ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
ret <2 x i32> %vgpr
}
@@ -99,10 +99,10 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
- ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
- ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
- ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
+ ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
%insertvalue0 = insertvalue { i32, i32 } undef, i32 %vgpr0, 0
%value = insertvalue { i32, i32 } %insertvalue0, i32 %vgpr1, 1
@@ -116,8 +116,8 @@
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PTRTOINT]](s32)
- ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[PTRTOINT]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
ret ptr addrspace(3) %vgpr
}
@@ -131,10 +131,10 @@
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p1)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
- ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
- ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
- ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+ ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
ret ptr addrspace(1) %vgpr
}
@@ -146,8 +146,8 @@
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](s32)
- ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
ret <2 x i16> %vgpr
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll
index 5169c9b..875b725 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll
@@ -82,10 +82,10 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
- ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
- ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
- ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
+ ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
main_body:
%tmp0 = insertvalue <{ i32, i32 }> undef, i32 %arg0, 0
@@ -97,8 +97,8 @@
; CHECK-LABEL: name: non_void_ret
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[C]](s32)
- ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[C]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
ret i32 0
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll
index 86bad7b..9359bde 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll
@@ -34,15 +34,15 @@
; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %14(s32), %bb.2
; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[PHI1]], [[C]]
; CHECK-NEXT: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p3), [[PHI1]], [[FSUB]] :: (load store seq_cst seq_cst (s32) on %ir.addr, addrspace 3)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), [[PHI]](s64)
- ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), [[INT]](s64)
- ; CHECK-NEXT: G_BRCOND [[INT1]](s1), %bb.3
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), [[PHI]](s64)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:%[0-9]+]]:_(s1) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), [[INTRINSIC_CONVERGENT]](s64)
+ ; CHECK-NEXT: G_BRCOND [[INTRINSIC_CONVERGENT_W_SIDE_EFFECTS]](s1), %bb.3
; CHECK-NEXT: G_BR %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3.atomicrmw.end:
; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[ATOMIC_CMPXCHG_WITH_SUCCESS]](s32), %bb.2
- ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(s64) = G_PHI [[INT]](s64), %bb.2
- ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s64)
+ ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(s64) = G_PHI [[INTRINSIC_CONVERGENT]](s64), %bb.2
+ ; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s64)
; CHECK-NEXT: $vgpr0 = COPY [[PHI2]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
%oldval = atomicrmw fsub ptr addrspace(3) %addr, float 1.0 seq_cst
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
index 10931c0..062d2e1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
@@ -97,8 +97,8 @@
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[TRUNC]], [[C]]
- ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s1), [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), [[XOR]](s1)
- ; CHECK-NEXT: G_BRCOND [[INT]](s1), %bb.2
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:%[0-9]+]]:_(s1), [[INTRINSIC_CONVERGENT_W_SIDE_EFFECTS1:%[0-9]+]]:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), [[XOR]](s1)
+ ; CHECK-NEXT: G_BRCOND [[INTRINSIC_CONVERGENT_W_SIDE_EFFECTS]](s1), %bb.2
; CHECK-NEXT: G_BR %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.bb1:
@@ -108,7 +108,7 @@
; CHECK-NEXT: G_BR %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3.bb2:
- ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT1]](s64)
+ ; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INTRINSIC_CONVERGENT_W_SIDE_EFFECTS1]](s64)
; CHECK-NEXT: SI_RETURN
bb:
br i1 %arg, label %bb2, label %bb1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if-invalid.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if-invalid.mir
index b7e52ca..1e87f8b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if-invalid.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if-invalid.mir
@@ -2,12 +2,12 @@
# Make sure incorrect usage of control flow intrinsics fails to select in case some transform separated the intrinsic from its branch.
-# ERR: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_different_block)
-# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: si_if_not_brcond_user)
-# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: si_if_multi_user)
-# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_xor_0)
-# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_or_neg1)
-# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_negated_multi_use)
+# ERR: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_different_block)
+# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: si_if_not_brcond_user)
+# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: si_if_multi_user)
+# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_xor_0)
+# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_or_neg1)
+# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_negated_multi_use)
---
@@ -19,7 +19,7 @@
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
- %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
+ %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
bb.1:
G_BRCOND %3, %bb.1
@@ -34,7 +34,7 @@
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
- %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
+ %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%5:_(s32) = G_SELECT %3, %0, %1
S_ENDPGM 0, implicit %5
@@ -48,7 +48,7 @@
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
- %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
+ %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%5:_(s32) = G_SELECT %3, %0, %1
G_BRCOND %3, %bb.1
@@ -67,7 +67,7 @@
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
- %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
+ %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%5:_(s1) = G_CONSTANT i1 false
%6:_(s1) = G_XOR %3, %5
G_BRCOND %6, %bb.2
@@ -93,7 +93,7 @@
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
- %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
+ %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%5:_(s1) = G_CONSTANT i1 true
%6:_(s1) = G_OR %3, %5
G_BRCOND %6, %bb.2
@@ -118,7 +118,7 @@
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
- %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
+ %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%5:_(s1) = G_CONSTANT i1 true
%6:_(s1) = G_XOR %3, %5
S_NOP 0, implicit %6
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if.xfail.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if.xfail.mir
index 9716bb3..bfc398d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if.xfail.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if.xfail.mir
@@ -2,7 +2,7 @@
# Make sure there's no crash if there is somehow no successor block.
-# ERR: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_no_succ_block)
+# ERR: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_no_succ_block)
---
name: brcond_si_if_no_succ_block
@@ -16,6 +16,6 @@
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
- %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
+ %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
G_BRCOND %3, %bb.1
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir
index 9be5e14..137631a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir
@@ -150,7 +150,7 @@
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
- %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
+ %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
G_BRCOND %3, %bb.1
bb.1:
@@ -189,7 +189,7 @@
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
- %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), %2
+ %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), %2
G_BRCOND %3, %bb.1
bb.1:
@@ -244,7 +244,7 @@
bb.1:
successors: %bb.1, %bb.2
S_NOP 0
- %3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
+ %3:_(s1) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
G_BRCOND %3, %bb.2
G_BR %bb.1
@@ -303,7 +303,7 @@
bb.1:
successors: %bb.1, %bb.2
S_NOP 0
- %3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
+ %3:_(s1) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
G_BRCOND %3, %bb.1
G_BR %bb.2
@@ -360,7 +360,7 @@
bb.1:
successors: %bb.1, %bb.2
S_NOP 0
- %3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
+ %3:_(s1) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
G_BRCOND %3, %bb.1
bb.2:
@@ -405,7 +405,7 @@
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
- %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
+ %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%5:_(s32) = COPY $vgpr2
G_BRCOND %3, %bb.1
@@ -466,7 +466,7 @@
bb.1:
successors: %bb.1, %bb.2
S_NOP 0
- %3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
+ %3:_(s1) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
S_NOP 0
S_NOP 0
G_BRCOND %3, %bb.2
@@ -521,7 +521,7 @@
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
- %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
+ %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%5:_(s1) = G_CONSTANT i1 true
%6:_(s1) = G_XOR %3, %5
G_BRCOND %6, %bb.2
@@ -588,7 +588,7 @@
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
- %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
+ %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%5:_(s1) = G_CONSTANT i1 true
%6:_(s1) = G_XOR %3, %5
G_BRCOND %6, %bb.2
@@ -653,7 +653,7 @@
bb.1:
successors: %bb.1, %bb.2
S_NOP 0
- %3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
+ %3:_(s1) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
%4:_(s1) = G_CONSTANT i1 true
%5:_(s1) = G_XOR %3, %4
G_BRCOND %5, %bb.1
@@ -711,7 +711,7 @@
bb.1:
successors: %bb.1, %bb.2
S_NOP 0
- %3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
+ %3:_(s1) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
%4:_(s1) = G_CONSTANT i1 true
%5:_(s1) = G_XOR %3, %4
G_BRCOND %5, %bb.2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir
index 533cb3a..f18a576 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir
@@ -304,7 +304,7 @@
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK-NEXT: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[SMAX]], [[C1]]
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[SMIN]](s32)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%0:sgpr(s32) = COPY $sgpr2
@@ -313,7 +313,7 @@
%5:sgpr(s32) = G_CONSTANT i32 17
%6:sgpr(s32) = G_SMIN %4, %5
%8:vgpr(s32) = COPY %6(s32)
- %7:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %8(s32)
+ %7:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %8(s32)
$sgpr0 = COPY %7(s32)
SI_RETURN_TO_EPILOG implicit $sgpr0
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir
index cb8f10d..ef05a52 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir
@@ -304,7 +304,7 @@
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK-NEXT: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[UMAX]], [[C1]]
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UMIN]](s32)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%0:sgpr(s32) = COPY $sgpr2
@@ -313,7 +313,7 @@
%5:sgpr(s32) = G_CONSTANT i32 17
%6:sgpr(s32) = G_UMIN %4, %5
%8:vgpr(s32) = COPY %6(s32)
- %7:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %8(s32)
+ %7:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %8(s32)
$sgpr0 = COPY %7(s32)
SI_RETURN_TO_EPILOG implicit $sgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir
index 489f3cc..cac289b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir
@@ -79,8 +79,8 @@
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
- ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: .2:
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
@@ -135,8 +135,8 @@
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
- ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: .2:
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ballot.i64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ballot.i64.mir
index d56f869..4ea70c9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ballot.i64.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ballot.i64.mir
@@ -15,11 +15,11 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[COPY1]](s1)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[COPY1]](s1)
; CHECK-NEXT: S_ENDPGM 0, implicit [[INT]](s64)
%0:_(s32) = COPY $sgpr0
%1:_(s1) = G_TRUNC %0
- %2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), %1
+ %2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), %1
S_ENDPGM 0, implicit %2
...
@@ -36,11 +36,11 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[COPY1]](s1)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[COPY1]](s1)
; CHECK-NEXT: S_ENDPGM 0, implicit [[INT]](s64)
%0:_(s32) = COPY $vgpr0
%1:_(s1) = G_TRUNC %0
- %2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), %1
+ %2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), %1
S_ENDPGM 0, implicit %2
...
@@ -57,11 +57,11 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
; CHECK-NEXT: S_ENDPGM 0, implicit [[INT]](s64)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(eq), %0, %1
- %3:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), %2
+ %3:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), %2
S_ENDPGM 0, implicit %3
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.append.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.append.mir
index 0ea6281..4ca402c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.append.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.append.mir
@@ -13,9 +13,9 @@
; CHECK: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
- ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[COPY]](p3), 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[COPY]](p3), 0
%0:_(p3) = COPY $sgpr0
- %1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), %0, 0
+ %1:_(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), %0, 0
...
@@ -31,8 +31,8 @@
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(p3) = COPY $vgpr0
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(p3) = V_READFIRSTLANE_B32 [[COPY]](p3), implicit $exec
- ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[V_READFIRSTLANE_B32_]](p3), 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[V_READFIRSTLANE_B32_]](p3), 0
%0:_(p3) = COPY $vgpr0
- %1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), %0, 0
+ %1:_(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), %0, 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.bpermute.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.bpermute.mir
index 14f0343..294dc2e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.bpermute.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.bpermute.mir
@@ -17,9 +17,9 @@
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.bpermute), [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.bpermute), [[COPY2]](s32), [[COPY3]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
- %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.bpermute), %0, %1
+ %2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.bpermute), %0, %1
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.consume.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.consume.mir
index 9411579..3c19a36 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.consume.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.consume.mir
@@ -13,9 +13,9 @@
; CHECK: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
- ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[COPY]](p3), 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[COPY]](p3), 0
%0:_(p3) = COPY $sgpr0
- %1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), %0, 0
+ %1:_(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), %0, 0
...
@@ -31,8 +31,8 @@
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(p3) = COPY $vgpr0
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(p3) = V_READFIRSTLANE_B32 [[COPY]](p3), implicit $exec
- ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[V_READFIRSTLANE_B32_]](p3), 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[V_READFIRSTLANE_B32_]](p3), 0
%0:_(p3) = COPY $vgpr0
- %1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), %0, 0
+ %1:_(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), %0, 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.init.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.init.mir
index 731e077..7c2de20 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.init.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.init.mir
@@ -16,10 +16,10 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY2]](s32), [[COPY1]](s32)
+ ; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY2]](s32), [[COPY1]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1
+ G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1
...
---
@@ -37,10 +37,10 @@
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
- ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32)
+ ; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1
+ G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1
...
---
@@ -56,10 +56,10 @@
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY]](s32), [[COPY1]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $sgpr0
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1
+ G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1
...
---
@@ -76,8 +76,8 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
- ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32)
+ ; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1
+ G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.sema.v.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.sema.v.mir
index 638841a..daa28184 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.sema.v.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.sema.v.mir
@@ -14,9 +14,9 @@
; CHECK: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), [[COPY]](s32)
+ ; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), [[COPY]](s32)
%0:_(s32) = COPY $sgpr0
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), %0
+ G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), %0
...
---
@@ -32,8 +32,8 @@
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec
- ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), [[V_READFIRSTLANE_B32_]](s32)
+ ; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), [[V_READFIRSTLANE_B32_]](s32)
%0:_(s32) = COPY $vgpr0
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), %0
+ G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), %0
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.permute.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.permute.mir
index 60d276d..89de7f5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.permute.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.permute.mir
@@ -17,9 +17,9 @@
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.permute), [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.permute), [[COPY2]](s32), [[COPY3]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
- %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.permute), %0, %1
+ %2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.permute), %0, %1
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.swizzle.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.swizzle.mir
index cbd9b4a..ee0c878 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.swizzle.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.swizzle.mir
@@ -15,8 +15,8 @@
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), [[COPY1]](s32), 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.swizzle), [[COPY1]](s32), 0
%0:_(s32) = COPY $sgpr0
- %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), %0, 0
+ %1:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.swizzle), %0, 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.else.32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.else.32.mir
index d273cad..61278c1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.else.32.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.else.32.mir
@@ -15,8 +15,8 @@
; CHECK: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1), [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), [[COPY]](s32)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1), [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), [[COPY]](s32)
%0:_(s32) = COPY $sgpr0
- %1:_(s1), %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), %0
+ %1:_(s1), %2:_(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), %0
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.else.64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.else.64.mir
index e20ac81..818d08c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.else.64.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.else.64.mir
@@ -12,8 +12,8 @@
; CHECK: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
- ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1), [[INT1:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), [[COPY]](s64)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1), [[INT1:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), [[COPY]](s64)
%0:_(s64) = COPY $sgpr0_sgpr1
- %1:_(s1), %2:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), %0
+ %1:_(s1), %2:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), %0
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir
index 96757cd..0266baf 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir
@@ -16,10 +16,10 @@
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), [[COPY2]](s32), [[COPY3]](s32), 1
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), [[COPY2]](s32), [[COPY3]](s32), 1
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
- %2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1
+ %2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1
...
---
@@ -35,10 +35,10 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), [[COPY2]](s32), [[COPY1]](s32), 1
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), [[COPY2]](s32), [[COPY1]](s32), 1
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
- %2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1
+ %2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1
...
---
@@ -54,10 +54,10 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), [[COPY]](s32), [[COPY2]](s32), 1
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), [[COPY]](s32), [[COPY2]](s32), 1
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $sgpr0
- %2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1
+ %2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1
...
---
@@ -72,8 +72,8 @@
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), [[COPY]](s32), [[COPY1]](s32), 1
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), [[COPY]](s32), [[COPY1]](s32), 1
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
- %2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1
+ %2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir
index d41bedc..c73975c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir
@@ -16,10 +16,10 @@
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), [[COPY2]](s32), [[COPY3]](s32), 32
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), [[COPY2]](s32), [[COPY3]](s32), 32
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
- %2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), %0, %1, 32
+ %2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), %0, %1, 32
...
---
@@ -35,10 +35,10 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), [[COPY2]](s32), [[COPY1]](s32), 32
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), [[COPY2]](s32), [[COPY1]](s32), 32
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
- %2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), %0, %1, 32
+ %2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), %0, %1, 32
...
---
@@ -54,10 +54,10 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), [[COPY]](s32), [[COPY2]](s32), 32
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), [[COPY]](s32), [[COPY2]](s32), 32
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $sgpr0
- %2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), %0, %1, 32
+ %2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), %0, %1, 32
...
---
@@ -72,8 +72,8 @@
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), [[COPY]](s32), [[COPY1]](s32), 32
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), [[COPY]](s32), [[COPY1]](s32), 32
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
- %2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), %0, %1, 32
+ %2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), %0, %1, 32
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll
index 603d7e4..8c4ce1c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll
@@ -138,8 +138,8 @@
; FAST-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]]
; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]]
- ; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
- ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
+ ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; FAST-NEXT: {{ $}}
; FAST-NEXT: bb.3:
; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@@ -199,8 +199,8 @@
; GREEDY-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]]
; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]]
- ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
- ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
+ ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GREEDY-NEXT: {{ $}}
; GREEDY-NEXT: bb.3:
; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@@ -268,8 +268,8 @@
; FAST-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]]
; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]]
- ; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
- ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
+ ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; FAST-NEXT: {{ $}}
; FAST-NEXT: bb.3:
; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@@ -330,8 +330,8 @@
; GREEDY-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]]
; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]]
- ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
- ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
+ ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GREEDY-NEXT: {{ $}}
; GREEDY-NEXT: bb.3:
; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll
index 8dab27b..9833cd1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll
@@ -159,8 +159,8 @@
; FAST-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]]
; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]]
- ; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
- ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
+ ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; FAST-NEXT: {{ $}}
; FAST-NEXT: bb.3:
; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@@ -224,8 +224,8 @@
; GREEDY-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]]
; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]]
- ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
- ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
+ ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GREEDY-NEXT: {{ $}}
; GREEDY-NEXT: bb.3:
; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@@ -288,8 +288,8 @@
; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; FAST-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
- ; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
- ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
+ ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; FAST-NEXT: {{ $}}
; FAST-NEXT: bb.3:
; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@@ -345,8 +345,8 @@
; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
- ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
- ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
+ ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GREEDY-NEXT: {{ $}}
; GREEDY-NEXT: bb.3:
; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@@ -429,8 +429,8 @@
; FAST-NEXT: [[AND3:%[0-9]+]]:vcc(s1) = G_AND [[AND2]], [[ICMP4]]
; FAST-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV23]](s64), [[UV21]]
; FAST-NEXT: [[AND4:%[0-9]+]]:vcc(s1) = G_AND [[AND3]], [[ICMP5]]
- ; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND4]](s1)
- ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND4]](s1)
+ ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; FAST-NEXT: {{ $}}
; FAST-NEXT: bb.3:
; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@@ -506,8 +506,8 @@
; GREEDY-NEXT: [[AND3:%[0-9]+]]:vcc(s1) = G_AND [[AND2]], [[ICMP4]]
; GREEDY-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV23]](s64), [[UV21]]
; GREEDY-NEXT: [[AND4:%[0-9]+]]:vcc(s1) = G_AND [[AND3]], [[ICMP5]]
- ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND4]](s1)
- ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND4]](s1)
+ ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GREEDY-NEXT: {{ $}}
; GREEDY-NEXT: bb.3:
; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx90a.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx90a.mir
index 1798151..27764ba 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx90a.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx90a.mir
@@ -16,7 +16,7 @@
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0
+ ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
; GREEDY-LABEL: name: mfma_f32_32x32x4bf16_1k_vva
; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
@@ -24,12 +24,12 @@
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0
+ ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- %3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), %0, %1, %2, 0, 0, 0
+ %3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
...
@@ -47,7 +47,7 @@
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
+ ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
; GREEDY-LABEL: name: mfma_f32_16x16x4bf16_1k_vva
; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
@@ -55,12 +55,12 @@
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
+ ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), %0, %1, %2, 0, 0, 0
+ %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@@ -78,7 +78,7 @@
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
- ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
+ ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
; GREEDY-LABEL: name: mfma_f32_4x4x4bf16_1k_vva
; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3
@@ -86,12 +86,12 @@
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
- ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
+ ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
- %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), %0, %1, %2, 0, 0, 0
+ %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@@ -109,7 +109,7 @@
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0
+ ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
; GREEDY-LABEL: name: mfma_f32_32x32x8bf16_1k_vva
; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
@@ -117,12 +117,12 @@
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0
+ ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- %3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), %0, %1, %2, 0, 0, 0
+ %3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
...
@@ -140,7 +140,7 @@
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
- ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
+ ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
; GREEDY-LABEL: name: mfma_f32_16x16x16bf16_1k_vva
; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3
@@ -148,12 +148,12 @@
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
- ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
+ ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
- %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), %0, %1, %2, 0, 0, 0
+ %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@@ -171,7 +171,7 @@
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
- ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<8 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<8 x s32>), 0, 0, 0
+ ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<8 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<8 x s32>), 0, 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INT]](<8 x s32>)
; GREEDY-LABEL: name: mfma_f64_16x16x4f64_vva
; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
@@ -179,12 +179,12 @@
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
- ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<8 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<8 x s32>), 0, 0, 0
+ ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<8 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<8 x s32>), 0, 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INT]](<8 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
- %3:_(<8 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), %0, %1, %2, 0, 0, 0
+ %3:_(<8 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3
...
@@ -202,7 +202,7 @@
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<2 x s32>) = COPY $agpr0_agpr1
- ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<2 x s32>), 0, 0, 0
+ ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<2 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<2 x s32>), 0, 0, 0
; FAST-NEXT: $vgpr0_vgpr1 = COPY [[INT]](<2 x s32>)
; GREEDY-LABEL: name: mfma_f64_4x4x4f64_vva
; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1
@@ -210,11 +210,11 @@
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<2 x s32>) = COPY $agpr0_agpr1
- ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<2 x s32>), 0, 0, 0
+ ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<2 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<2 x s32>), 0, 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1 = COPY [[INT]](<2 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(<2 x s32>) = COPY $agpr0_agpr1
- %3:_(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), %0, %1, %2, 0, 0, 0
+ %3:_(<2 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1 = COPY %3
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx940.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx940.mir
index a14b746..f20b28f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx940.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx940.mir
@@ -16,7 +16,7 @@
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
- ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
+ ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
; GREEDY-LABEL: name: mfma_i32_16x16x32_i8_vva
; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3
@@ -24,12 +24,12 @@
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
- ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
+ ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
- %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), %0, %1, %2, 0, 0, 0
+ %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@@ -47,7 +47,7 @@
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
+ ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
; GREEDY-LABEL: name: mfma_i32_32x32x16_i8_vva
; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
@@ -55,12 +55,12 @@
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
+ ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), %0, %1, %2, 0, 0, 0
+ %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@@ -78,7 +78,7 @@
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
- ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
+ ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
; GREEDY-LABEL: name: mfma_f32_16x16x8_xf32_vva
; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3
@@ -86,12 +86,12 @@
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
- ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
+ ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
- %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), %0, %1, %2, 0, 0, 0
+ %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@@ -109,7 +109,7 @@
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
+ ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
; GREEDY-LABEL: name: mfma_f32_32x32x4_xf32_vva
; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
@@ -117,12 +117,12 @@
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
+ ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), %0, %1, %2, 0, 0, 0
+ %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@@ -141,7 +141,7 @@
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
- ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
+ ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
; GREEDY-LABEL: name: smfmac_f32_16x16x32_f16_vva
; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20
@@ -150,13 +150,13 @@
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
- ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
+ ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
%3:_(s32) = COPY $vgpr20
- %4:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), %0, %1, %2, %3, 0, 0
+ %4:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), %0, %1, %2, %3, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4
...
@@ -175,7 +175,7 @@
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
- ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
+ ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
; GREEDY-LABEL: name: smfmac_f32_32x32x16_f16_vva
; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20
@@ -184,13 +184,13 @@
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
- ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
+ ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
%3:_(s32) = COPY $vgpr20
- %4:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), %0, %1, %2, %3, 0, 0
+ %4:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), %0, %1, %2, %3, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %4
...
@@ -209,7 +209,7 @@
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
- ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
+ ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
; GREEDY-LABEL: name: smfmac_f32_16x16x32_bf16_vva
; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20
@@ -218,13 +218,13 @@
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
- ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
+ ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
%3:_(s32) = COPY $vgpr20
- %4:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), %0, %1, %2, %3, 0, 0
+ %4:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), %0, %1, %2, %3, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4
...
@@ -243,7 +243,7 @@
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
- ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
+ ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
; GREEDY-LABEL: name: smfmac_f32_32x32x16_bf16_vva
; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20
@@ -252,13 +252,13 @@
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
- ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
+ ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
%3:_(s32) = COPY $vgpr20
- %4:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), %0, %1, %2, %3, 0, 0
+ %4:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), %0, %1, %2, %3, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %4
...
@@ -277,7 +277,7 @@
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
- ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
+ ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
; GREEDY-LABEL: name: smfmac_i32_16x16x64_i8_vva
; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20
@@ -286,13 +286,13 @@
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
- ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
+ ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
%3:_(s32) = COPY $vgpr20
- %4:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), %0, %1, %2, %3, 0, 0
+ %4:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), %0, %1, %2, %3, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4
...
@@ -311,7 +311,7 @@
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
- ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
+ ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
; GREEDY-LABEL: name: smfmac_i32_32x32x32_i8_vva
; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20
@@ -320,12 +320,12 @@
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
- ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
+ ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
%3:_(s32) = COPY $vgpr20
- %4:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), %0, %1, %2, %3, 0, 0
+ %4:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), %0, %1, %2, %3, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %4
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.mir
index 2995fda7..87db617 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.mir
@@ -16,12 +16,12 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<32 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<32 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- %3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), %0, %1, %2, 0, 0, 0
+ %3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
...
@@ -42,12 +42,12 @@
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<32 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<32 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
%0:_(s32) = COPY $sgpr32
%1:_(s32) = COPY $sgpr33
%2:_(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
- %3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), %0, %1, %2, 0, 0, 0
+ %3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
...
@@ -65,12 +65,12 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), %0, %1, %2, 0, 0, 0
+ %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@@ -91,12 +91,12 @@
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(s32) = COPY $sgpr32
%1:_(s32) = COPY $sgpr33
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
- %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), %0, %1, %2, 0, 0, 0
+ %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@@ -114,12 +114,12 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
- %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), %0, %1, %2, 0, 0, 0
+ %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@@ -140,12 +140,12 @@
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s32) = COPY $sgpr32
%1:_(s32) = COPY $sgpr33
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), %0, %1, %2, 0, 0, 0
+ %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@@ -163,12 +163,12 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), %0, %1, %2, 0, 0, 0
+ %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@@ -189,12 +189,12 @@
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(s32) = COPY $sgpr32
%1:_(s32) = COPY $sgpr33
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
- %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), %0, %1, %2, 0, 0, 0
+ %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@@ -212,12 +212,12 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
- %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), %0, %1, %2, 0, 0, 0
+ %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@@ -238,12 +238,12 @@
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s32) = COPY $sgpr32
%1:_(s32) = COPY $sgpr33
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), %0, %1, %2, 0, 0, 0
+ %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@@ -261,12 +261,12 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<32 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<32 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<4 x s16>) = COPY $vgpr2_vgpr3
%2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- %3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), %0, %1, %2, 0, 0, 0
+ %3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
...
@@ -287,12 +287,12 @@
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<32 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<32 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
%0:_(<4 x s16>) = COPY $sgpr32_sgpr33
%1:_(<4 x s16>) = COPY $sgpr34_sgpr35
%2:_(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
- %3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), %0, %1, %2, 0, 0, 0
+ %3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
...
@@ -310,12 +310,12 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<4 x s16>) = COPY $vgpr2_vgpr3
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), %0, %1, %2, 0, 0, 0
+ %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@@ -336,12 +336,12 @@
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(<4 x s16>) = COPY $sgpr32_sgpr33
%1:_(<4 x s16>) = COPY $sgpr34_sgpr35
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
- %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), %0, %1, %2, 0, 0, 0
+ %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@@ -359,12 +359,12 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<4 x s16>) = COPY $vgpr2_vgpr3
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
- %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), %0, %1, %2, 0, 0, 0
+ %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@@ -385,12 +385,12 @@
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(<4 x s16>) = COPY $sgpr32_sgpr33
%1:_(<4 x s16>) = COPY $sgpr34_sgpr35
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), %0, %1, %2, 0, 0, 0
+ %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@@ -408,12 +408,12 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<4 x s16>) = COPY $vgpr2_vgpr3
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), %0, %1, %2, 0, 0, 0
+ %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@@ -434,12 +434,12 @@
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(<4 x s16>) = COPY $sgpr32_sgpr33
%1:_(<4 x s16>) = COPY $sgpr34_sgpr35
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
- %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), %0, %1, %2, 0, 0, 0
+ %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@@ -457,12 +457,12 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<4 x s16>) = COPY $vgpr2_vgpr3
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
- %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), %0, %1, %2, 0, 0, 0
+ %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@@ -483,12 +483,12 @@
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(<4 x s16>) = COPY $sgpr32_sgpr33
%1:_(<4 x s16>) = COPY $sgpr34_sgpr35
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), %0, %1, %2, 0, 0, 0
+ %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@@ -506,12 +506,12 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<32 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<32 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr2
%2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- %3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), %0, %1, %2, 0, 0, 0
+ %3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
...
@@ -532,12 +532,12 @@
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<32 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<32 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
%0:_(s32) = COPY $sgpr32
%1:_(s32) = COPY $sgpr33
%2:_(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
- %3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), %0, %1, %2, 0, 0, 0
+ %3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
...
@@ -555,12 +555,12 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr2
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), %0, %1, %2, 0, 0, 0
+ %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@@ -581,12 +581,12 @@
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(s32) = COPY $sgpr32
%1:_(s32) = COPY $sgpr33
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
- %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), %0, %1, %2, 0, 0, 0
+ %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@@ -604,12 +604,12 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr2
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
- %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), %0, %1, %2, 0, 0, 0
+ %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@@ -630,12 +630,12 @@
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s32) = COPY $sgpr32
%1:_(s32) = COPY $sgpr33
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), %0, %1, %2, 0, 0, 0
+ %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@@ -653,12 +653,12 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr2
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), %0, %1, %2, 0, 0, 0
+ %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@@ -679,12 +679,12 @@
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(s32) = COPY $sgpr32
%1:_(s32) = COPY $sgpr33
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
- %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), %0, %1, %2, 0, 0, 0
+ %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@@ -702,12 +702,12 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr2
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
- %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), %0, %1, %2, 0, 0, 0
+ %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@@ -728,12 +728,12 @@
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s32) = COPY $sgpr32
%1:_(s32) = COPY $sgpr33
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), %0, %1, %2, 0, 0, 0
+ %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@@ -751,12 +751,12 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<32 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<32 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr2
%2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- %3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), %0, %1, %2, 0, 0, 0
+ %3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
...
@@ -777,12 +777,12 @@
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<32 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<32 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
%0:_(<2 x s16>) = COPY $sgpr32
%1:_(<2 x s16>) = COPY $sgpr33
%2:_(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
- %3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), %0, %1, %2, 0, 0, 0
+ %3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
...
@@ -800,12 +800,12 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr2
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), %0, %1, %2, 0, 0, 0
+ %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@@ -826,12 +826,12 @@
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(<2 x s16>) = COPY $sgpr32
%1:_(<2 x s16>) = COPY $sgpr33
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
- %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), %0, %1, %2, 0, 0, 0
+ %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@@ -849,12 +849,12 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr2
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
- %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), %0, %1, %2, 0, 0, 0
+ %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@@ -875,12 +875,12 @@
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(<2 x s16>) = COPY $sgpr32
%1:_(<2 x s16>) = COPY $sgpr33
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), %0, %1, %2, 0, 0, 0
+ %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@@ -898,12 +898,12 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr2
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), %0, %1, %2, 0, 0, 0
+ %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@@ -924,12 +924,12 @@
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(<2 x s16>) = COPY $sgpr32
%1:_(<2 x s16>) = COPY $sgpr33
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
- %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), %0, %1, %2, 0, 0, 0
+ %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@@ -947,12 +947,12 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr2
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
- %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), %0, %1, %2, 0, 0, 0
+ %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@@ -973,11 +973,11 @@
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(<2 x s16>) = COPY $sgpr32
%1:_(<2 x s16>) = COPY $sgpr33
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), %0, %1, %2, 0, 0, 0
+ %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll
index a0ebe9f..8c7bdb86 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll
@@ -81,8 +81,8 @@
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
- ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@@ -128,8 +128,8 @@
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY5]]
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
- ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@@ -187,8 +187,8 @@
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY5]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
- ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.ptr.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.ptr.buffer.load.ll
index fc2a63c..19793f7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.ptr.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.ptr.buffer.load.ll
@@ -81,8 +81,8 @@
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
- ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@@ -128,8 +128,8 @@
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %20, %bb.3
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY5]]
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
- ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@@ -187,8 +187,8 @@
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY5]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
- ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readfirstlane.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readfirstlane.mir
index fb355e4..a34981c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readfirstlane.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readfirstlane.mir
@@ -14,9 +14,9 @@
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
%0:_(s32) = COPY $sgpr0
- %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %0
+ %1:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0
...
---
@@ -30,7 +30,7 @@
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
%0:_(s32) = COPY $vgpr0
- %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %0
+ %1:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readlane.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readlane.mir
index 87886e0..71f8ded 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readlane.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readlane.mir
@@ -15,10 +15,10 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[COPY1]](s32)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[COPY1]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
- %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1
+ %2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1
...
---
@@ -33,10 +33,10 @@
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY]](s32), [[COPY1]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $sgpr0
- %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1
+ %2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1
...
---
@@ -52,10 +52,10 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
- %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1
+ %2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1
...
---
@@ -72,10 +72,10 @@
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
- %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1
+ %2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1
...
---
@@ -93,11 +93,11 @@
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY3]](s32), implicit $exec
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32)
; CHECK-NEXT: S_ENDPGM 0, implicit [[INT]](s32)
%0:_(s32) = COPY $agpr0
%1:_(s32) = COPY $agpr1
- %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1
+ %2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1
S_ENDPGM 0, implicit %2
...
@@ -114,10 +114,10 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[COPY1]](s32)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[COPY1]](s32)
%0:_(s32) = COPY $agpr0
%1:_(s32) = COPY $sgpr0
- %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1
+ %2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1
...
---
@@ -135,10 +135,10 @@
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY3]](s32), implicit $exec
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $agpr0
- %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1
+ %2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1
...
---
@@ -155,8 +155,8 @@
; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY2]](s32), implicit $exec
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $agpr0
- %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1
+ %2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll
index 9ba410a..2c84b7c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll
@@ -16,8 +16,8 @@
; GFX7-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
; GFX7-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s32))
; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32)
- ; GFX7-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
- ; GFX7-NEXT: $sgpr0 = COPY [[INT]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
+ ; GFX7-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret i32 %val
@@ -37,11 +37,11 @@
; GFX7-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s64), align 4)
; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<2 x s32>)
; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
- ; GFX7-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
- ; GFX7-NEXT: $sgpr0 = COPY [[INT]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
+ ; GFX7-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32)
- ; GFX7-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32)
- ; GFX7-NEXT: $sgpr1 = COPY [[INT1]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32)
+ ; GFX7-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
%val = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret <2 x i32> %val
@@ -61,14 +61,14 @@
; GFX7-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s96), align 4)
; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>)
; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
- ; GFX7-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
- ; GFX7-NEXT: $sgpr0 = COPY [[INT]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
+ ; GFX7-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32)
- ; GFX7-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32)
- ; GFX7-NEXT: $sgpr1 = COPY [[INT1]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32)
+ ; GFX7-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32)
- ; GFX7-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32)
- ; GFX7-NEXT: $sgpr2 = COPY [[INT2]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32)
+ ; GFX7-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32)
; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
%val = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret <3 x i32> %val
@@ -88,29 +88,29 @@
; GFX7-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s256), align 4)
; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>)
; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
- ; GFX7-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
- ; GFX7-NEXT: $sgpr0 = COPY [[INT]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
+ ; GFX7-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32)
- ; GFX7-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32)
- ; GFX7-NEXT: $sgpr1 = COPY [[INT1]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32)
+ ; GFX7-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32)
- ; GFX7-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32)
- ; GFX7-NEXT: $sgpr2 = COPY [[INT2]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32)
+ ; GFX7-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32)
; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32)
- ; GFX7-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32)
- ; GFX7-NEXT: $sgpr3 = COPY [[INT3]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32)
+ ; GFX7-NEXT: $sgpr3 = COPY [[INTRINSIC_CONVERGENT3]](s32)
; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32)
- ; GFX7-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32)
- ; GFX7-NEXT: $sgpr4 = COPY [[INT4]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32)
+ ; GFX7-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT4]](s32)
; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32)
- ; GFX7-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32)
- ; GFX7-NEXT: $sgpr5 = COPY [[INT5]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32)
+ ; GFX7-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT5]](s32)
; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32)
- ; GFX7-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32)
- ; GFX7-NEXT: $sgpr6 = COPY [[INT6]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32)
+ ; GFX7-NEXT: $sgpr6 = COPY [[INTRINSIC_CONVERGENT6]](s32)
; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32)
- ; GFX7-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32)
- ; GFX7-NEXT: $sgpr7 = COPY [[INT7]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32)
+ ; GFX7-NEXT: $sgpr7 = COPY [[INTRINSIC_CONVERGENT7]](s32)
; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
%val = call <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret <8 x i32> %val
@@ -130,53 +130,53 @@
; GFX7-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s512), align 4)
; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32), [[UV8:%[0-9]+]]:sgpr(s32), [[UV9:%[0-9]+]]:sgpr(s32), [[UV10:%[0-9]+]]:sgpr(s32), [[UV11:%[0-9]+]]:sgpr(s32), [[UV12:%[0-9]+]]:sgpr(s32), [[UV13:%[0-9]+]]:sgpr(s32), [[UV14:%[0-9]+]]:sgpr(s32), [[UV15:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<16 x s32>)
; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
- ; GFX7-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
- ; GFX7-NEXT: $sgpr0 = COPY [[INT]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
+ ; GFX7-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32)
- ; GFX7-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32)
- ; GFX7-NEXT: $sgpr1 = COPY [[INT1]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32)
+ ; GFX7-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32)
- ; GFX7-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32)
- ; GFX7-NEXT: $sgpr2 = COPY [[INT2]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32)
+ ; GFX7-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32)
; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32)
- ; GFX7-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32)
- ; GFX7-NEXT: $sgpr3 = COPY [[INT3]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32)
+ ; GFX7-NEXT: $sgpr3 = COPY [[INTRINSIC_CONVERGENT3]](s32)
; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32)
- ; GFX7-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32)
- ; GFX7-NEXT: $sgpr4 = COPY [[INT4]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32)
+ ; GFX7-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT4]](s32)
; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32)
- ; GFX7-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32)
- ; GFX7-NEXT: $sgpr5 = COPY [[INT5]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32)
+ ; GFX7-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT5]](s32)
; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32)
- ; GFX7-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32)
- ; GFX7-NEXT: $sgpr6 = COPY [[INT6]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32)
+ ; GFX7-NEXT: $sgpr6 = COPY [[INTRINSIC_CONVERGENT6]](s32)
; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32)
- ; GFX7-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32)
- ; GFX7-NEXT: $sgpr7 = COPY [[INT7]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32)
+ ; GFX7-NEXT: $sgpr7 = COPY [[INTRINSIC_CONVERGENT7]](s32)
; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[UV8]](s32)
- ; GFX7-NEXT: [[INT8:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY13]](s32)
- ; GFX7-NEXT: $sgpr8 = COPY [[INT8]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT8:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY13]](s32)
+ ; GFX7-NEXT: $sgpr8 = COPY [[INTRINSIC_CONVERGENT8]](s32)
; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY [[UV9]](s32)
- ; GFX7-NEXT: [[INT9:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY14]](s32)
- ; GFX7-NEXT: $sgpr9 = COPY [[INT9]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT9:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY14]](s32)
+ ; GFX7-NEXT: $sgpr9 = COPY [[INTRINSIC_CONVERGENT9]](s32)
; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY [[UV10]](s32)
- ; GFX7-NEXT: [[INT10:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY15]](s32)
- ; GFX7-NEXT: $sgpr10 = COPY [[INT10]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT10:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY15]](s32)
+ ; GFX7-NEXT: $sgpr10 = COPY [[INTRINSIC_CONVERGENT10]](s32)
; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr(s32) = COPY [[UV11]](s32)
- ; GFX7-NEXT: [[INT11:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY16]](s32)
- ; GFX7-NEXT: $sgpr11 = COPY [[INT11]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT11:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY16]](s32)
+ ; GFX7-NEXT: $sgpr11 = COPY [[INTRINSIC_CONVERGENT11]](s32)
; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[UV12]](s32)
- ; GFX7-NEXT: [[INT12:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY17]](s32)
- ; GFX7-NEXT: $sgpr12 = COPY [[INT12]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT12:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY17]](s32)
+ ; GFX7-NEXT: $sgpr12 = COPY [[INTRINSIC_CONVERGENT12]](s32)
; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[UV13]](s32)
- ; GFX7-NEXT: [[INT13:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY18]](s32)
- ; GFX7-NEXT: $sgpr13 = COPY [[INT13]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT13:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY18]](s32)
+ ; GFX7-NEXT: $sgpr13 = COPY [[INTRINSIC_CONVERGENT13]](s32)
; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr(s32) = COPY [[UV14]](s32)
- ; GFX7-NEXT: [[INT14:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY19]](s32)
- ; GFX7-NEXT: $sgpr14 = COPY [[INT14]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT14:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY19]](s32)
+ ; GFX7-NEXT: $sgpr14 = COPY [[INTRINSIC_CONVERGENT14]](s32)
; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr(s32) = COPY [[UV15]](s32)
- ; GFX7-NEXT: [[INT15:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY20]](s32)
- ; GFX7-NEXT: $sgpr15 = COPY [[INT15]](s32)
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT15:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY20]](s32)
+ ; GFX7-NEXT: $sgpr15 = COPY [[INTRINSIC_CONVERGENT15]](s32)
; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
%val = call <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret <16 x i32> %val
@@ -887,8 +887,8 @@
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
- ; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
- ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
+ ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX7-NEXT: successors: %bb.4, %bb.2
@@ -939,8 +939,8 @@
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
- ; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
- ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
+ ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX7-NEXT: successors: %bb.4, %bb.2
@@ -993,8 +993,8 @@
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
- ; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
- ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
+ ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX7-NEXT: successors: %bb.4, %bb.2
@@ -1045,8 +1045,8 @@
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
- ; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
- ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
+ ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX7-NEXT: successors: %bb.4, %bb.2
@@ -1096,8 +1096,8 @@
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
- ; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
- ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
+ ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX7-NEXT: successors: %bb.4, %bb.2
@@ -1149,8 +1149,8 @@
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
- ; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
- ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
+ ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX7-NEXT: successors: %bb.4, %bb.2
@@ -1214,8 +1214,8 @@
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
- ; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
- ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
+ ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX7-NEXT: successors: %bb.4, %bb.2
@@ -1277,8 +1277,8 @@
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
- ; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
- ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
+ ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX7-NEXT: successors: %bb.4, %bb.2
@@ -1339,8 +1339,8 @@
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
- ; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
- ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
+ ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX7-NEXT: successors: %bb.4, %bb.2
@@ -1401,8 +1401,8 @@
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
- ; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
- ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
+ ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX7-NEXT: successors: %bb.4, %bb.2
@@ -1463,8 +1463,8 @@
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
- ; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
- ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
+ ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX7-NEXT: successors: %bb.4, %bb.2
@@ -1524,8 +1524,8 @@
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
- ; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
- ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
+ ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX7-NEXT: successors: %bb.4, %bb.2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll
index 06f8f3e..f664e62 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll
@@ -79,8 +79,8 @@
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
- ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@@ -125,8 +125,8 @@
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY6]]
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
- ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@@ -183,8 +183,8 @@
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY6]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
- ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll
index 7aeb43a..0f72586 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll
@@ -81,8 +81,8 @@
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
- ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@@ -127,8 +127,8 @@
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY7]]
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
- ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@@ -185,8 +185,8 @@
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY7]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
- ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.ptr.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.ptr.buffer.load.ll
index 596a8ee..b835b3a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.ptr.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.ptr.buffer.load.ll
@@ -79,8 +79,8 @@
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
- ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@@ -125,8 +125,8 @@
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %19, %bb.3
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY6]]
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
- ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@@ -183,8 +183,8 @@
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY6]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
- ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.ptr.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.ptr.buffer.store.ll
index 42d007e..0cefc37 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.ptr.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.ptr.buffer.store.ll
@@ -81,8 +81,8 @@
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
- ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@@ -127,8 +127,8 @@
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %19, %bb.3
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY7]]
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
- ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@@ -185,8 +185,8 @@
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY7]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
- ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.update.dpp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.update.dpp.mir
index 064a0b9..b613688 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.update.dpp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.update.dpp.mir
@@ -16,10 +16,10 @@
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY3]](s32), 0, 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY3]](s32), 0, 0, 0, 0
%0:_(p3) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
- %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0
+ %2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0
...
@@ -37,10 +37,10 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY1]](s32), 0, 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY1]](s32), 0, 0, 0, 0
%0:_(p3) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
- %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0
+ %2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0
...
@@ -58,10 +58,10 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY2]](s32), 0, 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY2]](s32), 0, 0, 0, 0
%0:_(p3) = COPY $vgpr0
%1:_(s32) = COPY $sgpr0
- %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0
+ %2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0
...
@@ -78,9 +78,9 @@
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY1]](s32), 0, 0, 0, 0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY1]](s32), 0, 0, 0, 0
%0:_(p3) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
- %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0
+ %2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.vote.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.vote.mir
index f57af08..fb9c298 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.vote.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.vote.mir
@@ -17,11 +17,11 @@
; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm.vote), [[COPY2]](s1)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), [[COPY2]](s1)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
- %3:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm.vote), %2
+ %3:_(s1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), %2
...
---
@@ -37,11 +37,11 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
- ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm.vote), [[ICMP]](s1)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), [[ICMP]](s1)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
- %3:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm.vote), %2
+ %3:_(s1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), %2
...
---
@@ -57,8 +57,8 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm.vote), [[COPY1]](s1)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), [[COPY1]](s1)
%0:_(s32) = COPY $sgpr0
%1:_(s1) = G_TRUNC %0
- %2:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm.vote), %1
+ %2:_(s1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), %1
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.writelane.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.writelane.mir
index 5145c5c..a1b2e3c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.writelane.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.writelane.mir
@@ -16,11 +16,11 @@
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[COPY1]](s32), [[COPY3]](s32)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[COPY1]](s32), [[COPY3]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $sgpr2
- %3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
+ %3:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
...
---
@@ -36,11 +36,11 @@
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $vgpr0
- %3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
+ %3:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
...
---
@@ -57,11 +57,11 @@
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec
- ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), [[COPY2]](s32)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), [[COPY2]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $sgpr0
%2:_(s32) = COPY $vgpr1
- %3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
+ %3:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
...
---
@@ -79,11 +79,11 @@
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec
; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
- ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[COPY2]](s32)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[COPY2]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
- %3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
+ %3:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
...
---
@@ -100,9 +100,9 @@
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
- ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s32) = COPY $vgpr1
- %3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
+ %3:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wwm.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wwm.mir
index c0e7feb..10ac58a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wwm.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wwm.mir
@@ -14,9 +14,9 @@
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.strict.wwm), [[COPY1]](s32)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.strict.wwm), [[COPY1]](s32)
%0:_(s32) = COPY $sgpr0
- %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.strict.wwm), %0
+ %1:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.strict.wwm), %0
...
---
@@ -30,7 +30,7 @@
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.strict.wwm), [[COPY]](s32)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.strict.wwm), [[COPY]](s32)
%0:_(s32) = COPY $vgpr0
- %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.strict.wwm), %0
+ %1:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.strict.wwm), %0
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir
index f0c8578..23383f2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir
@@ -33,8 +33,8 @@
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %9, %bb.2
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY1]]
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
- ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: .2:
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
@@ -101,8 +101,8 @@
; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]]
; CHECK-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]]
- ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
- ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: .2:
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.release.all.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.release.all.ll
index ea5706f..818e23b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.release.all.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.release.all.ll
@@ -13,7 +13,7 @@
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -o - -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP %s
; GFX6ERR-SDAG: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.ds.gws.sema.release.all
-; GFX6ERR-GISEL: LLVM ERROR: cannot select: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.release.all), %{{[0-9]+}}:sgpr(s32) :: (store (s32) into custom "GWSResource") (in function: gws_sema_release_all_offset0)
+; GFX6ERR-GISEL: LLVM ERROR: cannot select: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.release.all), %{{[0-9]+}}:sgpr(s32) :: (store (s32) into custom "GWSResource") (in function: gws_sema_release_all_offset0)
; GCN-LABEL: {{^}}gws_sema_release_all_offset0:
; NOLOOP-DAG: s_mov_b32 m0, 0{{$}}
diff --git a/llvm/unittests/CodeGen/GlobalISel/MachineIRBuilderTest.cpp b/llvm/unittests/CodeGen/GlobalISel/MachineIRBuilderTest.cpp
index 3afcaa1..7b4a06f 100644
--- a/llvm/unittests/CodeGen/GlobalISel/MachineIRBuilderTest.cpp
+++ b/llvm/unittests/CodeGen/GlobalISel/MachineIRBuilderTest.cpp
@@ -165,14 +165,12 @@
collectCopies(Copies, MF);
// Make sure DstOp version works. sqrt is just a placeholder intrinsic.
- B.buildIntrinsic(Intrinsic::sqrt, {S64}, false)
- .addUse(Copies[0]);
+ B.buildIntrinsic(Intrinsic::sqrt, {S64}).addUse(Copies[0]);
// Make sure register version works
SmallVector<Register, 1> Results;
Results.push_back(MRI->createGenericVirtualRegister(S64));
- B.buildIntrinsic(Intrinsic::sqrt, Results, false)
- .addUse(Copies[1]);
+ B.buildIntrinsic(Intrinsic::sqrt, Results).addUse(Copies[1]);
auto CheckStr = R"(
; CHECK: [[COPY0:%[0-9]+]]:_(s64) = COPY $x0
diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp
index 3bdcfec..e229e64 100644
--- a/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -498,6 +498,10 @@
return &Target.getInstruction(Equiv.getValueAsDef("IfFloatingPoint"));
}
+ if (!Equiv.isValueUnset("IfConvergent") &&
+ N->getIntrinsicInfo(CGP)->isConvergent)
+ return &Target.getInstruction(Equiv.getValueAsDef("IfConvergent"));
+
for (const TreePredicateCall &Call : N->getPredicateCalls()) {
const TreePredicateFn &Predicate = Call.Fn;
if (!Equiv.isValueUnset("IfSignExtend") &&
@@ -863,7 +867,10 @@
// Match the used operands (i.e. the children of the operator).
bool IsIntrinsic =
SrcGIOrNull->TheDef->getName() == "G_INTRINSIC" ||
- SrcGIOrNull->TheDef->getName() == "G_INTRINSIC_W_SIDE_EFFECTS";
+ SrcGIOrNull->TheDef->getName() == "G_INTRINSIC_W_SIDE_EFFECTS" ||
+ SrcGIOrNull->TheDef->getName() == "G_INTRINSIC_CONVERGENT" ||
+ SrcGIOrNull->TheDef->getName() ==
+ "G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS";
const CodeGenIntrinsic *II = Src->getIntrinsicInfo(CGP);
if (IsIntrinsic && !II)
return failedImport("Expected IntInit containing intrinsic ID)");