blob: 1beaaafb159e30ae96c83d9aae948b40d853ef2a [file] [log] [blame]
//===---- X86InstrAMX.td - AMX Instruction Set Extension --*- tablegen -*--===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file describes the instructions that make up the Intel AMX instruction
// set.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// AMX instructions
multiclass AMX_TILE_COMMON<string Suffix, Predicate HasEGPR> {
let Predicates = [HasAMXTILE, HasEGPR, In64BitMode] in {
let hasSideEffects = 1,
Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
def LDTILECFG#Suffix : I<0x49, MRM0m, (outs), (ins opaquemem:$src),
"ldtilecfg\t$src",
[(int_x86_ldtilecfg addr:$src)]>,
T8, PS;
let hasSideEffects = 1 in
def STTILECFG#Suffix : I<0x49, MRM0m, (outs), (ins opaquemem:$src),
"sttilecfg\t$src",
[(int_x86_sttilecfg addr:$src)]>,
T8, PD;
let mayLoad = 1 in
def TILELOADD#Suffix : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
(ins sibmem:$src),
"tileloadd\t{$src, $dst|$dst, $src}", []>,
T8, XD;
let mayLoad = 1 in
def TILELOADDT1#Suffix : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
(ins sibmem:$src),
"tileloaddt1\t{$src, $dst|$dst, $src}", []>,
T8, PD;
let mayStore = 1 in
def TILESTORED#Suffix : I<0x4b, MRMDestMemFSIB, (outs),
(ins sibmem:$dst, TILE:$src),
"tilestored\t{$src, $dst|$dst, $src}", []>,
T8, XS;
}
}
let SchedRW = [WriteSystem] in {
defm "" : AMX_TILE_COMMON<"", NoEGPR>, VEX;
defm "" : AMX_TILE_COMMON<"_EVEX", HasEGPR>, EVEX, NoCD8;
let Predicates = [HasAMXTILE, In64BitMode] in {
let Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
def TILERELEASE : I<0x49, MRM_C0, (outs), (ins),
"tilerelease", [(int_x86_tilerelease)]>, VEX, T8, PS;
def TILEZERO : I<0x49, MRMr0, (outs TILE:$dst), (ins),
"tilezero\t$dst", []>,
VEX, T8, XD;
// Pseduo instruction for RA.
let isPseudo = true, mayLoad = 1, hasSideEffects = 1,
Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
def PLDTILECFGV : PseudoI<(outs), (ins opaquemem:$src), []>;
let isPseudo = true, mayLoad = 1 in
def PTILELOADDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
GR16:$src2,
opaquemem:$src3), []>;
let isPseudo = true, mayLoad = 1 in
def PTILELOADDT1V : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
GR16:$src2,
opaquemem:$src3), []>;
let isPseudo = true, mayStore = 1 in
def PTILESTOREDV : PseudoI<(outs), (ins GR16:$src1,
GR16:$src2, opaquemem:$src3,
TILE:$src4), []>;
let isPseudo = true, isReMaterializable = 1, isAsCheapAsAMove = 1,
canFoldAsLoad = 1, usesCustomInserter = 1 in
def PTILEZEROV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2),
[(set TILE:$dst, (int_x86_tilezero_internal
GR16:$src1, GR16:$src2))]>;
let usesCustomInserter = 1 in {
// Pseudo instructions, using immediates instead of tile registers.
// To be translated to the actual instructions in X86ISelLowering.cpp
let mayLoad = 1 in
def PTILELOADD : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>;
let mayLoad = 1 in
def PTILELOADDT1 : PseudoI<(outs), (ins u8imm:$src1,
sibmem:$src2), []>;
let mayStore = 1 in
def PTILESTORED : PseudoI<(outs), (ins i8mem:$dst, u8imm:$src), []>;
def PTILEZERO : PseudoI<(outs), (ins u8imm:$src),
[(int_x86_tilezero timm:$src)]>;
}
} // Predicates
} // SchedRW
let Predicates = [HasAMXINT8, In64BitMode] in {
let SchedRW = [WriteSystem] in {
let Constraints = "$src1 = $dst" in {
def TDPBSSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
(ins TILE:$src1, TILE:$src2, TILE:$src3),
"tdpbssd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
VEX, VVVV, T8, XD;
def TDPBSUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
(ins TILE:$src1, TILE:$src2, TILE:$src3),
"tdpbsud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
VEX, VVVV, T8, XS;
def TDPBUSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
(ins TILE:$src1, TILE:$src2, TILE:$src3),
"tdpbusd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
VEX, VVVV, T8, PD;
def TDPBUUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
(ins TILE:$src1, TILE:$src2, TILE:$src3),
"tdpbuud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
VEX, VVVV, T8;
}
// Pseduo instruction for RA.
let isPseudo = true, Constraints = "$src4 = $dst" in {
def PTDPBSSDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
GR16:$src2, GR16:$src3, TILE:$src4,
TILE:$src5, TILE:$src6),
[(set TILE: $dst,
(int_x86_tdpbssd_internal GR16:$src1, GR16:$src2,
GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
def PTDPBSUDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
GR16:$src2, GR16:$src3, TILE:$src4,
TILE:$src5, TILE:$src6),
[(set TILE: $dst,
(int_x86_tdpbsud_internal GR16:$src1, GR16:$src2,
GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
def PTDPBUSDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
GR16:$src2, GR16:$src3, TILE:$src4,
TILE:$src5, TILE:$src6),
[(set TILE: $dst,
(int_x86_tdpbusd_internal GR16:$src1, GR16:$src2,
GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
def PTDPBUUDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
GR16:$src2, GR16:$src3, TILE:$src4,
TILE:$src5, TILE:$src6),
[(set TILE: $dst,
(int_x86_tdpbuud_internal GR16:$src1, GR16:$src2,
GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
}
let usesCustomInserter = 1 in {
// Pseudo instructions, using immediates instead of tile registers.
// To be translated to the actual instructions in X86ISelLowering.cpp
def PTDPBSSD : PseudoI<(outs), (ins u8imm:$src1,
u8imm:$src2, u8imm:$src3),
[(int_x86_tdpbssd timm:$src1,
timm:$src2, timm:$src3)]>;
def PTDPBSUD : PseudoI<(outs), (ins u8imm:$src1,
u8imm:$src2, u8imm:$src3),
[(int_x86_tdpbsud timm:$src1,
timm:$src2, timm:$src3)]>;
def PTDPBUSD : PseudoI<(outs), (ins u8imm:$src1,
u8imm:$src2, u8imm:$src3),
[(int_x86_tdpbusd timm:$src1,
timm:$src2, timm:$src3)]>;
def PTDPBUUD : PseudoI<(outs), (ins u8imm:$src1,
u8imm:$src2, u8imm:$src3),
[(int_x86_tdpbuud timm:$src1,
timm:$src2, timm:$src3)]>;
}
}
} // HasAMXTILE
let Predicates = [HasAMXBF16, In64BitMode] in {
let SchedRW = [WriteSystem] in {
let Constraints = "$src1 = $dst" in
def TDPBF16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst),
(ins TILE:$src1, TILE:$src2, TILE:$src3),
"tdpbf16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[]>, VEX, VVVV, T8, XS;
// Pseduo instruction for RA.
let isPseudo = true, Constraints = "$src4 = $dst" in
def PTDPBF16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
GR16:$src2, GR16:$src3, TILE:$src4,
TILE:$src5, TILE:$src6),
[(set TILE: $dst,
(int_x86_tdpbf16ps_internal GR16:$src1,
GR16:$src2, GR16:$src3, TILE:$src4,
TILE:$src5, TILE:$src6))]>;
let usesCustomInserter = 1 in {
// Pseudo instructions, using immediates instead of tile registers.
// To be translated to the actual instructions in X86ISelLowering.cpp
def PTDPBF16PS : PseudoI<(outs), (ins u8imm:$src1,
u8imm:$src2, u8imm:$src3),
[(int_x86_tdpbf16ps timm:$src1,
timm:$src2, timm:$src3)]>;
}
}
} // HasAMXTILE, HasAMXBF16
//AMX-FP16
let Predicates = [HasAMXFP16, In64BitMode] in {
let SchedRW = [WriteSystem] in {
let Constraints = "$src1 = $dst" in {
def TDPFP16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst),
(ins TILE:$src1, TILE:$src2, TILE:$src3),
"tdpfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
[]>, VEX, VVVV, T8, XD;
}
// Pseduo instruction for RA.
let isPseudo = true, Constraints = "$src4 = $dst" in {
def PTDPFP16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
GR16:$src2, GR16:$src3, TILE:$src4,
TILE:$src5, TILE:$src6),
[(set TILE: $dst,
(int_x86_tdpfp16ps_internal GR16:$src1,
GR16:$src2, GR16:$src3, TILE:$src4,
TILE:$src5, TILE:$src6))]>;
}
let usesCustomInserter = 1 in {
def PTDPFP16PS : PseudoI<(outs), (ins u8imm:$src1,
u8imm:$src2, u8imm:$src3),
[(int_x86_tdpfp16ps timm:$src1,
timm:$src2, timm:$src3)]>;
}
}
} // HasAMXTILE, HasAMXFP16
let Predicates = [HasAMXCOMPLEX, In64BitMode] in {
let SchedRW = [WriteSystem] in {
let Constraints = "$src1 = $dst" in {
def TCMMIMFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst),
(ins TILE:$src1, TILE:$src2, TILE:$src3),
"tcmmimfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
[]>, T8, PD, VEX, VVVV;
def TCMMRLFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst),
(ins TILE:$src1, TILE:$src2, TILE:$src3),
"tcmmrlfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
[]>, VEX, VVVV, WIG, T8;
} // Constraints = "$src1 = $dst"
let Constraints = "$src4 = $dst" in {
def PTCMMIMFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
GR16:$src2, GR16:$src3, TILE:$src4,
TILE:$src5, TILE:$src6),
[(set TILE: $dst,
(int_x86_tcmmimfp16ps_internal GR16:$src1, GR16:$src2,
GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
def PTCMMRLFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
GR16:$src2, GR16:$src3, TILE:$src4,
TILE:$src5, TILE:$src6),
[(set TILE: $dst,
(int_x86_tcmmrlfp16ps_internal GR16:$src1, GR16:$src2,
GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
}
let usesCustomInserter = 1 in {
def PTCMMIMFP16PS : PseudoI<(outs), (ins u8imm:$src1,
u8imm:$src2, u8imm:$src3),
[(int_x86_tcmmimfp16ps timm:$src1,
timm:$src2, timm:$src3)]>;
def PTCMMRLFP16PS : PseudoI<(outs), (ins u8imm:$src1,
u8imm:$src2, u8imm:$src3),
[(int_x86_tcmmrlfp16ps timm:$src1,
timm:$src2, timm:$src3)]>;
}
} // SchedRW = [WriteSystem]
}
// AMX-FP8
let Predicates = [HasAMXFP8, In64BitMode] in {
let SchedRW = [WriteSystem] in {
let Constraints = "$src1 = $dst" in {
class AMX_FP8_BASE<bits<8> Opcode, string Opstr> :
I<Opcode, MRMSrcReg4VOp3, (outs TILE:$dst),
(ins TILE:$src1, TILE:$src2, TILE:$src3),
!strconcat(Opstr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[]>, VEX, VVVV;
}
def TDPBF8PS : AMX_FP8_BASE<0xfd, "tdpbf8ps">, T_MAP5, PS;
def TDPBHF8PS : AMX_FP8_BASE<0xfd, "tdpbhf8ps">, T_MAP5, XD;
def TDPHBF8PS : AMX_FP8_BASE<0xfd, "tdphbf8ps">, T_MAP5, XS;
def TDPHF8PS : AMX_FP8_BASE<0xfd, "tdphf8ps">, T_MAP5, PD;
let usesCustomInserter = 1 in {
// Pseudo instructions, using immediates instead of tile registers.
// To be translated to the actual instructions in X86ISelLowering.cpp
def PTDPBF8PS : PseudoI<(outs),
(ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
[(int_x86_tdpbf8ps timm:$src1, timm:$src2,
timm:$src3)]>;
def PTDPBHF8PS : PseudoI<(outs),
(ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
[(int_x86_tdpbhf8ps timm:$src1, timm:$src2,
timm:$src3)]>;
def PTDPHBF8PS : PseudoI<(outs),
(ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
[(int_x86_tdphbf8ps timm:$src1, timm:$src2,
timm:$src3)]>;
def PTDPHF8PS : PseudoI<(outs),
(ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
[(int_x86_tdphf8ps timm:$src1, timm:$src2,
timm:$src3)]>;
}
let Constraints = "$src4 = $dst" in {
def PTDPBF8PSV : PseudoI<(outs TILE:$dst),
(ins GR16:$src1, GR16:$src2, GR16:$src3,
TILE:$src4, TILE:$src5, TILE:$src6),
[(set TILE:$dst,
(int_x86_tdpbf8ps_internal GR16:$src1,
GR16:$src2, GR16:$src3, TILE:$src4,
TILE:$src5, TILE:$src6))]>;
def PTDPBHF8PSV : PseudoI<(outs TILE:$dst),
(ins GR16:$src1, GR16:$src2, GR16:$src3,
TILE:$src4, TILE:$src5, TILE:$src6),
[(set TILE:$dst,
(int_x86_tdpbhf8ps_internal GR16:$src1,
GR16:$src2, GR16:$src3, TILE:$src4,
TILE:$src5, TILE:$src6))]>;
def PTDPHBF8PSV : PseudoI<(outs TILE:$dst),
(ins GR16:$src1, GR16:$src2, GR16:$src3,
TILE:$src4, TILE:$src5, TILE:$src6),
[(set TILE:$dst,
(int_x86_tdphbf8ps_internal GR16:$src1,
GR16:$src2, GR16:$src3, TILE:$src4,
TILE:$src5, TILE:$src6))]>;
def PTDPHF8PSV : PseudoI<(outs TILE:$dst),
(ins GR16:$src1, GR16:$src2, GR16:$src3,
TILE:$src4, TILE:$src5, TILE:$src6),
[(set TILE:$dst,
(int_x86_tdphf8ps_internal GR16:$src1,
GR16:$src2, GR16:$src3, TILE:$src4,
TILE:$src5, TILE:$src6))]>;
}
}
}
let Predicates = [HasAMXTILE, In64BitMode], isPseudo = true, SchedRW = [WriteSystem] in {
let mayStore = 1 in
def PTILEPAIRSTORE : PseudoI<(outs), (ins opaquemem:$src1, TILEPair:$src2), []>;
let mayLoad = 1 in
def PTILEPAIRLOAD : PseudoI<(outs TILEPair:$dst), (ins opaquemem:$src), []>;
}
multiclass T2RPNTLVW_Base<bits<8> op1, bits<8> op2, string rs, string suffix> {
def Z0#rs#suffix : I<op1, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
"t2rpntlvwz0" #!tolower(rs)# "\t{$src, $dst|$dst, $src}", []>, PS;
def Z0#rs#T1#suffix : I<op2, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
"t2rpntlvwz0" #!tolower(rs)# "t1\t{$src, $dst|$dst, $src}", []>, PS;
def Z1#rs#suffix : I<op1, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
"t2rpntlvwz1" #!tolower(rs)# "\t{$src, $dst|$dst, $src}", []>, PD;
def Z1#rs#T1#suffix : I<op2, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
"t2rpntlvwz1" #!tolower(rs)# "t1\t{$src, $dst|$dst, $src}", []>, PD;
}
let Predicates = [HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in
defm T2RPNTLVW : T2RPNTLVW_Base<0x6e, 0x6f, "", "">, T8, VEX;
let Predicates = [HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in
defm T2RPNTLVW : T2RPNTLVW_Base<0x6e, 0x6f, "", "_EVEX">, T8, EVEX, NoCD8;
let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in
defm T2RPNTLVW : T2RPNTLVW_Base<0xf8, 0xf9, "RS", "">, T_MAP5, VEX;
let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in
defm T2RPNTLVW : T2RPNTLVW_Base<0xf8, 0xf9, "RS", "_EVEX">, T_MAP5, EVEX, NoCD8;
let Predicates = [HasAMXTRANSPOSE, In64BitMode] in {
let SchedRW = [WriteSystem] in {
def TTRANSPOSED : I<0x5f, MRMSrcReg, (outs TILE:$dst), (ins TILE:$src),
"ttransposed\t{$src, $dst|$dst, $src}", []>, VEX, T8, XS;
let isPseudo = true in {
def PT2RPNTLVWZ0V : PseudoI<(outs TILEPair:$dst),
(ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
[]>;
def PT2RPNTLVWZ0T1V : PseudoI<(outs TILEPair:$dst),
(ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
[]>;
def PT2RPNTLVWZ1V : PseudoI<(outs TILEPair:$dst),
(ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
[]>;
def PT2RPNTLVWZ1T1V : PseudoI<(outs TILEPair:$dst),
(ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
[]>;
}
def PTTRANSPOSEDV : PseudoI<(outs TILE:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src),
[(set TILE: $dst,
(int_x86_ttransposed_internal GR16:$src1, GR16:$src2,
TILE:$src))]>;
let usesCustomInserter = 1 in {
def PT2RPNTLVWZ0 : PseudoI<(outs), (ins u8imm:$dst,
sibmem:$src1), []>;
def PT2RPNTLVWZ0T1 : PseudoI<(outs), (ins u8imm:$dst,
sibmem:$src1), []>;
def PT2RPNTLVWZ1 : PseudoI<(outs), (ins u8imm:$dst,
sibmem:$src1), []>;
def PT2RPNTLVWZ1T1 : PseudoI<(outs), (ins u8imm:$dst,
sibmem:$src1), []>;
def PTTRANSPOSED : PseudoI<(outs), (ins u8imm:$dst, u8imm:$src),
[(int_x86_ttransposed timm:$dst, timm:$src)]>;
}
}
} // HasAMXTILE, HasAMXTRANSPOSE
let Predicates = [HasAMXBF16, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in {
let Constraints = "$src1 = $dst" in
def TTDPBF16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst),
(ins TILE:$src1, TILE:$src2, TILE:$src3),
"ttdpbf16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[]>, VEX, VVVV, T8,XS;
let Constraints = "$src4 = $dst" in
def PTTDPBF16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
GR16:$src2, GR16:$src3, TILE:$src4,
TILE:$src5, TILE:$src6),
[(set TILE: $dst,
(int_x86_ttdpbf16ps_internal GR16:$src1, GR16:$src2,
GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
let usesCustomInserter = 1 in
def PTTDPBF16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
[(int_x86_ttdpbf16ps timm:$src1, timm:$src2, timm:$src3)]>;
}
let Predicates = [HasAMXFP16, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in {
let Constraints = "$src1 = $dst" in
def TTDPFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst),
(ins TILE:$src1, TILE:$src2, TILE:$src3),
"ttdpfp16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[]>, VEX, VVVV, T8,XD;
let Constraints = "$src4 = $dst" in
def PTTDPFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
GR16:$src2, GR16:$src3, TILE:$src4,
TILE:$src5, TILE:$src6),
[(set TILE: $dst,
(int_x86_ttdpfp16ps_internal GR16:$src1, GR16:$src2,
GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
let usesCustomInserter = 1 in
def PTTDPFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
[(int_x86_ttdpfp16ps timm:$src1, timm:$src2, timm:$src3)]>;
}
let Predicates = [HasAMXCOMPLEX, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in {
let Constraints = "$src1 = $dst" in {
def TTCMMIMFP16PS : I<0x6b, MRMSrcReg4VOp3, (outs TILE:$dst),
(ins TILE:$src1, TILE:$src2, TILE:$src3),
"ttcmmimfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
[]>, VEX, VVVV, T8,XD;
def TTCMMRLFP16PS: I<0x6b, MRMSrcReg4VOp3, (outs TILE:$dst),
(ins TILE:$src1, TILE:$src2, TILE:$src3),
"ttcmmrlfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
[]>, VEX, VVVV, T8,XS;
def TCONJTCMMIMFP16PS : I<0x6b, MRMSrcReg4VOp3, (outs TILE:$dst),
(ins TILE:$src1, TILE:$src2, TILE:$src3),
"tconjtcmmimfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
[]>, VEX, VVVV, WIG, T8,PS;
}
def TCONJTFP16 : I<0x6b, MRMSrcReg, (outs TILE:$dst), (ins TILE:$src),
"tconjtfp16\t{$src, $dst|$dst, $src}", []>, VEX, T8,PD;
let Constraints = "$src4 = $dst" in {
def PTTCMMIMFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
GR16:$src2, GR16:$src3, TILE:$src4,
TILE:$src5, TILE:$src6),
[(set TILE: $dst,
(int_x86_ttcmmimfp16ps_internal GR16:$src1, GR16:$src2,
GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
def PTTCMMRLFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
GR16:$src2, GR16:$src3, TILE:$src4,
TILE:$src5, TILE:$src6),
[(set TILE: $dst,
(int_x86_ttcmmrlfp16ps_internal GR16:$src1, GR16:$src2,
GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
def PTCONJTCMMIMFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
GR16:$src2, GR16:$src3, TILE:$src4,
TILE:$src5, TILE:$src6),
[(set TILE: $dst,
(int_x86_tconjtcmmimfp16ps_internal GR16:$src1, GR16:$src2,
GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
}
def PTCONJTFP16V : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, TILE:$src3),
[(set TILE: $dst, (int_x86_tconjtfp16_internal GR16:$src1, GR16:$src2, TILE:$src3))]>;
let usesCustomInserter = 1 in {
def PTTCMMIMFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
[(int_x86_ttcmmimfp16ps timm:$src1, timm:$src2, timm:$src3)]>;
def PTTCMMRLFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
[(int_x86_ttcmmrlfp16ps timm:$src1, timm:$src2, timm:$src3)]>;
def PTCONJTCMMIMFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
[(int_x86_tconjtcmmimfp16ps timm:$src1, timm:$src2, timm:$src3)]>;
def PTCONJTFP16 : PseudoI<(outs), (ins u8imm:$dst, u8imm:$src),
[(int_x86_tconjtfp16 timm:$dst, timm:$src)]>;
}
}
let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in {
let isPseudo = true in {
def PT2RPNTLVWZ0RSV : PseudoI<(outs TILEPair:$dst),
(ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
[]>;
def PT2RPNTLVWZ0RST1V : PseudoI<(outs TILEPair:$dst),
(ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
[]>;
def PT2RPNTLVWZ1RSV : PseudoI<(outs TILEPair:$dst),
(ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
[]>;
def PT2RPNTLVWZ1RST1V : PseudoI<(outs TILEPair:$dst),
(ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
[]>;
}
let usesCustomInserter = 1 in {
def PT2RPNTLVWZ0RS : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>;
def PT2RPNTLVWZ0RST1 : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>;
def PT2RPNTLVWZ1RS : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>;
def PT2RPNTLVWZ1RST1 : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>;
}
} // HasAMXMOVRS, HasAMXTRANSPOSE
multiclass TILELOADDRS_Base<string suffix> {
def suffix : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst), (ins sibmem:$src1),
"tileloaddrs\t{$src1, $dst|$dst, $src1}", []>, T8, XD;
def T1#suffix : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst), (ins sibmem:$src1),
"tileloaddrst1\t{$src1, $dst|$dst, $src1}", []>, T8, PD;
}
let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in
defm TILELOADDRS : TILELOADDRS_Base<"">, VEX;
let Predicates = [HasAMXMOVRS, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in
defm TILELOADDRS : TILELOADDRS_Base<"_EVEX">, EVEX, NoCD8;
let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in {
let isPseudo = true, mayLoad = 1 in {
def PTILELOADDRSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
GR16:$src2,
opaquemem:$src3), []>;
def PTILELOADDRST1V : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
GR16:$src2,
opaquemem:$src3), []>;
}
let usesCustomInserter = 1, mayLoad = 1 in {
def PTILELOADDRS : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>;
def PTILELOADDRST1 : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>;
}
} // HasAMXMOVRS, In64BitMode
multiclass m_tcvtrowd2ps {
let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
let SchedRW = [WriteSystem] in {
def rri : Ii8<0x7, MRMSrcReg, (outs VR512:$dst),
(ins TILE:$src1, i32u8imm:$src2),
"tcvtrowd2ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, TA,XS, EVEX, EVEX_V512;
def rre : I<0x4A, MRMSrcReg4VOp3, (outs VR512:$dst),
(ins TILE:$src1, GR32:$src2),
"tcvtrowd2ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, T8,XS, EVEX, VVVV, EVEX_V512;
}
} // HasAMXAVX512, HasAVX10_2_512, In64BitMode
}
defm TCVTROWD2PS : m_tcvtrowd2ps;
let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
let SchedRW = [WriteSystem] in {
let usesCustomInserter = 1 in {
def PTCVTROWD2PSrri : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2),
[(set VR512:$dst, (int_x86_tcvtrowd2ps timm:$src1, imm:$src2))]>;
def PTCVTROWD2PSrre : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, GR32:$src2),
[(set VR512:$dst, (int_x86_tcvtrowd2ps timm:$src1, GR32:$src2))]>;
}
def PTCVTROWD2PSrriV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
[(set VR512: $dst,
(int_x86_tcvtrowd2ps_internal GR16:$src1, GR16:$src2,
TILE:$src3, imm:$src4))]>;
def PTCVTROWD2PSrreV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
[(set VR512: $dst,
(int_x86_tcvtrowd2ps_internal GR16:$src1, GR16:$src2,
TILE:$src3, GR32:$src4))]>;
def PTCVTROWPS2BF16HrriV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
[(set VR512: $dst,
(int_x86_tcvtrowps2bf16h_internal GR16:$src1, GR16:$src2,
TILE:$src3, imm:$src4))]>;
def PTCVTROWPS2BF16HrreV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
[(set VR512: $dst,
(int_x86_tcvtrowps2bf16h_internal GR16:$src1, GR16:$src2,
TILE:$src3, GR32:$src4))]>;
def PTCVTROWPS2BF16LrriV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
[(set VR512: $dst,
(int_x86_tcvtrowps2bf16l_internal GR16:$src1, GR16:$src2,
TILE:$src3, imm:$src4))]>;
def PTCVTROWPS2BF16LrreV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
[(set VR512: $dst,
(int_x86_tcvtrowps2bf16l_internal GR16:$src1, GR16:$src2,
TILE:$src3, GR32:$src4))]>;
def PTCVTROWPS2PHHrriV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
[(set VR512: $dst,
(int_x86_tcvtrowps2phh_internal GR16:$src1, GR16:$src2,
TILE:$src3, imm:$src4))]>;
def PTCVTROWPS2PHHrreV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
[(set VR512: $dst,
(int_x86_tcvtrowps2phh_internal GR16:$src1, GR16:$src2,
TILE:$src3, GR32:$src4))]>;
def PTCVTROWPS2PHLrriV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
[(set VR512: $dst,
(int_x86_tcvtrowps2phl_internal GR16:$src1, GR16:$src2,
TILE:$src3, imm:$src4))]>;
def PTCVTROWPS2PHLrreV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
[(set VR512: $dst,
(int_x86_tcvtrowps2phl_internal GR16:$src1, GR16:$src2,
TILE:$src3, GR32:$src4))]>;
}
}
multiclass AMXAVX512_BASE<bits<8> Opcode1, bits<8> Opcode2, string Opstr,
Prefix P1, Prefix P2> {
let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode], SchedRW = [WriteSystem] in {
let OpPrefix = P1 in
def rre : I<Opcode1, MRMSrcReg4VOp3, (outs VR512:$dst),
(ins TILE:$src1, GR32:$src2),
!strconcat(Opstr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, EVEX, VVVV, EVEX_V512, T8;
let OpPrefix = P2 in
def rri : Ii8<Opcode2, MRMSrcReg, (outs VR512:$dst),
(ins TILE:$src1, i32u8imm:$src2),
!strconcat(Opstr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, EVEX, EVEX_V512, TA;
let usesCustomInserter = 1 in {
def "P"#NAME#"rre" : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, GR32:$src2),
[(set VR512:$dst,
(!cast<Intrinsic>("int_x86_"#Opstr) timm:$src1, GR32:$src2))]>;
def "P"#NAME#"rri" : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2),
[(set VR512:$dst,
(!cast<Intrinsic>("int_x86_"#Opstr) timm:$src1, imm:$src2))]>;
}
}
}
defm TCVTROWPS2PHH : AMXAVX512_BASE<0x6d, 0x07, "tcvtrowps2phh", PS, PS>;
defm TCVTROWPS2PHL : AMXAVX512_BASE<0x6d, 0x77, "tcvtrowps2phl", PD, XD>;
defm TCVTROWPS2BF16H : AMXAVX512_BASE<0x6d, 0x07, "tcvtrowps2bf16h", XD, XD>;
defm TCVTROWPS2BF16L : AMXAVX512_BASE<0x6d, 0x77, "tcvtrowps2bf16l", XS, XS>;
multiclass m_tilemovrow {
let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
let SchedRW = [WriteSystem] in {
def rri : Ii8<0x7, MRMSrcReg, (outs VR512:$dst),
(ins TILE:$src1, u8imm:$src2),
"tilemovrow\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, TA,PD, EVEX, EVEX_V512;
def rre : I<0x4A, MRMSrcReg4VOp3, (outs VR512:$dst),
(ins TILE:$src1, GR32:$src2),
"tilemovrow\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, T8,PD, EVEX, VVVV, EVEX_V512;
}
} // HasAMXAVX512, HasAVX10_2_512, In64BitMode
}
defm TILEMOVROW : m_tilemovrow;
let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
let SchedRW = [WriteSystem] in {
let usesCustomInserter = 1 in {
def PTILEMOVROWrri : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2),
[(set VR512:$dst, (int_x86_tilemovrow timm:$src1, imm:$src2))]>;
def PTILEMOVROWrre : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, GR32:$src2),
[(set VR512:$dst, (int_x86_tilemovrow timm:$src1, GR32:$src2))]>;
}
def PTILEMOVROWrriV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
[(set VR512: $dst,
(int_x86_tilemovrow_internal GR16:$src1, GR16:$src2,
TILE:$src3, imm:$src4))]>;
def PTILEMOVROWrreV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
[(set VR512: $dst,
(int_x86_tilemovrow_internal GR16:$src1, GR16:$src2,
TILE:$src3, GR32:$src4))]>;
}
}
let Predicates = [HasAMXTF32, In64BitMode] in {
let SchedRW = [WriteSystem] in {
let Constraints = "$src1 = $dst" in {
def TMMULTF32PS: I<0x48, MRMSrcReg4VOp3, (outs TILE:$dst),
(ins TILE:$src1, TILE:$src2, TILE:$src3),
"tmmultf32ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[]>, VEX, VVVV, T8, PD;
}
let Constraints = "$src4 = $dst" in {
def PTMMULTF32PSV : PseudoI<(outs TILE:$dst),
(ins GR16:$src1, GR16:$src2, GR16:$src3,
TILE:$src4, TILE:$src5, TILE:$src6),
[(set TILE:$dst,
(int_x86_tmmultf32ps_internal GR16:$src1,
GR16:$src2, GR16:$src3, TILE:$src4,
TILE:$src5, TILE:$src6))]>;
}
let usesCustomInserter = 1 in {
def PTMMULTF32PS : PseudoI<(outs),
(ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
[(int_x86_tmmultf32ps timm:$src1, timm:$src2,
timm:$src3)]>;
}
} // SchedRW = [WriteSystem]
} // HasAMXTF32
let Predicates = [HasAMXTF32, HasAMXTRANSPOSE, In64BitMode] in {
let SchedRW = [WriteSystem] in {
let Constraints = "$src1 = $dst" in {
def TTMMULTF32PS: I<0x48, MRMSrcReg4VOp3, (outs TILE:$dst),
(ins TILE:$src1, TILE:$src2, TILE:$src3),
"ttmmultf32ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[]>, VEX, VVVV, T8, PS;
}
let Constraints = "$src4 = $dst" in {
def PTTMMULTF32PSV : PseudoI<(outs TILE:$dst),
(ins GR16:$src1, GR16:$src2, GR16:$src3,
TILE:$src4, TILE:$src5, TILE:$src6),
[(set TILE:$dst,
(int_x86_ttmmultf32ps_internal GR16:$src1,
GR16:$src2, GR16:$src3, TILE:$src4,
TILE:$src5, TILE:$src6))]>;
}
let usesCustomInserter = 1 in {
def PTTMMULTF32PS : PseudoI<(outs),
(ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
[(int_x86_ttmmultf32ps timm:$src1, timm:$src2,
timm:$src3)]>;
}
} // SchedRW = [WriteSystem]
} // HasAMXTF32, HasAMXTRANSPOSE