blob: e2bcd18ce66079ba3a49021bdd9fdbadc16d776d [file] [log] [blame]
//===-- X86InstrSSE.td - SSE Instruction Set ---------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the X86 SSE instruction set, defining the instructions,
// and properties of the instructions which are needed for code generation,
// machine code emission, and analysis.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// SSE 1 & 2 Instructions Classes
//===----------------------------------------------------------------------===//
/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class
multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
RegisterClass RC, X86MemOperand x86memop,
Domain d, X86FoldableSchedWrite sched,
bit Is2Addr = 1> {
let isCommutable = 1 in {
def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (OpNode RC:$src1, RC:$src2))], d>,
Sched<[sched]>;
}
def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], d>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr,
SDPatternOperator OpNode, RegisterClass RC,
ValueType VT, string asm, Operand memopr,
ComplexPattern mem_cpat, Domain d,
X86FoldableSchedWrite sched, bit Is2Addr = 1> {
let isCodeGenOnly = 1, hasSideEffects = 0 in {
def rr_Int : SI_Int<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (VT (OpNode RC:$src1, RC:$src2)))], d>,
Sched<[sched]>;
let mayLoad = 1 in
def rm_Int : SI_Int<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (VT (OpNode RC:$src1, mem_cpat:$src2)))], d>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
/// sse12_fp_packed - SSE 1 & 2 packed instructions class
multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
RegisterClass RC, ValueType vt,
X86MemOperand x86memop, PatFrag mem_frag,
Domain d, X86FoldableSchedWrite sched,
bit Is2Addr = 1> {
let isCommutable = 1 in
def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], d>,
Sched<[sched]>;
let mayLoad = 1 in
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
d>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
string OpcodeStr, X86MemOperand x86memop,
X86FoldableSchedWrite sched,
list<dag> pat_rr, list<dag> pat_rm,
bit Is2Addr = 1> {
let isCommutable = 1, hasSideEffects = 0 in
def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
pat_rr, d>,
Sched<[sched]>;
let hasSideEffects = 0, mayLoad = 1 in
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
pat_rm, d>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
// This is expanded by ExpandPostRAPseudos.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isPseudo = 1, SchedRW = [WriteZero] in {
def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "",
[(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1, NoAVX512]>;
def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "",
[(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2, NoAVX512]>;
}
//===----------------------------------------------------------------------===//
// AVX & SSE - Zero/One Vectors
//===----------------------------------------------------------------------===//
// Alias instruction that maps zero vector to pxor / xorp* for sse.
// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
// swizzled by ExecutionDomainFix to pxor.
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-zeros value if folding it would be beneficial.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isPseudo = 1, SchedRW = [WriteZero] in {
def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4f32 immAllZerosV))]>;
}
let Predicates = [NoAVX512] in
def : Pat<(v4i32 immAllZerosV), (V_SET0)>;
// The same as done above but for AVX. The 256-bit AVX1 ISA doesn't support PI,
// and doesn't need it because on sandy bridge the register is set to zero
// at the rename stage without using any execution unit, so SET0PSY
// and SET0PDY can be used for vector int instructions without penalty
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in {
def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "",
[(set VR256:$dst, (v8i32 immAllZerosV))]>;
}
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-ones value if folding it would be beneficial.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isPseudo = 1, SchedRW = [WriteZero] in {
def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4i32 immAllOnesV))]>;
let Predicates = [HasAVX1Only, OptForMinSize] in {
def AVX1_SETALLONES: I<0, Pseudo, (outs VR256:$dst), (ins), "",
[(set VR256:$dst, (v8i32 immAllOnesV))]>;
}
let Predicates = [HasAVX2] in
def AVX2_SETALLONES : I<0, Pseudo, (outs VR256:$dst), (ins), "",
[(set VR256:$dst, (v8i32 immAllOnesV))]>;
}
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Move FP Scalar Instructions
//
// Move Instructions. Register-to-register movss/movsd is not used for FR32/64
// register copies because it's a partial register update; Register-to-register
// movss/movsd is not modeled as an INSERT_SUBREG because INSERT_SUBREG requires
// that the insert be implementable in terms of a copy, and just mentioned, we
// don't use movss/movsd for copies.
//===----------------------------------------------------------------------===//
multiclass sse12_move_rr<SDNode OpNode, ValueType vt,
X86MemOperand x86memop, string base_opc,
string asm_opr, Domain d, string Name> {
let isCommutable = 1 in
def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(base_opc, asm_opr),
[(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))], d>,
Sched<[SchedWriteFShuffle.XMM]>;
// For the disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(base_opc, asm_opr), []>,
Sched<[SchedWriteFShuffle.XMM]>, FoldGenData<Name#rr>;
}
multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
X86MemOperand x86memop, string OpcodeStr,
Domain d, string Name, Predicate pred> {
// AVX
let Predicates = [UseAVX, OptForSize] in
defm V#NAME : sse12_move_rr<OpNode, vt, x86memop, OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}", d,
"V"#Name>,
VEX_4V, VEX_LIG, VEX_WIG;
def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(store RC:$src, addr:$dst)], d>,
VEX, VEX_LIG, Sched<[WriteFStore]>, VEX_WIG;
// SSE1 & 2
let Constraints = "$src1 = $dst" in {
let Predicates = [pred, NoSSE41_Or_OptForSize] in
defm NAME : sse12_move_rr<OpNode, vt, x86memop, OpcodeStr,
"\t{$src2, $dst|$dst, $src2}", d, Name>;
}
def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(store RC:$src, addr:$dst)], d>,
Sched<[WriteFStore]>;
def : InstAlias<"v"#OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
(!cast<Instruction>("V"#NAME#"rr_REV")
VR128:$dst, VR128:$src1, VR128:$src2), 0>;
def : InstAlias<OpcodeStr#".s\t{$src2, $dst|$dst, $src2}",
(!cast<Instruction>(NAME#"rr_REV")
VR128:$dst, VR128:$src2), 0>;
}
// Loading from memory automatically zeroing upper bits.
multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
PatFrag mem_pat, string OpcodeStr, Domain d> {
def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (mem_pat addr:$src))], d>,
VEX, VEX_LIG, Sched<[WriteFLoad]>, VEX_WIG;
def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (mem_pat addr:$src))], d>,
Sched<[WriteFLoad]>;
}
defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss",
SSEPackedSingle, "MOVSS", UseSSE1>, XS;
defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd",
SSEPackedDouble, "MOVSD", UseSSE2>, XD;
let canFoldAsLoad = 1, isReMaterializable = 1 in {
defm MOVSS : sse12_move_rm<FR32, f32mem, loadf32, "movss",
SSEPackedSingle>, XS;
defm MOVSD : sse12_move_rm<FR64, f64mem, loadf64, "movsd",
SSEPackedDouble>, XD;
}
// Patterns
let Predicates = [UseAVX] in {
// MOVSSrm zeros the high parts of the register; represent this
// with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
(COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
(COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
def : Pat<(v4f32 (X86vzload addr:$src)),
(COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
// MOVSDrm zeros the high parts of the register; represent this
// with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
(COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
(COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzload addr:$src)),
(COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
// Represent the same patterns above but in the form they appear for
// 256-bit types
def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
(v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
def : Pat<(v8f32 (X86vzload addr:$src)),
(SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
(v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
def : Pat<(v4f64 (X86vzload addr:$src)),
(SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
// Extract and store.
def : Pat<(store (f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
addr:$dst),
(VMOVSSmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32))>;
}
let Predicates = [UseAVX, OptForSize] in {
// Move scalar to XMM zero-extended, zeroing a VR128 then do a
// MOVSS to the lower bits.
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
(VMOVSSrr (v4f32 (V_SET0)), VR128:$src)>;
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
(VMOVSSrr (v4i32 (V_SET0)), VR128:$src)>;
// Move low f32 and clear high bits.
def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
(SUBREG_TO_REG (i32 0),
(v4f32 (VMOVSSrr (v4f32 (V_SET0)),
(v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)))), sub_xmm)>;
def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
(SUBREG_TO_REG (i32 0),
(v4i32 (VMOVSSrr (v4i32 (V_SET0)),
(v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)))), sub_xmm)>;
def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
(SUBREG_TO_REG (i32 0),
(v2f64 (VMOVSDrr (v2f64 (V_SET0)),
(v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)))),
sub_xmm)>;
def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
(SUBREG_TO_REG (i32 0),
(v2i64 (VMOVSDrr (v2i64 (V_SET0)),
(v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)))),
sub_xmm)>;
}
let Predicates = [UseSSE1] in {
let Predicates = [UseSSE1, NoSSE41_Or_OptForSize] in {
// Move scalar to XMM zero-extended, zeroing a VR128 then do a
// MOVSS to the lower bits.
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
(MOVSSrr (v4f32 (V_SET0)), VR128:$src)>;
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
(MOVSSrr (v4i32 (V_SET0)), VR128:$src)>;
}
// MOVSSrm already zeros the high parts of the register.
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
(COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
(COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
def : Pat<(v4f32 (X86vzload addr:$src)),
(COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
// Extract and store.
def : Pat<(store (f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
addr:$dst),
(MOVSSmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR32))>;
}
let Predicates = [UseSSE2] in {
// MOVSDrm already zeros the high parts of the register.
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
(COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
(COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzload addr:$src)),
(COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
}
// Aliases to help the assembler pick two byte VEX encodings by swapping the
// operands relative to the normal instructions to use VEX.R instead of VEX.B.
def : InstAlias<"vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
(VMOVSSrr_REV VR128L:$dst, VR128:$src1, VR128H:$src2), 0>;
def : InstAlias<"vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
(VMOVSDrr_REV VR128L:$dst, VR128:$src1, VR128H:$src2), 0>;
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions
//===----------------------------------------------------------------------===//
multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC,
X86MemOperand x86memop, PatFrag ld_frag,
string asm, Domain d,
X86SchedWriteMoveLS sched> {
let hasSideEffects = 0, isMoveReg = 1 in
def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>,
Sched<[sched.RR]>;
let canFoldAsLoad = 1, isReMaterializable = 1 in
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (ld_frag addr:$src))], d>,
Sched<[sched.RM]>;
}
let Predicates = [HasAVX, NoVLX] in {
defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps",
SSEPackedSingle, SchedWriteFMoveLS.XMM>,
PS, VEX, VEX_WIG;
defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd",
SSEPackedDouble, SchedWriteFMoveLS.XMM>,
PD, VEX, VEX_WIG;
defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups",
SSEPackedSingle, SchedWriteFMoveLS.XMM>,
PS, VEX, VEX_WIG;
defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd",
SSEPackedDouble, SchedWriteFMoveLS.XMM>,
PD, VEX, VEX_WIG;
defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, "movaps",
SSEPackedSingle, SchedWriteFMoveLS.YMM>,
PS, VEX, VEX_L, VEX_WIG;
defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, "movapd",
SSEPackedDouble, SchedWriteFMoveLS.YMM>,
PD, VEX, VEX_L, VEX_WIG;
defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups",
SSEPackedSingle, SchedWriteFMoveLS.YMM>,
PS, VEX, VEX_L, VEX_WIG;
defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd",
SSEPackedDouble, SchedWriteFMoveLS.YMM>,
PD, VEX, VEX_L, VEX_WIG;
}
let Predicates = [UseSSE1] in {
defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps",
SSEPackedSingle, SchedWriteFMoveLS.XMM>,
PS;
defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups",
SSEPackedSingle, SchedWriteFMoveLS.XMM>,
PS;
}
let Predicates = [UseSSE2] in {
defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd",
SSEPackedDouble, SchedWriteFMoveLS.XMM>,
PD;
defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd",
SSEPackedDouble, SchedWriteFMoveLS.XMM>,
PD;
}
let Predicates = [HasAVX, NoVLX] in {
let SchedRW = [SchedWriteFMoveLS.XMM.MR] in {
def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)]>,
VEX, VEX_WIG;
def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movapd\t{$src, $dst|$dst, $src}",
[(alignedstore (v2f64 VR128:$src), addr:$dst)]>,
VEX, VEX_WIG;
def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movups\t{$src, $dst|$dst, $src}",
[(store (v4f32 VR128:$src), addr:$dst)]>,
VEX, VEX_WIG;
def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v2f64 VR128:$src), addr:$dst)]>,
VEX, VEX_WIG;
} // SchedRW
let SchedRW = [SchedWriteFMoveLS.YMM.MR] in {
def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v8f32 VR256:$src), addr:$dst)]>,
VEX, VEX_L, VEX_WIG;
def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movapd\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f64 VR256:$src), addr:$dst)]>,
VEX, VEX_L, VEX_WIG;
def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movups\t{$src, $dst|$dst, $src}",
[(store (v8f32 VR256:$src), addr:$dst)]>,
VEX, VEX_L, VEX_WIG;
def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v4f64 VR256:$src), addr:$dst)]>,
VEX, VEX_L, VEX_WIG;
} // SchedRW
} // Predicate
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
isMoveReg = 1 in {
let SchedRW = [SchedWriteFMoveLS.XMM.RR] in {
def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
"movaps\t{$src, $dst|$dst, $src}", []>,
VEX, VEX_WIG, FoldGenData<"VMOVAPSrr">;
def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
"movapd\t{$src, $dst|$dst, $src}", []>,
VEX, VEX_WIG, FoldGenData<"VMOVAPDrr">;
def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
"movups\t{$src, $dst|$dst, $src}", []>,
VEX, VEX_WIG, FoldGenData<"VMOVUPSrr">;
def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
"movupd\t{$src, $dst|$dst, $src}", []>,
VEX, VEX_WIG, FoldGenData<"VMOVUPDrr">;
} // SchedRW
let SchedRW = [SchedWriteFMoveLS.YMM.RR] in {
def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst),
(ins VR256:$src),
"movaps\t{$src, $dst|$dst, $src}", []>,
VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVAPSYrr">;
def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst),
(ins VR256:$src),
"movapd\t{$src, $dst|$dst, $src}", []>,
VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVAPDYrr">;
def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst),
(ins VR256:$src),
"movups\t{$src, $dst|$dst, $src}", []>,
VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVUPSYrr">;
def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst),
(ins VR256:$src),
"movupd\t{$src, $dst|$dst, $src}", []>,
VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVUPDYrr">;
} // SchedRW
} // Predicate
// Aliases to help the assembler pick two byte VEX encodings by swapping the
// operands relative to the normal instructions to use VEX.R instead of VEX.B.
def : InstAlias<"vmovaps\t{$src, $dst|$dst, $src}",
(VMOVAPSrr_REV VR128L:$dst, VR128H:$src), 0>;
def : InstAlias<"vmovapd\t{$src, $dst|$dst, $src}",
(VMOVAPDrr_REV VR128L:$dst, VR128H:$src), 0>;
def : InstAlias<"vmovups\t{$src, $dst|$dst, $src}",
(VMOVUPSrr_REV VR128L:$dst, VR128H:$src), 0>;
def : InstAlias<"vmovupd\t{$src, $dst|$dst, $src}",
(VMOVUPDrr_REV VR128L:$dst, VR128H:$src), 0>;
def : InstAlias<"vmovaps\t{$src, $dst|$dst, $src}",
(VMOVAPSYrr_REV VR256L:$dst, VR256H:$src), 0>;
def : InstAlias<"vmovapd\t{$src, $dst|$dst, $src}",
(VMOVAPDYrr_REV VR256L:$dst, VR256H:$src), 0>;
def : InstAlias<"vmovups\t{$src, $dst|$dst, $src}",
(VMOVUPSYrr_REV VR256L:$dst, VR256H:$src), 0>;
def : InstAlias<"vmovupd\t{$src, $dst|$dst, $src}",
(VMOVUPDYrr_REV VR256L:$dst, VR256H:$src), 0>;
// Reversed version with ".s" suffix for GAS compatibility.
def : InstAlias<"vmovaps.s\t{$src, $dst|$dst, $src}",
(VMOVAPSrr_REV VR128:$dst, VR128:$src), 0>;
def : InstAlias<"vmovapd.s\t{$src, $dst|$dst, $src}",
(VMOVAPDrr_REV VR128:$dst, VR128:$src), 0>;
def : InstAlias<"vmovups.s\t{$src, $dst|$dst, $src}",
(VMOVUPSrr_REV VR128:$dst, VR128:$src), 0>;
def : InstAlias<"vmovupd.s\t{$src, $dst|$dst, $src}",
(VMOVUPDrr_REV VR128:$dst, VR128:$src), 0>;
def : InstAlias<"vmovaps.s\t{$src, $dst|$dst, $src}",
(VMOVAPSYrr_REV VR256:$dst, VR256:$src), 0>;
def : InstAlias<"vmovapd.s\t{$src, $dst|$dst, $src}",
(VMOVAPDYrr_REV VR256:$dst, VR256:$src), 0>;
def : InstAlias<"vmovups.s\t{$src, $dst|$dst, $src}",
(VMOVUPSYrr_REV VR256:$dst, VR256:$src), 0>;
def : InstAlias<"vmovupd.s\t{$src, $dst|$dst, $src}",
(VMOVUPDYrr_REV VR256:$dst, VR256:$src), 0>;
let SchedRW = [SchedWriteFMoveLS.XMM.MR] in {
def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movapd\t{$src, $dst|$dst, $src}",
[(alignedstore (v2f64 VR128:$src), addr:$dst)]>;
def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movups\t{$src, $dst|$dst, $src}",
[(store (v4f32 VR128:$src), addr:$dst)]>;
def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v2f64 VR128:$src), addr:$dst)]>;
} // SchedRW
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
isMoveReg = 1, SchedRW = [SchedWriteFMoveLS.XMM.RR] in {
def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movaps\t{$src, $dst|$dst, $src}", []>,
FoldGenData<"MOVAPSrr">;
def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movapd\t{$src, $dst|$dst, $src}", []>,
FoldGenData<"MOVAPDrr">;
def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movups\t{$src, $dst|$dst, $src}", []>,
FoldGenData<"MOVUPSrr">;
def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movupd\t{$src, $dst|$dst, $src}", []>,
FoldGenData<"MOVUPDrr">;
}
// Reversed version with ".s" suffix for GAS compatibility.
def : InstAlias<"movaps.s\t{$src, $dst|$dst, $src}",
(MOVAPSrr_REV VR128:$dst, VR128:$src), 0>;
def : InstAlias<"movapd.s\t{$src, $dst|$dst, $src}",
(MOVAPDrr_REV VR128:$dst, VR128:$src), 0>;
def : InstAlias<"movups.s\t{$src, $dst|$dst, $src}",
(MOVUPSrr_REV VR128:$dst, VR128:$src), 0>;
def : InstAlias<"movupd.s\t{$src, $dst|$dst, $src}",
(MOVUPDrr_REV VR128:$dst, VR128:$src), 0>;
let Predicates = [HasAVX, NoVLX] in {
// 256-bit load/store need to use floating point load/store in case we don't
// have AVX2. Execution domain fixing will convert to integer if AVX2 is
// available and changing the domain is beneficial.
def : Pat<(alignedloadv4i64 addr:$src),
(VMOVAPSYrm addr:$src)>;
def : Pat<(alignedloadv8i32 addr:$src),
(VMOVAPSYrm addr:$src)>;
def : Pat<(alignedloadv16i16 addr:$src),
(VMOVAPSYrm addr:$src)>;
def : Pat<(alignedloadv32i8 addr:$src),
(VMOVAPSYrm addr:$src)>;
def : Pat<(loadv4i64 addr:$src),
(VMOVUPSYrm addr:$src)>;
def : Pat<(loadv8i32 addr:$src),
(VMOVUPSYrm addr:$src)>;
def : Pat<(loadv16i16 addr:$src),
(VMOVUPSYrm addr:$src)>;
def : Pat<(loadv32i8 addr:$src),
(VMOVUPSYrm addr:$src)>;
def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst),
(VMOVAPSYmr addr:$dst, VR256:$src)>;
def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst),
(VMOVAPSYmr addr:$dst, VR256:$src)>;
def : Pat<(alignedstore (v16i16 VR256:$src), addr:$dst),
(VMOVAPSYmr addr:$dst, VR256:$src)>;
def : Pat<(alignedstore (v32i8 VR256:$src), addr:$dst),
(VMOVAPSYmr addr:$dst, VR256:$src)>;
def : Pat<(store (v4i64 VR256:$src), addr:$dst),
(VMOVUPSYmr addr:$dst, VR256:$src)>;
def : Pat<(store (v8i32 VR256:$src), addr:$dst),
(VMOVUPSYmr addr:$dst, VR256:$src)>;
def : Pat<(store (v16i16 VR256:$src), addr:$dst),
(VMOVUPSYmr addr:$dst, VR256:$src)>;
def : Pat<(store (v32i8 VR256:$src), addr:$dst),
(VMOVUPSYmr addr:$dst, VR256:$src)>;
}
// Use movaps / movups for SSE integer load / store (one byte shorter).
// The instructions selected below are then converted to MOVDQA/MOVDQU
// during the SSE domain pass.
let Predicates = [UseSSE1] in {
def : Pat<(alignedloadv2i64 addr:$src),
(MOVAPSrm addr:$src)>;
def : Pat<(alignedloadv4i32 addr:$src),
(MOVAPSrm addr:$src)>;
def : Pat<(alignedloadv8i16 addr:$src),
(MOVAPSrm addr:$src)>;
def : Pat<(alignedloadv16i8 addr:$src),
(MOVAPSrm addr:$src)>;
def : Pat<(loadv2i64 addr:$src),
(MOVUPSrm addr:$src)>;
def : Pat<(loadv4i32 addr:$src),
(MOVUPSrm addr:$src)>;
def : Pat<(loadv8i16 addr:$src),
(MOVUPSrm addr:$src)>;
def : Pat<(loadv16i8 addr:$src),
(MOVUPSrm addr:$src)>;
def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
(MOVAPSmr addr:$dst, VR128:$src)>;
def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
(MOVAPSmr addr:$dst, VR128:$src)>;
def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
(MOVAPSmr addr:$dst, VR128:$src)>;
def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
(MOVAPSmr addr:$dst, VR128:$src)>;
def : Pat<(store (v2i64 VR128:$src), addr:$dst),
(MOVUPSmr addr:$dst, VR128:$src)>;
def : Pat<(store (v4i32 VR128:$src), addr:$dst),
(MOVUPSmr addr:$dst, VR128:$src)>;
def : Pat<(store (v8i16 VR128:$src), addr:$dst),
(MOVUPSmr addr:$dst, VR128:$src)>;
def : Pat<(store (v16i8 VR128:$src), addr:$dst),
(MOVUPSmr addr:$dst, VR128:$src)>;
}
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Move Low packed FP Instructions
//===----------------------------------------------------------------------===//
multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode pdnode,
string base_opc, string asm_opr> {
// No pattern as they need be special cased between high and low.
let hasSideEffects = 0, mayLoad = 1 in
def PSrm : PI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
!strconcat(base_opc, "s", asm_opr),
[], SSEPackedSingle>, PS,
Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
def PDrm : PI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
!strconcat(base_opc, "d", asm_opr),
[(set VR128:$dst, (v2f64 (pdnode VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))))],
SSEPackedDouble>, PD,
Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
}
multiclass sse12_mov_hilo_packed<bits<8>opc, SDPatternOperator pdnode,
string base_opc> {
let Predicates = [UseAVX] in
defm V#NAME : sse12_mov_hilo_packed_base<opc, pdnode, base_opc,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}">,
VEX_4V, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm NAME : sse12_mov_hilo_packed_base<opc, pdnode, base_opc,
"\t{$src2, $dst|$dst, $src2}">;
}
defm MOVL : sse12_mov_hilo_packed<0x12, X86Movsd, "movlp">;
let SchedRW = [WriteFStore] in {
let Predicates = [UseAVX] in {
def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
[(store (f64 (extractelt (bc_v2f64 (v4f32 VR128:$src)),
(iPTR 0))), addr:$dst)]>,
VEX, VEX_WIG;
def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlpd\t{$src, $dst|$dst, $src}",
[(store (f64 (extractelt (v2f64 VR128:$src),
(iPTR 0))), addr:$dst)]>,
VEX, VEX_WIG;
}// UseAVX
def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
[(store (f64 (extractelt (bc_v2f64 (v4f32 VR128:$src)),
(iPTR 0))), addr:$dst)]>;
def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlpd\t{$src, $dst|$dst, $src}",
[(store (f64 (extractelt (v2f64 VR128:$src),
(iPTR 0))), addr:$dst)]>;
} // SchedRW
let Predicates = [UseSSE1] in {
// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
def : Pat<(store (i64 (extractelt (bc_v2i64 (v4f32 VR128:$src2)),
(iPTR 0))), addr:$src1),
(MOVLPSmr addr:$src1, VR128:$src2)>;
// This pattern helps select MOVLPS on SSE1 only targets. With SSE2 we'll
// end up with a movsd or blend instead of shufp.
// No need for aligned load, we're only loading 64-bits.
def : Pat<(X86Shufp (loadv4f32 addr:$src2), VR128:$src1, (i8 -28)),
(MOVLPSrm VR128:$src1, addr:$src2)>;
}
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Move Hi packed FP Instructions
//===----------------------------------------------------------------------===//
defm MOVH : sse12_mov_hilo_packed<0x16, X86Unpckl, "movhp">;
let SchedRW = [WriteFStore] in {
// v2f64 extract element 1 is always custom lowered to unpack high to low
// and extract element 0 so the non-store version isn't too horrible.
let Predicates = [UseAVX] in {
def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}",
[(store (f64 (extractelt
(X86Unpckh (bc_v2f64 (v4f32 VR128:$src)),
(bc_v2f64 (v4f32 VR128:$src))),
(iPTR 0))), addr:$dst)]>, VEX, VEX_WIG;
def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhpd\t{$src, $dst|$dst, $src}",
[(store (f64 (extractelt
(v2f64 (X86Unpckh VR128:$src, VR128:$src)),
(iPTR 0))), addr:$dst)]>, VEX, VEX_WIG;
} // UseAVX
def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}",
[(store (f64 (extractelt
(X86Unpckh (bc_v2f64 (v4f32 VR128:$src)),
(bc_v2f64 (v4f32 VR128:$src))),
(iPTR 0))), addr:$dst)]>;
def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhpd\t{$src, $dst|$dst, $src}",
[(store (f64 (extractelt
(v2f64 (X86Unpckh VR128:$src, VR128:$src)),
(iPTR 0))), addr:$dst)]>;
} // SchedRW
let Predicates = [UseAVX] in {
// Also handle an i64 load because that may get selected as a faster way to
// load the data.
def : Pat<(v2f64 (X86Unpckl VR128:$src1,
(bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
(VMOVHPDrm VR128:$src1, addr:$src2)>;
def : Pat<(store (f64 (extractelt
(v2f64 (X86VPermilpi VR128:$src, (i8 1))),
(iPTR 0))), addr:$dst),
(VMOVHPDmr addr:$dst, VR128:$src)>;
}
let Predicates = [UseSSE1] in {
// This pattern helps select MOVHPS on SSE1 only targets. With SSE2 we'll
// end up with a movsd or blend instead of shufp.
// No need for aligned load, we're only loading 64-bits.
def : Pat<(X86Movlhps VR128:$src1, (loadv4f32 addr:$src2)),
(MOVHPSrm VR128:$src1, addr:$src2)>;
}
let Predicates = [UseSSE2] in {
// MOVHPD patterns
// Also handle an i64 load because that may get selected as a faster way to
// load the data.
def : Pat<(v2f64 (X86Unpckl VR128:$src1,
(bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
(MOVHPDrm VR128:$src1, addr:$src2)>;
def : Pat<(store (f64 (extractelt
(v2f64 (X86Shufp VR128:$src, VR128:$src, (i8 1))),
(iPTR 0))), addr:$dst),
(MOVHPDmr addr:$dst, VR128:$src)>;
}
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions
//===----------------------------------------------------------------------===//
let Predicates = [UseAVX] in {
def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>,
VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, VEX_WIG;
let isCommutable = 1 in
def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>,
VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, VEX_WIG,
NotMemoryFoldable;
}
let Constraints = "$src1 = $dst" in {
def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movlhps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>,
Sched<[SchedWriteFShuffle.XMM]>;
let isCommutable = 1 in
def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movhlps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>,
Sched<[SchedWriteFShuffle.XMM]>, NotMemoryFoldable;
}
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Conversion Instructions
//===----------------------------------------------------------------------===//
multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
string asm, X86FoldableSchedWrite sched> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
[(set DstRC:$dst, (OpNode SrcRC:$src))]>,
Sched<[sched]>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
[(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>,
Sched<[sched.Folded]>;
}
multiclass sse12_cvt_p<bits<8> opc, RegisterClass RC, X86MemOperand x86memop,
ValueType DstTy, ValueType SrcTy, PatFrag ld_frag,
string asm, Domain d, X86FoldableSchedWrite sched> {
let hasSideEffects = 0 in {
def rr : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), asm,
[(set RC:$dst, (DstTy (sint_to_fp (SrcTy RC:$src))))], d>,
Sched<[sched]>;
let mayLoad = 1 in
def rm : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), asm,
[(set RC:$dst, (DstTy (sint_to_fp
(SrcTy (ld_frag addr:$src)))))], d>,
Sched<[sched.Folded]>;
}
}
multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
X86MemOperand x86memop, string asm,
X86FoldableSchedWrite sched> {
let hasSideEffects = 0, Predicates = [UseAVX] in {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
Sched<[sched]>;
let mayLoad = 1 in
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
} // hasSideEffects = 0
}
let Predicates = [UseAVX] in {
defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}",
WriteCvtSS2I>,
XS, VEX, VEX_LIG;
defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}",
WriteCvtSS2I>,
XS, VEX, VEX_W, VEX_LIG;
defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
"cvttsd2si\t{$src, $dst|$dst, $src}",
WriteCvtSD2I>,
XD, VEX, VEX_LIG;
defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
"cvttsd2si\t{$src, $dst|$dst, $src}",
WriteCvtSD2I>,
XD, VEX, VEX_W, VEX_LIG;
def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
(VCVTTSS2SIrr GR32:$dst, FR32:$src), 0, "att">;
def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
(VCVTTSS2SIrm GR32:$dst, f32mem:$src), 0, "att">;
def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
(VCVTTSD2SIrr GR32:$dst, FR64:$src), 0, "att">;
def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
(VCVTTSD2SIrm GR32:$dst, f64mem:$src), 0, "att">;
def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
(VCVTTSS2SI64rr GR64:$dst, FR32:$src), 0, "att">;
def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
(VCVTTSS2SI64rm GR64:$dst, f32mem:$src), 0, "att">;
def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
(VCVTTSD2SI64rr GR64:$dst, FR64:$src), 0, "att">;
def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
(VCVTTSD2SI64rm GR64:$dst, f64mem:$src), 0, "att">;
}
// The assembler can recognize rr 64-bit instructions by seeing a rxx
// register, but the same isn't true when only using memory operands,
// provide other assembly "l" and "q" forms to address this explicitly
// where appropriate to do so.
defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss{l}",
WriteCvtI2SS>, XS, VEX_4V, VEX_LIG;
defm VCVTSI642SS : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}",
WriteCvtI2SS>, XS, VEX_4V, VEX_W, VEX_LIG;
defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}",
WriteCvtI2SD>, XD, VEX_4V, VEX_LIG;
defm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}",
WriteCvtI2SD>, XD, VEX_4V, VEX_W, VEX_LIG;
let Predicates = [UseAVX] in {
def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
(VCVTSI2SSrm FR64:$dst, FR64:$src1, i32mem:$src), 0, "att">;
def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
(VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src), 0, "att">;
def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
(VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
(VCVTSI642SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
(VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>;
def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
(VCVTSI642SDrm (f64 (IMPLICIT_DEF)), addr:$src)>;
def : Pat<(f32 (sint_to_fp GR32:$src)),
(VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
def : Pat<(f32 (sint_to_fp GR64:$src)),
(VCVTSI642SSrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
def : Pat<(f64 (sint_to_fp GR32:$src)),
(VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
def : Pat<(f64 (sint_to_fp GR64:$src)),
(VCVTSI642SDrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
}
defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}",
WriteCvtSS2I>, XS;
defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}",
WriteCvtSS2I>, XS, REX_W;
defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
"cvttsd2si\t{$src, $dst|$dst, $src}",
WriteCvtSD2I>, XD;
defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
"cvttsd2si\t{$src, $dst|$dst, $src}",
WriteCvtSD2I>, XD, REX_W;
defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
"cvtsi2ss{l}\t{$src, $dst|$dst, $src}",
WriteCvtI2SS>, XS;
defm CVTSI642SS : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64,
"cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
WriteCvtI2SS>, XS, REX_W;
defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
"cvtsi2sd{l}\t{$src, $dst|$dst, $src}",
WriteCvtI2SD>, XD;
defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
"cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
WriteCvtI2SD>, XD, REX_W;
def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
(CVTTSS2SIrr GR32:$dst, FR32:$src), 0, "att">;
def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
(CVTTSS2SIrm GR32:$dst, f32mem:$src), 0, "att">;
def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
(CVTTSD2SIrr GR32:$dst, FR64:$src), 0, "att">;
def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
(CVTTSD2SIrm GR32:$dst, f64mem:$src), 0, "att">;
def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
(CVTTSS2SI64rr GR64:$dst, FR32:$src), 0, "att">;
def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
(CVTTSS2SI64rm GR64:$dst, f32mem:$src), 0, "att">;
def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
(CVTTSD2SI64rr GR64:$dst, FR64:$src), 0, "att">;
def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
(CVTTSD2SI64rm GR64:$dst, f64mem:$src), 0, "att">;
def : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}",
(CVTSI2SSrm FR64:$dst, i32mem:$src), 0, "att">;
def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}",
(CVTSI2SDrm FR64:$dst, i32mem:$src), 0, "att">;
// Conversion Instructions Intrinsics - Match intrinsics which expect MM
// and/or XMM operand(s).
multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
ValueType DstVT, ValueType SrcVT, SDNode OpNode,
Operand memop, ComplexPattern mem_cpat, string asm,
X86FoldableSchedWrite sched> {
def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst, (DstVT (OpNode (SrcVT SrcRC:$src))))]>,
Sched<[sched]>;
def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst, (DstVT (OpNode (SrcVT mem_cpat:$src))))]>,
Sched<[sched.Folded]>;
}
multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
RegisterClass DstRC, X86MemOperand x86memop,
string asm, X86FoldableSchedWrite sched,
bit Is2Addr = 1> {
let hasSideEffects = 0 in {
def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[]>, Sched<[sched]>;
let mayLoad = 1 in
def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[]>, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
let Predicates = [UseAVX] in {
defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64,
X86cvts2si, sdmem, sse_load_f64, "cvtsd2si",
WriteCvtSD2I>, XD, VEX, VEX_LIG;
defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64,
X86cvts2si, sdmem, sse_load_f64, "cvtsd2si",
WriteCvtSD2I>, XD, VEX, VEX_W, VEX_LIG;
}
defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, X86cvts2si,
sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD;
defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si,
sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD, REX_W;
let isCodeGenOnly = 1 in {
let Predicates = [UseAVX] in {
defm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
i32mem, "cvtsi2ss{l}", WriteCvtI2SS, 0>, XS, VEX_4V;
defm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
i64mem, "cvtsi2ss{q}", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_W;
defm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
i32mem, "cvtsi2sd{l}", WriteCvtI2SD, 0>, XD, VEX_4V;
defm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
i64mem, "cvtsi2sd{q}", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_W;
}
let Constraints = "$src1 = $dst" in {
defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
i32mem, "cvtsi2ss{l}", WriteCvtI2SS>, XS;
defm CVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
i64mem, "cvtsi2ss{q}", WriteCvtI2SS>, XS, REX_W;
defm CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
i32mem, "cvtsi2sd{l}", WriteCvtI2SD>, XD;
defm CVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
i64mem, "cvtsi2sd{q}", WriteCvtI2SD>, XD, REX_W;
}
} // isCodeGenOnly = 1
/// SSE 1 Only
// Aliases for intrinsics
let isCodeGenOnly = 1 in {
let Predicates = [UseAVX] in {
defm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int,
ssmem, sse_load_f32, "cvttss2si",
WriteCvtSS2I>, XS, VEX;
defm VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32,
X86cvtts2Int, ssmem, sse_load_f32,
"cvttss2si", WriteCvtSS2I>,
XS, VEX, VEX_W;
defm VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int,
sdmem, sse_load_f64, "cvttsd2si",
WriteCvtSS2I>, XD, VEX;
defm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64,
X86cvtts2Int, sdmem, sse_load_f64,
"cvttsd2si", WriteCvtSS2I>,
XD, VEX, VEX_W;
}
defm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int,
ssmem, sse_load_f32, "cvttss2si",
WriteCvtSS2I>, XS;
defm CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32,
X86cvtts2Int, ssmem, sse_load_f32,
"cvttss2si", WriteCvtSS2I>, XS, REX_W;
defm CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int,
sdmem, sse_load_f64, "cvttsd2si",
WriteCvtSD2I>, XD;
defm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64,
X86cvtts2Int, sdmem, sse_load_f64,
"cvttsd2si", WriteCvtSD2I>, XD, REX_W;
} // isCodeGenOnly = 1
let Predicates = [UseAVX] in {
defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si,
ssmem, sse_load_f32, "cvtss2si",
WriteCvtSS2I>, XS, VEX, VEX_LIG;
defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si,
ssmem, sse_load_f32, "cvtss2si",
WriteCvtSS2I>, XS, VEX, VEX_W, VEX_LIG;
}
defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si,
ssmem, sse_load_f32, "cvtss2si",
WriteCvtSS2I>, XS;
defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si,
ssmem, sse_load_f32, "cvtss2si",
WriteCvtSS2I>, XS, REX_W;
defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, load,
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle, WriteCvtI2PS>,
PS, VEX, Requires<[HasAVX, NoVLX]>, VEX_WIG;
defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, i256mem, v8f32, v8i32, load,
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle, WriteCvtI2PSY>,
PS, VEX, VEX_L, Requires<[HasAVX, NoVLX]>, VEX_WIG;
defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memop,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle, WriteCvtI2PS>,
PS, Requires<[UseSSE2]>;
let Predicates = [UseAVX] in {
def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
(VCVTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
(VCVTSS2SIrm_Int GR32:$dst, ssmem:$src), 0, "att">;
def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}",
(VCVTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}",
(VCVTSD2SIrm_Int GR32:$dst, sdmem:$src), 0, "att">;
def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}",
(VCVTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}",
(VCVTSS2SI64rm_Int GR64:$dst, ssmem:$src), 0, "att">;
def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
(VCVTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
(VCVTSD2SI64rm_Int GR64:$dst, sdmem:$src), 0, "att">;
}
def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}",
(CVTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}",
(CVTSS2SIrm_Int GR32:$dst, ssmem:$src), 0, "att">;
def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}",
(CVTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}",
(CVTSD2SIrm_Int GR32:$dst, sdmem:$src), 0, "att">;
def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}",
(CVTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}",
(CVTSS2SI64rm_Int GR64:$dst, ssmem:$src), 0, "att">;
def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
(CVTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
(CVTSD2SI64rm_Int GR64:$dst, sdmem:$src), 0, "att">;
/// SSE 2 Only
// Convert scalar double to scalar single
let hasSideEffects = 0, Predicates = [UseAVX] in {
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
(ins FR32:$src1, FR64:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
VEX_4V, VEX_LIG, VEX_WIG,
Sched<[WriteCvtSD2SS]>;
let mayLoad = 1 in
def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
(ins FR32:$src1, f64mem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
XD, VEX_4V, VEX_LIG, VEX_WIG,
Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
}
def : Pat<(f32 (fpround FR64:$src)),
(VCVTSD2SSrr (f32 (IMPLICIT_DEF)), FR64:$src)>,
Requires<[UseAVX]>;
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fpround FR64:$src))]>,
Sched<[WriteCvtSD2SS]>;
def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fpround (loadf64 addr:$src)))]>,
XD, Requires<[UseSSE2, OptForSize]>,
Sched<[WriteCvtSD2SS.Folded]>;
let isCodeGenOnly = 1 in {
def VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))]>,
XD, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
Sched<[WriteCvtSD2SS]>;
def VCVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))]>,
XD, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
let Constraints = "$src1 = $dst" in {
def CVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"cvtsd2ss\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))]>,
XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS]>;
def CVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"cvtsd2ss\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))]>,
XD, Requires<[UseSSE2]>,
Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
}
} // isCodeGenOnly = 1
// Convert scalar single to scalar double
// SSE2 instructions with XS prefix
let hasSideEffects = 0 in {
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
(ins FR64:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
XS, VEX_4V, VEX_LIG, VEX_WIG,
Sched<[WriteCvtSS2SD]>, Requires<[UseAVX]>;
let mayLoad = 1 in
def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
(ins FR64:$src1, f32mem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
XS, VEX_4V, VEX_LIG, VEX_WIG,
Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>,
Requires<[UseAVX, OptForSize]>;
}
def : Pat<(f64 (fpextend FR32:$src)),
(VCVTSS2SDrr (f64 (IMPLICIT_DEF)), FR32:$src)>, Requires<[UseAVX]>;
def : Pat<(fpextend (loadf32 addr:$src)),
(VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX, OptForSize]>;
def : Pat<(extloadf32 addr:$src),
(VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Requires<[UseAVX, OptForSize]>;
def : Pat<(extloadf32 addr:$src),
(VCVTSS2SDrr (f64 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>,
Requires<[UseAVX, OptForSpeed]>;
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (fpextend FR32:$src))]>,
XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>;
def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (extloadf32 addr:$src))]>,
XS, Requires<[UseSSE2, OptForSize]>,
Sched<[WriteCvtSS2SD.Folded]>;
// extload f32 -> f64. This matches load+fpextend because we have a hack in
// the isel (PreprocessForFPConvert) that can introduce loads after dag
// combine.
// Since these loads aren't folded into the fpextend, we have to match it
// explicitly here.
def : Pat<(fpextend (loadf32 addr:$src)),
(CVTSS2SDrm addr:$src)>, Requires<[UseSSE2, OptForSize]>;
def : Pat<(extloadf32 addr:$src),
(CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[UseSSE2, OptForSpeed]>;
let isCodeGenOnly = 1, hasSideEffects = 0 in {
def VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, XS, VEX_4V, VEX_WIG,
Requires<[HasAVX]>, Sched<[WriteCvtSS2SD]>;
let mayLoad = 1 in
def VCVTSS2SDrm_Int: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, XS, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>;
let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
def CVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
[]>, XS, Requires<[UseSSE2]>,
Sched<[WriteCvtSS2SD]>;
let mayLoad = 1 in
def CVTSS2SDrm_Int: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
[]>, XS, Requires<[UseSSE2]>,
Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>;
}
} // isCodeGenOnly = 1
// Patterns used for matching (v)cvtsi2ss, (v)cvtsi2sd, (v)cvtsd2ss and
// (v)cvtss2sd intrinsic sequences from clang which produce unnecessary
// vmovs{s,d} instructions
let Predicates = [UseAVX] in {
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector
(f32 (fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))),
(VCVTSD2SSrr_Int VR128:$dst, VR128:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector
(f64 (fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
(VCVTSS2SDrr_Int VR128:$dst, VR128:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
(VCVTSI642SSrr_Int VR128:$dst, GR64:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
(VCVTSI642SSrm_Int VR128:$dst, addr:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
(VCVTSI2SSrr_Int VR128:$dst, GR32:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
(VCVTSI2SSrm_Int VR128:$dst, addr:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
(VCVTSI642SDrr_Int VR128:$dst, GR64:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
(VCVTSI642SDrm_Int VR128:$dst, addr:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
(VCVTSI2SDrr_Int VR128:$dst, GR32:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
(VCVTSI2SDrm_Int VR128:$dst, addr:$src)>;
} // Predicates = [UseAVX]
let Predicates = [UseSSE2] in {
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector
(f32 (fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))),
(CVTSD2SSrr_Int VR128:$dst, VR128:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector
(f64 (fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
(CVTSS2SDrr_Int VR128:$dst, VR128:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
(CVTSI642SDrr_Int VR128:$dst, GR64:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
(CVTSI642SDrm_Int VR128:$dst, addr:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
(CVTSI2SDrr_Int VR128:$dst, GR32:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
(CVTSI2SDrm_Int VR128:$dst, addr:$src)>;
} // Predicates = [UseSSE2]
let Predicates = [UseSSE1] in {
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
(CVTSI642SSrr_Int VR128:$dst, GR64:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
(CVTSI642SSrm_Int VR128:$dst, addr:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
(CVTSI2SSrr_Int VR128:$dst, GR32:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
(CVTSI2SSrm_Int VR128:$dst, addr:$src)>;
} // Predicates = [UseSSE1]
let Predicates = [HasAVX, NoVLX] in {
// Convert packed single/double fp to doubleword
def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>,
VEX, Sched<[WriteCvtPS2I]>, VEX_WIG;
def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvtp2Int (loadv4f32 addr:$src))))]>,
VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG;
def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(v8i32 (X86cvtp2Int (v8f32 VR256:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG;
def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(v8i32 (X86cvtp2Int (loadv8f32 addr:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtPS2IYLd]>, VEX_WIG;
}
def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>,
Sched<[WriteCvtPS2I]>;
def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvtp2Int (memopv4f32 addr:$src))))]>,
Sched<[WriteCvtPS2ILd]>;
// Convert Packed Double FP to Packed DW Integers
let Predicates = [HasAVX, NoVLX] in {
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>,
VEX, Sched<[WriteCvtPD2I]>, VEX_WIG;
// XMM only
def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
(VCVTPD2DQrr VR128:$dst, VR128:$src), 0>;
def VCVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"vcvtpd2dq{x}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))))]>, VEX,
Sched<[WriteCvtPD2ILd]>, VEX_WIG;
def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
(VCVTPD2DQrm VR128:$dst, f128mem:$src), 0, "intel">;
// YMM only
def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"vcvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvtp2Int (v4f64 VR256:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtPD2IY]>, VEX_WIG;
def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvtp2Int (loadv4f64 addr:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG;
def : InstAlias<"vcvtpd2dqy\t{$src, $dst|$dst, $src}",
(VCVTPD2DQYrr VR128:$dst, VR256:$src), 0>;
def : InstAlias<"vcvtpd2dqy\t{$src, $dst|$dst, $src}",
(VCVTPD2DQYrm VR128:$dst, f256mem:$src), 0, "intel">;
}
def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvtp2Int (memopv2f64 addr:$src))))]>,
Sched<[WriteCvtPD2ILd]>;
def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>,
Sched<[WriteCvtPD2I]>;
// Convert with truncation packed single/double fp to doubleword
// SSE2 packed instructions with XS prefix
let Predicates = [HasAVX, NoVLX] in {
def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvttp2si (v4f32 VR128:$src))))]>,
VEX, Sched<[WriteCvtPS2I]>, VEX_WIG;
def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvttp2si (loadv4f32 addr:$src))))]>,
VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG;
def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(v8i32 (X86cvttp2si (v8f32 VR256:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG;
def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(v8i32 (X86cvttp2si (loadv8f32 addr:$src))))]>,
VEX, VEX_L,
Sched<[WriteCvtPS2IYLd]>, VEX_WIG;
}
let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
(VCVTTPS2DQrr VR128:$src)>;
def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))),
(VCVTTPS2DQrm addr:$src)>;
def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))),
(VCVTTPS2DQYrr VR256:$src)>;
def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))),
(VCVTTPS2DQYrm addr:$src)>;
}
def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvttp2si (v4f32 VR128:$src))))]>,
Sched<[WriteCvtPS2I]>;
def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvttp2si (memopv4f32 addr:$src))))]>,
Sched<[WriteCvtPS2ILd]>;
let Predicates = [UseSSE2] in {
def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
(CVTTPS2DQrr VR128:$src)>;
def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))),
(CVTTPS2DQrm addr:$src)>;
}
let Predicates = [HasAVX, NoVLX] in
def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvttp2si (v2f64 VR128:$src))))]>,
VEX, Sched<[WriteCvtPD2I]>, VEX_WIG;
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
// XMM only
def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}",
(VCVTTPD2DQrr VR128:$dst, VR128:$src), 0>;
let Predicates = [HasAVX, NoVLX] in
def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttpd2dq{x}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvttp2si (loadv2f64 addr:$src))))]>,
VEX, Sched<[WriteCvtPD2ILd]>, VEX_WIG;
def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}",
(VCVTTPD2DQrm VR128:$dst, f128mem:$src), 0, "intel">;
// YMM only
let Predicates = [HasAVX, NoVLX] in {
def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvttp2si (v4f64 VR256:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtPD2IY]>, VEX_WIG;
def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvttp2si (loadv4f64 addr:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG;
}
def : InstAlias<"vcvttpd2dqy\t{$src, $dst|$dst, $src}",
(VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>;
def : InstAlias<"vcvttpd2dqy\t{$src, $dst|$dst, $src}",
(VCVTTPD2DQYrm VR128:$dst, f256mem:$src), 0, "intel">;
let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
(VCVTTPD2DQYrr VR256:$src)>;
def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))),
(VCVTTPD2DQYrm addr:$src)>;
}
let Predicates = [HasAVX, NoVLX] in {
def : Pat<(X86vzmovl (v2i64 (bitconvert
(v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
(VCVTPD2DQrr VR128:$src)>;
def : Pat<(X86vzmovl (v2i64 (bitconvert
(v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
(VCVTPD2DQrm addr:$src)>;
def : Pat<(X86vzmovl (v2i64 (bitconvert
(v4i32 (X86cvttp2si (v2f64 VR128:$src)))))),
(VCVTTPD2DQrr VR128:$src)>;
def : Pat<(X86vzmovl (v2i64 (bitconvert
(v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
(VCVTTPD2DQrm addr:$src)>;
} // Predicates = [HasAVX, NoVLX]
def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvttp2si (v2f64 VR128:$src))))]>,
Sched<[WriteCvtPD2I]>;
def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvttp2si (memopv2f64 addr:$src))))]>,
Sched<[WriteCvtPD2ILd]>;
let Predicates = [UseSSE2] in {
def : Pat<(X86vzmovl (v2i64 (bitconvert
(v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
(CVTPD2DQrr VR128:$src)>;
def : Pat<(X86vzmovl (v2i64 (bitconvert
(v4i32 (X86cvtp2Int (memopv2f64 addr:$src)))))),
(CVTPD2DQrm addr:$src)>;
def : Pat<(X86vzmovl (v2i64 (bitconvert
(v4i32 (X86cvttp2si (v2f64 VR128:$src)))))),
(CVTTPD2DQrr VR128:$src)>;
def : Pat<(X86vzmovl (v2i64 (bitconvert
(v4i32 (X86cvttp2si (memopv2f64 addr:$src)))))),
(CVTTPD2DQrm addr:$src)>;
} // Predicates = [UseSSE2]
// Convert packed single to packed double
let Predicates = [HasAVX, NoVLX] in {
// SSE2 instructions without OpSize prefix
def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))]>,
PS, VEX, Sched<[WriteCvtPS2PD]>, VEX_WIG;
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>,
PS, VEX, Sched<[WriteCvtPS2PD.Folded]>, VEX_WIG;
def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst, (v4f64 (fpextend (v4f32 VR128:$src))))]>,
PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY]>, VEX_WIG;
def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst, (v4f64 (extloadv4f32 addr:$src)))]>,
PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY.Folded]>, VEX_WIG;
}
let Predicates = [UseSSE2] in {
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))]>,
PS, Sched<[WriteCvtPS2PD]>;
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>,
PS, Sched<[WriteCvtPS2PD.Folded]>;
}
// Convert Packed DW Integers to Packed Double FP
let Predicates = [HasAVX, NoVLX] in {
let hasSideEffects = 0, mayLoad = 1 in
def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2f64 (X86VSintToFP (loadv4i32 addr:$src))))]>,
VEX, Sched<[WriteCvtI2PDLd]>, VEX_WIG;
def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2f64 (X86VSintToFP (v4i32 VR128:$src))))]>,
VEX, Sched<[WriteCvtI2PD]>, VEX_WIG;
def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(v4f64 (sint_to_fp (loadv4i32 addr:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtI2PDYLd]>,
VEX_WIG;
def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(v4f64 (sint_to_fp (v4i32 VR128:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtI2PDY]>, VEX_WIG;
}
let hasSideEffects = 0, mayLoad = 1 in
def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2f64 (X86VSintToFP (loadv4i32 addr:$src))))]>,
Sched<[WriteCvtI2PDLd]>;
def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2f64 (X86VSintToFP (v4i32 VR128:$src))))]>,
Sched<[WriteCvtI2PD]>;
// AVX register conversion intrinsics
let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(VCVTDQ2PDrm addr:$src)>;
def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
(VCVTDQ2PDrm addr:$src)>;
} // Predicates = [HasAVX, NoVLX]
// SSE2 register conversion intrinsics
let Predicates = [UseSSE2] in {
def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(CVTDQ2PDrm addr:$src)>;
def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
(CVTDQ2PDrm addr:$src)>;
} // Predicates = [UseSSE2]
// Convert packed double to packed single
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
let Predicates = [HasAVX, NoVLX] in
def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))]>,
VEX, Sched<[WriteCvtPD2PS]>, VEX_WIG;
// XMM only
def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
(VCVTPD2PSrr VR128:$dst, VR128:$src), 0>;
let Predicates = [HasAVX, NoVLX] in
def VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps{x}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (X86vfpround (loadv2f64 addr:$src)))]>,
VEX, Sched<[WriteCvtPD2PS.Folded]>, VEX_WIG;
def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
(VCVTPD2PSrm VR128:$dst, f128mem:$src), 0, "intel">;
// YMM only
let Predicates = [HasAVX, NoVLX] in {
def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (fpround VR256:$src))]>,
VEX, VEX_L, Sched<[WriteCvtPD2PSY]>, VEX_WIG;
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (fpround (loadv4f64 addr:$src)))]>,
VEX, VEX_L, Sched<[WriteCvtPD2PSY.Folded]>, VEX_WIG;
}
def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}",
(VCVTPD2PSYrr VR128:$dst, VR256:$src), 0>;
def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}",
(VCVTPD2PSYrm VR128:$dst, f256mem:$src), 0, "intel">;
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))]>,
Sched<[WriteCvtPD2PS]>;
def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (X86vfpround (memopv2f64 addr:$src)))]>,
Sched<[WriteCvtPD2PS.Folded]>;
// AVX 256-bit register conversion intrinsics
// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
// whenever possible to avoid declaring two versions of each one.
let Predicates = [HasAVX, NoVLX] in {
// Match fpround and fpextend for 128/256-bit conversions
def : Pat<(X86vzmovl (v2f64 (bitconvert
(v4f32 (X86vfpround (v2f64 VR128:$src)))))),
(VCVTPD2PSrr VR128:$src)>;
def : Pat<(X86vzmovl (v2f64 (bitconvert
(v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
(VCVTPD2PSrm addr:$src)>;
}
let Predicates = [UseSSE2] in {
// Match fpround and fpextend for 128 conversions
def : Pat<(X86vzmovl (v2f64 (bitconvert
(v4f32 (X86vfpround (v2f64 VR128:$src)))))),
(CVTPD2PSrr VR128:$src)>;
def : Pat<(X86vzmovl (v2f64 (bitconvert
(v4f32 (X86vfpround (memopv2f64 addr:$src)))))),
(CVTPD2PSrm addr:$src)>;
}
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Compare Instructions
//===----------------------------------------------------------------------===//
// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
Operand CC, SDNode OpNode, ValueType VT,
PatFrag ld_frag, string asm, string asm_alt,
X86FoldableSchedWrite sched> {
let isCommutable = 1 in
def rr : SIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
[(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))]>,
Sched<[sched]>;
def rm : SIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
[(set RC:$dst, (OpNode (VT RC:$src1),
(ld_frag addr:$src2), imm:$cc))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0 in {
def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, u8imm:$cc), asm_alt, []>,
Sched<[sched]>, NotMemoryFoldable;
let mayLoad = 1 in
def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u8imm:$cc), asm_alt, []>,
Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
}
}
let ExeDomain = SSEPackedSingle in
defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, AVXCC, X86cmps, f32, loadf32,
"cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SchedWriteFCmpSizes.PS.Scl>, XS, VEX_4V, VEX_LIG, VEX_WIG;
let ExeDomain = SSEPackedDouble in
defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, AVXCC, X86cmps, f64, loadf64,
"cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SchedWriteFCmpSizes.PD.Scl>,
XD, VEX_4V, VEX_LIG, VEX_WIG;
let Constraints = "$src1 = $dst" in {
let ExeDomain = SSEPackedSingle in
defm CMPSS : sse12_cmp_scalar<FR32, f32mem, SSECC, X86cmps, f32, loadf32,
"cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
"cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}",
SchedWriteFCmpSizes.PS.Scl>, XS;
let ExeDomain = SSEPackedDouble in
defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmps, f64, loadf64,
"cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
"cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
SchedWriteFCmpSizes.PD.Scl>, XD;
}
multiclass sse12_cmp_scalar_int<Operand memop, Operand CC,
Intrinsic Int, string asm, X86FoldableSchedWrite sched,
ComplexPattern mem_cpat> {
def rr_Int : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src, CC:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
VR128:$src, imm:$cc))]>,
Sched<[sched]>;
let mayLoad = 1 in
def rm_Int : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, memop:$src, CC:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
mem_cpat:$src, imm:$cc))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let isCodeGenOnly = 1 in {
// Aliases to match intrinsics which expect XMM operand(s).
let ExeDomain = SSEPackedSingle in
defm VCMPSS : sse12_cmp_scalar_int<ssmem, AVXCC, int_x86_sse_cmp_ss,
"cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, XS, VEX_4V;
let ExeDomain = SSEPackedDouble in
defm VCMPSD : sse12_cmp_scalar_int<sdmem, AVXCC, int_x86_sse2_cmp_sd,
"cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}",
SchedWriteFCmpSizes.PD.Scl, sse_load_f64>,
XD, VEX_4V;
let Constraints = "$src1 = $dst" in {
let ExeDomain = SSEPackedSingle in
defm CMPSS : sse12_cmp_scalar_int<ssmem, SSECC, int_x86_sse_cmp_ss,
"cmp${cc}ss\t{$src, $dst|$dst, $src}",
SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, XS;
let ExeDomain = SSEPackedDouble in
defm CMPSD : sse12_cmp_scalar_int<sdmem, SSECC, int_x86_sse2_cmp_sd,
"cmp${cc}sd\t{$src, $dst|$dst, $src}",
SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, XD;
}
}
// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS
multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
ValueType vt, X86MemOperand x86memop,
PatFrag ld_frag, string OpcodeStr,
X86FoldableSchedWrite sched> {
let hasSideEffects = 0 in {
def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>,
Sched<[sched]>;
let mayLoad = 1 in
def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1),
(ld_frag addr:$src2)))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
// sse12_ord_cmp_int - Intrinsic version of sse12_ord_cmp
multiclass sse12_ord_cmp_int<bits<8> opc, RegisterClass RC, SDNode OpNode,
ValueType vt, Operand memop,
ComplexPattern mem_cpat, string OpcodeStr,
X86FoldableSchedWrite sched> {
def rr_Int: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>,
Sched<[sched]>;
let mayLoad = 1 in
def rm_Int: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1),
mem_cpat:$src2))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Defs = [EFLAGS] in {
defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
"ucomiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG;
defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
"ucomisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG;
let Pattern = []<dag> in {
defm VCOMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32,
"comiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG;
defm VCOMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64,
"comisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG;
}
let isCodeGenOnly = 1 in {
defm VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
sse_load_f32, "ucomiss", WriteFCom>, PS, VEX, VEX_WIG;
defm VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
sse_load_f64, "ucomisd", WriteFCom>, PD, VEX, VEX_WIG;
defm VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
sse_load_f32, "comiss", WriteFCom>, PS, VEX, VEX_WIG;
defm VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
sse_load_f64, "comisd", WriteFCom>, PD, VEX, VEX_WIG;
}
defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
"ucomiss", WriteFCom>, PS;
defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
"ucomisd", WriteFCom>, PD;
let Pattern = []<dag> in {
defm COMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32,
"comiss", WriteFCom>, PS;
defm COMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64,
"comisd", WriteFCom>, PD;
}
let isCodeGenOnly = 1 in {
defm UCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
sse_load_f32, "ucomiss", WriteFCom>, PS;
defm UCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
sse_load_f64, "ucomisd", WriteFCom>, PD;
defm COMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
sse_load_f32, "comiss", WriteFCom>, PS;
defm COMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
sse_load_f64, "comisd", WriteFCom>, PD;
}
} // Defs = [EFLAGS]
// sse12_cmp_packed - sse 1 & 2 compare packed instructions
multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
Operand CC, ValueType VT, string asm,
string asm_alt, X86FoldableSchedWrite sched,
Domain d, PatFrag ld_frag> {
let isCommutable = 1 in
def rri : PIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
[(set RC:$dst, (VT (X86cmpp RC:$src1, RC:$src2, imm:$cc)))], d>,
Sched<[sched]>;
def rmi : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
[(set RC:$dst,
(VT (X86cmpp RC:$src1, (ld_frag addr:$src2), imm:$cc)))], d>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0 in {
def rri_alt : PIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc),
asm_alt, [], d>, Sched<[sched]>, NotMemoryFoldable;
let mayLoad = 1 in
def rmi_alt : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc),
asm_alt, [], d>, Sched<[sched.Folded, sched.ReadAfterFold]>,
NotMemoryFoldable;
}
}
defm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, v4f32,
"cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, loadv4f32>, PS, VEX_4V, VEX_WIG;
defm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, v2f64,
"cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, loadv2f64>, PD, VEX_4V, VEX_WIG;
defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, v8f32,
"cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SchedWriteFCmpSizes.PS.YMM, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L, VEX_WIG;
defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, v4f64,
"cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SchedWriteFCmpSizes.PD.YMM, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L, VEX_WIG;
let Constraints = "$src1 = $dst" in {
defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, v4f32,
"cmp${cc}ps\t{$src2, $dst|$dst, $src2}",
"cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, memopv4f32>, PS;
defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, v2f64,
"cmp${cc}pd\t{$src2, $dst|$dst, $src2}",
"cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, memopv2f64>, PD;
}
def CommutableCMPCC : PatLeaf<(imm), [{
uint64_t Imm = N->getZExtValue() & 0x7;
return (Imm == 0x00 || Imm == 0x03 || Imm == 0x04 || Imm == 0x07);
}]>;
// Patterns to select compares with loads in first operand.
let Predicates = [HasAVX] in {
def : Pat<(v4f64 (X86cmpp (loadv4f64 addr:$src2), VR256:$src1,
CommutableCMPCC:$cc)),
(VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>;
def : Pat<(v8f32 (X86cmpp (loadv8f32 addr:$src2), VR256:$src1,
CommutableCMPCC:$cc)),
(VCMPPSYrmi VR256:$src1, addr:$src2, imm:$cc)>;
def : Pat<(v2f64 (X86cmpp (loadv2f64 addr:$src2), VR128:$src1,
CommutableCMPCC:$cc)),
(VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
def : Pat<(v4f32 (X86cmpp (loadv4f32 addr:$src2), VR128:$src1,
CommutableCMPCC:$cc)),
(VCMPPSrmi VR128:$src1, addr:$src2, imm:$cc)>;
def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1,
CommutableCMPCC:$cc)),
(VCMPSDrm FR64:$src1, addr:$src2, imm:$cc)>;
def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1,
CommutableCMPCC:$cc)),
(VCMPSSrm FR32:$src1, addr:$src2,