| //===-- X86InstrSSE.td - SSE Instruction Set ---------------*- tablegen -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file describes the X86 SSE instruction set, defining the instructions, |
| // and properties of the instructions which are needed for code generation, |
| // machine code emission, and analysis. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| //===----------------------------------------------------------------------===// |
| // SSE 1 & 2 Instructions Classes |
| //===----------------------------------------------------------------------===// |
| |
| /// sse12_fp_scalar - SSE 1 & 2 scalar instructions class |
| multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, |
| RegisterClass RC, X86MemOperand x86memop, |
| Domain d, X86FoldableSchedWrite sched, |
| bit Is2Addr = 1> { |
| let isCodeGenOnly = 1 in { |
| let isCommutable = 1 in { |
| def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), |
| !if(Is2Addr, |
| !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), |
| !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), |
| [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], d>, |
| Sched<[sched]>; |
| } |
| def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), |
| !if(Is2Addr, |
| !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), |
| !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), |
| [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], d>, |
| Sched<[sched.Folded, sched.ReadAfterFold]>; |
| } |
| } |
| |
| /// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class |
| multiclass sse12_fp_scalar_int<bits<8> opc, |
| SDPatternOperator OpNode, RegisterClass RC, |
| ValueType VT, string asm, Operand memopr, |
| PatFrags mem_frags, Domain d, |
| X86FoldableSchedWrite sched, bit Is2Addr = 1> { |
| let hasSideEffects = 0 in { |
| def rr_Int : SI_Int<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), |
| !if(Is2Addr, |
| !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), |
| !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), |
| [(set RC:$dst, (VT (OpNode RC:$src1, RC:$src2)))], d>, |
| Sched<[sched]>; |
| let mayLoad = 1 in |
| def rm_Int : SI_Int<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2), |
| !if(Is2Addr, |
| !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), |
| !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), |
| [(set RC:$dst, (VT (OpNode RC:$src1, (mem_frags addr:$src2))))], d>, |
| Sched<[sched.Folded, sched.ReadAfterFold]>; |
| } |
| } |
| |
| /// sse12_fp_packed - SSE 1 & 2 packed instructions class |
| multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, |
| RegisterClass RC, ValueType vt, |
| X86MemOperand x86memop, PatFrag mem_frag, |
| Domain d, X86FoldableSchedWrite sched, |
| bit Is2Addr = 1> { |
| let isCommutable = 1 in |
| def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), |
| !if(Is2Addr, |
| !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), |
| !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), |
| [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], d>, |
| Sched<[sched]>; |
| let mayLoad = 1 in |
| def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), |
| !if(Is2Addr, |
| !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), |
| !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), |
| [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], |
| d>, |
| Sched<[sched.Folded, sched.ReadAfterFold]>; |
| } |
| |
| /// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class |
| multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, |
| string OpcodeStr, X86MemOperand x86memop, |
| X86FoldableSchedWrite sched, |
| list<dag> pat_rr, list<dag> pat_rm, |
| bit Is2Addr = 1> { |
| let isCommutable = 1, hasSideEffects = 0 in |
| def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), |
| !if(Is2Addr, |
| !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), |
| !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), |
| pat_rr, d>, |
| Sched<[sched]>; |
| let hasSideEffects = 0, mayLoad = 1 in |
| def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), |
| !if(Is2Addr, |
| !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), |
| !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), |
| pat_rm, d>, |
| Sched<[sched.Folded, sched.ReadAfterFold]>; |
| } |
| |
| |
| // Alias instructions that map fld0 to xorps for sse or vxorps for avx. |
| // This is expanded by ExpandPostRAPseudos. |
| let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, |
| isPseudo = 1, SchedRW = [WriteZero] in { |
| def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "", |
| [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1, NoAVX512]>; |
| def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "", |
| [(set FR64:$dst, fp64imm0)]>, Requires<[HasSSE2, NoAVX512]>; |
| def FsFLD0F128 : I<0, Pseudo, (outs VR128:$dst), (ins), "", |
| [(set VR128:$dst, fp128imm0)]>, Requires<[HasSSE1, NoAVX512]>; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // AVX & SSE - Zero/One Vectors |
| //===----------------------------------------------------------------------===// |
| |
| // Alias instruction that maps zero vector to pxor / xorp* for sse. |
| // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then |
| // swizzled by ExecutionDomainFix to pxor. |
| // We set canFoldAsLoad because this can be converted to a constant-pool |
| // load of an all-zeros value if folding it would be beneficial. |
| let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, |
| isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in { |
| def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", |
| [(set VR128:$dst, (v4f32 immAllZerosV))]>; |
| } |
| |
| let Predicates = [NoAVX512] in { |
| def : Pat<(v16i8 immAllZerosV), (V_SET0)>; |
| def : Pat<(v8i16 immAllZerosV), (V_SET0)>; |
| def : Pat<(v4i32 immAllZerosV), (V_SET0)>; |
| def : Pat<(v2i64 immAllZerosV), (V_SET0)>; |
| def : Pat<(v2f64 immAllZerosV), (V_SET0)>; |
| } |
| |
| |
| // The same as done above but for AVX. The 256-bit AVX1 ISA doesn't support PI, |
| // and doesn't need it because on sandy bridge the register is set to zero |
| // at the rename stage without using any execution unit, so SET0PSY |
| // and SET0PDY can be used for vector int instructions without penalty |
| let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, |
| isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in { |
| def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "", |
| [(set VR256:$dst, (v8i32 immAllZerosV))]>; |
| } |
| |
| let Predicates = [NoAVX512] in { |
| def : Pat<(v32i8 immAllZerosV), (AVX_SET0)>; |
| def : Pat<(v16i16 immAllZerosV), (AVX_SET0)>; |
| def : Pat<(v4i64 immAllZerosV), (AVX_SET0)>; |
| def : Pat<(v8f32 immAllZerosV), (AVX_SET0)>; |
| def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>; |
| } |
| |
| // We set canFoldAsLoad because this can be converted to a constant-pool |
| // load of an all-ones value if folding it would be beneficial. |
| let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, |
| isPseudo = 1, SchedRW = [WriteZero] in { |
| def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "", |
| [(set VR128:$dst, (v4i32 immAllOnesV))]>; |
| let Predicates = [HasAVX1Only, OptForMinSize] in { |
| def AVX1_SETALLONES: I<0, Pseudo, (outs VR256:$dst), (ins), "", |
| [(set VR256:$dst, (v8i32 immAllOnesV))]>; |
| } |
| let Predicates = [HasAVX2] in |
| def AVX2_SETALLONES : I<0, Pseudo, (outs VR256:$dst), (ins), "", |
| [(set VR256:$dst, (v8i32 immAllOnesV))]>; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // SSE 1 & 2 - Move FP Scalar Instructions |
| // |
| // Move Instructions. Register-to-register movss/movsd is not used for FR32/64 |
| // register copies because it's a partial register update; Register-to-register |
| // movss/movsd is not modeled as an INSERT_SUBREG because INSERT_SUBREG requires |
| // that the insert be implementable in terms of a copy, and just mentioned, we |
| // don't use movss/movsd for copies. |
| //===----------------------------------------------------------------------===// |
| |
| multiclass sse12_move_rr<SDNode OpNode, ValueType vt, string base_opc, |
| string asm_opr, Domain d, string Name> { |
| let isCommutable = 1 in |
| def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst), |
| (ins VR128:$src1, VR128:$src2), |
| !strconcat(base_opc, asm_opr), |
| [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))], d>, |
| Sched<[SchedWriteFShuffle.XMM]>; |
| |
| // For the disassembler |
| let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in |
| def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), |
| (ins VR128:$src1, VR128:$src2), |
| !strconcat(base_opc, asm_opr), []>, |
| Sched<[SchedWriteFShuffle.XMM]>, FoldGenData<Name#rr>; |
| } |
| |
| multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, |
| X86MemOperand x86memop, string OpcodeStr, |
| Domain d, string Name, Predicate pred> { |
| // AVX |
| let Predicates = [UseAVX, OptForSize] in |
| defm V#NAME : sse12_move_rr<OpNode, vt, OpcodeStr, |
| "\t{$src2, $src1, $dst|$dst, $src1, $src2}", d, |
| "V"#Name>, |
| VEX_4V, VEX_LIG, VEX_WIG; |
| |
| def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), |
| !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), |
| [(store RC:$src, addr:$dst)], d>, |
| VEX, VEX_LIG, Sched<[WriteFStore]>, VEX_WIG; |
| // SSE1 & 2 |
| let Constraints = "$src1 = $dst" in { |
| let Predicates = [pred, NoSSE41_Or_OptForSize] in |
| defm NAME : sse12_move_rr<OpNode, vt, OpcodeStr, |
| "\t{$src2, $dst|$dst, $src2}", d, Name>; |
| } |
| |
| def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), |
| !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), |
| [(store RC:$src, addr:$dst)], d>, |
| Sched<[WriteFStore]>; |
| |
| def : InstAlias<"v"#OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", |
| (!cast<Instruction>("V"#NAME#"rr_REV") |
| VR128:$dst, VR128:$src1, VR128:$src2), 0>; |
| def : InstAlias<OpcodeStr#".s\t{$src2, $dst|$dst, $src2}", |
| (!cast<Instruction>(NAME#"rr_REV") |
| VR128:$dst, VR128:$src2), 0>; |
| } |
| |
| // Loading from memory automatically zeroing upper bits. |
| multiclass sse12_move_rm<RegisterClass RC, ValueType vt, X86MemOperand x86memop, |
| PatFrag mem_pat, PatFrag vzloadfrag, string OpcodeStr, |
| Domain d> { |
| def V#NAME#rm : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), |
| !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), |
| [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>, |
| VEX, VEX_LIG, Sched<[WriteFLoad]>, VEX_WIG; |
| def NAME#rm : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), |
| !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), |
| [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>, |
| Sched<[WriteFLoad]>; |
| |
| // _alt version uses FR32/FR64 register class. |
| let isCodeGenOnly = 1 in { |
| def V#NAME#rm_alt : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), |
| !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), |
| [(set RC:$dst, (mem_pat addr:$src))], d>, |
| VEX, VEX_LIG, Sched<[WriteFLoad]>, VEX_WIG; |
| def NAME#rm_alt : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), |
| !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), |
| [(set RC:$dst, (mem_pat addr:$src))], d>, |
| Sched<[WriteFLoad]>; |
| } |
| } |
| |
| defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss", |
| SSEPackedSingle, "MOVSS", UseSSE1>, XS; |
| defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd", |
| SSEPackedDouble, "MOVSD", UseSSE2>, XD; |
| |
| let canFoldAsLoad = 1, isReMaterializable = 1 in { |
| defm MOVSS : sse12_move_rm<FR32, v4f32, f32mem, loadf32, X86vzload32, "movss", |
| SSEPackedSingle>, XS; |
| defm MOVSD : sse12_move_rm<FR64, v2f64, f64mem, loadf64, X86vzload64, "movsd", |
| SSEPackedDouble>, XD; |
| } |
| |
| // Patterns |
| let Predicates = [UseAVX] in { |
| def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), |
| (VMOVSSrm addr:$src)>; |
| def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), |
| (VMOVSDrm addr:$src)>; |
| |
| // Represent the same patterns above but in the form they appear for |
| // 256-bit types |
| def : Pat<(v8f32 (X86vzload32 addr:$src)), |
| (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; |
| def : Pat<(v4f64 (X86vzload64 addr:$src)), |
| (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>; |
| } |
| |
| let Predicates = [UseAVX, OptForSize] in { |
| // Move scalar to XMM zero-extended, zeroing a VR128 then do a |
| // MOVSS to the lower bits. |
| def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), |
| (VMOVSSrr (v4f32 (V_SET0)), VR128:$src)>; |
| def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), |
| (VMOVSSrr (v4i32 (V_SET0)), VR128:$src)>; |
| |
| // Move low f32 and clear high bits. |
| def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))), |
| (SUBREG_TO_REG (i32 0), |
| (v4f32 (VMOVSSrr (v4f32 (V_SET0)), |
| (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)))), sub_xmm)>; |
| def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))), |
| (SUBREG_TO_REG (i32 0), |
| (v4i32 (VMOVSSrr (v4i32 (V_SET0)), |
| (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)))), sub_xmm)>; |
| } |
| |
| let Predicates = [UseSSE1, NoSSE41_Or_OptForSize] in { |
| // Move scalar to XMM zero-extended, zeroing a VR128 then do a |
| // MOVSS to the lower bits. |
| def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), |
| (MOVSSrr (v4f32 (V_SET0)), VR128:$src)>; |
| def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), |
| (MOVSSrr (v4i32 (V_SET0)), VR128:$src)>; |
| } |
| |
| let Predicates = [UseSSE2] in |
| def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), |
| (MOVSDrm addr:$src)>; |
| |
| let Predicates = [UseSSE1] in |
| def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), |
| (MOVSSrm addr:$src)>; |
| |
| //===----------------------------------------------------------------------===// |
| // SSE 1 & 2 - Move Aligned/Unaligned FP Instructions |
| //===----------------------------------------------------------------------===// |
| |
| multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC, |
| X86MemOperand x86memop, PatFrag ld_frag, |
| string asm, Domain d, |
| X86SchedWriteMoveLS sched> { |
| let hasSideEffects = 0, isMoveReg = 1 in |
| def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), |
| !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>, |
| Sched<[sched.RR]>; |
| let canFoldAsLoad = 1, isReMaterializable = 1 in |
| def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), |
| !strconcat(asm, "\t{$src, $dst|$dst, $src}"), |
| [(set RC:$dst, (ld_frag addr:$src))], d>, |
| Sched<[sched.RM]>; |
| } |
| |
| let Predicates = [HasAVX, NoVLX] in { |
| defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps", |
| SSEPackedSingle, SchedWriteFMoveLS.XMM>, |
| PS, VEX, VEX_WIG; |
| defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd", |
| SSEPackedDouble, SchedWriteFMoveLS.XMM>, |
| PD, VEX, VEX_WIG; |
| defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups", |
| SSEPackedSingle, SchedWriteFMoveLS.XMM>, |
| PS, VEX, VEX_WIG; |
| defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", |
| SSEPackedDouble, SchedWriteFMoveLS.XMM>, |
| PD, VEX, VEX_WIG; |
| |
| defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, "movaps", |
| SSEPackedSingle, SchedWriteFMoveLS.YMM>, |
| PS, VEX, VEX_L, VEX_WIG; |
| defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, "movapd", |
| SSEPackedDouble, SchedWriteFMoveLS.YMM>, |
| PD, VEX, VEX_L, VEX_WIG; |
| defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups", |
| SSEPackedSingle, SchedWriteFMoveLS.YMM>, |
| PS, VEX, VEX_L, VEX_WIG; |
| defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd", |
| SSEPackedDouble, SchedWriteFMoveLS.YMM>, |
| PD, VEX, VEX_L, VEX_WIG; |
| } |
| |
| let Predicates = [UseSSE1] in { |
| defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps", |
| SSEPackedSingle, SchedWriteFMoveLS.XMM>, |
| PS; |
| defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups", |
| SSEPackedSingle, SchedWriteFMoveLS.XMM>, |
| PS; |
| } |
| let Predicates = [UseSSE2] in { |
| defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd", |
| SSEPackedDouble, SchedWriteFMoveLS.XMM>, |
| PD; |
| defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", |
| SSEPackedDouble, SchedWriteFMoveLS.XMM>, |
| PD; |
| } |
| |
| let Predicates = [HasAVX, NoVLX] in { |
| let SchedRW = [SchedWriteFMoveLS.XMM.MR] in { |
| def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), |
| "movaps\t{$src, $dst|$dst, $src}", |
| [(alignedstore (v4f32 VR128:$src), addr:$dst)]>, |
| VEX, VEX_WIG; |
| def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), |
| "movapd\t{$src, $dst|$dst, $src}", |
| [(alignedstore (v2f64 VR128:$src), addr:$dst)]>, |
| VEX, VEX_WIG; |
| def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), |
| "movups\t{$src, $dst|$dst, $src}", |
| [(store (v4f32 VR128:$src), addr:$dst)]>, |
| VEX, VEX_WIG; |
| def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), |
| "movupd\t{$src, $dst|$dst, $src}", |
| [(store (v2f64 VR128:$src), addr:$dst)]>, |
| VEX, VEX_WIG; |
| } // SchedRW |
| |
| let SchedRW = [SchedWriteFMoveLS.YMM.MR] in { |
| def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), |
| "movaps\t{$src, $dst|$dst, $src}", |
| [(alignedstore (v8f32 VR256:$src), addr:$dst)]>, |
| VEX, VEX_L, VEX_WIG; |
| def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), |
| "movapd\t{$src, $dst|$dst, $src}", |
| [(alignedstore (v4f64 VR256:$src), addr:$dst)]>, |
| VEX, VEX_L, VEX_WIG; |
| def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), |
| "movups\t{$src, $dst|$dst, $src}", |
| [(store (v8f32 VR256:$src), addr:$dst)]>, |
| VEX, VEX_L, VEX_WIG; |
| def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), |
| "movupd\t{$src, $dst|$dst, $src}", |
| [(store (v4f64 VR256:$src), addr:$dst)]>, |
| VEX, VEX_L, VEX_WIG; |
| } // SchedRW |
| } // Predicate |
| |
| // For disassembler |
| let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, |
| isMoveReg = 1 in { |
| let SchedRW = [SchedWriteFMoveLS.XMM.RR] in { |
| def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst), |
| (ins VR128:$src), |
| "movaps\t{$src, $dst|$dst, $src}", []>, |
| VEX, VEX_WIG, FoldGenData<"VMOVAPSrr">; |
| def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst), |
| (ins VR128:$src), |
| "movapd\t{$src, $dst|$dst, $src}", []>, |
| VEX, VEX_WIG, FoldGenData<"VMOVAPDrr">; |
| def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst), |
| (ins VR128:$src), |
| "movups\t{$src, $dst|$dst, $src}", []>, |
| VEX, VEX_WIG, FoldGenData<"VMOVUPSrr">; |
| def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst), |
| (ins VR128:$src), |
| "movupd\t{$src, $dst|$dst, $src}", []>, |
| VEX, VEX_WIG, FoldGenData<"VMOVUPDrr">; |
| } // SchedRW |
| |
| let SchedRW = [SchedWriteFMoveLS.YMM.RR] in { |
| def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst), |
| (ins VR256:$src), |
| "movaps\t{$src, $dst|$dst, $src}", []>, |
| VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVAPSYrr">; |
| def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst), |
| (ins VR256:$src), |
| "movapd\t{$src, $dst|$dst, $src}", []>, |
| VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVAPDYrr">; |
| def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst), |
| (ins VR256:$src), |
| "movups\t{$src, $dst|$dst, $src}", []>, |
| VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVUPSYrr">; |
| def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst), |
| (ins VR256:$src), |
| "movupd\t{$src, $dst|$dst, $src}", []>, |
| VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVUPDYrr">; |
| } // SchedRW |
| } // Predicate |
| |
| // Reversed version with ".s" suffix for GAS compatibility. |
| def : InstAlias<"vmovaps.s\t{$src, $dst|$dst, $src}", |
| (VMOVAPSrr_REV VR128:$dst, VR128:$src), 0>; |
| def : InstAlias<"vmovapd.s\t{$src, $dst|$dst, $src}", |
| (VMOVAPDrr_REV VR128:$dst, VR128:$src), 0>; |
| def : InstAlias<"vmovups.s\t{$src, $dst|$dst, $src}", |
| (VMOVUPSrr_REV VR128:$dst, VR128:$src), 0>; |
| def : InstAlias<"vmovupd.s\t{$src, $dst|$dst, $src}", |
| (VMOVUPDrr_REV VR128:$dst, VR128:$src), 0>; |
| def : InstAlias<"vmovaps.s\t{$src, $dst|$dst, $src}", |
| (VMOVAPSYrr_REV VR256:$dst, VR256:$src), 0>; |
| def : InstAlias<"vmovapd.s\t{$src, $dst|$dst, $src}", |
| (VMOVAPDYrr_REV VR256:$dst, VR256:$src), 0>; |
| def : InstAlias<"vmovups.s\t{$src, $dst|$dst, $src}", |
| (VMOVUPSYrr_REV VR256:$dst, VR256:$src), 0>; |
| def : InstAlias<"vmovupd.s\t{$src, $dst|$dst, $src}", |
| (VMOVUPDYrr_REV VR256:$dst, VR256:$src), 0>; |
| |
| let SchedRW = [SchedWriteFMoveLS.XMM.MR] in { |
| def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), |
| "movaps\t{$src, $dst|$dst, $src}", |
| [(alignedstore (v4f32 VR128:$src), addr:$dst)]>; |
| def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), |
| "movapd\t{$src, $dst|$dst, $src}", |
| [(alignedstore (v2f64 VR128:$src), addr:$dst)]>; |
| def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), |
| "movups\t{$src, $dst|$dst, $src}", |
| [(store (v4f32 VR128:$src), addr:$dst)]>; |
| def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), |
| "movupd\t{$src, $dst|$dst, $src}", |
| [(store (v2f64 VR128:$src), addr:$dst)]>; |
| } // SchedRW |
| |
| // For disassembler |
| let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, |
| isMoveReg = 1, SchedRW = [SchedWriteFMoveLS.XMM.RR] in { |
| def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), |
| "movaps\t{$src, $dst|$dst, $src}", []>, |
| FoldGenData<"MOVAPSrr">; |
| def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), |
| "movapd\t{$src, $dst|$dst, $src}", []>, |
| FoldGenData<"MOVAPDrr">; |
| def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), |
| "movups\t{$src, $dst|$dst, $src}", []>, |
| FoldGenData<"MOVUPSrr">; |
| def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), |
| "movupd\t{$src, $dst|$dst, $src}", []>, |
| FoldGenData<"MOVUPDrr">; |
| } |
| |
| // Reversed version with ".s" suffix for GAS compatibility. |
| def : InstAlias<"movaps.s\t{$src, $dst|$dst, $src}", |
| (MOVAPSrr_REV VR128:$dst, VR128:$src), 0>; |
| def : InstAlias<"movapd.s\t{$src, $dst|$dst, $src}", |
| (MOVAPDrr_REV VR128:$dst, VR128:$src), 0>; |
| def : InstAlias<"movups.s\t{$src, $dst|$dst, $src}", |
| (MOVUPSrr_REV VR128:$dst, VR128:$src), 0>; |
| def : InstAlias<"movupd.s\t{$src, $dst|$dst, $src}", |
| (MOVUPDrr_REV VR128:$dst, VR128:$src), 0>; |
| |
| let Predicates = [HasAVX, NoVLX] in { |
| // 256-bit load/store need to use floating point load/store in case we don't |
| // have AVX2. Execution domain fixing will convert to integer if AVX2 is |
| // available and changing the domain is beneficial. |
| def : Pat<(alignedloadv4i64 addr:$src), |
| (VMOVAPSYrm addr:$src)>; |
| def : Pat<(alignedloadv8i32 addr:$src), |
| (VMOVAPSYrm addr:$src)>; |
| def : Pat<(alignedloadv16i16 addr:$src), |
| (VMOVAPSYrm addr:$src)>; |
| def : Pat<(alignedloadv32i8 addr:$src), |
| (VMOVAPSYrm addr:$src)>; |
| def : Pat<(loadv4i64 addr:$src), |
| (VMOVUPSYrm addr:$src)>; |
| def : Pat<(loadv8i32 addr:$src), |
| (VMOVUPSYrm addr:$src)>; |
| def : Pat<(loadv16i16 addr:$src), |
| (VMOVUPSYrm addr:$src)>; |
| def : Pat<(loadv32i8 addr:$src), |
| (VMOVUPSYrm addr:$src)>; |
| |
| def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst), |
| (VMOVAPSYmr addr:$dst, VR256:$src)>; |
| def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst), |
| (VMOVAPSYmr addr:$dst, VR256:$src)>; |
| def : Pat<(alignedstore (v16i16 VR256:$src), addr:$dst), |
| (VMOVAPSYmr addr:$dst, VR256:$src)>; |
| def : Pat<(alignedstore (v32i8 VR256:$src), addr:$dst), |
| (VMOVAPSYmr addr:$dst, VR256:$src)>; |
| def : Pat<(store (v4i64 VR256:$src), addr:$dst), |
| (VMOVUPSYmr addr:$dst, VR256:$src)>; |
| def : Pat<(store (v8i32 VR256:$src), addr:$dst), |
| (VMOVUPSYmr addr:$dst, VR256:$src)>; |
| def : Pat<(store (v16i16 VR256:$src), addr:$dst), |
| (VMOVUPSYmr addr:$dst, VR256:$src)>; |
| def : Pat<(store (v32i8 VR256:$src), addr:$dst), |
| (VMOVUPSYmr addr:$dst, VR256:$src)>; |
| } |
| |
| // Use movaps / movups for SSE integer load / store (one byte shorter). |
| // The instructions selected below are then converted to MOVDQA/MOVDQU |
| // during the SSE domain pass. |
| let Predicates = [UseSSE1] in { |
| def : Pat<(alignedloadv2i64 addr:$src), |
| (MOVAPSrm addr:$src)>; |
| def : Pat<(alignedloadv4i32 addr:$src), |
| (MOVAPSrm addr:$src)>; |
| def : Pat<(alignedloadv8i16 addr:$src), |
| (MOVAPSrm addr:$src)>; |
| def : Pat<(alignedloadv16i8 addr:$src), |
| (MOVAPSrm addr:$src)>; |
| def : Pat<(loadv2i64 addr:$src), |
| (MOVUPSrm addr:$src)>; |
| def : Pat<(loadv4i32 addr:$src), |
| (MOVUPSrm addr:$src)>; |
| def : Pat<(loadv8i16 addr:$src), |
| (MOVUPSrm addr:$src)>; |
| def : Pat<(loadv16i8 addr:$src), |
| (MOVUPSrm addr:$src)>; |
| |
| def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), |
| (MOVAPSmr addr:$dst, VR128:$src)>; |
| def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), |
| (MOVAPSmr addr:$dst, VR128:$src)>; |
| def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), |
| (MOVAPSmr addr:$dst, VR128:$src)>; |
| def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), |
| (MOVAPSmr addr:$dst, VR128:$src)>; |
| def : Pat<(store (v2i64 VR128:$src), addr:$dst), |
| (MOVUPSmr addr:$dst, VR128:$src)>; |
| def : Pat<(store (v4i32 VR128:$src), addr:$dst), |
| (MOVUPSmr addr:$dst, VR128:$src)>; |
| def : Pat<(store (v8i16 VR128:$src), addr:$dst), |
| (MOVUPSmr addr:$dst, VR128:$src)>; |
| def : Pat<(store (v16i8 VR128:$src), addr:$dst), |
| (MOVUPSmr addr:$dst, VR128:$src)>; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // SSE 1 & 2 - Move Low packed FP Instructions |
| //===----------------------------------------------------------------------===// |
| |
| multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDPatternOperator pdnode, |
| string base_opc, string asm_opr> { |
| // No pattern as they need be special cased between high and low. |
| let hasSideEffects = 0, mayLoad = 1 in |
| def PSrm : PI<opc, MRMSrcMem, |
| (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), |
| !strconcat(base_opc, "s", asm_opr), |
| [], SSEPackedSingle>, PS, |
| Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; |
| |
| def PDrm : PI<opc, MRMSrcMem, |
| (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), |
| !strconcat(base_opc, "d", asm_opr), |
| [(set VR128:$dst, (v2f64 (pdnode VR128:$src1, |
| (scalar_to_vector (loadf64 addr:$src2)))))], |
| SSEPackedDouble>, PD, |
| Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; |
| } |
| |
| multiclass sse12_mov_hilo_packed<bits<8>opc, SDPatternOperator pdnode, |
| string base_opc> { |
| let Predicates = [UseAVX] in |
| defm V#NAME : sse12_mov_hilo_packed_base<opc, pdnode, base_opc, |
| "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, |
| VEX_4V, VEX_WIG; |
| |
| let Constraints = "$src1 = $dst" in |
| defm NAME : sse12_mov_hilo_packed_base<opc, pdnode, base_opc, |
| "\t{$src2, $dst|$dst, $src2}">; |
| } |
| |
| defm MOVL : sse12_mov_hilo_packed<0x12, X86Movsd, "movlp">; |
| |
| let SchedRW = [WriteFStore] in { |
| let Predicates = [UseAVX] in { |
| let mayStore = 1, hasSideEffects = 0 in |
| def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), |
| "movlps\t{$src, $dst|$dst, $src}", |
| []>, |
| VEX, VEX_WIG; |
| def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), |
| "movlpd\t{$src, $dst|$dst, $src}", |
| [(store (f64 (extractelt (v2f64 VR128:$src), |
| (iPTR 0))), addr:$dst)]>, |
| VEX, VEX_WIG; |
| }// UseAVX |
| let mayStore = 1, hasSideEffects = 0 in |
| def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), |
| "movlps\t{$src, $dst|$dst, $src}", |
| []>; |
| def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), |
| "movlpd\t{$src, $dst|$dst, $src}", |
| [(store (f64 (extractelt (v2f64 VR128:$src), |
| (iPTR 0))), addr:$dst)]>; |
| } // SchedRW |
| |
| let Predicates = [UseSSE1] in { |
| // This pattern helps select MOVLPS on SSE1 only targets. With SSE2 we'll |
| // end up with a movsd or blend instead of shufp. |
| // No need for aligned load, we're only loading 64-bits. |
| def : Pat<(X86Shufp (v4f32 (simple_load addr:$src2)), VR128:$src1, |
| (i8 -28)), |
| (MOVLPSrm VR128:$src1, addr:$src2)>; |
| def : Pat<(X86Shufp (v4f32 (X86vzload64 addr:$src2)), VR128:$src1, (i8 -28)), |
| (MOVLPSrm VR128:$src1, addr:$src2)>; |
| |
| def : Pat<(v4f32 (X86vzload64 addr:$src)), |
| (MOVLPSrm (v4f32 (V_SET0)), addr:$src)>; |
| def : Pat<(X86vextractstore64 (v4f32 VR128:$src), addr:$dst), |
| (MOVLPSmr addr:$dst, VR128:$src)>; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // SSE 1 & 2 - Move Hi packed FP Instructions |
| //===----------------------------------------------------------------------===// |
| |
| defm MOVH : sse12_mov_hilo_packed<0x16, X86Unpckl, "movhp">; |
| |
| let SchedRW = [WriteFStore] in { |
| // v2f64 extract element 1 is always custom lowered to unpack high to low |
| // and extract element 0 so the non-store version isn't too horrible. |
| let Predicates = [UseAVX] in { |
| let mayStore = 1, hasSideEffects = 0 in |
| def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), |
| "movhps\t{$src, $dst|$dst, $src}", |
| []>, VEX, VEX_WIG; |
| def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), |
| "movhpd\t{$src, $dst|$dst, $src}", |
| [(store (f64 (extractelt |
| (v2f64 (X86Unpckh VR128:$src, VR128:$src)), |
| (iPTR 0))), addr:$dst)]>, VEX, VEX_WIG; |
| } // UseAVX |
| let mayStore = 1, hasSideEffects = 0 in |
| def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), |
| "movhps\t{$src, $dst|$dst, $src}", |
| []>; |
| def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), |
| "movhpd\t{$src, $dst|$dst, $src}", |
| [(store (f64 (extractelt |
| (v2f64 (X86Unpckh VR128:$src, VR128:$src)), |
| (iPTR 0))), addr:$dst)]>; |
| } // SchedRW |
| |
| let Predicates = [UseAVX] in { |
| // MOVHPD patterns |
| def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))), |
| (VMOVHPDrm VR128:$src1, addr:$src2)>; |
| |
| def : Pat<(store (f64 (extractelt |
| (v2f64 (X86VPermilpi VR128:$src, (i8 1))), |
| (iPTR 0))), addr:$dst), |
| (VMOVHPDmr addr:$dst, VR128:$src)>; |
| |
| // MOVLPD patterns |
| def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))), |
| (VMOVLPDrm VR128:$src1, addr:$src2)>; |
| } |
| |
| let Predicates = [UseSSE1] in { |
| // This pattern helps select MOVHPS on SSE1 only targets. With SSE2 we'll |
| // end up with a movsd or blend instead of shufp. |
| // No need for aligned load, we're only loading 64-bits. |
| def : Pat<(X86Movlhps VR128:$src1, (v4f32 (simple_load addr:$src2))), |
| (MOVHPSrm VR128:$src1, addr:$src2)>; |
| def : Pat<(X86Movlhps VR128:$src1, (v4f32 (X86vzload64 addr:$src2))), |
| (MOVHPSrm VR128:$src1, addr:$src2)>; |
| |
| def : Pat<(X86vextractstore64 (v4f32 (X86Movhlps VR128:$src, VR128:$src)), |
| addr:$dst), |
| (MOVHPSmr addr:$dst, VR128:$src)>; |
| } |
| |
| let Predicates = [UseSSE2] in { |
| // MOVHPD patterns |
| def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))), |
| (MOVHPDrm VR128:$src1, addr:$src2)>; |
| |
| def : Pat<(store (f64 (extractelt |
| (v2f64 (X86Shufp VR128:$src, VR128:$src, (i8 1))), |
| (iPTR 0))), addr:$dst), |
| (MOVHPDmr addr:$dst, VR128:$src)>; |
| |
| // MOVLPD patterns |
| def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))), |
| (MOVLPDrm VR128:$src1, addr:$src2)>; |
| } |
| |
| let Predicates = [UseSSE2, NoSSE41_Or_OptForSize] in { |
| // Use MOVLPD to load into the low bits from a full vector unless we can use |
| // BLENDPD. |
| def : Pat<(X86Movsd VR128:$src1, (v2f64 (simple_load addr:$src2))), |
| (MOVLPDrm VR128:$src1, addr:$src2)>; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions |
| //===----------------------------------------------------------------------===// |
| |
| let Predicates = [UseAVX] in { |
| def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst), |
| (ins VR128:$src1, VR128:$src2), |
| "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", |
| [(set VR128:$dst, |
| (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>, |
| VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, VEX_WIG; |
| let isCommutable = 1 in |
| def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst), |
| (ins VR128:$src1, VR128:$src2), |
| "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", |
| [(set VR128:$dst, |
| (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>, |
| VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, VEX_WIG, |
| NotMemoryFoldable; |
| } |
| let Constraints = "$src1 = $dst" in { |
| def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), |
| (ins VR128:$src1, VR128:$src2), |
| "movlhps\t{$src2, $dst|$dst, $src2}", |
| [(set VR128:$dst, |
| (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>, |
| Sched<[SchedWriteFShuffle.XMM]>; |
| let isCommutable = 1 in |
| def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), |
| (ins VR128:$src1, VR128:$src2), |
| "movhlps\t{$src2, $dst|$dst, $src2}", |
| [(set VR128:$dst, |
| (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>, |
| Sched<[SchedWriteFShuffle.XMM]>, NotMemoryFoldable; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // SSE 1 & 2 - Conversion Instructions |
| //===----------------------------------------------------------------------===// |
| |
| multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, |
| SDPatternOperator OpNode, X86MemOperand x86memop, PatFrag ld_frag, |
| string asm, string mem, X86FoldableSchedWrite sched, |
| Domain d, |
| SchedRead Int2Fpu = ReadDefault> { |
| let ExeDomain = d in { |
| def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), |
| !strconcat(asm,"\t{$src, $dst|$dst, $src}"), |
| [(set DstRC:$dst, (OpNode SrcRC:$src))]>, |
| Sched<[sched, Int2Fpu]>; |
| def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), |
| mem#"\t{$src, $dst|$dst, $src}", |
| [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, |
| Sched<[sched.Folded]>; |
| } |
| } |
| |
| multiclass sse12_cvt_p<bits<8> opc, RegisterClass RC, X86MemOperand x86memop, |
| ValueType DstTy, ValueType SrcTy, PatFrag ld_frag, |
| string asm, Domain d, X86FoldableSchedWrite sched> { |
| let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in { |
| def rr : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), asm, |
| [(set RC:$dst, (DstTy (any_sint_to_fp (SrcTy RC:$src))))], d>, |
| Sched<[sched]>; |
| let mayLoad = 1 in |
| def rm : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), asm, |
| [(set RC:$dst, (DstTy (any_sint_to_fp |
| (SrcTy (ld_frag addr:$src)))))], d>, |
| Sched<[sched.Folded]>; |
| } |
| } |
| |
| multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, |
| X86MemOperand x86memop, string asm, string mem, |
| X86FoldableSchedWrite sched, Domain d> { |
| let hasSideEffects = 0, Predicates = [UseAVX], ExeDomain = d in { |
| def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src), |
| !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, |
| Sched<[sched, ReadDefault, ReadInt2Fpu]>; |
| let mayLoad = 1 in |
| def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), |
| (ins DstRC:$src1, x86memop:$src), |
| asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>, |
| Sched<[sched.Folded, sched.ReadAfterFold]>; |
| } // hasSideEffects = 0 |
| } |
| |
| let isCodeGenOnly = 1, Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { |
| defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, any_fp_to_sint, f32mem, loadf32, |
| "cvttss2si", "cvttss2si", |
| WriteCvtSS2I, SSEPackedSingle>, |
| XS, VEX, VEX_LIG; |
| defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, any_fp_to_sint, f32mem, loadf32, |
| "cvttss2si", "cvttss2si", |
| WriteCvtSS2I, SSEPackedSingle>, |
| XS, VEX, VEX_W, VEX_LIG; |
| defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64, |
| "cvttsd2si", "cvttsd2si", |
| WriteCvtSD2I, SSEPackedDouble>, |
| XD, VEX, VEX_LIG; |
| defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64, |
| "cvttsd2si", "cvttsd2si", |
| WriteCvtSD2I, SSEPackedDouble>, |
| XD, VEX, VEX_W, VEX_LIG; |
| |
| defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32, |
| "cvtss2si", "cvtss2si", |
| WriteCvtSS2I, SSEPackedSingle>, |
| XS, VEX, VEX_LIG; |
| defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32, |
| "cvtss2si", "cvtss2si", |
| WriteCvtSS2I, SSEPackedSingle>, |
| XS, VEX, VEX_W, VEX_LIG; |
| defm VCVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64, |
| "cvtsd2si", "cvtsd2si", |
| WriteCvtSD2I, SSEPackedDouble>, |
| XD, VEX, VEX_LIG; |
| defm VCVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64, |
| "cvtsd2si", "cvtsd2si", |
| WriteCvtSD2I, SSEPackedDouble>, |
| XD, VEX, VEX_W, VEX_LIG; |
| } |
| |
| // The assembler can recognize rr 64-bit instructions by seeing a rxx |
| // register, but the same isn't true when only using memory operands, |
| // provide other assembly "l" and "q" forms to address this explicitly |
| // where appropriate to do so. |
| let isCodeGenOnly = 1 in { |
| defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss", "l", |
| WriteCvtI2SS, SSEPackedSingle>, XS, VEX_4V, |
| VEX_LIG, SIMD_EXC; |
| defm VCVTSI642SS : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss", "q", |
| WriteCvtI2SS, SSEPackedSingle>, XS, VEX_4V, |
| VEX_W, VEX_LIG, SIMD_EXC; |
| defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd", "l", |
| WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V, |
| VEX_LIG; |
| defm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd", "q", |
| WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V, |
| VEX_W, VEX_LIG, SIMD_EXC; |
| } // isCodeGenOnly = 1 |
| |
| let Predicates = [UseAVX] in { |
| def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))), |
| (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; |
| def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))), |
| (VCVTSI642SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; |
| def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))), |
| (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>; |
| def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))), |
| (VCVTSI642SDrm (f64 (IMPLICIT_DEF)), addr:$src)>; |
| |
| def : Pat<(f32 (any_sint_to_fp GR32:$src)), |
| (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>; |
| def : Pat<(f32 (any_sint_to_fp GR64:$src)), |
| (VCVTSI642SSrr (f32 (IMPLICIT_DEF)), GR64:$src)>; |
| def : Pat<(f64 (any_sint_to_fp GR32:$src)), |
| (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>; |
| def : Pat<(f64 (any_sint_to_fp GR64:$src)), |
| (VCVTSI642SDrr (f64 (IMPLICIT_DEF)), GR64:$src)>; |
| |
| def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64rr FR32:$src)>; |
| def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64rm addr:$src)>; |
| |
| def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64rr FR64:$src)>; |
| def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64rm addr:$src)>; |
| } |
| |
| let isCodeGenOnly = 1 in { |
| defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, any_fp_to_sint, f32mem, loadf32, |
| "cvttss2si", "cvttss2si", |
| WriteCvtSS2I, SSEPackedSingle>, XS, SIMD_EXC; |
| defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, any_fp_to_sint, f32mem, loadf32, |
| "cvttss2si", "cvttss2si", |
| WriteCvtSS2I, SSEPackedSingle>, XS, REX_W, SIMD_EXC; |
| defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64, |
| "cvttsd2si", "cvttsd2si", |
| WriteCvtSD2I, SSEPackedDouble>, XD, SIMD_EXC; |
| defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64, |
| "cvttsd2si", "cvttsd2si", |
| WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC; |
| |
| defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32, |
| "cvtss2si", "cvtss2si", |
| WriteCvtSS2I, SSEPackedSingle>, XS, SIMD_EXC; |
| defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32, |
| "cvtss2si", "cvtss2si", |
| WriteCvtSS2I, SSEPackedSingle>, XS, REX_W, SIMD_EXC; |
| defm CVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64, |
| "cvtsd2si", "cvtsd2si", |
| WriteCvtSD2I, SSEPackedDouble>, XD, SIMD_EXC; |
| defm CVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64, |
| "cvtsd2si", "cvtsd2si", |
| WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC; |
| |
| defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, any_sint_to_fp, i32mem, loadi32, |
| "cvtsi2ss", "cvtsi2ss{l}", |
| WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, SIMD_EXC; |
| defm CVTSI642SS : sse12_cvt_s<0x2A, GR64, FR32, any_sint_to_fp, i64mem, loadi64, |
| "cvtsi2ss", "cvtsi2ss{q}", |
| WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, REX_W, SIMD_EXC; |
| defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, any_sint_to_fp, i32mem, loadi32, |
| "cvtsi2sd", "cvtsi2sd{l}", |
| WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD; |
| defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, any_sint_to_fp, i64mem, loadi64, |
| "cvtsi2sd", "cvtsi2sd{q}", |
| WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD, REX_W, SIMD_EXC; |
| } // isCodeGenOnly = 1 |
| |
| let Predicates = [UseSSE1] in { |
| def : Pat<(i64 (lrint FR32:$src)), (CVTSS2SI64rr FR32:$src)>; |
| def : Pat<(i64 (lrint (loadf32 addr:$src))), (CVTSS2SI64rm addr:$src)>; |
| } |
| |
| let Predicates = [UseSSE2] in { |
| def : Pat<(i64 (lrint FR64:$src)), (CVTSD2SI64rr FR64:$src)>; |
| def : Pat<(i64 (lrint (loadf64 addr:$src))), (CVTSD2SI64rm addr:$src)>; |
| } |
| |
| // Conversion Instructions Intrinsics - Match intrinsics which expect MM |
| // and/or XMM operand(s). |
| |
| multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, |
| ValueType DstVT, ValueType SrcVT, SDNode OpNode, |
| Operand memop, PatFrags mem_frags, string asm, |
| X86FoldableSchedWrite sched, Domain d> { |
| let ExeDomain = d in { |
| def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), |
| !strconcat(asm, "\t{$src, $dst|$dst, $src}"), |
| [(set DstRC:$dst, (DstVT (OpNode (SrcVT SrcRC:$src))))]>, |
| Sched<[sched]>; |
| def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src), |
| !strconcat(asm, "\t{$src, $dst|$dst, $src}"), |
| [(set DstRC:$dst, (DstVT (OpNode (SrcVT (mem_frags addr:$src)))))]>, |
| Sched<[sched.Folded]>; |
| } |
| } |
| |
| multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, |
| RegisterClass DstRC, X86MemOperand x86memop, |
| string asm, string mem, X86FoldableSchedWrite sched, |
| Domain d, bit Is2Addr = 1> { |
| let hasSideEffects = 0, ExeDomain = d in { |
| def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2), |
| !if(Is2Addr, |
| !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), |
| !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), |
| []>, Sched<[sched, ReadDefault, ReadInt2Fpu]>; |
| let mayLoad = 1 in |
| def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst), |
| (ins DstRC:$src1, x86memop:$src2), |
| !if(Is2Addr, |
| asm#"{"#mem#"}\t{$src2, $dst|$dst, $src2}", |
| asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}"), |
| []>, Sched<[sched.Folded, sched.ReadAfterFold]>; |
| } |
| } |
| |
| let Uses = [MXCSR], mayRaiseFPException = 1 in { |
| let Predicates = [UseAVX] in { |
| defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, |
| X86cvts2si, sdmem, sse_load_f64, "cvtsd2si", |
| WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_LIG; |
| defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, |
| X86cvts2si, sdmem, sse_load_f64, "cvtsd2si", |
| WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_W, VEX_LIG; |
| } |
| defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, X86cvts2si, |
| sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I, |
| SSEPackedDouble>, XD; |
| defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si, |
| sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I, |
| SSEPackedDouble>, XD, REX_W; |
| } |
| |
| let Predicates = [UseAVX] in { |
| defm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, |
| i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle, 0>, |
| XS, VEX_4V, VEX_LIG, SIMD_EXC; |
| defm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, |
| i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle, 0>, |
| XS, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC; |
| defm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, |
| i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble, 0>, |
| XD, VEX_4V, VEX_LIG; |
| defm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, |
| i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble, 0>, |
| XD, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC; |
| } |
| let Constraints = "$src1 = $dst" in { |
| defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, |
| i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle>, |
| XS, SIMD_EXC; |
| defm CVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, |
| i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle>, |
| XS, REX_W, SIMD_EXC; |
| defm CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, |
| i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble>, |
| XD; |
| defm CVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, |
| i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble>, |
| XD, REX_W, SIMD_EXC; |
| } |
| |
| def : InstAlias<"vcvtsi2ss{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", |
| (VCVTSI2SSrr_Int VR128:$dst, VR128:$src1, GR32:$src2), 0, "att">; |
| def : InstAlias<"vcvtsi2ss{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", |
| (VCVTSI642SSrr_Int VR128:$dst, VR128:$src1, GR64:$src2), 0, "att">; |
| def : InstAlias<"vcvtsi2sd{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", |
| (VCVTSI2SDrr_Int VR128:$dst, VR128:$src1, GR32:$src2), 0, "att">; |
| def : InstAlias<"vcvtsi2sd{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", |
| (VCVTSI642SDrr_Int VR128:$dst, VR128:$src1, GR64:$src2), 0, "att">; |
| |
| def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", |
| (VCVTSI2SSrm_Int VR128:$dst, VR128:$src1, i32mem:$src), 0, "att">; |
| def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", |
| (VCVTSI2SDrm_Int VR128:$dst, VR128:$src1, i32mem:$src), 0, "att">; |
| |
| def : InstAlias<"cvtsi2ss{l}\t{$src, $dst|$dst, $src}", |
| (CVTSI2SSrr_Int VR128:$dst, GR32:$src), 0, "att">; |
| def : InstAlias<"cvtsi2ss{q}\t{$src, $dst|$dst, $src}", |
| (CVTSI642SSrr_Int VR128:$dst, GR64:$src), 0, "att">; |
| def : InstAlias<"cvtsi2sd{l}\t{$src, $dst|$dst, $src}", |
| (CVTSI2SDrr_Int VR128:$dst, GR32:$src), 0, "att">; |
| def : InstAlias<"cvtsi2sd{q}\t{$src, $dst|$dst, $src}", |
| (CVTSI642SDrr_Int VR128:$dst, GR64:$src), 0, "att">; |
| |
| def : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}", |
| (CVTSI2SSrm_Int VR128:$dst, i32mem:$src), 0, "att">; |
| def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}", |
| (CVTSI2SDrm_Int VR128:$dst, i32mem:$src), 0, "att">; |
| |
| /// SSE 1 Only |
| |
| // Aliases for intrinsics |
| let Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { |
| defm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int, |
| ssmem, sse_load_f32, "cvttss2si", |
| WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG; |
| defm VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32, |
| X86cvtts2Int, ssmem, sse_load_f32, |
| "cvttss2si", WriteCvtSS2I, SSEPackedSingle>, |
| XS, VEX, VEX_LIG, VEX_W; |
| defm VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int, |
| sdmem, sse_load_f64, "cvttsd2si", |
| WriteCvtSS2I, SSEPackedDouble>, XD, VEX, VEX_LIG; |
| defm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64, |
| X86cvtts2Int, sdmem, sse_load_f64, |
| "cvttsd2si", WriteCvtSS2I, SSEPackedDouble>, |
| XD, VEX, VEX_LIG, VEX_W; |
| } |
| let Uses = [MXCSR], mayRaiseFPException = 1 in { |
| defm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int, |
| ssmem, sse_load_f32, "cvttss2si", |
| WriteCvtSS2I, SSEPackedSingle>, XS; |
| defm CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32, |
| X86cvtts2Int, ssmem, sse_load_f32, |
| "cvttss2si", WriteCvtSS2I, SSEPackedSingle>, |
| XS, REX_W; |
| defm CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int, |
| sdmem, sse_load_f64, "cvttsd2si", |
| WriteCvtSD2I, SSEPackedDouble>, XD; |
| defm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64, |
| X86cvtts2Int, sdmem, sse_load_f64, |
| "cvttsd2si", WriteCvtSD2I, SSEPackedDouble>, |
| XD, REX_W; |
| } |
| |
| def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", |
| (VCVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; |
| def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", |
| (VCVTTSS2SIrm_Int GR32:$dst, f32mem:$src), 0, "att">; |
| def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", |
| (VCVTTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; |
| def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", |
| (VCVTTSD2SIrm_Int GR32:$dst, f64mem:$src), 0, "att">; |
| def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", |
| (VCVTTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; |
| def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", |
| (VCVTTSS2SI64rm_Int GR64:$dst, f32mem:$src), 0, "att">; |
| def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", |
| (VCVTTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; |
| def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", |
| (VCVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">; |
| |
| def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", |
| (CVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; |
| def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", |
| (CVTTSS2SIrm_Int GR32:$dst, f32mem:$src), 0, "att">; |
| def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", |
| (CVTTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; |
| def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", |
| (CVTTSD2SIrm_Int GR32:$dst, f64mem:$src), 0, "att">; |
| def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", |
| (CVTTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; |
| def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", |
| (CVTTSS2SI64rm_Int GR64:$dst, f32mem:$src), 0, "att">; |
| def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", |
| (CVTTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; |
| def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", |
| (CVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">; |
| |
| let Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { |
| defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si, |
| ssmem, sse_load_f32, "cvtss2si", |
| WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG; |
| defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si, |
| ssmem, sse_load_f32, "cvtss2si", |
| WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_W, VEX_LIG; |
| } |
| let Uses = [MXCSR], mayRaiseFPException = 1 in { |
| defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si, |
| ssmem, sse_load_f32, "cvtss2si", |
| WriteCvtSS2I, SSEPackedSingle>, XS; |
| defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si, |
| ssmem, sse_load_f32, "cvtss2si", |
| WriteCvtSS2I, SSEPackedSingle>, XS, REX_W; |
| |
| defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, load, |
| "vcvtdq2ps\t{$src, $dst|$dst, $src}", |
| SSEPackedSingle, WriteCvtI2PS>, |
| PS, VEX, Requires<[HasAVX, NoVLX]>, VEX_WIG; |
| defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, i256mem, v8f32, v8i32, load, |
| "vcvtdq2ps\t{$src, $dst|$dst, $src}", |
| SSEPackedSingle, WriteCvtI2PSY>, |
| PS, VEX, VEX_L, Requires<[HasAVX, NoVLX]>, VEX_WIG; |
| |
| defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memop, |
| "cvtdq2ps\t{$src, $dst|$dst, $src}", |
| SSEPackedSingle, WriteCvtI2PS>, |
| PS, Requires<[UseSSE2]>; |
| } |
| |
| // AVX aliases |
| def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", |
| (VCVTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; |
| def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", |
| (VCVTSS2SIrm_Int GR32:$dst, ssmem:$src), 0, "att">; |
| def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", |
| (VCVTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; |
| def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", |
| (VCVTSD2SIrm_Int GR32:$dst, sdmem:$src), 0, "att">; |
| def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", |
| (VCVTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; |
| def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", |
| (VCVTSS2SI64rm_Int GR64:$dst, ssmem:$src), 0, "att">; |
| def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", |
| (VCVTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; |
| def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", |
| (VCVTSD2SI64rm_Int GR64:$dst, sdmem:$src), 0, "att">; |
| |
| // SSE aliases |
| def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", |
| (CVTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; |
| def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", |
| (CVTSS2SIrm_Int GR32:$dst, ssmem:$src), 0, "att">; |
| def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", |
| (CVTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; |
| def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", |
| (CVTSD2SIrm_Int GR32:$dst, sdmem:$src), 0, "att">; |
| def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", |
| (CVTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; |
| def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", |
| (CVTSS2SI64rm_Int GR64:$dst, ssmem:$src), 0, "att">; |
| def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", |
| (CVTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; |
| def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", |
| (CVTSD2SI64rm_Int GR64:$dst, sdmem:$src), 0, "att">; |
| |
| /// SSE 2 Only |
| |
| // Convert scalar double to scalar single |
| let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [UseAVX], |
| ExeDomain = SSEPackedSingle in { |
| def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), |
| (ins FR32:$src1, FR64:$src2), |
| "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, |
| VEX_4V, VEX_LIG, VEX_WIG, |
| Sched<[WriteCvtSD2SS]>, SIMD_EXC; |
| let mayLoad = 1 in |
| def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), |
| (ins FR32:$src1, f64mem:$src2), |
| "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, |
| XD, VEX_4V, VEX_LIG, VEX_WIG, |
| Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>, SIMD_EXC; |
| } |
| |
| def : Pat<(f32 (any_fpround FR64:$src)), |
| (VCVTSD2SSrr (f32 (IMPLICIT_DEF)), FR64:$src)>, |
| Requires<[UseAVX]>; |
| |
| let isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in { |
| def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), |
| "cvtsd2ss\t{$src, $dst|$dst, $src}", |
| [(set FR32:$dst, (any_fpround FR64:$src))]>, |
| Sched<[WriteCvtSD2SS]>, SIMD_EXC; |
| def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), |
| "cvtsd2ss\t{$src, $dst|$dst, $src}", |
| [(set FR32:$dst, (any_fpround (loadf64 addr:$src)))]>, |
| XD, Requires<[UseSSE2, OptForSize]>, |
| Sched<[WriteCvtSD2SS.Folded]>, SIMD_EXC; |
| } |
| |
| let Uses = [MXCSR], mayRaiseFPException = 1, ExeDomain = SSEPackedSingle in { |
| def VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg, |
| (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), |
| "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", |
| [(set VR128:$dst, |
| (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>, |
| XD, VEX_4V, VEX_LIG, VEX_WIG, Requires<[UseAVX]>, |
| Sched<[WriteCvtSD2SS]>; |
| def VCVTSD2SSrm_Int: I<0x5A, MRMSrcMem, |
| (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), |
| "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", |
| [(set VR128:$dst, |
| (v4f32 (X86frounds VR128:$src1, (sse_load_f64 addr:$src2))))]>, |
| XD, VEX_4V, VEX_LIG, VEX_WIG, Requires<[UseAVX]>, |
| Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; |
| let Constraints = "$src1 = $dst" in { |
| def CVTSD2SSrr_Int: I<0x5A, MRMSrcReg, |
| (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), |
| "cvtsd2ss\t{$src2, $dst|$dst, $src2}", |
| [(set VR128:$dst, |
| (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>, |
| XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS]>; |
| def CVTSD2SSrm_Int: I<0x5A, MRMSrcMem, |
| (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), |
| "cvtsd2ss\t{$src2, $dst|$dst, $src2}", |
| [(set VR128:$dst, |
| (v4f32 (X86frounds VR128:$src1, (sse_load_f64 addr:$src2))))]>, |
| XD, Requires<[UseSSE2]>, |
| Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; |
| } |
| } |
| |
| // Convert scalar single to scalar double |
| // SSE2 instructions with XS prefix |
| let isCodeGenOnly = 1, hasSideEffects = 0, ExeDomain = SSEPackedSingle in { |
| def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), |
| (ins FR64:$src1, FR32:$src2), |
| "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, |
| XS, VEX_4V, VEX_LIG, VEX_WIG, |
| Sched<[WriteCvtSS2SD]>, Requires<[UseAVX]>, SIMD_EXC; |
| let mayLoad = 1 in |
| def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), |
| (ins FR64:$src1, f32mem:$src2), |
| "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, |
| XS, VEX_4V, VEX_LIG, VEX_WIG, |
| Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>, |
| Requires<[UseAVX, OptForSize]>, SIMD_EXC; |
| } // isCodeGenOnly = 1, hasSideEffects = 0 |
| |
| def : Pat<(f64 (any_fpextend FR32:$src)), |
| (VCVTSS2SDrr (f64 (IMPLICIT_DEF)), FR32:$src)>, Requires<[UseAVX]>; |
| def : Pat<(any_fpextend (loadf32 addr:$src)), |
| (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX, OptForSize]>; |
| |
| let isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in { |
| def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), |
| "cvtss2sd\t{$src, $dst|$dst, $src}", |
| [(set FR64:$dst, (any_fpextend FR32:$src))]>, |
| XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>, SIMD_EXC; |
| def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), |
| "cvtss2sd\t{$src, $dst|$dst, $src}", |
| [(set FR64:$dst, (any_fpextend (loadf32 addr:$src)))]>, |
| XS, Requires<[UseSSE2, OptForSize]>, |
| Sched<[WriteCvtSS2SD.Folded]>, SIMD_EXC; |
| } // isCodeGenOnly = 1 |
| |
| let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1, |
| ExeDomain = SSEPackedSingle in { |
| def VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg, |
| (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), |
| "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", |
| []>, XS, VEX_4V, VEX_LIG, VEX_WIG, |
| Requires<[HasAVX]>, Sched<[WriteCvtSS2SD]>; |
| let mayLoad = 1 in |
| def VCVTSS2SDrm_Int: I<0x5A, MRMSrcMem, |
| (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), |
| "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", |
| []>, XS, VEX_4V, VEX_LIG, VEX_WIG, Requires<[HasAVX]>, |
| Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>; |
| let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix |
| def CVTSS2SDrr_Int: I<0x5A, MRMSrcReg, |
| (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), |
| "cvtss2sd\t{$src2, $dst|$dst, $src2}", |
| []>, XS, Requires<[UseSSE2]>, |
| Sched<[WriteCvtSS2SD]>; |
| let mayLoad = 1 in |
| def CVTSS2SDrm_Int: I<0x5A, MRMSrcMem, |
| (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), |
| "cvtss2sd\t{$src2, $dst|$dst, $src2}", |
| []>, XS, Requires<[UseSSE2]>, |
| Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>; |
| } |
| } // hasSideEffects = 0 |
| |
| // Patterns used for matching (v)cvtsi2ss, (v)cvtsi2sd, (v)cvtsd2ss and |
| // (v)cvtss2sd intrinsic sequences from clang which produce unnecessary |
| // vmovs{s,d} instructions |
| let Predicates = [UseAVX] in { |
| def : Pat<(v4f32 (X86Movss |
| (v4f32 VR128:$dst), |
| (v4f32 (scalar_to_vector |
| (f32 (any_fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))), |
| (VCVTSD2SSrr_Int VR128:$dst, VR128:$src)>; |
| |
| def : Pat<(v2f64 (X86Movsd |
| (v2f64 VR128:$dst), |
| (v2f64 (scalar_to_vector |
| (f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))), |
| (VCVTSS2SDrr_Int VR128:$dst, VR128:$src)>; |
| |
| def : Pat<(v4f32 (X86Movss |
| (v4f32 VR128:$dst), |
| (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))), |
| (VCVTSI642SSrr_Int VR128:$dst, GR64:$src)>; |
| |
| def : Pat<(v4f32 (X86Movss |
| (v4f32 VR128:$dst), |
| (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))), |
| (VCVTSI642SSrm_Int VR128:$dst, addr:$src)>; |
| |
| def : Pat<(v4f32 (X86Movss |
| (v4f32 VR128:$dst), |
| (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))), |
| (VCVTSI2SSrr_Int VR128:$dst, GR32:$src)>; |
| |
| def : Pat<(v4f32 (X86Movss |
| (v4f32 VR128:$dst), |
| (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))), |
| (VCVTSI2SSrm_Int VR128:$dst, addr:$src)>; |
| |
| def : Pat<(v2f64 (X86Movsd |
| (v2f64 VR128:$dst), |
| (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))), |
| (VCVTSI642SDrr_Int VR128:$dst, GR64:$src)>; |
| |
| def : Pat<(v2f64 (X86Movsd |
| (v2f64 VR128:$dst), |
| (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))), |
| (VCVTSI642SDrm_Int VR128:$dst, addr:$src)>; |
| |
| def : Pat<(v2f64 (X86Movsd |
| (v2f64 VR128:$dst), |
| (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))), |
| (VCVTSI2SDrr_Int VR128:$dst, GR32:$src)>; |
| |
| def : Pat<(v2f64 (X86Movsd |
| (v2f64 VR128:$dst), |
| (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))), |
| (VCVTSI2SDrm_Int VR128:$dst, addr:$src)>; |
| } // Predicates = [UseAVX] |
| |
| let Predicates = [UseSSE2] in { |
| def : Pat<(v4f32 (X86Movss |
| (v4f32 VR128:$dst), |
| (v4f32 (scalar_to_vector |
| (f32 (any_fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))), |
| (CVTSD2SSrr_Int VR128:$dst, VR128:$src)>; |
| |
| def : Pat<(v2f64 (X86Movsd |
| (v2f64 VR128:$dst), |
| (v2f64 (scalar_to_vector |
| (f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))), |
| (CVTSS2SDrr_Int VR128:$dst, VR128:$src)>; |
| |
| def : Pat<(v2f64 (X86Movsd |
| (v2f64 VR128:$dst), |
| (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))), |
| (CVTSI642SDrr_Int VR128:$dst, GR64:$src)>; |
| |
| def : Pat<(v2f64 (X86Movsd |
| (v2f64 VR128:$dst), |
| (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))), |
| (CVTSI642SDrm_Int VR128:$dst, addr:$src)>; |
| |
| def : Pat<(v2f64 (X86Movsd |
| (v2f64 VR128:$dst), |
| (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))), |
| (CVTSI2SDrr_Int VR128:$dst, GR32:$src)>; |
| |
| def : Pat<(v2f64 (X86Movsd |
| (v2f64 VR128:$dst), |
| (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))), |
| (CVTSI2SDrm_Int VR128:$dst, addr:$src)>; |
| } // Predicates = [UseSSE2] |
| |
| let Predicates = [UseSSE1] in { |
| def : Pat<(v4f32 (X86Movss |
| (v4f32 VR128:$dst), |
| (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))), |
| (CVTSI642SSrr_Int VR128:$dst, GR64:$src)>; |
| |
| def : Pat<(v4f32 (X86Movss |
| (v4f32 VR128:$dst), |
| (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))), |
| (CVTSI642SSrm_Int VR128:$dst, addr:$src)>; |
| |
| def : Pat<(v4f32 (X86Movss |
| (v4f32 VR128:$dst), |
| (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))), |
| (CVTSI2SSrr_Int VR128:$dst, GR32:$src)>; |
| |
| def : Pat<(v4f32 (X86Movss |
| (v4f32 VR128:$dst), |
| (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))), |
| (CVTSI2SSrm_Int VR128:$dst, addr:$src)>; |
| } // Predicates = [UseSSE1] |
| |
| let Predicates = [HasAVX, NoVLX] in { |
| // Convert packed single/double fp to doubleword |
| def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), |
| "cvtps2dq\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>, |
| VEX, Sched<[WriteCvtPS2I]>, VEX_WIG, SIMD_EXC; |
| def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), |
| "cvtps2dq\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, |
| (v4i32 (X86cvtp2Int (loadv4f32 addr:$src))))]>, |
| VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG, SIMD_EXC; |
| def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), |
| "cvtps2dq\t{$src, $dst|$dst, $src}", |
| [(set VR256:$dst, |
| (v8i32 (X86cvtp2Int (v8f32 VR256:$src))))]>, |
| VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG, SIMD_EXC; |
| def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), |
| "cvtps2dq\t{$src, $dst|$dst, $src}", |
| [(set VR256:$dst, |
| (v8i32 (X86cvtp2Int (loadv8f32 addr:$src))))]>, |
| VEX, VEX_L, Sched<[WriteCvtPS2IYLd]>, VEX_WIG, SIMD_EXC; |
| } |
| def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), |
| "cvtps2dq\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>, |
| Sched<[WriteCvtPS2I]>, SIMD_EXC; |
| def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), |
| "cvtps2dq\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, |
| (v4i32 (X86cvtp2Int (memopv4f32 addr:$src))))]>, |
| Sched<[WriteCvtPS2ILd]>, SIMD_EXC; |
| |
| |
| // Convert Packed Double FP to Packed DW Integers |
| let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { |
| // The assembler can recognize rr 256-bit instructions by seeing a ymm |
| // register, but the same isn't true when using memory operands instead. |
| // Provide other assembly rr and rm forms to address this explicitly. |
| def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), |
| "vcvtpd2dq\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, |
| (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>, |
| VEX, Sched<[WriteCvtPD2I]>, VEX_WIG; |
| |
| // XMM only |
| def VCVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), |
| "vcvtpd2dq{x}\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, |
| (v4i32 (X86cvtp2Int (loadv2f64 addr:$src))))]>, VEX, |
| Sched<[WriteCvtPD2ILd]>, VEX_WIG; |
| |
| // YMM only |
| def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), |
| "vcvtpd2dq\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, |
| (v4i32 (X86cvtp2Int (v4f64 VR256:$src))))]>, |
| VEX, VEX_L, Sched<[WriteCvtPD2IY]>, VEX_WIG; |
| def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), |
| "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, |
| (v4i32 (X86cvtp2Int (loadv4f64 addr:$src))))]>, |
| VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG; |
| } |
| |
| def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", |
| (VCVTPD2DQrr VR128:$dst, VR128:$src), 0, "att">; |
| def : InstAlias<"vcvtpd2dqy\t{$src, $dst|$dst, $src}", |
| (VCVTPD2DQYrr VR128:$dst, VR256:$src), 0, "att">; |
| |
| def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), |
| "cvtpd2dq\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, |
| (v4i32 (X86cvtp2Int (memopv2f64 addr:$src))))]>, |
| Sched<[WriteCvtPD2ILd]>, SIMD_EXC; |
| def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), |
| "cvtpd2dq\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, |
| (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>, |
| Sched<[WriteCvtPD2I]>, SIMD_EXC; |
| |
| // Convert with truncation packed single/double fp to doubleword |
| // SSE2 packed instructions with XS prefix |
| let Uses = [MXCSR], mayRaiseFPException = 1 in { |
| let Predicates = [HasAVX, NoVLX] in { |
| def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), |
| "cvttps2dq\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, |
| (v4i32 (X86any_cvttp2si (v4f32 VR128:$src))))]>, |
| VEX, Sched<[WriteCvtPS2I]>, VEX_WIG; |
| def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), |
| "cvttps2dq\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, |
| (v4i32 (X86any_cvttp2si (loadv4f32 addr:$src))))]>, |
| VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG; |
| def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), |
| "cvttps2dq\t{$src, $dst|$dst, $src}", |
| [(set VR256:$dst, |
| (v8i32 (X86any_cvttp2si (v8f32 VR256:$src))))]>, |
| VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG; |
| def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), |
| "cvttps2dq\t{$src, $dst|$dst, $src}", |
| [(set VR256:$dst, |
| (v8i32 (X86any_cvttp2si (loadv8f32 addr:$src))))]>, |
| VEX, VEX_L, |
| Sched<[WriteCvtPS2IYLd]>, VEX_WIG; |
| } |
| |
| def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), |
| "cvttps2dq\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, |
| (v4i32 (X86any_cvttp2si (v4f32 VR128:$src))))]>, |
| Sched<[WriteCvtPS2I]>; |
| def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), |
| "cvttps2dq\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, |
| (v4i32 (X86any_cvttp2si (memopv4f32 addr:$src))))]>, |
| Sched<[WriteCvtPS2ILd]>; |
| } |
| |
| // The assembler can recognize rr 256-bit instructions by seeing a ymm |
| // register, but the same isn't true when using memory operands instead. |
| // Provide other assembly rr and rm forms to address this explicitly. |
| let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { |
| // XMM only |
| def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), |
| "cvttpd2dq\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, |
| (v4i32 (X86any_cvttp2si (v2f64 VR128:$src))))]>, |
| VEX, Sched<[WriteCvtPD2I]>, VEX_WIG; |
| def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), |
| "cvttpd2dq{x}\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, |
| (v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))))]>, |
| VEX, Sched<[WriteCvtPD2ILd]>, VEX_WIG; |
| |
| // YMM only |
| def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), |
| "cvttpd2dq\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, |
| (v4i32 (X86any_cvttp2si (v4f64 VR256:$src))))]>, |
| VEX, VEX_L, Sched<[WriteCvtPD2IY]>, VEX_WIG; |
| def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), |
| "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, |
| (v4i32 (X86any_cvttp2si (loadv4f64 addr:$src))))]>, |
| VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG; |
| } // Predicates = [HasAVX, NoVLX] |
| |
| def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}", |
| (VCVTTPD2DQrr VR128:$dst, VR128:$src), 0, "att">; |
| def : InstAlias<"vcvttpd2dqy\t{$src, $dst|$dst, $src}", |
| (VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0, "att">; |
| |
| let Predicates = [HasAVX, NoVLX] in { |
| def : Pat<(v4i32 (any_fp_to_sint (v4f64 VR256:$src))), |
| (VCVTTPD2DQYrr VR256:$src)>; |
| def : Pat<(v4i32 (any_fp_to_sint (loadv4f64 addr:$src))), |
| (VCVTTPD2DQYrm addr:$src)>; |
| } |
| |
| def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), |
| "cvttpd2dq\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, |
| (v4i32 (X86any_cvttp2si (v2f64 VR128:$src))))]>, |
| Sched<[WriteCvtPD2I]>, SIMD_EXC; |
| def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), |
| "cvttpd2dq\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, |
| (v4i32 (X86any_cvttp2si (memopv2f64 addr:$src))))]>, |
| Sched<[WriteCvtPD2ILd]>, SIMD_EXC; |
| |
| // Convert packed single to packed double |
| let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { |
| // SSE2 instructions without OpSize prefix |
| def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), |
| "vcvtps2pd\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, (v2f64 (X86any_vfpext (v4f32 VR128:$src))))]>, |
| PS, VEX, Sched<[WriteCvtPS2PD]>, VEX_WIG; |
| def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), |
| "vcvtps2pd\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>, |
| PS, VEX, Sched<[WriteCvtPS2PD.Folded]>, VEX_WIG; |
| def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), |
| "vcvtps2pd\t{$src, $dst|$dst, $src}", |
| [(set VR256:$dst, (v4f64 (any_fpextend (v4f32 VR128:$src))))]>, |
| PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY]>, VEX_WIG; |
| def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), |
| "vcvtps2pd\t{$src, $dst|$dst, $src}", |
| [(set VR256:$dst, (v4f64 (extloadv4f32 addr:$src)))]>, |
| PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY.Folded]>, VEX_WIG; |
| } |
| |
| let Predicates = [UseSSE2], Uses = [MXCSR], mayRaiseFPException = 1 in { |
| def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), |
| "cvtps2pd\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, (v2f64 (X86any_vfpext (v4f32 VR128:$src))))]>, |
| PS, Sched<[WriteCvtPS2PD]>; |
| def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), |
| "cvtps2pd\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>, |
| PS, Sched<[WriteCvtPS2PD.Folded]>; |
| } |
| |
| // Convert Packed DW Integers to Packed Double FP |
| let Predicates = [HasAVX, NoVLX] in { |
| let hasSideEffects = 0, mayLoad = 1 in |
| def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), |
| "vcvtdq2pd\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, |
| (v2f64 (X86any_VSintToFP |
| (bc_v4i32 |
| (v2i64 (scalar_to_vector |
| (loadi64 addr:$src)))))))]>, |
| VEX, Sched<[WriteCvtI2PDLd]>, VEX_WIG; |
| def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), |
| "vcvtdq2pd\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, |
| (v2f64 (X86any_VSintToFP (v4i32 VR128:$src))))]>, |
| VEX, Sched<[WriteCvtI2PD]>, VEX_WIG; |
| def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), |
| "vcvtdq2pd\t{$src, $dst|$dst, $src}", |
| [(set VR256:$dst, |
| (v4f64 (any_sint_to_fp (loadv4i32 addr:$src))))]>, |
| VEX, VEX_L, Sched<[WriteCvtI2PDYLd]>, |
| VEX_WIG; |
| def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), |
| "vcvtdq2pd\t{$src, $dst|$dst, $src}", |
| [(set VR256:$dst, |
| (v4f64 (any_sint_to_fp (v4i32 VR128:$src))))]>, |
| VEX, VEX_L, Sched<[WriteCvtI2PDY]>, VEX_WIG; |
| } |
| |
| let hasSideEffects = 0, mayLoad = 1 in |
| def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), |
| "cvtdq2pd\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, |
| (v2f64 (X86any_VSintToFP |
| (bc_v4i32 |
| (v2i64 (scalar_to_vector |
| (loadi64 addr:$src)))))))]>, |
| Sched<[WriteCvtI2PDLd]>; |
| def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), |
| "cvtdq2pd\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, |
| (v2f64 (X86any_VSintToFP (v4i32 VR128:$src))))]>, |
| Sched<[WriteCvtI2PD]>; |
| |
| // AVX register conversion intrinsics |
| let Predicates = [HasAVX, NoVLX] in { |
| def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), |
| (VCVTDQ2PDrm addr:$src)>; |
| } // Predicates = [HasAVX, NoVLX] |
| |
| // SSE2 register conversion intrinsics |
| let Predicates = [UseSSE2] in { |
| def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), |
| (CVTDQ2PDrm addr:$src)>; |
| } // Predicates = [UseSSE2] |
| |
| // Convert packed double to packed single |
| // The assembler can recognize rr 256-bit instructions by seeing a ymm |
| // register, but the same isn't true when using memory operands instead. |
| // Provide other assembly rr and rm forms to address this explicitly. |
| let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { |
| // XMM only |
| def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), |
| "cvtpd2ps\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, (v4f32 (X86any_vfpround (v2f64 VR128:$src))))]>, |
| VEX, Sched<[WriteCvtPD2PS]>, VEX_WIG; |
| def VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), |
| "cvtpd2ps{x}\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, (v4f32 (X86any_vfpround (loadv2f64 addr:$src))))]>, |
| VEX, Sched<[WriteCvtPD2PS.Folded]>, VEX_WIG; |
| |
| def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), |
| "cvtpd2ps\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, (v4f32 (X86any_vfpround (v4f64 VR256:$src))))]>, |
| VEX, VEX_L, Sched<[WriteCvtPD2PSY]>, VEX_WIG; |
| def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), |
| "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, (v4f32 (X86any_vfpround (loadv4f64 addr:$src))))]>, |
| VEX, VEX_L, Sched<[WriteCvtPD2PSY.Folded]>, VEX_WIG; |
| } // Predicates = [HasAVX, NoVLX] |
| |
| def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}", |
| (VCVTPD2PSrr VR128:$dst, VR128:$src), 0, "att">; |
| def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}", |
| (VCVTPD2PSYrr VR128:$dst, VR256:$src), 0, "att">; |
| |
| def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), |
| "cvtpd2ps\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, (v4f32 (X86any_vfpround (v2f64 VR128:$src))))]>, |
| Sched<[WriteCvtPD2PS]>, SIMD_EXC; |
| def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), |
| "cvtpd2ps\t{$src, $dst|$dst, $src}", |
| [(set VR128:$dst, (v4f32 (X86any_vfpround (memopv2f64 addr:$src))))]>, |
| Sched<[WriteCvtPD2PS.Folded]>, SIMD_EXC; |
| |
| //===----------------------------------------------------------------------===// |
| // SSE 1 & 2 - Compare Instructions |
| //===----------------------------------------------------------------------===// |
| |
| // sse12_cmp_scalar - sse 1 & 2 compare scalar instructions |
| multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, |
| Operand memop, SDNode OpNode, ValueType VT, |
| PatFrag ld_frag, string asm, |
| X86FoldableSchedWrite sched, |
| PatFrags mem_frags> { |
| def rr_Int : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst), |
| (ins VR128:$src1, VR128:$src2, u8imm:$cc), asm, |
| [(set VR128:$dst, (OpNode (VT VR128:$src1), |
| VR128:$src2, timm:$cc))]>, |
| Sched<[sched]>, SIMD_EXC; |
| let mayLoad = 1 in |
| def rm_Int : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), |
| (ins VR128:$src1, memop:$src2, u8imm:$cc), asm, |
| [(set VR128:$dst, (OpNode (VT VR128:$src1), |
| (mem_frags addr:$src2), timm:$cc))]>, |
| Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; |
| |
| let isCodeGenOnly = 1 in { |
| let isCommutable = 1 in |
| def rr : SIi8<0xC2, MRMSrcReg, |
| (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm, |
| [(set RC:$dst, (OpNode RC:$src1, RC:$src2, timm:$cc))]>, |
| Sched<[sched]>, SIMD_EXC; |
| def rm : SIi8<0xC2, MRMSrcMem, |
| (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm, |
| [(set RC:$dst, (OpNode RC:$src1, |
| (ld_frag addr:$src2), timm:$cc))]>, |
| Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; |
| } |
| } |
| |
| let ExeDomain = SSEPackedSingle in |
| defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, ssmem, X86cmps, v4f32, loadf32, |
| "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", |
| SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, |
| XS, VEX_4V, VEX_LIG, VEX_WIG; |
| let ExeDomain = SSEPackedDouble in |
| defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, sdmem, X86cmps, v2f64, loadf64, |
| "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", |
| SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, |
| XD, VEX_4V, VEX_LIG, VEX_WIG; |
| |
| let Constraints = "$src1 = $dst" in { |
| let ExeDomain = SSEPackedSingle in |
| defm CMPSS : sse12_cmp_scalar<FR32, f32mem, ssmem, X86cmps, v4f32, loadf32, |
| "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", |
| SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, XS; |
| let ExeDomain = SSEPackedDouble in |
| defm CMPSD : sse12_cmp_scalar<FR64, f64mem, sdmem, X86cmps, v2f64, loadf64, |
| "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}", |
| SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, XD; |
| } |
| |
| // sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS |
| multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDPatternOperator OpNode, |
| ValueType vt, X86MemOperand x86memop, |
| PatFrag ld_frag, string OpcodeStr, Domain d, |
| X86FoldableSchedWrite sched = WriteFComX> { |
| let ExeDomain = d in { |
| def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), |
| !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), |
| [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>, |
| Sched<[sched]>, SIMD_EXC; |
| let mayLoad = 1 in |
| def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), |
| !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), |
| [(set EFLAGS, (OpNode (vt RC:$src1), |
| (ld_frag addr:$src2)))]>, |
| Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; |
| } |
| } |
| |
| // sse12_ord_cmp_int - Intrinsic version of sse12_ord_cmp |
| multiclass sse12_ord_cmp_int<bits<8> opc, RegisterClass RC, SDNode OpNode, |
| ValueType vt, Operand memop, |
| PatFrags mem_frags, string OpcodeStr, |
| Domain d, |
| X86FoldableSchedWrite sched = WriteFComX> { |
| let ExeDomain = d in { |
| def rr_Int: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), |
| !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), |
| [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>, |
| Sched<[sched]>, SIMD_EXC; |
| let mayLoad = 1 in |
| def rm_Int: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, memop:$src2), |
| !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), |
| [(set EFLAGS, (OpNode (vt RC:$src1), |
| (mem_frags addr:$src2)))]>, |
| Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; |
| } |
| } |
| |
| let Defs = [EFLAGS] in { |
| defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86any_fcmp, f32, f32mem, loadf32, |
| "ucomiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG; |
| defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86any_fcmp, f64, f64mem, loadf64, |
| "ucomisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG; |
| defm VCOMISS : sse12_ord_cmp<0x2F, FR32, X86strict_fcmps, f32, f32mem, loadf32, |
| "comiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG; |
| defm VCOMISD : sse12_ord_cmp<0x2F, FR64, X86strict_fcmps, f64, f64mem, loadf64, |
| "comisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG; |
| |
| let isCodeGenOnly = 1 in { |
| defm VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, |
| sse_load_f32, "ucomiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG; |
| defm VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, |
| sse_load_f64, "ucomisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG; |
| |
| defm VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, |
| sse_load_f32, "comiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG; |
| defm VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, |
| sse_load_f64, "comisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG; |
| } |
| defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86any_fcmp, f32, f32mem, loadf32, |
| "ucomiss", SSEPackedSingle>, PS; |
| defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86any_fcmp, f64, f64mem, loadf64, |
| "ucomisd", SSEPackedDouble>, PD; |
| defm COMISS : sse12_ord_cmp<0x2F, FR32, X86strict_fcmps, f32, f32mem, loadf32, |
| "comiss", SSEPackedSingle>, PS; |
| defm COMISD : sse12_ord_cmp<0x2F, FR64, X86strict_fcmps, f64, f64mem, loadf64, |
| "comisd", SSEPackedDouble>, PD; |
| |
| let isCodeGenOnly = 1 in { |
| defm UCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, |
| sse_load_f32, "ucomiss", SSEPackedSingle>, PS; |
| defm UCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, |
| sse_load_f64, "ucomisd", SSEPackedDouble>, PD; |
| |
| defm COMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, |
| sse_load_f32, "comiss", SSEPackedSingle>, PS; |
| defm COMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, |
| sse_load_f64, "comisd", SSEPackedDouble>, PD; |
| } |
| } // Defs = [EFLAGS] |
| |
| // sse12_cmp_packed - sse 1 & 2 compare packed instructions |
| multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, |
| ValueType VT, string asm, |
| X86FoldableSchedWrite sched, |
| Domain d, PatFrag ld_frag> { |
| let isCommutable = 1 in |
| def rri : PIi8<0xC2, MRMSrcReg, |
| (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm, |
| [(set RC:$dst, (VT (X86any_cmpp RC:$src1, RC:$src2, timm:$cc)))], d>, |
| Sched<[sched]>, SIMD_EXC; |
| def rmi : PIi8<0xC2, MRMSrcMem, |
| (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm, |
| [(set RC:$dst, |
| (VT (X86any_cmpp RC:$src1, (ld_frag addr:$src2), timm:$cc)))], d>, |
| Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; |
| } |
| |
| defm VCMPPS : sse12_cmp_packed<VR128, f128mem, v4f32, |
| "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", |
| SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, loadv4f32>, PS, VEX_4V, VEX_WIG; |
| defm VCMPPD : sse12_cmp_packed<VR128, f128mem, v2f64, |
| "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", |
| SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, loadv2f64>, PD, VEX_4V, VEX_WIG; |
| defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, v8f32, |
| "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", |
| SchedWriteFCmpSizes.PS.YMM, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L, VEX_WIG; |
| defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, v4f64, |
| "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", |
| SchedWriteFCmpSizes.PD.YMM, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L, VEX_WIG; |
| let Constraints = "$src1 = $dst" in { |
| defm CMPPS : sse12_cmp_packed<VR128, f128mem, v4f32, |
| "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}", |
| SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, memopv4f32>, PS; |
| defm CMPPD : sse12_cmp_packed<VR128, f128mem, v2f64, |
| "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}", |
| SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, memopv2f64>, PD; |
| } |
| |
| def CommutableCMPCC : PatLeaf<(timm), [{ |
| uint64_t Imm = N->getZExtValue() & 0x7; |
| return (Imm == 0x00 || Imm == 0x03 || Imm == 0x04 || Imm == 0x07); |
| }]>; |
| |
| // Patterns to select compares with loads in first operand. |
| let Predicates = [HasAVX] in { |
| def : Pat<(v4f64 (X86any_cmpp (loadv4f64 addr:$src2), VR256:$src1, |
| CommutableCMPCC:$cc)), |
| (VCMPPDYrmi VR256:$src1, addr:$src2, timm:$cc)>; |
| |
| def : Pat<(v8f32 (X86any_cmpp (loadv8f32 addr:$src2), VR256:$src1, |
| CommutableCMPCC:$cc)), |
| (VCMPPSYrmi VR256:$src1, addr:$src2, timm:$cc)>; |
| |
| def : Pat<(v2f64 (X86any_cmpp (loadv2f64 addr:$src2), VR128:$src1, |
| CommutableCMPCC:$cc)), |
| (VCMPPDrmi VR128:$src1, addr:$src2, timm:$cc)>; |
| |
| def : Pat<(v4f32 (X86any_cmpp (loadv4f32 addr:$src2), VR128:$src1, |
| CommutableCMPCC:$cc)), |
| (VCMPPSrmi VR128:$src1, addr:$src2, timm:$cc)>; |
| |
| def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1, |
| CommutableCMPCC:$cc)), |
| (VCMPSDrm FR64:$src1, addr:$src2, timm:$cc)>; |
| |
| def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1, |
| CommutableCMPCC:$cc)), |
| (VCMPSSrm FR32:$src1, addr:$src2, timm:$cc)>; |
| } |
| |
| let Predicates = [UseSSE2] in { |
| def : Pat<(v2f64 (X86any_cmpp (memopv2f64 addr:$src2), VR128:$src1, |
| CommutableCMPCC:$cc)), |
| (CMPPDrmi VR128:$src1, addr:$src2, timm:$cc)>; |
| |
| def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1, |
| CommutableCMPCC:$cc)), |
| (CMPSDrm FR64:$src1, addr:$src2, timm:$cc)>; |
| } |
| |
| let Predicates = [UseSSE1] in { |
| def : Pat<(v4f32 (X86any_cmpp (memopv4f32 addr:$src2), VR128:$src1, |
| CommutableCMPCC:$cc)), |
| (CMPPSrmi VR128:$src1, addr:$src2, timm:$cc)>; |
| |
| def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1, |
| CommutableCMPCC:$cc)), |
| (CMPSSrm FR32:$src1, addr:$src2, timm:$cc)>; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // SSE 1 & 2 - Shuffle Instructions |
| //===----------------------------------------------------------------------===// |
| |
| /// sse12_shuffle - sse 1 & 2 fp shuffle instructions |
| multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop, |
| ValueType vt, string asm, PatFrag mem_frag, |
| X86FoldableSchedWrite sched, Domain d, |
| bit IsCommutable = 0> { |
| def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst), |
| (ins RC:$src1, x86memop:$src2, u8imm:$src3), asm, |
| [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), |
| (i8 timm:$src3))))], d>, |
| Sched<[sched.Folded, sched.ReadAfterFold]>; |
| let isCommutable = IsCommutable in |
| def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), |
| (ins RC:$src1, RC:$src2, u8imm:$src3), asm, |
| [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, |
| (i8 timm:$src3))))], d>, |
| Sched<[sched]>; |
| } |
| |
| let Predicates = [HasAVX, NoVLX] in { |
| defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, |
| "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", |
| loadv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, |
| PS, VEX_4V, VEX_WIG; |
| defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32, |
| "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", |
| loadv8f32, SchedWriteFShuffle.YMM, SSEPackedSingle>, |
| PS, VEX_4V, VEX_L, VEX_WIG; |
| defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64, |
| "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", |
| loadv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble>, |
| PD, VEX_4V, VEX_WIG; |
| defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64, |
| "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", |
| loadv4f64, SchedWriteFShuffle.YMM, SSEPackedDouble>, |
| PD, VEX_4V, VEX_L, VEX_WIG; |
| } |
| let Constraints = "$src1 = $dst" in { |
| defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32, |
| "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", |
| memopv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, PS; |
| defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64, |
| "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", |
| memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // SSE 1 & 2 - Unpack FP Instructions |
| //===----------------------------------------------------------------------===// |
| |
| /// sse12_unpack_interleave - sse 1 & 2 fp unpack and interleave |
| multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt, |
| PatFrag mem_frag, RegisterClass RC, |
| X86MemOperand x86memop, string asm, |
| X86FoldableSchedWrite sched, Domain d, |
| bit IsCommutable = 0> { |
| let isCommutable = IsCommutable in |
| def rr : PI<opc, MRMSrcReg, |
| (outs RC:$dst), (ins RC:$src1, RC:$src2), |
| asm, [(set RC:$dst, |
| (vt (OpNode RC:$src1, RC:$src2)))], d>, |
| Sched<[sched]>; |
| def rm : PI<opc, MRMSrcMem, |
| (outs RC:$dst), (ins RC:$src1, x86memop:$src2), |
| asm, [(set RC:$dst, |
| (vt (OpNode RC:$src1, |
| (mem_frag addr:$src2))))], d>, |
| Sched<[sched.Folded, sched.ReadAfterFold]>; |
| } |
| |
| let Predicates = [HasAVX, NoVLX] in { |
| defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, load, |
| VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", |
| SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, VEX_WIG; |
| defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, load, |
| VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", |
| SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD, VEX_4V, VEX_WIG; |
| defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, load, |
| VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", |
| SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, VEX_WIG; |
| defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, load, |
| VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", |
| SchedWriteFShuffle.XMM, SSEPackedDouble>, PD, VEX_4V, VEX_WIG; |
| |
| defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, load, |
| VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", |
| SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG; |
| defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, load, |
| VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", |
| SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG; |
| defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, load, |
| VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", |
| SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG; |
| defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, load, |
| VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", |
| SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG; |
| }// Predicates = [HasAVX, NoVLX] |
| |
| let Constraints = "$src1 = $dst" in { |
| defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memop, |
| VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}", |
| SchedWriteFShuffle.XMM, SSEPackedSingle>, PS; |
| defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memop, |
| VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}", |
| SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD; |
| defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memop, |
| VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}", |
| SchedWriteFShuffle.XMM, SSEPackedSingle>, PS; |
| defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memop, |
| VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}", |
| SchedWriteFShuffle.XMM, SSEPackedDouble>, PD; |
| } // Constraints = "$src1 = $dst" |
| |
| let Predicates = [HasAVX1Only] in { |
| def : Pat<(v8i32 (X86Unpckl VR256:$src1, (loadv8i32 addr:$src2))), |
| (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; |
| def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)), |
| (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; |
| def : Pat<(v8i32 (X86Unpckh VR256:$src1, (loadv8i32 addr:$src2))), |
| (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; |
| def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)), |
| (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; |
| |
| def : Pat<(v4i64 (X86Unpckl VR256:$src1, (loadv4i64 addr:$src2))), |
| (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; |
| def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)), |
| (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; |
| def : Pat<(v4i64 (X86Unpckh VR256:$src1, (loadv4i64 addr:$src2))), |
| (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; |
| def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)), |
| (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; |
| } |
| |
| let Predicates = [UseSSE2] in { |
| // Use MOVHPD if the load isn't aligned enough for UNPCKLPD. |
| def : Pat<(v2f64 (X86Unpckl VR128:$src1, |
| (v2f64 (simple_load addr:$src2)))), |
| (MOVHPDrm VR128:$src1, addr:$src2)>; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // SSE 1 & 2 - Extract Floating-Point Sign mask |
| //===----------------------------------------------------------------------===// |
| |
| /// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave |
| multiclass sse12_extr_sign_mask<RegisterClass RC, ValueType vt, |
| string asm, Domain d> { |
| def rr : PI<0x50, MRMSrcReg, (outs GR32orGR64:$dst), (ins RC:$src), |
| !strconcat(asm, "\t{$src, $dst|$dst, $src}"), |
| [(set GR32orGR64:$dst, (X86movmsk (vt RC:$src)))], d>, |
| Sched<[WriteFMOVMSK]>; |
| } |
| |
| let Predicates = [HasAVX] in { |
| defm VMOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps", |
| SSEPackedSingle>, PS, VEX, VEX_WIG; |
| defm VMOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd", |
| SSEPackedDouble>, PD, VEX, VEX_WIG; |
| defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, v8f32, "movmskps", |
| SSEPackedSingle>, PS, VEX, VEX_L, VEX_WIG; |
| defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, v4f64, "movmskpd", |
| SSEPackedDouble>, PD, VEX, VEX_L, VEX_WIG; |
| |
| // Also support integer VTs to avoid a int->fp bitcast in the DAG. |
| def : Pat<(X86movmsk (v4i32 VR128:$src)), |
| (VMOVMSKPSrr VR128:$src)>; |
| def : Pat<(X86movmsk (v2i64 VR128:$src)), |
| (VMOVMSKPDrr VR128:$src)>; |
| def : Pat<(X86movmsk (v8i32 VR256:$src)), |
| (VMOVMSKPSYrr VR256:$src)>; |
| def : Pat<(X86movmsk (v4i64 VR256:$src)), |
| (VMOVMSKPDYrr VR256:$src)>; |
| } |
| |
| defm MOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps", |
| SSEPackedSingle>, PS; |
| defm MOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd", |
| SSEPackedDouble>, PD; |
| |
| let Predicates = [UseSSE2] in { |
| // Also support integer VTs to avoid a int->fp bitcast in the DAG. |
| def : Pat<(X86movmsk (v4i32 VR128:$src)), |
| (MOVMSKPSrr VR128:$src)>; |
| def : Pat<(X86movmsk (v2i64 VR128:$src)), |
| (MOVMSKPDrr VR128:$src)>; |
| } |
| |
| //===---------------------------------------------------------------------===// |
| // SSE2 - Packed Integer Logical Instructions |
| //===---------------------------------------------------------------------===// |
| |
| let ExeDomain = SSEPackedInt in { // SSE integer instructions |
| |
| /// PDI_binop_rm - Simple SSE2 binary operator. |
| multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, |
| ValueType OpVT, RegisterClass RC, PatFrag memop_frag, |
| X86MemOperand x86memop, X86FoldableSchedWrite sched, |
| bit IsCommutable, bit Is2Addr> { |
| let isCommutable = IsCommutable in |
| def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), |
| (ins RC:$src1, RC:$src2), |
| !if(Is2Addr, |
| !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), |
| !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), |
| [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, |
| Sched<[sched]>; |
| def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), |
| (ins RC:$src1, x86memop:$src2), |
| !if(Is2Addr, |
| !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), |
| !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), |
| [(set RC:$dst, (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, |
| Sched<[sched.Folded, sched.ReadAfterFold]>; |
| } |
| } // ExeDomain = SSEPackedInt |
| |
| multiclass PDI_binop_all<bits |