| //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file describes the X86 AVX512 instruction set, defining the |
| // instructions, and properties of the instructions which are needed for code |
| // generation, machine code emission, and analysis. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| // Group template arguments that can be derived from the vector type (EltNum x |
| // EltVT). These are things like the register class for the writemask, etc. |
| // The idea is to pass one of these as the template argument rather than the |
| // individual arguments. |
| // The template is also used for scalar types, in this case numelts is 1. |
| class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc, |
| string suffix = ""> { |
| RegisterClass RC = rc; |
| ValueType EltVT = eltvt; |
| int NumElts = numelts; |
| |
| // Corresponding mask register class. |
| RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts); |
| |
| // Corresponding mask register pair class. |
| RegisterOperand KRPC = !if (!gt(NumElts, 16), ?, |
| !cast<RegisterOperand>("VK" # NumElts # "Pair")); |
| |
| // Corresponding write-mask register class. |
| RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM"); |
| |
| // The mask VT. |
| ValueType KVT = !cast<ValueType>("v" # NumElts # "i1"); |
| |
| // Suffix used in the instruction mnemonic. |
| string Suffix = suffix; |
| |
| // VTName is a string name for vector VT. For vector types it will be |
| // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32 |
| // It is a little bit complex for scalar types, where NumElts = 1. |
| // In this case we build v4f32 or v2f64 |
| string VTName = "v" # !if (!eq (NumElts, 1), |
| !if (!eq (EltVT.Size, 16), 8, |
| !if (!eq (EltVT.Size, 32), 4, |
| !if (!eq (EltVT.Size, 64), 2, NumElts))), NumElts) # EltVT; |
| |
| // The vector VT. |
| ValueType VT = !cast<ValueType>(VTName); |
| |
| string EltTypeName = !cast<string>(EltVT); |
| // Size of the element type in bits, e.g. 32 for v16i32. |
| string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName)); |
| int EltSize = EltVT.Size; |
| |
| // "i" for integer types and "f" for floating-point types |
| string TypeVariantName = !subst(EltSizeName, "", EltTypeName); |
| |
| // Size of RC in bits, e.g. 512 for VR512. |
| int Size = VT.Size; |
| |
| // The corresponding memory operand, e.g. i512mem for VR512. |
| X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem"); |
| X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem"); |
| // FP scalar memory operand for intrinsics - ssmem/sdmem. |
| Operand IntScalarMemOp = !if (!eq (EltTypeName, "f16"), !cast<Operand>("shmem"), |
| !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"), |
| !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?))); |
| |
| // Load patterns |
| PatFrag LdFrag = !cast<PatFrag>("load" # VTName); |
| |
| PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName); |
| |
| PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT); |
| PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName); |
| |
| PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f16"), !cast<PatFrags>("sse_load_f16"), |
| !if (!eq (EltTypeName, "f32"), !cast<PatFrags>("sse_load_f32"), |
| !if (!eq (EltTypeName, "f64"), !cast<PatFrags>("sse_load_f64"), ?))); |
| |
| // The string to specify embedded broadcast in assembly. |
| string BroadcastStr = "{1to" # NumElts # "}"; |
| |
| // 8-bit compressed displacement tuple/subvector format. This is only |
| // defined for NumElts <= 8. |
| CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0), |
| !cast<CD8VForm>("CD8VT" # NumElts), ?); |
| |
| SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm, |
| !if (!eq (Size, 256), sub_ymm, ?)); |
| |
| Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle, |
| !if (!eq (EltTypeName, "f64"), SSEPackedDouble, |
| !if (!eq (EltTypeName, "f16"), SSEPackedSingle, // FIXME? |
| SSEPackedInt))); |
| |
| RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, |
| !if (!eq (EltTypeName, "f16"), FR16X, |
| FR64X)); |
| |
| dag ImmAllZerosV = (VT immAllZerosV); |
| |
| string ZSuffix = !if (!eq (Size, 128), "Z128", |
| !if (!eq (Size, 256), "Z256", "Z")); |
| } |
| |
| def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">; |
| def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">; |
| def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">; |
| def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">; |
| def v32f16_info : X86VectorVTInfo<32, f16, VR512, "ph">; |
| def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">; |
| def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">; |
| |
| // "x" in v32i8x_info means RC = VR256X |
| def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">; |
| def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">; |
| def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">; |
| def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">; |
| def v16f16x_info : X86VectorVTInfo<16, f16, VR256X, "ph">; |
| def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">; |
| def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">; |
| |
| def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">; |
| def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">; |
| def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">; |
| def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">; |
| def v8f16x_info : X86VectorVTInfo<8, f16, VR128X, "ph">; |
| def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">; |
| def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">; |
| |
| // We map scalar types to the smallest (128-bit) vector type |
| // with the appropriate element type. This allows to use the same masking logic. |
| def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">; |
| def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">; |
| def f16x_info : X86VectorVTInfo<1, f16, VR128X, "sh">; |
| def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">; |
| def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">; |
| |
| class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256, |
| X86VectorVTInfo i128> { |
| X86VectorVTInfo info512 = i512; |
| X86VectorVTInfo info256 = i256; |
| X86VectorVTInfo info128 = i128; |
| } |
| |
| def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info, |
| v16i8x_info>; |
| def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info, |
| v8i16x_info>; |
| def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info, |
| v4i32x_info>; |
| def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info, |
| v2i64x_info>; |
| def avx512vl_f16_info : AVX512VLVectorVTInfo<v32f16_info, v16f16x_info, |
| v8f16x_info>; |
| def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info, |
| v4f32x_info>; |
| def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info, |
| v2f64x_info>; |
| |
| class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm, |
| ValueType _vt> { |
| RegisterClass KRC = _krc; |
| RegisterClass KRCWM = _krcwm; |
| ValueType KVT = _vt; |
| } |
| |
| def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>; |
| def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>; |
| def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>; |
| def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>; |
| def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>; |
| def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>; |
| def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>; |
| |
| // Used for matching masked operations. Ensures the operation part only has a |
| // single use. |
| def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2), |
| (vselect node:$mask, node:$src1, node:$src2), [{ |
| return isProfitableToFormMaskedOp(N); |
| }]>; |
| |
| def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2), |
| (X86selects node:$mask, node:$src1, node:$src2), [{ |
| return isProfitableToFormMaskedOp(N); |
| }]>; |
| |
| // This multiclass generates the masking variants from the non-masking |
| // variant. It only provides the assembly pieces for the masking variants. |
| // It assumes custom ISel patterns for masking which can be provided as |
| // template arguments. |
| multiclass AVX512_maskable_custom<bits<8> O, Format F, |
| dag Outs, |
| dag Ins, dag MaskingIns, dag ZeroMaskingIns, |
| string OpcodeStr, |
| string AttSrcAsm, string IntelSrcAsm, |
| list<dag> Pattern, |
| list<dag> MaskingPattern, |
| list<dag> ZeroMaskingPattern, |
| string MaskingConstraint = "", |
| bit IsCommutable = 0, |
| bit IsKCommutable = 0, |
| bit IsKZCommutable = IsCommutable, |
| string ClobberConstraint = ""> { |
| let isCommutable = IsCommutable, Constraints = ClobberConstraint in |
| def NAME: AVX512<O, F, Outs, Ins, |
| OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# |
| "$dst, "#IntelSrcAsm#"}", |
| Pattern>; |
| |
| // Prefer over VMOV*rrk Pat<> |
| let isCommutable = IsKCommutable in |
| def NAME#k: AVX512<O, F, Outs, MaskingIns, |
| OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# |
| "$dst {${mask}}, "#IntelSrcAsm#"}", |
| MaskingPattern>, |
| EVEX_K { |
| // In case of the 3src subclass this is overridden with a let. |
| string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint, |
| !if(!eq(MaskingConstraint, ""), ClobberConstraint, |
| !strconcat(ClobberConstraint, ", ", MaskingConstraint))); |
| } |
| |
| // Zero mask does not add any restrictions to commute operands transformation. |
| // So, it is Ok to use IsCommutable instead of IsKCommutable. |
| let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<> |
| Constraints = ClobberConstraint in |
| def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns, |
| OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"# |
| "$dst {${mask}} {z}, "#IntelSrcAsm#"}", |
| ZeroMaskingPattern>, |
| EVEX_KZ; |
| } |
| |
| |
| // Common base class of AVX512_maskable and AVX512_maskable_3src. |
| multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _, |
| dag Outs, |
| dag Ins, dag MaskingIns, dag ZeroMaskingIns, |
| string OpcodeStr, |
| string AttSrcAsm, string IntelSrcAsm, |
| dag RHS, dag MaskingRHS, |
| SDPatternOperator Select = vselect_mask, |
| string MaskingConstraint = "", |
| bit IsCommutable = 0, |
| bit IsKCommutable = 0, |
| bit IsKZCommutable = IsCommutable, |
| string ClobberConstraint = ""> : |
| AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, |
| AttSrcAsm, IntelSrcAsm, |
| [(set _.RC:$dst, RHS)], |
| [(set _.RC:$dst, MaskingRHS)], |
| [(set _.RC:$dst, |
| (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))], |
| MaskingConstraint, IsCommutable, |
| IsKCommutable, IsKZCommutable, ClobberConstraint>; |
| |
| // This multiclass generates the unconditional/non-masking, the masking and |
| // the zero-masking variant of the vector instruction. In the masking case, the |
| // preserved vector elements come from a new dummy input operand tied to $dst. |
| // This version uses a separate dag for non-masking and masking. |
| multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _, |
| dag Outs, dag Ins, string OpcodeStr, |
| string AttSrcAsm, string IntelSrcAsm, |
| dag RHS, dag MaskRHS, |
| string ClobberConstraint = "", |
| bit IsCommutable = 0, bit IsKCommutable = 0, |
| bit IsKZCommutable = IsCommutable> : |
| AVX512_maskable_custom<O, F, Outs, Ins, |
| !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), |
| !con((ins _.KRCWM:$mask), Ins), |
| OpcodeStr, AttSrcAsm, IntelSrcAsm, |
| [(set _.RC:$dst, RHS)], |
| [(set _.RC:$dst, |
| (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))], |
| [(set _.RC:$dst, |
| (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))], |
| "$src0 = $dst", IsCommutable, IsKCommutable, |
| IsKZCommutable, ClobberConstraint>; |
| |
| // This multiclass generates the unconditional/non-masking, the masking and |
| // the zero-masking variant of the vector instruction. In the masking case, the |
| // preserved vector elements come from a new dummy input operand tied to $dst. |
| multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _, |
| dag Outs, dag Ins, string OpcodeStr, |
| string AttSrcAsm, string IntelSrcAsm, |
| dag RHS, |
| bit IsCommutable = 0, bit IsKCommutable = 0, |
| bit IsKZCommutable = IsCommutable, |
| SDPatternOperator Select = vselect_mask, |
| string ClobberConstraint = ""> : |
| AVX512_maskable_common<O, F, _, Outs, Ins, |
| !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), |
| !con((ins _.KRCWM:$mask), Ins), |
| OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, |
| (Select _.KRCWM:$mask, RHS, _.RC:$src0), |
| Select, "$src0 = $dst", IsCommutable, IsKCommutable, |
| IsKZCommutable, ClobberConstraint>; |
| |
| // This multiclass generates the unconditional/non-masking, the masking and |
| // the zero-masking variant of the scalar instruction. |
| multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _, |
| dag Outs, dag Ins, string OpcodeStr, |
| string AttSrcAsm, string IntelSrcAsm, |
| dag RHS> : |
| AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm, |
| RHS, 0, 0, 0, X86selects_mask>; |
| |
| // Similar to AVX512_maskable but in this case one of the source operands |
| // ($src1) is already tied to $dst so we just use that for the preserved |
| // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude |
| // $src1. |
| multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _, |
| dag Outs, dag NonTiedIns, string OpcodeStr, |
| string AttSrcAsm, string IntelSrcAsm, |
| dag RHS, |
| bit IsCommutable = 0, |
| bit IsKCommutable = 0, |
| SDPatternOperator Select = vselect_mask, |
| bit MaskOnly = 0> : |
| AVX512_maskable_common<O, F, _, Outs, |
| !con((ins _.RC:$src1), NonTiedIns), |
| !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), |
| !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), |
| OpcodeStr, AttSrcAsm, IntelSrcAsm, |
| !if(MaskOnly, (null_frag), RHS), |
| (Select _.KRCWM:$mask, RHS, _.RC:$src1), |
| Select, "", IsCommutable, IsKCommutable>; |
| |
| // Similar to AVX512_maskable_3src but in this case the input VT for the tied |
| // operand differs from the output VT. This requires a bitconvert on |
| // the preserved vector going into the vselect. |
| // NOTE: The unmasked pattern is disabled. |
| multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT, |
| X86VectorVTInfo InVT, |
| dag Outs, dag NonTiedIns, string OpcodeStr, |
| string AttSrcAsm, string IntelSrcAsm, |
| dag RHS, bit IsCommutable = 0> : |
| AVX512_maskable_common<O, F, OutVT, Outs, |
| !con((ins InVT.RC:$src1), NonTiedIns), |
| !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), |
| !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), |
| OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag), |
| (vselect_mask InVT.KRCWM:$mask, RHS, |
| (bitconvert InVT.RC:$src1)), |
| vselect_mask, "", IsCommutable>; |
| |
| multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _, |
| dag Outs, dag NonTiedIns, string OpcodeStr, |
| string AttSrcAsm, string IntelSrcAsm, |
| dag RHS, |
| bit IsCommutable = 0, |
| bit IsKCommutable = 0, |
| bit MaskOnly = 0> : |
| AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm, |
| IntelSrcAsm, RHS, IsCommutable, IsKCommutable, |
| X86selects_mask, MaskOnly>; |
| |
| multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _, |
| dag Outs, dag Ins, |
| string OpcodeStr, |
| string AttSrcAsm, string IntelSrcAsm, |
| list<dag> Pattern> : |
| AVX512_maskable_custom<O, F, Outs, Ins, |
| !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), |
| !con((ins _.KRCWM:$mask), Ins), |
| OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], |
| "$src0 = $dst">; |
| |
| multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _, |
| dag Outs, dag NonTiedIns, |
| string OpcodeStr, |
| string AttSrcAsm, string IntelSrcAsm, |
| list<dag> Pattern> : |
| AVX512_maskable_custom<O, F, Outs, |
| !con((ins _.RC:$src1), NonTiedIns), |
| !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), |
| !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), |
| OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], |
| "">; |
| |
| // Instruction with mask that puts result in mask register, |
| // like "compare" and "vptest" |
| multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F, |
| dag Outs, |
| dag Ins, dag MaskingIns, |
| string OpcodeStr, |
| string AttSrcAsm, string IntelSrcAsm, |
| list<dag> Pattern, |
| list<dag> MaskingPattern, |
| bit IsCommutable = 0> { |
| let isCommutable = IsCommutable in { |
| def NAME: AVX512<O, F, Outs, Ins, |
| OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# |
| "$dst, "#IntelSrcAsm#"}", |
| Pattern>; |
| |
| def NAME#k: AVX512<O, F, Outs, MaskingIns, |
| OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# |
| "$dst {${mask}}, "#IntelSrcAsm#"}", |
| MaskingPattern>, EVEX_K; |
| } |
| } |
| |
| multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _, |
| dag Outs, |
| dag Ins, dag MaskingIns, |
| string OpcodeStr, |
| string AttSrcAsm, string IntelSrcAsm, |
| dag RHS, dag MaskingRHS, |
| bit IsCommutable = 0> : |
| AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr, |
| AttSrcAsm, IntelSrcAsm, |
| [(set _.KRC:$dst, RHS)], |
| [(set _.KRC:$dst, MaskingRHS)], IsCommutable>; |
| |
| multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _, |
| dag Outs, dag Ins, string OpcodeStr, |
| string AttSrcAsm, string IntelSrcAsm, |
| dag RHS, dag RHS_su, bit IsCommutable = 0> : |
| AVX512_maskable_common_cmp<O, F, _, Outs, Ins, |
| !con((ins _.KRCWM:$mask), Ins), |
| OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, |
| (and _.KRCWM:$mask, RHS_su), IsCommutable>; |
| |
| // Used by conversion instructions. |
| multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _, |
| dag Outs, |
| dag Ins, dag MaskingIns, dag ZeroMaskingIns, |
| string OpcodeStr, |
| string AttSrcAsm, string IntelSrcAsm, |
| dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> : |
| AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, |
| AttSrcAsm, IntelSrcAsm, |
| [(set _.RC:$dst, RHS)], |
| [(set _.RC:$dst, MaskingRHS)], |
| [(set _.RC:$dst, ZeroMaskingRHS)], |
| "$src0 = $dst">; |
| |
| multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _, |
| dag Outs, dag NonTiedIns, string OpcodeStr, |
| string AttSrcAsm, string IntelSrcAsm, |
| dag RHS, dag MaskingRHS, bit IsCommutable, |
| bit IsKCommutable> : |
| AVX512_maskable_custom<O, F, Outs, |
| !con((ins _.RC:$src1), NonTiedIns), |
| !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), |
| !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), |
| OpcodeStr, AttSrcAsm, IntelSrcAsm, |
| [(set _.RC:$dst, RHS)], |
| [(set _.RC:$dst, |
| (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))], |
| [(set _.RC:$dst, |
| (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))], |
| "", IsCommutable, IsKCommutable>; |
| |
| // Alias instruction that maps zero vector to pxor / xorp* for AVX-512. |
| // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then |
| // swizzled by ExecutionDomainFix to pxor. |
| // We set canFoldAsLoad because this can be converted to a constant-pool |
| // load of an all-zeros value if folding it would be beneficial. |
| let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, |
| isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { |
| def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "", |
| [(set VR512:$dst, (v16i32 immAllZerosV))]>; |
| def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "", |
| [(set VR512:$dst, (v16i32 immAllOnesV))]>; |
| } |
| |
| let Predicates = [HasAVX512] in { |
| def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>; |
| def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>; |
| def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>; |
| def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>; |
| def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; |
| } |
| |
| // Alias instructions that allow VPTERNLOG to be used with a mask to create |
| // a mix of all ones and all zeros elements. This is done this way to force |
| // the same register to be used as input for all three sources. |
| let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in { |
| def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst), |
| (ins VK16WM:$mask), "", |
| [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask), |
| (v16i32 immAllOnesV), |
| (v16i32 immAllZerosV)))]>; |
| def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst), |
| (ins VK8WM:$mask), "", |
| [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask), |
| (v8i64 immAllOnesV), |
| (v8i64 immAllZerosV)))]>; |
| } |
| |
| let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, |
| isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { |
| def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", |
| [(set VR128X:$dst, (v4i32 immAllZerosV))]>; |
| def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "", |
| [(set VR256X:$dst, (v8i32 immAllZerosV))]>; |
| } |
| |
| let Predicates = [HasAVX512] in { |
| def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>; |
| def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>; |
| def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>; |
| def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>; |
| def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>; |
| def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>; |
| def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>; |
| def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>; |
| def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>; |
| def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>; |
| } |
| |
| let Predicates = [HasFP16] in { |
| def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>; |
| def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>; |
| def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>; |
| } |
| |
| // Alias instructions that map fld0 to xorps for sse or vxorps for avx. |
| // This is expanded by ExpandPostRAPseudos. |
| let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, |
| isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in { |
| def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "", |
| [(set FR32X:$dst, fp32imm0)]>; |
| def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "", |
| [(set FR64X:$dst, fp64imm0)]>; |
| def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", |
| [(set VR128X:$dst, fp128imm0)]>; |
| } |
| |
| let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, |
| isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasFP16] in { |
| def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "", |
| [(set FR16X:$dst, fp16imm0)]>; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // AVX-512 - VECTOR INSERT |
| // |
| |
| // Supports two different pattern operators for mask and unmasked ops. Allows |
| // null_frag to be passed for one. |
| multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From, |
| X86VectorVTInfo To, |
| SDPatternOperator vinsert_insert, |
| SDPatternOperator vinsert_for_mask, |
| X86FoldableSchedWrite sched> { |
| let hasSideEffects = 0, ExeDomain = To.ExeDomain in { |
| defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst), |
| (ins To.RC:$src1, From.RC:$src2, u8imm:$src3), |
| "vinsert" # From.EltTypeName # "x" # From.NumElts, |
| "$src3, $src2, $src1", "$src1, $src2, $src3", |
| (vinsert_insert:$src3 (To.VT To.RC:$src1), |
| (From.VT From.RC:$src2), |
| (iPTR imm)), |
| (vinsert_for_mask:$src3 (To.VT To.RC:$src1), |
| (From.VT From.RC:$src2), |
| (iPTR imm))>, |
| AVX512AIi8Base, EVEX_4V, Sched<[sched]>; |
| let mayLoad = 1 in |
| defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst), |
| (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3), |
| "vinsert" # From.EltTypeName # "x" # From.NumElts, |
| "$src3, $src2, $src1", "$src1, $src2, $src3", |
| (vinsert_insert:$src3 (To.VT To.RC:$src1), |
| (From.VT (From.LdFrag addr:$src2)), |
| (iPTR imm)), |
| (vinsert_for_mask:$src3 (To.VT To.RC:$src1), |
| (From.VT (From.LdFrag addr:$src2)), |
| (iPTR imm))>, AVX512AIi8Base, EVEX_4V, |
| EVEX_CD8<From.EltSize, From.CD8TupleForm>, |
| Sched<[sched.Folded, sched.ReadAfterFold]>; |
| } |
| } |
| |
| // Passes the same pattern operator for masked and unmasked ops. |
| multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, |
| X86VectorVTInfo To, |
| SDPatternOperator vinsert_insert, |
| X86FoldableSchedWrite sched> : |
| vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>; |
| |
| multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From, |
| X86VectorVTInfo To, PatFrag vinsert_insert, |
| SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> { |
| let Predicates = p in { |
| def : Pat<(vinsert_insert:$ins |
| (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)), |
| (To.VT (!cast<Instruction>(InstrStr#"rr") |
| To.RC:$src1, From.RC:$src2, |
| (INSERT_get_vinsert_imm To.RC:$ins)))>; |
| |
| def : Pat<(vinsert_insert:$ins |
| (To.VT To.RC:$src1), |
| (From.VT (From.LdFrag addr:$src2)), |
| (iPTR imm)), |
| (To.VT (!cast<Instruction>(InstrStr#"rm") |
| To.RC:$src1, addr:$src2, |
| (INSERT_get_vinsert_imm To.RC:$ins)))>; |
| } |
| } |
| |
| multiclass vinsert_for_type<ValueType EltVT32, int Opcode128, |
| ValueType EltVT64, int Opcode256, |
| X86FoldableSchedWrite sched> { |
| |
| let Predicates = [HasVLX] in |
| defm NAME # "32x4Z256" : vinsert_for_size<Opcode128, |
| X86VectorVTInfo< 4, EltVT32, VR128X>, |
| X86VectorVTInfo< 8, EltVT32, VR256X>, |
| vinsert128_insert, sched>, EVEX_V256; |
| |
| defm NAME # "32x4Z" : vinsert_for_size<Opcode128, |
| X86VectorVTInfo< 4, EltVT32, VR128X>, |
| X86VectorVTInfo<16, EltVT32, VR512>, |
| vinsert128_insert, sched>, EVEX_V512; |
| |
| defm NAME # "64x4Z" : vinsert_for_size<Opcode256, |
| X86VectorVTInfo< 4, EltVT64, VR256X>, |
| X86VectorVTInfo< 8, EltVT64, VR512>, |
| vinsert256_insert, sched>, VEX_W, EVEX_V512; |
| |
| // Even with DQI we'd like to only use these instructions for masking. |
| let Predicates = [HasVLX, HasDQI] in |
| defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128, |
| X86VectorVTInfo< 2, EltVT64, VR128X>, |
| X86VectorVTInfo< 4, EltVT64, VR256X>, |
| null_frag, vinsert128_insert, sched>, |
| VEX_W1X, EVEX_V256; |
| |
| // Even with DQI we'd like to only use these instructions for masking. |
| let Predicates = [HasDQI] in { |
| defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128, |
| X86VectorVTInfo< 2, EltVT64, VR128X>, |
| X86VectorVTInfo< 8, EltVT64, VR512>, |
| null_frag, vinsert128_insert, sched>, |
| VEX_W, EVEX_V512; |
| |
| defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256, |
| X86VectorVTInfo< 8, EltVT32, VR256X>, |
| X86VectorVTInfo<16, EltVT32, VR512>, |
| null_frag, vinsert256_insert, sched>, |
| EVEX_V512; |
| } |
| } |
| |
| // FIXME: Is there a better scheduler class for VINSERTF/VINSERTI? |
| defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>; |
| defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>; |
| |
| // Codegen pattern with the alternative types, |
| // Even with AVX512DQ we'll still use these for unmasked operations. |
| defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, |
| vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; |
| defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, |
| vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; |
| |
| defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info, |
| vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; |
| defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info, |
| vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; |
| |
| defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info, |
| vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; |
| defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info, |
| vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; |
| |
| // Codegen pattern with the alternative types insert VEC128 into VEC256 |
| defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, |
| vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; |
| defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, |
| vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; |
| defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info, |
| vinsert128_insert, INSERT_get_vinsert128_imm, [HasFP16, HasVLX]>; |
| // Codegen pattern with the alternative types insert VEC128 into VEC512 |
| defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info, |
| vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; |
| defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info, |
| vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; |
| defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info, |
| vinsert128_insert, INSERT_get_vinsert128_imm, [HasFP16]>; |
| // Codegen pattern with the alternative types insert VEC256 into VEC512 |
| defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info, |
| vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; |
| defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info, |
| vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; |
| defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info, |
| vinsert256_insert, INSERT_get_vinsert256_imm, [HasFP16]>; |
| |
| |
| multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From, |
| X86VectorVTInfo To, X86VectorVTInfo Cast, |
| PatFrag vinsert_insert, |
| SDNodeXForm INSERT_get_vinsert_imm, |
| list<Predicate> p> { |
| let Predicates = p in { |
| def : Pat<(Cast.VT |
| (vselect_mask Cast.KRCWM:$mask, |
| (bitconvert |
| (vinsert_insert:$ins (To.VT To.RC:$src1), |
| (From.VT From.RC:$src2), |
| (iPTR imm))), |
| Cast.RC:$src0)), |
| (!cast<Instruction>(InstrStr#"rrk") |
| Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, |
| (INSERT_get_vinsert_imm To.RC:$ins))>; |
| def : Pat<(Cast.VT |
| (vselect_mask Cast.KRCWM:$mask, |
| (bitconvert |
| (vinsert_insert:$ins (To.VT To.RC:$src1), |
| (From.VT |
| (bitconvert |
| (From.LdFrag addr:$src2))), |
| (iPTR imm))), |
| Cast.RC:$src0)), |
| (!cast<Instruction>(InstrStr#"rmk") |
| Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, |
| (INSERT_get_vinsert_imm To.RC:$ins))>; |
| |
| def : Pat<(Cast.VT |
| (vselect_mask Cast.KRCWM:$mask, |
| (bitconvert |
| (vinsert_insert:$ins (To.VT To.RC:$src1), |
| (From.VT From.RC:$src2), |
| (iPTR imm))), |
| Cast.ImmAllZerosV)), |
| (!cast<Instruction>(InstrStr#"rrkz") |
| Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, |
| (INSERT_get_vinsert_imm To.RC:$ins))>; |
| def : Pat<(Cast.VT |
| (vselect_mask Cast.KRCWM:$mask, |
| (bitconvert |
| (vinsert_insert:$ins (To.VT To.RC:$src1), |
| (From.VT (From.LdFrag addr:$src2)), |
| (iPTR imm))), |
| Cast.ImmAllZerosV)), |
| (!cast<Instruction>(InstrStr#"rmkz") |
| Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, |
| (INSERT_get_vinsert_imm To.RC:$ins))>; |
| } |
| } |
| |
| defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, |
| v8f32x_info, vinsert128_insert, |
| INSERT_get_vinsert128_imm, [HasVLX]>; |
| defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info, |
| v4f64x_info, vinsert128_insert, |
| INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; |
| |
| defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, |
| v8i32x_info, vinsert128_insert, |
| INSERT_get_vinsert128_imm, [HasVLX]>; |
| defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, |
| v8i32x_info, vinsert128_insert, |
| INSERT_get_vinsert128_imm, [HasVLX]>; |
| defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, |
| v8i32x_info, vinsert128_insert, |
| INSERT_get_vinsert128_imm, [HasVLX]>; |
| defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info, |
| v4i64x_info, vinsert128_insert, |
| INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; |
| defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info, |
| v4i64x_info, vinsert128_insert, |
| INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; |
| defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info, |
| v4i64x_info, vinsert128_insert, |
| INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; |
| |
| defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info, |
| v16f32_info, vinsert128_insert, |
| INSERT_get_vinsert128_imm, [HasAVX512]>; |
| defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info, |
| v8f64_info, vinsert128_insert, |
| INSERT_get_vinsert128_imm, [HasDQI]>; |
| |
| defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info, |
| v16i32_info, vinsert128_insert, |
| INSERT_get_vinsert128_imm, [HasAVX512]>; |
| defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info, |
| v16i32_info, vinsert128_insert, |
| INSERT_get_vinsert128_imm, [HasAVX512]>; |
| defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info, |
| v16i32_info, vinsert128_insert, |
| INSERT_get_vinsert128_imm, [HasAVX512]>; |
| defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info, |
| v8i64_info, vinsert128_insert, |
| INSERT_get_vinsert128_imm, [HasDQI]>; |
| defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info, |
| v8i64_info, vinsert128_insert, |
| INSERT_get_vinsert128_imm, [HasDQI]>; |
| defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info, |
| v8i64_info, vinsert128_insert, |
| INSERT_get_vinsert128_imm, [HasDQI]>; |
| |
| defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info, |
| v16f32_info, vinsert256_insert, |
| INSERT_get_vinsert256_imm, [HasDQI]>; |
| defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info, |
| v8f64_info, vinsert256_insert, |
| INSERT_get_vinsert256_imm, [HasAVX512]>; |
| |
| defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info, |
| v16i32_info, vinsert256_insert, |
| INSERT_get_vinsert256_imm, [HasDQI]>; |
| defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info, |
| v16i32_info, vinsert256_insert, |
| INSERT_get_vinsert256_imm, [HasDQI]>; |
| defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info, |
| v16i32_info, vinsert256_insert, |
| INSERT_get_vinsert256_imm, [HasDQI]>; |
| defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info, |
| v8i64_info, vinsert256_insert, |
| INSERT_get_vinsert256_imm, [HasAVX512]>; |
| defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info, |
| v8i64_info, vinsert256_insert, |
| INSERT_get_vinsert256_imm, [HasAVX512]>; |
| defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info, |
| v8i64_info, vinsert256_insert, |
| INSERT_get_vinsert256_imm, [HasAVX512]>; |
| |
| // vinsertps - insert f32 to XMM |
| let ExeDomain = SSEPackedSingle in { |
| let isCommutable = 1 in |
| def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), |
| (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), |
| "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", |
| [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>, |
| EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; |
| def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), |
| (ins VR128X:$src1, f32mem:$src2, u8imm:$src3), |
| "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", |
| [(set VR128X:$dst, (X86insertps VR128X:$src1, |
| (v4f32 (scalar_to_vector (loadf32 addr:$src2))), |
| timm:$src3))]>, |
| EVEX_4V, EVEX_CD8<32, CD8VT1>, |
| Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // AVX-512 VECTOR EXTRACT |
| //--- |
| |
| // Supports two different pattern operators for mask and unmasked ops. Allows |
| // null_frag to be passed for one. |
| multiclass vextract_for_size_split<int Opcode, |
| X86VectorVTInfo From, X86VectorVTInfo To, |
| SDPatternOperator vextract_extract, |
| SDPatternOperator vextract_for_mask, |
| SchedWrite SchedRR, SchedWrite SchedMR> { |
| |
| let hasSideEffects = 0, ExeDomain = To.ExeDomain in { |
| defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst), |
| (ins From.RC:$src1, u8imm:$idx), |
| "vextract" # To.EltTypeName # "x" # To.NumElts, |
| "$idx, $src1", "$src1, $idx", |
| (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)), |
| (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>, |
| AVX512AIi8Base, EVEX, Sched<[SchedRR]>; |
| |
| def mr : AVX512AIi8<Opcode, MRMDestMem, (outs), |
| (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx), |
| "vextract" # To.EltTypeName # "x" # To.NumElts # |
| "\t{$idx, $src1, $dst|$dst, $src1, $idx}", |
| [(store (To.VT (vextract_extract:$idx |
| (From.VT From.RC:$src1), (iPTR imm))), |
| addr:$dst)]>, EVEX, |
| Sched<[SchedMR]>; |
| |
| let mayStore = 1, hasSideEffects = 0 in |
| def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs), |
| (ins To.MemOp:$dst, To.KRCWM:$mask, |
| From.RC:$src1, u8imm:$idx), |
| "vextract" # To.EltTypeName # "x" # To.NumElts # |
| "\t{$idx, $src1, $dst {${mask}}|" |
| "$dst {${mask}}, $src1, $idx}", []>, |
| EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable; |
| } |
| } |
| |
| // Passes the same pattern operator for masked and unmasked ops. |
| multiclass vextract_for_size<int Opcode, X86VectorVTInfo From, |
| X86VectorVTInfo To, |
| SDPatternOperator vextract_extract, |
| SchedWrite SchedRR, SchedWrite SchedMR> : |
| vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>; |
| |
| // Codegen pattern for the alternative types |
| multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From, |
| X86VectorVTInfo To, PatFrag vextract_extract, |
| SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> { |
| let Predicates = p in { |
| def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)), |
| (To.VT (!cast<Instruction>(InstrStr#"rr") |
| From.RC:$src1, |
| (EXTRACT_get_vextract_imm To.RC:$ext)))>; |
| def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1), |
| (iPTR imm))), addr:$dst), |
| (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1, |
| (EXTRACT_get_vextract_imm To.RC:$ext))>; |
| } |
| } |
| |
| multiclass vextract_for_type<ValueType EltVT32, int Opcode128, |
| ValueType EltVT64, int Opcode256, |
| SchedWrite SchedRR, SchedWrite SchedMR> { |
| let Predicates = [HasAVX512] in { |
| defm NAME # "32x4Z" : vextract_for_size<Opcode128, |
| X86VectorVTInfo<16, EltVT32, VR512>, |
| X86VectorVTInfo< 4, EltVT32, VR128X>, |
| vextract128_extract, SchedRR, SchedMR>, |
| EVEX_V512, EVEX_CD8<32, CD8VT4>; |
| defm NAME # "64x4Z" : vextract_for_size<Opcode256, |
| X86VectorVTInfo< 8, EltVT64, VR512>, |
| X86VectorVTInfo< 4, EltVT64, VR256X>, |
| vextract256_extract, SchedRR, SchedMR>, |
| VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; |
| } |
| let Predicates = [HasVLX] in |
| defm NAME # "32x4Z256" : vextract_for_size<Opcode128, |
| X86VectorVTInfo< 8, EltVT32, VR256X>, |
| X86VectorVTInfo< 4, EltVT32, VR128X>, |
| vextract128_extract, SchedRR, SchedMR>, |
| EVEX_V256, EVEX_CD8<32, CD8VT4>; |
| |
| // Even with DQI we'd like to only use these instructions for masking. |
| let Predicates = [HasVLX, HasDQI] in |
| defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128, |
| X86VectorVTInfo< 4, EltVT64, VR256X>, |
| X86VectorVTInfo< 2, EltVT64, VR128X>, |
| null_frag, vextract128_extract, SchedRR, SchedMR>, |
| VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>; |
| |
| // Even with DQI we'd like to only use these instructions for masking. |
| let Predicates = [HasDQI] in { |
| defm NAME # "64x2Z" : vextract_for_size_split<Opcode128, |
| X86VectorVTInfo< 8, EltVT64, VR512>, |
| X86VectorVTInfo< 2, EltVT64, VR128X>, |
| null_frag, vextract128_extract, SchedRR, SchedMR>, |
| VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>; |
| defm NAME # "32x8Z" : vextract_for_size_split<Opcode256, |
| X86VectorVTInfo<16, EltVT32, VR512>, |
| X86VectorVTInfo< 8, EltVT32, VR256X>, |
| null_frag, vextract256_extract, SchedRR, SchedMR>, |
| EVEX_V512, EVEX_CD8<32, CD8VT8>; |
| } |
| } |
| |
| // TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types. |
| defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>; |
| defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>; |
| |
| // extract_subvector codegen patterns with the alternative types. |
| // Even with AVX512DQ we'll still use these for unmasked operations. |
| defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, |
| vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; |
| defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, |
| vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; |
| |
| defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, |
| vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; |
| defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, |
| vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; |
| |
| defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, |
| vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; |
| defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, |
| vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; |
| |
| // Codegen pattern with the alternative types extract VEC128 from VEC256 |
| defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, |
| vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; |
| defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, |
| vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; |
| defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info, |
| vextract128_extract, EXTRACT_get_vextract128_imm, [HasFP16, HasVLX]>; |
| |
| // Codegen pattern with the alternative types extract VEC128 from VEC512 |
| defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, |
| vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; |
| defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, |
| vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; |
| defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info, |
| vextract128_extract, EXTRACT_get_vextract128_imm, [HasFP16]>; |
| // Codegen pattern with the alternative types extract VEC256 from VEC512 |
| defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, |
| vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; |
| defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, |
| vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; |
| defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info, |
| vextract256_extract, EXTRACT_get_vextract256_imm, [HasFP16]>; |
| |
| |
| // A 128-bit extract from bits [255:128] of a 512-bit vector should use a |
| // smaller extract to enable EVEX->VEX. |
| let Predicates = [NoVLX] in { |
| def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), |
| (v2i64 (VEXTRACTI128rr |
| (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), |
| (iPTR 1)))>; |
| def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), |
| (v2f64 (VEXTRACTF128rr |
| (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), |
| (iPTR 1)))>; |
| def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), |
| (v4i32 (VEXTRACTI128rr |
| (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), |
| (iPTR 1)))>; |
| def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), |
| (v4f32 (VEXTRACTF128rr |
| (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), |
| (iPTR 1)))>; |
| def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), |
| (v8i16 (VEXTRACTI128rr |
| (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), |
| (iPTR 1)))>; |
| def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), |
| (v16i8 (VEXTRACTI128rr |
| (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), |
| (iPTR 1)))>; |
| } |
| |
| // A 128-bit extract from bits [255:128] of a 512-bit vector should use a |
| // smaller extract to enable EVEX->VEX. |
| let Predicates = [HasVLX] in { |
| def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), |
| (v2i64 (VEXTRACTI32x4Z256rr |
| (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), |
| (iPTR 1)))>; |
| def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), |
| (v2f64 (VEXTRACTF32x4Z256rr |
| (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), |
| (iPTR 1)))>; |
| def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), |
| (v4i32 (VEXTRACTI32x4Z256rr |
| (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), |
| (iPTR 1)))>; |
| def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), |
| (v4f32 (VEXTRACTF32x4Z256rr |
| (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), |
| (iPTR 1)))>; |
| def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), |
| (v8i16 (VEXTRACTI32x4Z256rr |
| (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), |
| (iPTR 1)))>; |
| def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), |
| (v16i8 (VEXTRACTI32x4Z256rr |
| (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), |
| (iPTR 1)))>; |
| } |
| |
| let Predicates = [HasFP16, HasVLX] in |
| def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))), |
| (v8f16 (VEXTRACTF32x4Z256rr |
| (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)), |
| (iPTR 1)))>; |
| |
| |
| // Additional patterns for handling a bitcast between the vselect and the |
| // extract_subvector. |
| multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From, |
| X86VectorVTInfo To, X86VectorVTInfo Cast, |
| PatFrag vextract_extract, |
| SDNodeXForm EXTRACT_get_vextract_imm, |
| list<Predicate> p> { |
| let Predicates = p in { |
| def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask, |
| (bitconvert |
| (To.VT (vextract_extract:$ext |
| (From.VT From.RC:$src), (iPTR imm)))), |
| To.RC:$src0)), |
| (Cast.VT (!cast<Instruction>(InstrStr#"rrk") |
| Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src, |
| (EXTRACT_get_vextract_imm To.RC:$ext)))>; |
| |
| def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask, |
| (bitconvert |
| (To.VT (vextract_extract:$ext |
| (From.VT From.RC:$src), (iPTR imm)))), |
| Cast.ImmAllZerosV)), |
| (Cast.VT (!cast<Instruction>(InstrStr#"rrkz") |
| Cast.KRCWM:$mask, From.RC:$src, |
| (EXTRACT_get_vextract_imm To.RC:$ext)))>; |
| } |
| } |
| |
| defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, |
| v4f32x_info, vextract128_extract, |
| EXTRACT_get_vextract128_imm, [HasVLX]>; |
| defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info, |
| v2f64x_info, vextract128_extract, |
| EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; |
| |
| defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, |
| v4i32x_info, vextract128_extract, |
| EXTRACT_get_vextract128_imm, [HasVLX]>; |
| defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, |
| v4i32x_info, vextract128_extract, |
| EXTRACT_get_vextract128_imm, [HasVLX]>; |
| defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, |
| v4i32x_info, vextract128_extract, |
| EXTRACT_get_vextract128_imm, [HasVLX]>; |
| defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info, |
| v2i64x_info, vextract128_extract, |
| EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; |
| defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info, |
| v2i64x_info, vextract128_extract, |
| EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; |
| defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info, |
| v2i64x_info, vextract128_extract, |
| EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; |
| |
| defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, |
| v4f32x_info, vextract128_extract, |
| EXTRACT_get_vextract128_imm, [HasAVX512]>; |
| defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info, |
| v2f64x_info, vextract128_extract, |
| EXTRACT_get_vextract128_imm, [HasDQI]>; |
| |
| defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, |
| v4i32x_info, vextract128_extract, |
| EXTRACT_get_vextract128_imm, [HasAVX512]>; |
| defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, |
| v4i32x_info, vextract128_extract, |
| EXTRACT_get_vextract128_imm, [HasAVX512]>; |
| defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, |
| v4i32x_info, vextract128_extract, |
| EXTRACT_get_vextract128_imm, [HasAVX512]>; |
| defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info, |
| v2i64x_info, vextract128_extract, |
| EXTRACT_get_vextract128_imm, [HasDQI]>; |
| defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info, |
| v2i64x_info, vextract128_extract, |
| EXTRACT_get_vextract128_imm, [HasDQI]>; |
| defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info, |
| v2i64x_info, vextract128_extract, |
| EXTRACT_get_vextract128_imm, [HasDQI]>; |
| |
| defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info, |
| v8f32x_info, vextract256_extract, |
| EXTRACT_get_vextract256_imm, [HasDQI]>; |
| defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, |
| v4f64x_info, vextract256_extract, |
| EXTRACT_get_vextract256_imm, [HasAVX512]>; |
| |
| defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info, |
| v8i32x_info, vextract256_extract, |
| EXTRACT_get_vextract256_imm, [HasDQI]>; |
| defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info, |
| v8i32x_info, vextract256_extract, |
| EXTRACT_get_vextract256_imm, [HasDQI]>; |
| defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info, |
| v8i32x_info, vextract256_extract, |
| EXTRACT_get_vextract256_imm, [HasDQI]>; |
| defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, |
| v4i64x_info, vextract256_extract, |
| EXTRACT_get_vextract256_imm, [HasAVX512]>; |
| defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, |
| v4i64x_info, vextract256_extract, |
| EXTRACT_get_vextract256_imm, [HasAVX512]>; |
| defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, |
| v4i64x_info, vextract256_extract, |
| EXTRACT_get_vextract256_imm, [HasAVX512]>; |
| |
| // vextractps - extract 32 bits from XMM |
| def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst), |
| (ins VR128X:$src1, u8imm:$src2), |
| "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", |
| [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, |
| EVEX, VEX_WIG, Sched<[WriteVecExtract]>; |
| |
| def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs), |
| (ins f32mem:$dst, VR128X:$src1, u8imm:$src2), |
| "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", |
| [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), |
| addr:$dst)]>, |
| EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>; |
| |
| //===---------------------------------------------------------------------===// |
| // AVX-512 BROADCAST |
| //--- |
| // broadcast with a scalar argument. |
| multiclass avx512_broadcast_scalar<string Name, X86VectorVTInfo DestInfo, |
| X86VectorVTInfo SrcInfo> { |
| def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)), |
| (!cast<Instruction>(Name#DestInfo.ZSuffix#rr) |
| (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; |
| def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask, |
| (X86VBroadcast SrcInfo.FRC:$src), |
| DestInfo.RC:$src0)), |
| (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk) |
| DestInfo.RC:$src0, DestInfo.KRCWM:$mask, |
| (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; |
| def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask, |
| (X86VBroadcast SrcInfo.FRC:$src), |
| DestInfo.ImmAllZerosV)), |
| (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz) |
| DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; |
| } |
| |
| // Split version to allow mask and broadcast node to be different types. This |
| // helps support the 32x2 broadcasts. |
| multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr, |
| SchedWrite SchedRR, SchedWrite SchedRM, |
| X86VectorVTInfo MaskInfo, |
| X86VectorVTInfo DestInfo, |
| X86VectorVTInfo SrcInfo, |
| bit IsConvertibleToThreeAddress, |
| SDPatternOperator UnmaskedOp = X86VBroadcast, |
| SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> { |
| let hasSideEffects = 0 in |
| def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src), |
| !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), |
| [(set MaskInfo.RC:$dst, |
| (MaskInfo.VT |
| (bitconvert |
| (DestInfo.VT |
| (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))], |
| DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>; |
| def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), |
| (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src), |
| !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", |
| "${dst} {${mask}} {z}, $src}"), |
| [(set MaskInfo.RC:$dst, |
| (vselect_mask MaskInfo.KRCWM:$mask, |
| (MaskInfo.VT |
| (bitconvert |
| (DestInfo.VT |
| (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), |
| MaskInfo.ImmAllZerosV))], |
| DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>; |
| let Constraints = "$src0 = $dst" in |
| def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), |
| (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, |
| SrcInfo.RC:$src), |
| !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", |
| "${dst} {${mask}}, $src}"), |
| [(set MaskInfo.RC:$dst, |
| (vselect_mask MaskInfo.KRCWM:$mask, |
| (MaskInfo.VT |
| (bitconvert |
| (DestInfo.VT |
| (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), |
| MaskInfo.RC:$src0))], |
| DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>; |
| |
| let hasSideEffects = 0, mayLoad = 1 in |
| def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), |
| (ins SrcInfo.ScalarMemOp:$src), |
| !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), |
| [(set MaskInfo.RC:$dst, |
| (MaskInfo.VT |
| (bitconvert |
| (DestInfo.VT |
| (UnmaskedBcastOp addr:$src)))))], |
| DestInfo.ExeDomain>, T8PD, EVEX, |
| EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; |
| |
| def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), |
| (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src), |
| !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", |
| "${dst} {${mask}} {z}, $src}"), |
| [(set MaskInfo.RC:$dst, |
| (vselect_mask MaskInfo.KRCWM:$mask, |
| (MaskInfo.VT |
| (bitconvert |
| (DestInfo.VT |
| (SrcInfo.BroadcastLdFrag addr:$src)))), |
| MaskInfo.ImmAllZerosV))], |
| DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, |
| EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; |
| |
| let Constraints = "$src0 = $dst", |
| isConvertibleToThreeAddress = IsConvertibleToThreeAddress in |
| def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), |
| (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, |
| SrcInfo.ScalarMemOp:$src), |
| !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", |
| "${dst} {${mask}}, $src}"), |
| [(set MaskInfo.RC:$dst, |
| (vselect_mask MaskInfo.KRCWM:$mask, |
| (MaskInfo.VT |
| (bitconvert |
| (DestInfo.VT |
| (SrcInfo.BroadcastLdFrag addr:$src)))), |
| MaskInfo.RC:$src0))], |
| DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, |
| EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; |
| } |
| |
| // Helper class to force mask and broadcast result to same type. |
| multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, |
| SchedWrite SchedRR, SchedWrite SchedRM, |
| X86VectorVTInfo DestInfo, |
| X86VectorVTInfo SrcInfo, |
| bit IsConvertibleToThreeAddress> : |
| avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM, |
| DestInfo, DestInfo, SrcInfo, |
| IsConvertibleToThreeAddress>; |
| |
| multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr, |
| AVX512VLVectorVTInfo _> { |
| let Predicates = [HasAVX512] in { |
| defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, |
| WriteFShuffle256Ld, _.info512, _.info128, 1>, |
| avx512_broadcast_scalar<NAME, _.info512, _.info128>, |
| EVEX_V512; |
| } |
| |
| let Predicates = [HasVLX] in { |
| defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, |
| WriteFShuffle256Ld, _.info256, _.info128, 1>, |
| avx512_broadcast_scalar<NAME, _.info256, _.info128>, |
| EVEX_V256; |
| } |
| } |
| |
| multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr, |
| AVX512VLVectorVTInfo _> { |
| let Predicates = [HasAVX512] in { |
| defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, |
| WriteFShuffle256Ld, _.info512, _.info128, 1>, |
| avx512_broadcast_scalar<NAME, _.info512, _.info128>, |
| EVEX_V512; |
| } |
| |
| let Predicates = [HasVLX] in { |
| defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, |
| WriteFShuffle256Ld, _.info256, _.info128, 1>, |
| avx512_broadcast_scalar<NAME, _.info256, _.info128>, |
| EVEX_V256; |
| defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, |
| WriteFShuffle256Ld, _.info128, _.info128, 1>, |
| avx512_broadcast_scalar<NAME, _.info128, _.info128>, |
| EVEX_V128; |
| } |
| } |
| defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss", |
| avx512vl_f32_info>; |
| defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd", |
| avx512vl_f64_info>, VEX_W1X; |
| |
| multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR, |
| X86VectorVTInfo _, SDPatternOperator OpNode, |
| RegisterClass SrcRC> { |
| // Fold with a mask even if it has multiple uses since it is cheap. |
| let ExeDomain = _.ExeDomain in |
| defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), |
| (ins SrcRC:$src), |
| "vpbroadcast"#_.Suffix, "$src", "$src", |
| (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0, |
| /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>, |
| T8PD, EVEX, Sched<[SchedRR]>; |
| } |
| |
| multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR, |
| X86VectorVTInfo _, SDPatternOperator OpNode, |
| RegisterClass SrcRC, SubRegIndex Subreg> { |
| let hasSideEffects = 0, ExeDomain = _.ExeDomain in |
| defm rr : AVX512_maskable_custom<opc, MRMSrcReg, |
| (outs _.RC:$dst), (ins GR32:$src), |
| !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)), |
| !con((ins _.KRCWM:$mask), (ins GR32:$src)), |
| "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [], |
| "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>; |
| |
| def : Pat <(_.VT (OpNode SrcRC:$src)), |
| (!cast<Instruction>(Name#rr) |
| (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; |
| |
| // Fold with a mask even if it has multiple uses since it is cheap. |
| def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0), |
| (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask, |
| (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; |
| |
| def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV), |
| (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask, |
| (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; |
| } |
| |
| multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name, |
| AVX512VLVectorVTInfo _, SDPatternOperator OpNode, |
| RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> { |
| let Predicates = [prd] in |
| defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512, |
| OpNode, SrcRC, Subreg>, EVEX_V512; |
| let Predicates = [prd, HasVLX] in { |
| defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256, |
| _.info256, OpNode, SrcRC, Subreg>, EVEX_V256; |
| defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle, |
| _.info128, OpNode, SrcRC, Subreg>, EVEX_V128; |
| } |
| } |
| |
| multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _, |
| SDPatternOperator OpNode, |
| RegisterClass SrcRC, Predicate prd> { |
| let Predicates = [prd] in |
| defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode, |
| SrcRC>, EVEX_V512; |
| let Predicates = [prd, HasVLX] in { |
| defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode, |
| SrcRC>, EVEX_V256; |
| defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode, |
| SrcRC>, EVEX_V128; |
| } |
| } |
| |
| defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr", |
| avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>; |
| defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr", |
| avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit, |
| HasBWI>; |
| defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, |
| X86VBroadcast, GR32, HasAVX512>; |
| defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, |
| X86VBroadcast, GR64, HasAVX512>, VEX_W; |
| |
| multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr, |
| AVX512VLVectorVTInfo _, Predicate prd, |
| bit IsConvertibleToThreeAddress> { |
| let Predicates = [prd] in { |
| defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256, |
| WriteShuffle256Ld, _.info512, _.info128, |
| IsConvertibleToThreeAddress>, |
| EVEX_V512; |
| } |
| let Predicates = [prd, HasVLX] in { |
| defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256, |
| WriteShuffle256Ld, _.info256, _.info128, |
| IsConvertibleToThreeAddress>, |
| EVEX_V256; |
| defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle, |
| WriteShuffleXLd, _.info128, _.info128, |
| IsConvertibleToThreeAddress>, |
| EVEX_V128; |
| } |
| } |
| |
| defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb", |
| avx512vl_i8_info, HasBWI, 0>; |
| defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw", |
| avx512vl_i16_info, HasBWI, 0>; |
| defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd", |
| avx512vl_i32_info, HasAVX512, 1>; |
| defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq", |
| avx512vl_i64_info, HasAVX512, 1>, VEX_W1X; |
| |
| multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr, |
| SDPatternOperator OpNode, |
| X86VectorVTInfo _Dst, |
| X86VectorVTInfo _Src> { |
| defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), |
| (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", |
| (_Dst.VT (OpNode addr:$src))>, |
| Sched<[SchedWriteShuffle.YMM.Folded]>, |
| AVX5128IBase, EVEX; |
| } |
| |
| // This should be used for the AVX512DQ broadcast instructions. It disables |
| // the unmasked patterns so that we only use the DQ instructions when masking |
| // is requested. |
| multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr, |
| SDPatternOperator OpNode, |
| X86VectorVTInfo _Dst, |
| X86VectorVTInfo _Src> { |
| let hasSideEffects = 0, mayLoad = 1 in |
| defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), |
| (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", |
| (null_frag), |
| (_Dst.VT (OpNode addr:$src))>, |
| Sched<[SchedWriteShuffle.YMM.Folded]>, |
| AVX5128IBase, EVEX; |
| } |
| let Predicates = [HasFP16] in { |
| def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)), |
| (VPBROADCASTWZrm addr:$src)>; |
| |
| def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))), |
| (VPBROADCASTWZrr VR128X:$src)>; |
| def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))), |
| (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; |
| } |
| let Predicates = [HasVLX, HasFP16] in { |
| def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)), |
| (VPBROADCASTWZ128rm addr:$src)>; |
| def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)), |
| (VPBROADCASTWZ256rm addr:$src)>; |
| |
| def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))), |
| (VPBROADCASTWZ128rr VR128X:$src)>; |
| def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))), |
| (VPBROADCASTWZ256rr VR128X:$src)>; |
| |
| def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))), |
| (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; |
| def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))), |
| (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // AVX-512 BROADCAST SUBVECTORS |
| // |
| |
| defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", |
| X86SubVBroadcastld128, v16i32_info, v4i32x_info>, |
| EVEX_V512, EVEX_CD8<32, CD8VT4>; |
| defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", |
| X86SubVBroadcastld128, v16f32_info, v4f32x_info>, |
| EVEX_V512, EVEX_CD8<32, CD8VT4>; |
| defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4", |
| X86SubVBroadcastld256, v8i64_info, v4i64x_info>, VEX_W, |
| EVEX_V512, EVEX_CD8<64, CD8VT4>; |
| defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4", |
| X86SubVBroadcastld256, v8f64_info, v4f64x_info>, VEX_W, |
| EVEX_V512, EVEX_CD8<64, CD8VT4>; |
| |
| let Predicates = [HasAVX512] in { |
| def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)), |
| (VBROADCASTF64X4rm addr:$src)>; |
| def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)), |
| (VBROADCASTF64X4rm addr:$src)>; |
| def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)), |
| (VBROADCASTF64X4rm addr:$src)>; |
| def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)), |
| (VBROADCASTI64X4rm addr:$src)>; |
| def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)), |
| (VBROADCASTI64X4rm addr:$src)>; |
| def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)), |
| (VBROADCASTI64X4rm addr:$src)>; |
| def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)), |
| (VBROADCASTI64X4rm addr:$src)>; |
| |
| def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)), |
| (VBROADCASTF32X4rm addr:$src)>; |
| def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)), |
| (VBROADCASTF32X4rm addr:$src)>; |
| def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)), |
| (VBROADCASTF32X4rm addr:$src)>; |
| def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)), |
| (VBROADCASTI32X4rm addr:$src)>; |
| def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)), |
| (VBROADCASTI32X4rm addr:$src)>; |
| def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)), |
| (VBROADCASTI32X4rm addr:$src)>; |
| def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)), |
| (VBROADCASTI32X4rm addr:$src)>; |
| |
| // Patterns for selects of bitcasted operations. |
| def : Pat<(vselect_mask VK16WM:$mask, |
| (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), |
| (v16f32 immAllZerosV)), |
| (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>; |
| def : Pat<(vselect_mask VK16WM:$mask, |
| (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), |
| VR512:$src0), |
| (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; |
| def : Pat<(vselect_mask VK16WM:$mask, |
| (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), |
| (v16i32 immAllZerosV)), |
| (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>; |
| def : Pat<(vselect_mask VK16WM:$mask, |
| (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), |
| VR512:$src0), |
| (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; |
| |
| def : Pat<(vselect_mask VK8WM:$mask, |
| (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), |
| (v8f64 immAllZerosV)), |
| (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>; |
| def : Pat<(vselect_mask VK8WM:$mask, |
| (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), |
| VR512:$src0), |
| (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; |
| def : Pat<(vselect_mask VK8WM:$mask, |
| (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), |
| (v8i64 immAllZerosV)), |
| (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>; |
| def : Pat<(vselect_mask VK8WM:$mask, |
| (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), |
| VR512:$src0), |
| (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; |
| } |
| |
| let Predicates = [HasVLX] in { |
| defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", |
| X86SubVBroadcastld128, v8i32x_info, v4i32x_info>, |
| EVEX_V256, EVEX_CD8<32, CD8VT4>; |
| defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", |
| X86SubVBroadcastld128, v8f32x_info, v4f32x_info>, |
| EVEX_V256, EVEX_CD8<32, CD8VT4>; |
| |
| def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)), |
| (VBROADCASTF32X4Z256rm addr:$src)>; |
| def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)), |
| (VBROADCASTF32X4Z256rm addr:$src)>; |
| def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)), |
| (VBROADCASTF32X4Z256rm addr:$src)>; |
| def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)), |
| (VBROADCASTI32X4Z256rm addr:$src)>; |
| def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)), |
| (VBROADCASTI32X4Z256rm addr:$src)>; |
| def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)), |
| (VBROADCASTI32X4Z256rm addr:$src)>; |
| def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)), |
| (VBROADCASTI32X4Z256rm addr:$src)>; |
| |
| // Patterns for selects of bitcasted operations. |
| def : Pat<(vselect_mask VK8WM:$mask, |
| (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))), |
| (v8f32 immAllZerosV)), |
| (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>; |
| def : Pat<(vselect_mask VK8WM:$mask, |
| (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))), |
| VR256X:$src0), |
| (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; |
| def : Pat<(vselect_mask VK8WM:$mask, |
| (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))), |
| (v8i32 immAllZerosV)), |
| (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>; |
| def : Pat<(vselect_mask VK8WM:$mask, |
| (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))), |
| VR256X:$src0), |
| (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; |
| } |
| |
| let Predicates = [HasVLX, HasDQI] in { |
| defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", |
| X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X, |
| EVEX_V256, EVEX_CD8<64, CD8VT2>; |
| defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", |
| X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, VEX_W1X, |
| EVEX_V256, EVEX_CD8<64, CD8VT2>; |
| |
| // Patterns for selects of bitcasted operations. |
| def : Pat<(vselect_mask VK4WM:$mask, |
| (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), |
| (v4f64 immAllZerosV)), |
| (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>; |
| def : Pat<(vselect_mask VK4WM:$mask, |
| (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), |
| VR256X:$src0), |
| (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; |
| def : Pat<(vselect_mask VK4WM:$mask, |
| (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), |
| (v4i64 immAllZerosV)), |
| (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>; |
| def : Pat<(vselect_mask VK4WM:$mask, |
| (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), |
| VR256X:$src0), |
| (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; |
| } |
| |
| let Predicates = [HasDQI] in { |
| defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", |
| X86SubVBroadcastld128, v8i64_info, v2i64x_info>, VEX_W, |
| EVEX_V512, EVEX_CD8<64, CD8VT2>; |
| defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8", |
| X86SubVBroadcastld256, v16i32_info, v8i32x_info>, |
| EVEX_V512, EVEX_CD8<32, CD8VT8>; |
| defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", |
| X86SubVBroadcastld128, v8f64_info, v2f64x_info>, VEX_W, |
| EVEX_V512, EVEX_CD8<64, CD8VT2>; |
| defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8", |
| X86SubVBroadcastld256, v16f32_info, v8f32x_info>, |
| EVEX_V512, EVEX_CD8<32, CD8VT8>; |
| |
| // Patterns for selects of bitcasted operations. |
| def : Pat<(vselect_mask VK16WM:$mask, |
| (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), |
| (v16f32 immAllZerosV)), |
| (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>; |
| def : Pat<(vselect_mask VK16WM:$mask, |
| (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), |
| VR512:$src0), |
| (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; |
| def : Pat<(vselect_mask VK16WM:$mask, |
| (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), |
| (v16i32 immAllZerosV)), |
| (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>; |
| def : Pat<(vselect_mask VK16WM:$mask, |
| (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), |
| VR512:$src0), |
| (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; |
| |
| def : Pat<(vselect_mask VK8WM:$mask, |
| (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), |
| (v8f64 immAllZerosV)), |
| (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>; |
| def : Pat<(vselect_mask VK8WM:$mask, |
| (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), |
| VR512:$src0), |
| (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; |
| def : Pat<(vselect_mask VK8WM:$mask, |
| (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), |
| (v8i64 immAllZerosV)), |
| (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>; |
| def : Pat<(vselect_mask VK8WM:$mask, |
| (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), |
| VR512:$src0), |
| (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; |
| } |
| |
| multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr, |
| AVX512VLVectorVTInfo _Dst, |
| AVX512VLVectorVTInfo _Src> { |
| let Predicates = [HasDQI] in |
| defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256, |
| WriteShuffle256Ld, _Dst.info512, |
| _Src.info512, _Src.info128, 0, null_frag, null_frag>, |
| EVEX_V512; |
| let Predicates = [HasDQI, HasVLX] in |
| defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256, |
| WriteShuffle256Ld, _Dst.info256, |
| _Src.info256, _Src.info128, 0, null_frag, null_frag>, |
| EVEX_V256; |
| } |
| |
| multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr, |
| AVX512VLVectorVTInfo _Dst, |
| AVX512VLVectorVTInfo _Src> : |
| avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> { |
| |
| let Predicates = [HasDQI, HasVLX] in |
| defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle, |
| WriteShuffleXLd, _Dst.info128, |
| _Src.info128, _Src.info128, 0, null_frag, null_frag>, |
| EVEX_V128; |
| } |
| |
| defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2", |
| avx512vl_i32_info, avx512vl_i64_info>; |
| defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2", |
| avx512vl_f32_info, avx512vl_f64_info>; |
| |
| //===----------------------------------------------------------------------===// |
| // AVX-512 BROADCAST MASK TO VECTOR REGISTER |
| //--- |
| multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr, |
| X86VectorVTInfo _, RegisterClass KRC> { |
| def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src), |
| !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), |
| [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, |
| EVEX, Sched<[WriteShuffle]>; |
| } |
| |
| multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr, |
| AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> { |
| let Predicates = [HasCDI] in |
| defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512; |
| let Predicates = [HasCDI, HasVLX] in { |
| defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256; |
| defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128; |
| } |
| } |
| |
| defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", |
| avx512vl_i32_info, VK16>; |
| defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", |
| avx512vl_i64_info, VK8>, VEX_W; |
| |
| //===----------------------------------------------------------------------===// |
| // -- VPERMI2 - 3 source operands form -- |
| multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, |
| X86FoldableSchedWrite sched, |
| X86VectorVTInfo _, X86VectorVTInfo IdxVT> { |
| let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, |
| hasSideEffects = 0 in { |
| defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst), |
| (ins _.RC:$src2, _.RC:$src3), |
| OpcodeStr, "$src3, $src2", "$src2, $src3", |
| (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>, |
| EVEX_4V, AVX5128IBase, Sched<[sched]>; |
| |
| let mayLoad = 1 in |
| defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), |
| (ins _.RC:$src2, _.MemOp:$src3), |
| OpcodeStr, "$src3, $src2", "$src2, $src3", |
| (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, |
| (_.VT (_.LdFrag addr:$src3)))), 1>, |
| EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; |
| } |
| } |
| |
| multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, |
| X86FoldableSchedWrite sched, |
| X86VectorVTInfo _, X86VectorVTInfo IdxVT> { |
| let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, |
| hasSideEffects = 0, mayLoad = 1 in |
| defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), |
| (ins _.RC:$src2, _.ScalarMemOp:$src3), |
| OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), |
| !strconcat("$src2, ${src3}", _.BroadcastStr ), |
| (_.VT (X86VPermt2 _.RC:$src2, |
| IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, |
| AVX5128IBase, EVEX_4V, EVEX_B, |
| Sched<[sched.Folded, sched.ReadAfterFold]>; |
| } |
| |
| multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, |
| X86FoldableSchedWrite sched, |
| AVX512VLVectorVTInfo VTInfo, |
| AVX512VLVectorVTInfo ShuffleMask> { |
| defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, |
| ShuffleMask.info512>, |
| avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512, |
| ShuffleMask.info512>, EVEX_V512; |
| let Predicates = [HasVLX] in { |
| defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, |
| ShuffleMask.info128>, |
| avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128, |
| ShuffleMask.info128>, EVEX_V128; |
| defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, |
| ShuffleMask.info256>, |
| avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256, |
| ShuffleMask.info256>, EVEX_V256; |
| } |
| } |
| |
| multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr, |
| X86FoldableSchedWrite sched, |
| AVX512VLVectorVTInfo VTInfo, |
| AVX512VLVectorVTInfo Idx, |
| Predicate Prd> { |
| let Predicates = [Prd] in |
| defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, |
| Idx.info512>, EVEX_V512; |
| let Predicates = [Prd, HasVLX] in { |
| defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, |
| Idx.info128>, EVEX_V128; |
| defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, |
| Idx.info256>, EVEX_V256; |
| } |
| } |
| |
| defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256, |
| avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; |
| defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256, |
| avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; |
| defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256, |
| avx512vl_i16_info, avx512vl_i16_info, HasBWI>, |
| VEX_W, EVEX_CD8<16, CD8VF>; |
| defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256, |
| avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, |
| EVEX_CD8<8, CD8VF>; |
| defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256, |
| avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; |
| defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256, |
| avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; |
| |
| // Extra patterns to deal with extra bitcasts due to passthru and index being |
| // different types on the fp versions. |
| multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _, |
| X86VectorVTInfo IdxVT, |
| X86VectorVTInfo CastVT> { |
| def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, |
| (X86VPermt2 (_.VT _.RC:$src2), |
| (IdxVT.VT (bitconvert |
| (CastVT.VT _.RC:$src1))), |
| _.RC:$src3), |
| (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), |
| (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask, |
| _.RC:$src2, _.RC:$src3)>; |
| def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, |
| (X86VPermt2 _.RC:$src2, |
| (IdxVT.VT (bitconvert |
| (CastVT.VT _.RC:$src1))), |
| (_.LdFrag addr:$src3)), |
| (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), |
| (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask, |
| _.RC:$src2, addr:$src3)>; |
| def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, |
| (X86VPermt2 _.RC:$src2, |
| (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), |
| (_.BroadcastLdFrag addr:$src3)), |
| (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), |
| (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask, |
| _.RC:$src2, addr:$src3)>; |
| } |
| |
| // TODO: Should we add more casts? The vXi64 case is common due to ABI. |
| defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>; |
| defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>; |
| defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>; |
| |
| // VPERMT2 |
| multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, |
| X86FoldableSchedWrite sched, |
| X86VectorVTInfo _, X86VectorVTInfo IdxVT> { |
| let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { |
| defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), |
| (ins IdxVT.RC:$src2, _.RC:$src3), |
| OpcodeStr, "$src3, $src2", "$src2, $src3", |
| (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>, |
| EVEX_4V, AVX5128IBase, Sched<[sched]>; |
| |
| defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), |
| (ins IdxVT.RC:$src2, _.MemOp:$src3), |
| OpcodeStr, "$src3, $src2", "$src2, $src3", |
| (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, |
| (_.LdFrag addr:$src3))), 1>, |
| EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; |
| } |
| } |
| multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, |
| X86FoldableSchedWrite sched, |
| X86VectorVTInfo _, X86VectorVTInfo IdxVT> { |
| let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in |
| defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), |
| (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3), |
| OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), |
| !strconcat("$src2, ${src3}", _.BroadcastStr ), |
| (_.VT (X86VPermt2 _.RC:$src1, |
| IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, |
| AVX5128IBase, EVEX_4V, EVEX_B, |
| Sched<[sched.Folded, sched.ReadAfterFold]>; |
| } |
| |
| multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr, |
| X86FoldableSchedWrite sched, |
| AVX512VLVectorVTInfo VTInfo, |
| AVX512VLVectorVTInfo ShuffleMask> { |
| defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, |
| ShuffleMask.info512>, |
| avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512, |
| ShuffleMask.info512>, EVEX_V512; |
| let Predicates = [HasVLX] in { |
| defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, |
| ShuffleMask.info128>, |
| avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128, |
| ShuffleMask.info128>, EVEX_V128; |
| defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, |
| ShuffleMask.info256>, |
| avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256, |
| ShuffleMask.info256>, EVEX_V256; |
| } |
| } |
| |
| multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr, |
| X86FoldableSchedWrite sched, |
| AVX512VLVectorVTInfo VTInfo, |
| AVX512VLVectorVTInfo Idx, Predicate Prd> { |
| let Predicates = [Prd] in |
| defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, |
| Idx.info512>, EVEX_V512; |
| let Predicates = [Prd, HasVLX] in { |
| defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, |
| Idx.info128>, EVEX_V128; |
| defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, |
| Idx.info256>, EVEX_V256; |
| } |
| } |
| |
| defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256, |
| avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; |
| defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256, |
| avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; |
| defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256, |
| avx512vl_i16_info, avx512vl_i16_info, HasBWI>, |
| VEX_W, EVEX_CD8<16, CD8VF>; |
| defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256, |
| avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, |
| EVEX_CD8<8, CD8VF>; |
| defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256, |
| avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; |
| defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256, |
| avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; |
| |
| //===----------------------------------------------------------------------===// |
| // AVX-512 - BLEND using mask |
| // |
| |
| multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr, |
| X86FoldableSchedWrite sched, X86VectorVTInfo _> { |
| let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { |
| def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), |
| (ins _.RC:$src1, _.RC:$src2), |
| !strconcat(OpcodeStr, |
| "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>, |
| EVEX_4V, Sched<[sched]>; |
| def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), |
| (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), |
| !strconcat(OpcodeStr, |
| "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), |
| []>, EVEX_4V, EVEX_K, Sched<[sched]>; |
| def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), |
| (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), |
| !strconcat(OpcodeStr, |
| "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), |
| []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable; |
| let mayLoad = 1 in { |
| def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), |
| (ins _.RC:$src1, _.MemOp:$src2), |
| !strconcat(OpcodeStr, |
| "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), |
| []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, |
| Sched<[sched.Folded, sched.ReadAfterFold]>; |
| def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), |
| (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), |
| !strconcat(OpcodeStr, |
| "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), |
| []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>, |
| Sched<[sched.Folded, sched.ReadAfterFold]>; |
| def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), |
| (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), |
| !strconcat(OpcodeStr, |
| "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), |
| []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>, |
| Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; |
| } |
| } |
| } |
| multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr, |
| X86FoldableSchedWrite sched, X86VectorVTInfo _> { |
| let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in { |
| def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), |
| (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), |
| !strconcat(OpcodeStr, |
| "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", |
| "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>, |
| EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, |
| Sched<[sched.Folded, sched.ReadAfterFold]>; |
| |
| def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), |
| (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), |
| !strconcat(OpcodeStr, |
| "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|", |
| "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>, |
| EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, |
| Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; |
| |
| def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), |
| (ins _.RC:$src1, _.ScalarMemOp:$src2), |
| !strconcat(OpcodeStr, |
| "\t{${src2}", _.BroadcastStr, ", $src1, $dst|", |
| "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>, |
| EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, |
| Sched<[sched.Folded, sched.ReadAfterFold]>; |
| } |
| } |
| |
| multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, |
| AVX512VLVectorVTInfo VTInfo> { |
| defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, |
| WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, |
| EVEX_V512; |
| |
| let Predicates = [HasVLX] in { |
| defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, |
| WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>, |
| EVEX_V256; |
| defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, |
| WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>, |
| EVEX_V128; |
| } |
| } |
| |
| multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, |
| AVX512VLVectorVTInfo VTInfo> { |
| let Predicates = [HasBWI] in |
| defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, |
| EVEX_V512; |
| |
| let Predicates = [HasBWI, HasVLX] in { |
| defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, |
| EVEX_V256; |
| defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, |
| EVEX_V128; |
| } |
| } |
| |
| defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend, |
| avx512vl_f32_info>; |
| defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend, |
| avx512vl_f64_info>, VEX_W; |
| defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend, |
| avx512vl_i32_info>; |
| defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend, |
| avx512vl_i64_info>, VEX_W; |
| defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend, |
| avx512vl_i8_info>; |
| defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend, |
| avx512vl_i16_info>, VEX_W; |
| |
| //===----------------------------------------------------------------------===// |
| // Compare Instructions |
| //===----------------------------------------------------------------------===// |
| |
| // avx512_cmp_scalar - AVX512 CMPSS and CMPSD |
| |
| multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE, |
| PatFrag OpNode_su, PatFrag OpNodeSAE_su, |
| X86FoldableSchedWrite sched> { |
| defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, |
| (outs _.KRC:$dst), |
| (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), |
| "vcmp"#_.Suffix, |
| "$cc, $src2, $src1", "$src1, $src2, $cc", |
| (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), |
| (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), |
| timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC; |
| let mayLoad = 1 in |
| defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, |
| (outs _.KRC:$dst), |
| (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc), |
| "vcmp"#_.Suffix, |
| "$cc, $src2, $src1", "$src1, $src2, $cc", |
| (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), |
| timm:$cc), |
| (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), |
| timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, |
| Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; |
| |
| let Uses = [MXCSR] in |
| defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, |
| (outs _.KRC:$dst), |
| (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), |
| "vcmp"#_.Suffix, |
| "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", |
| (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), |
| timm:$cc), |
| (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), |
| timm:$cc)>, |
| EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; |
| |
| let isCodeGenOnly = 1 in { |
| let isCommutable = 1 in |
| def rr : AVX512Ii8<0xC2, MRMSrcReg, |
| (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc), |
| !strconcat("vcmp", _.Suffix, |
| "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), |
| [(set _.KRC:$dst, (OpNode _.FRC:$src1, |
| _.FRC:$src2, |
| timm:$cc))]>, |
| EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC; |
| def rm : AVX512Ii8<0xC2, MRMSrcMem, |
| (outs _.KRC:$dst), |
| (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), |
| !strconcat("vcmp", _.Suffix, |
| "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), |
| [(set _.KRC:$dst, (OpNode _.FRC:$src1, |
| (_.ScalarLdFrag addr:$src2), |
| timm:$cc))]>, |
| EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, |
| Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; |
| } |
| } |
| |
| def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), |
| (X86cmpms node:$src1, node:$src2, node:$cc), [{ |
| return N->hasOneUse(); |
| }]>; |
| def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), |
| (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{ |
| return N->hasOneUse(); |
| }]>; |
| |
| let Predicates = [HasAVX512] in { |
| let ExeDomain = SSEPackedSingle in |
| defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE, |
| X86cmpms_su, X86cmpmsSAE_su, |
| SchedWriteFCmp.Scl>, AVX512XSIi8Base; |
| let ExeDomain = SSEPackedDouble in |
| defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE, |
| X86cmpms_su, X86cmpmsSAE_su, |
| SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W; |
| } |
| let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in |
| defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE, |
| X86cmpms_su, X86cmpmsSAE_su, |
| SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA; |
| |
| multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, |
| X86FoldableSchedWrite sched, |
| X86VectorVTInfo _, bit IsCommutable> { |
| let isCommutable = IsCommutable, hasSideEffects = 0 in |
| def rr : AVX512BI<opc, MRMSrcReg, |
| (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2), |
| !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), |
| []>, EVEX_4V, Sched<[sched]>; |
| let mayLoad = 1, hasSideEffects = 0 in |
| def rm : AVX512BI<opc, MRMSrcMem, |
| (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2), |
| !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), |
| []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; |
| let isCommutable = IsCommutable, hasSideEffects = 0 in |
| def rrk : AVX512BI<opc, MRMSrcReg, |
| (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), |
| !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", |
| "$dst {${mask}}, $src1, $src2}"), |
| []>, EVEX_4V, EVEX_K, Sched<[sched]>; |
| let mayLoad = 1, hasSideEffects = 0 in |
| def rmk : AVX512BI<opc, MRMSrcMem, |
| (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), |
| !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", |
| "$dst {${mask}}, $src1, $src2}"), |
| []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; |
| } |
| |
| multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, |
| X86FoldableSchedWrite sched, X86VectorVTInfo _, |
| bit IsCommutable> : |
| avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> { |
| let mayLoad = 1, hasSideEffects = 0 in { |
| def rmb : AVX512BI<opc, MRMSrcMem, |
| (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), |
| !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst", |
| "|$dst, $src1, ${src2}", _.BroadcastStr, "}"), |
| []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; |
| def rmbk : AVX512BI<opc, MRMSrcMem, |
| (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, |
| _.ScalarMemOp:$src2), |
| !strconcat(OpcodeStr, |
| "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", |
| "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), |
| []>, EVEX_4V, EVEX_K, EVEX_B, |
| Sched<[sched.Folded, sched.ReadAfterFold]>; |
| } |
| } |
| |
| multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, |
| X86SchedWriteWidths sched, |
| AVX512VLVectorVTInfo VTInfo, Predicate prd, |
| bit IsCommutable = 0> { |
| let Predicates = [prd] in |
| defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM, |
| VTInfo.info512, IsCommutable>, EVEX_V512; |
| |
| let Predicates = [prd, HasVLX] in { |
| defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM, |
| VTInfo.info256, IsCommutable>, EVEX_V256; |
| defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM, |
| VTInfo.info128, IsCommutable>, EVEX_V128; |
| } |
| } |
| |
| multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr, |
| X86SchedWriteWidths sched, |
| AVX512VLVectorVTInfo VTInfo, |
| Predicate prd, bit IsCommutable = 0> { |
| let Predicates = [prd] in |
| defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM, |
| VTInfo.info512, IsCommutable>, EVEX_V512; |
| |
| let Predicates = [prd, HasVLX] in { |
| defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM, |
| VTInfo.info256, IsCommutable>, EVEX_V256; |
| defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM, |
| VTInfo.info128, IsCommutable>, EVEX_V128; |
| } |
| } |
| |
| // This fragment treats X86cmpm as commutable to help match loads in both |
| // operands for PCMPEQ. |
| def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>; |
| def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2), |
| (setcc node:$src1, node:$src2, SETGT)>; |
| |
| // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't |
| // increase the pattern complexity the way an immediate would. |
| let AddedComplexity = 2 in { |
| // FIXME: Is there a better scheduler class for VPCMP? |
| defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", |
| SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>, |
| EVEX_CD8<8, CD8VF>, VEX_WIG; |
| |
| defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", |
| SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>, |
| EVEX_CD8<16, CD8VF>, VEX_WIG; |
| |
| defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", |
| SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>, |
| EVEX_CD8<32, CD8VF>; |
| |
| defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", |
| SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>, |
| T8PD, VEX_W, EVEX_CD8<64, CD8VF>; |
| |
| defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", |
| SchedWriteVecALU, avx512vl_i8_info, HasBWI>, |
| EVEX_CD8<8, CD8VF>, VEX_WIG; |
| |
| defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", |
| SchedWriteVecALU, avx512vl_i16_info, HasBWI>, |
| EVEX_CD8<16, CD8VF>, VEX_WIG; |
| |
| defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", |
| SchedWriteVecALU, avx512vl_i32_info, HasAVX512>, |
| EVEX_CD8<32, CD8VF>; |
| |
| defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", |
| SchedWriteVecALU, avx512vl_i64_info, HasAVX512>, |
| T8PD, VEX_W, EVEX_CD8<64, CD8VF>; |
| } |
| |
| def X86pcmpm_imm : SDNodeXForm<setcc, [{ |
| ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); |
| uint8_t SSECC = X86::getVPCMPImmForCond(CC); |
| return getI8Imm(SSECC, SDLoc(N)); |
| }]>; |
| |
| // Swapped operand version of the above. |
| def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{ |
| ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); |
| uint8_t SSECC = X86::getVPCMPImmForCond(CC); |
| SSECC = X86::getSwappedVPCMPImm(SSECC); |
| return getI8Imm(SSECC, SDLoc(N)); |
| }]>; |
| |
| multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag, |
| PatFrag Frag_su, |
| X86FoldableSchedWrite sched, |
| X86VectorVTInfo _, string Name> { |
| let isCommutable = 1 in |
| def rri : AVX512AIi8<opc, MRMSrcReg, |
| (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), |
| !strconcat("vpcmp", Suffix, |
| "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), |
| [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1), |
| (_.VT _.RC:$src2), |
| cond)))]>, |
| EVEX_4V, Sched<[sched]>; |
| def rmi : AVX512AIi8<opc, MRMSrcMem, |
| (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), |
| !strconcat("vpcmp", Suffix, |
| "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), |
|