| //===- X86LegalizerInfo.cpp --------------------------------------*- C++ -*-==// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| /// \file |
| /// This file implements the targeting of the Machinelegalizer class for X86. |
| /// \todo This should be generated by TableGen. |
| //===----------------------------------------------------------------------===// |
| |
| #include "X86LegalizerInfo.h" |
| #include "X86Subtarget.h" |
| #include "X86TargetMachine.h" |
| #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" |
| #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" |
| #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
| #include "llvm/CodeGen/MachineConstantPool.h" |
| #include "llvm/CodeGen/MachineFrameInfo.h" |
| #include "llvm/CodeGen/TargetOpcodes.h" |
| #include "llvm/CodeGen/ValueTypes.h" |
| #include "llvm/IR/DerivedTypes.h" |
| #include "llvm/IR/Type.h" |
| |
| using namespace llvm; |
| using namespace TargetOpcode; |
| using namespace LegalizeActions; |
| using namespace LegalityPredicates; |
| |
| X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, |
| const X86TargetMachine &TM) |
| : Subtarget(STI) { |
| |
| bool Is64Bit = Subtarget.is64Bit(); |
| bool HasCMOV = Subtarget.canUseCMOV(); |
| bool HasSSE1 = Subtarget.hasSSE1(); |
| bool HasSSE2 = Subtarget.hasSSE2(); |
| bool HasSSE41 = Subtarget.hasSSE41(); |
| bool HasAVX = Subtarget.hasAVX(); |
| bool HasAVX2 = Subtarget.hasAVX2(); |
| bool HasAVX512 = Subtarget.hasAVX512(); |
| bool HasVLX = Subtarget.hasVLX(); |
| bool HasDQI = Subtarget.hasAVX512() && Subtarget.hasDQI(); |
| bool HasBWI = Subtarget.hasAVX512() && Subtarget.hasBWI(); |
| bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87(); |
| bool HasPOPCNT = Subtarget.hasPOPCNT(); |
| bool HasLZCNT = Subtarget.hasLZCNT(); |
| bool HasBMI = Subtarget.hasBMI(); |
| |
| const LLT p0 = LLT::pointer(0, TM.getPointerSizeInBits(0)); |
| const LLT s1 = LLT::scalar(1); |
| const LLT s8 = LLT::scalar(8); |
| const LLT s16 = LLT::scalar(16); |
| const LLT s32 = LLT::scalar(32); |
| const LLT s64 = LLT::scalar(64); |
| const LLT s80 = LLT::scalar(80); |
| const LLT s128 = LLT::scalar(128); |
| const LLT sMaxScalar = Subtarget.is64Bit() ? s64 : s32; |
| const LLT v2s32 = LLT::fixed_vector(2, 32); |
| const LLT v4s8 = LLT::fixed_vector(4, 8); |
| |
| const LLT v16s8 = LLT::fixed_vector(16, 8); |
| const LLT v8s16 = LLT::fixed_vector(8, 16); |
| const LLT v4s32 = LLT::fixed_vector(4, 32); |
| const LLT v2s64 = LLT::fixed_vector(2, 64); |
| const LLT v2p0 = LLT::fixed_vector(2, p0); |
| |
| const LLT v32s8 = LLT::fixed_vector(32, 8); |
| const LLT v16s16 = LLT::fixed_vector(16, 16); |
| const LLT v8s32 = LLT::fixed_vector(8, 32); |
| const LLT v4s64 = LLT::fixed_vector(4, 64); |
| const LLT v4p0 = LLT::fixed_vector(4, p0); |
| |
| const LLT v64s8 = LLT::fixed_vector(64, 8); |
| const LLT v32s16 = LLT::fixed_vector(32, 16); |
| const LLT v16s32 = LLT::fixed_vector(16, 32); |
| const LLT v8s64 = LLT::fixed_vector(8, 64); |
| |
| const LLT s8MaxVector = HasAVX512 ? v64s8 : HasAVX ? v32s8 : v16s8; |
| const LLT s16MaxVector = HasAVX512 ? v32s16 : HasAVX ? v16s16 : v8s16; |
| const LLT s32MaxVector = HasAVX512 ? v16s32 : HasAVX ? v8s32 : v4s32; |
| const LLT s64MaxVector = HasAVX512 ? v8s64 : HasAVX ? v4s64 : v2s64; |
| |
| // todo: AVX512 bool vector predicate types |
| |
| // implicit/constants |
| // 32/64-bits needs support for s64/s128 to handle cases: |
| // s64 = EXTEND (G_IMPLICIT_DEF s32) -> s64 = G_IMPLICIT_DEF |
| // s128 = EXTEND (G_IMPLICIT_DEF s32/s64) -> s128 = G_IMPLICIT_DEF |
| getActionDefinitionsBuilder(G_IMPLICIT_DEF) |
| .legalFor({p0, s1, s8, s16, s32, s64}) |
| .legalFor(Is64Bit, {s128}); |
| |
| getActionDefinitionsBuilder(G_CONSTANT) |
| .legalFor({p0, s8, s16, s32}) |
| .legalFor(Is64Bit, {s64}) |
| .widenScalarToNextPow2(0, /*Min=*/8) |
| .clampScalar(0, s8, sMaxScalar); |
| |
| getActionDefinitionsBuilder( |
| {G_LROUND, G_LLROUND, G_FCOS, G_FCOSH, G_FACOS, G_FSIN, G_FSINH, |
| G_FASIN, G_FTAN, G_FTANH, G_FATAN, G_FATAN2, G_FPOW, G_FEXP, |
| G_FEXP2, G_FEXP10, G_FLOG, G_FLOG2, G_FLOG10, G_FPOWI, G_FSINCOS}) |
| .libcall(); |
| |
| getActionDefinitionsBuilder(G_FSQRT) |
| .legalFor(HasSSE1 || UseX87, {s32}) |
| .legalFor(HasSSE2 || UseX87, {s64}) |
| .legalFor(UseX87, {s80}); |
| |
| getActionDefinitionsBuilder(G_GET_ROUNDING).customFor({s32}); |
| |
| // merge/unmerge |
| for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { |
| unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1; |
| unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0; |
| getActionDefinitionsBuilder(Op) |
| .widenScalarToNextPow2(LitTyIdx, /*Min=*/8) |
| .widenScalarToNextPow2(BigTyIdx, /*Min=*/16) |
| .minScalar(LitTyIdx, s8) |
| .minScalar(BigTyIdx, s32) |
| .legalIf([=](const LegalityQuery &Q) { |
| switch (Q.Types[BigTyIdx].getSizeInBits()) { |
| case 16: |
| case 32: |
| case 64: |
| case 128: |
| case 256: |
| case 512: |
| break; |
| default: |
| return false; |
| } |
| switch (Q.Types[LitTyIdx].getSizeInBits()) { |
| case 8: |
| case 16: |
| case 32: |
| case 64: |
| case 128: |
| case 256: |
| return true; |
| default: |
| return false; |
| } |
| }); |
| } |
| |
| // integer addition/subtraction |
| getActionDefinitionsBuilder({G_ADD, G_SUB}) |
| .legalFor({s8, s16, s32}) |
| .legalFor(Is64Bit, {s64}) |
| .legalFor(HasSSE2, {v16s8, v8s16, v4s32, v2s64}) |
| .legalFor(HasAVX2, {v32s8, v16s16, v8s32, v4s64}) |
| .legalFor(HasAVX512, {v16s32, v8s64}) |
| .legalFor(HasBWI, {v64s8, v32s16}) |
| .clampMinNumElements(0, s8, 16) |
| .clampMinNumElements(0, s16, 8) |
| .clampMinNumElements(0, s32, 4) |
| .clampMinNumElements(0, s64, 2) |
| .clampMaxNumElements(0, s8, HasBWI ? 64 : (HasAVX2 ? 32 : 16)) |
| .clampMaxNumElements(0, s16, HasBWI ? 32 : (HasAVX2 ? 16 : 8)) |
| .clampMaxNumElements(0, s32, HasAVX512 ? 16 : (HasAVX2 ? 8 : 4)) |
| .clampMaxNumElements(0, s64, HasAVX512 ? 8 : (HasAVX2 ? 4 : 2)) |
| .widenScalarToNextPow2(0, /*Min=*/32) |
| .clampScalar(0, s8, sMaxScalar) |
| .scalarize(0); |
| |
| getActionDefinitionsBuilder({G_UADDE, G_UADDO, G_USUBE, G_USUBO}) |
| .legalFor({{s8, s1}, {s16, s1}, {s32, s1}}) |
| .legalFor(Is64Bit, {{s64, s1}}) |
| .widenScalarToNextPow2(0, /*Min=*/32) |
| .clampScalar(0, s8, sMaxScalar) |
| .clampScalar(1, s1, s1) |
| .scalarize(0); |
| |
| // integer multiply |
| getActionDefinitionsBuilder(G_MUL) |
| .legalFor({s8, s16, s32}) |
| .legalFor(Is64Bit, {s64}) |
| .legalFor(HasSSE2, {v8s16}) |
| .legalFor(HasSSE41, {v4s32}) |
| .legalFor(HasAVX2, {v16s16, v8s32}) |
| .legalFor(HasAVX512, {v16s32}) |
| .legalFor(HasDQI, {v8s64}) |
| .legalFor(HasDQI && HasVLX, {v2s64, v4s64}) |
| .legalFor(HasBWI, {v32s16}) |
| .clampMinNumElements(0, s16, 8) |
| .clampMinNumElements(0, s32, 4) |
| .clampMinNumElements(0, s64, HasVLX ? 2 : 8) |
| .clampMaxNumElements(0, s16, HasBWI ? 32 : (HasAVX2 ? 16 : 8)) |
| .clampMaxNumElements(0, s32, HasAVX512 ? 16 : (HasAVX2 ? 8 : 4)) |
| .clampMaxNumElements(0, s64, 8) |
| .widenScalarToNextPow2(0, /*Min=*/32) |
| .clampScalar(0, s8, sMaxScalar) |
| .scalarize(0); |
| |
| getActionDefinitionsBuilder({G_SMULH, G_UMULH}) |
| .legalFor({s8, s16, s32}) |
| .legalFor(Is64Bit, {s64}) |
| .widenScalarToNextPow2(0, /*Min=*/32) |
| .clampScalar(0, s8, sMaxScalar) |
| .scalarize(0); |
| |
| // integer divisions |
| getActionDefinitionsBuilder({G_SDIV, G_SREM, G_UDIV, G_UREM}) |
| .legalFor({s8, s16, s32}) |
| .legalFor(Is64Bit, {s64}) |
| .libcallFor({s64}) |
| .clampScalar(0, s8, sMaxScalar); |
| |
| // integer shifts |
| getActionDefinitionsBuilder({G_SHL, G_LSHR, G_ASHR}) |
| .legalFor({{s8, s8}, {s16, s8}, {s32, s8}}) |
| .legalFor(Is64Bit, {{s64, s8}}) |
| .clampScalar(0, s8, sMaxScalar) |
| .clampScalar(1, s8, s8); |
| |
| // integer logic |
| getActionDefinitionsBuilder({G_AND, G_OR, G_XOR}) |
| .legalFor({s8, s16, s32}) |
| .legalFor(Is64Bit, {s64}) |
| .legalFor(HasSSE2, {v16s8, v8s16, v4s32, v2s64}) |
| .legalFor(HasAVX, {v32s8, v16s16, v8s32, v4s64}) |
| .legalFor(HasAVX512, {v64s8, v32s16, v16s32, v8s64}) |
| .clampMinNumElements(0, s8, 16) |
| .clampMinNumElements(0, s16, 8) |
| .clampMinNumElements(0, s32, 4) |
| .clampMinNumElements(0, s64, 2) |
| .clampMaxNumElements(0, s8, HasAVX512 ? 64 : (HasAVX ? 32 : 16)) |
| .clampMaxNumElements(0, s16, HasAVX512 ? 32 : (HasAVX ? 16 : 8)) |
| .clampMaxNumElements(0, s32, HasAVX512 ? 16 : (HasAVX ? 8 : 4)) |
| .clampMaxNumElements(0, s64, HasAVX512 ? 8 : (HasAVX ? 4 : 2)) |
| .widenScalarToNextPow2(0, /*Min=*/32) |
| .clampScalar(0, s8, sMaxScalar) |
| .scalarize(0); |
| |
| // integer comparison |
| const std::initializer_list<LLT> IntTypes32 = {s8, s16, s32, p0}; |
| const std::initializer_list<LLT> IntTypes64 = {s8, s16, s32, s64, p0}; |
| |
| getActionDefinitionsBuilder(G_ICMP) |
| .legalForCartesianProduct({s8}, Is64Bit ? IntTypes64 : IntTypes32) |
| .clampScalar(0, s8, s8) |
| .clampScalar(1, s8, sMaxScalar); |
| |
| // bswap |
| getActionDefinitionsBuilder(G_BSWAP) |
| .legalFor({s32}) |
| .legalFor(Is64Bit, {s64}) |
| .widenScalarToNextPow2(0, /*Min=*/32) |
| .clampScalar(0, s32, sMaxScalar); |
| |
| // popcount |
| getActionDefinitionsBuilder(G_CTPOP) |
| .legalFor(HasPOPCNT, {{s16, s16}, {s32, s32}}) |
| .legalFor(HasPOPCNT && Is64Bit, {{s64, s64}}) |
| .widenScalarToNextPow2(1, /*Min=*/16) |
| .clampScalar(1, s16, sMaxScalar) |
| .scalarSameSizeAs(0, 1); |
| |
| // count leading zeros (LZCNT) |
| getActionDefinitionsBuilder(G_CTLZ) |
| .legalFor(HasLZCNT, {{s16, s16}, {s32, s32}}) |
| .legalFor(HasLZCNT && Is64Bit, {{s64, s64}}) |
| .widenScalarToNextPow2(1, /*Min=*/16) |
| .clampScalar(1, s16, sMaxScalar) |
| .scalarSameSizeAs(0, 1); |
| |
| // count trailing zeros |
| getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF) |
| .legalFor({{s16, s16}, {s32, s32}}) |
| .legalFor(Is64Bit, {{s64, s64}}) |
| .widenScalarToNextPow2(1, /*Min=*/16) |
| .clampScalar(1, s16, sMaxScalar) |
| .scalarSameSizeAs(0, 1); |
| |
| getActionDefinitionsBuilder(G_CTTZ) |
| .legalFor(HasBMI, {{s16, s16}, {s32, s32}}) |
| .legalFor(HasBMI && Is64Bit, {{s64, s64}}) |
| .widenScalarToNextPow2(1, /*Min=*/16) |
| .clampScalar(1, s16, sMaxScalar) |
| .scalarSameSizeAs(0, 1); |
| |
| // control flow |
| getActionDefinitionsBuilder(G_PHI) |
| .legalFor({s8, s16, s32, p0}) |
| .legalFor(UseX87, {s80}) |
| .legalFor(Is64Bit, {s64}) |
| .legalFor(HasSSE1, {v16s8, v8s16, v4s32, v2s64}) |
| .legalFor(HasAVX, {v32s8, v16s16, v8s32, v4s64}) |
| .legalFor(HasAVX512, {v64s8, v32s16, v16s32, v8s64}) |
| .clampMinNumElements(0, s8, 16) |
| .clampMinNumElements(0, s16, 8) |
| .clampMinNumElements(0, s32, 4) |
| .clampMinNumElements(0, s64, 2) |
| .clampMaxNumElements(0, s8, HasAVX512 ? 64 : (HasAVX ? 32 : 16)) |
| .clampMaxNumElements(0, s16, HasAVX512 ? 32 : (HasAVX ? 16 : 8)) |
| .clampMaxNumElements(0, s32, HasAVX512 ? 16 : (HasAVX ? 8 : 4)) |
| .clampMaxNumElements(0, s64, HasAVX512 ? 8 : (HasAVX ? 4 : 2)) |
| .widenScalarToNextPow2(0, /*Min=*/32) |
| .clampScalar(0, s8, sMaxScalar) |
| .scalarize(0); |
| |
| getActionDefinitionsBuilder(G_BRCOND).legalFor({s1}); |
| |
| // pointer handling |
| const std::initializer_list<LLT> PtrTypes32 = {s1, s8, s16, s32}; |
| const std::initializer_list<LLT> PtrTypes64 = {s1, s8, s16, s32, s64}; |
| |
| getActionDefinitionsBuilder(G_PTRTOINT) |
| .legalForCartesianProduct(Is64Bit ? PtrTypes64 : PtrTypes32, {p0}) |
| .maxScalar(0, sMaxScalar) |
| .widenScalarToNextPow2(0, /*Min*/ 8); |
| |
| getActionDefinitionsBuilder(G_INTTOPTR).legalFor({{p0, sMaxScalar}}); |
| |
| getActionDefinitionsBuilder(G_CONSTANT_POOL).legalFor({p0}); |
| |
| getActionDefinitionsBuilder(G_PTR_ADD) |
| .legalFor({{p0, s32}}) |
| .legalFor(Is64Bit, {{p0, s64}}) |
| .widenScalarToNextPow2(1, /*Min*/ 32) |
| .clampScalar(1, s32, sMaxScalar); |
| |
| getActionDefinitionsBuilder({G_FRAME_INDEX, G_GLOBAL_VALUE}).legalFor({p0}); |
| |
| // load/store: add more corner cases |
| for (unsigned Op : {G_LOAD, G_STORE}) { |
| auto &Action = getActionDefinitionsBuilder(Op); |
| Action.legalForTypesWithMemDesc({{s8, p0, s8, 1}, |
| {s16, p0, s16, 1}, |
| {s32, p0, s32, 1}, |
| {s80, p0, s80, 1}, |
| {p0, p0, p0, 1}, |
| {v4s8, p0, v4s8, 1}}); |
| if (Is64Bit) |
| Action.legalForTypesWithMemDesc( |
| {{s64, p0, s64, 1}, {v2s32, p0, v2s32, 1}}); |
| |
| if (HasSSE1) |
| Action.legalForTypesWithMemDesc({{v4s32, p0, v4s32, 1}}); |
| if (HasSSE2) |
| Action.legalForTypesWithMemDesc({{v16s8, p0, v16s8, 1}, |
| {v8s16, p0, v8s16, 1}, |
| {v2s64, p0, v2s64, 1}, |
| {v2p0, p0, v2p0, 1}}); |
| if (HasAVX) |
| Action.legalForTypesWithMemDesc({{v32s8, p0, v32s8, 1}, |
| {v16s16, p0, v16s16, 1}, |
| {v8s32, p0, v8s32, 1}, |
| {v4s64, p0, v4s64, 1}, |
| {v4p0, p0, v4p0, 1}}); |
| if (HasAVX512) |
| Action.legalForTypesWithMemDesc({{v64s8, p0, v64s8, 1}, |
| {v32s16, p0, v32s16, 1}, |
| {v16s32, p0, v16s32, 1}, |
| {v8s64, p0, v8s64, 1}}); |
| |
| // X86 supports extending loads but not stores for GPRs |
| if (Op == G_LOAD) { |
| Action.legalForTypesWithMemDesc({{s8, p0, s1, 1}, |
| {s16, p0, s8, 1}, |
| {s32, p0, s8, 1}, |
| {s32, p0, s16, 1}}); |
| if (Is64Bit) |
| Action.legalForTypesWithMemDesc( |
| {{s64, p0, s8, 1}, {s64, p0, s16, 1}, {s64, p0, s32, 1}}); |
| } else { |
| Action.customIf([=](const LegalityQuery &Query) { |
| return Query.Types[0] != Query.MMODescrs[0].MemoryTy; |
| }); |
| } |
| Action.widenScalarToNextPow2(0, /*Min=*/8) |
| .clampScalar(0, s8, sMaxScalar) |
| .scalarize(0); |
| } |
| |
| for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) { |
| auto &Action = getActionDefinitionsBuilder(Op); |
| Action.legalForTypesWithMemDesc( |
| {{s16, p0, s8, 1}, {s32, p0, s8, 1}, {s32, p0, s16, 1}}); |
| if (Is64Bit) |
| Action.legalForTypesWithMemDesc( |
| {{s64, p0, s8, 1}, {s64, p0, s16, 1}, {s64, p0, s32, 1}}); |
| // TODO - SSE41/AVX2/AVX512F/AVX512BW vector extensions |
| } |
| |
| // sext, zext, and anyext |
| getActionDefinitionsBuilder(G_ANYEXT) |
| .legalFor({s8, s16, s32, s128}) |
| .legalFor(Is64Bit, {s64}) |
| .widenScalarToNextPow2(0, /*Min=*/8) |
| .clampScalar(0, s8, sMaxScalar) |
| .widenScalarToNextPow2(1, /*Min=*/8) |
| .clampScalar(1, s8, sMaxScalar) |
| .scalarize(0); |
| |
| getActionDefinitionsBuilder({G_SEXT, G_ZEXT}) |
| .legalFor({s8, s16, s32}) |
| .legalFor(Is64Bit, {s64}) |
| .widenScalarToNextPow2(0, /*Min=*/8) |
| .clampScalar(0, s8, sMaxScalar) |
| .widenScalarToNextPow2(1, /*Min=*/8) |
| .clampScalar(1, s8, sMaxScalar) |
| .scalarize(0); |
| |
| getActionDefinitionsBuilder(G_SEXT_INREG).lower(); |
| |
| // fp constants |
| getActionDefinitionsBuilder(G_FCONSTANT) |
| .legalFor({s32, s64}) |
| .legalFor(UseX87, {s80}); |
| |
| // fp arithmetic |
| getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV}) |
| .legalFor({s32, s64}) |
| .legalFor(HasSSE1, {v4s32}) |
| .legalFor(HasSSE2, {v2s64}) |
| .legalFor(HasAVX, {v8s32, v4s64}) |
| .legalFor(HasAVX512, {v16s32, v8s64}) |
| .legalFor(UseX87, {s80}); |
| |
| getActionDefinitionsBuilder(G_FABS) |
| .legalFor(UseX87, {s80}) |
| .legalFor(UseX87 && !Is64Bit, {s64}) |
| .lower(); |
| |
| // fp comparison |
| getActionDefinitionsBuilder(G_FCMP) |
| .legalFor(HasSSE1 || UseX87, {s8, s32}) |
| .legalFor(HasSSE2 || UseX87, {s8, s64}) |
| .legalFor(UseX87, {s8, s80}) |
| .clampScalar(0, s8, s8) |
| .clampScalar(1, s32, HasSSE2 ? s64 : s32) |
| .widenScalarToNextPow2(1); |
| |
| // fp conversions |
| getActionDefinitionsBuilder(G_FPEXT) |
| .legalFor(HasSSE2, {{s64, s32}}) |
| .legalFor(HasAVX, {{v4s64, v4s32}}) |
| .legalFor(HasAVX512, {{v8s64, v8s32}}); |
| |
| getActionDefinitionsBuilder(G_FPTRUNC) |
| .legalFor(HasSSE2, {{s32, s64}}) |
| .legalFor(HasAVX, {{v4s32, v4s64}}) |
| .legalFor(HasAVX512, {{v8s32, v8s64}}); |
| |
| getActionDefinitionsBuilder(G_SITOFP) |
| .legalFor(HasSSE1, {{s32, s32}}) |
| .legalFor(HasSSE1 && Is64Bit, {{s32, s64}}) |
| .legalFor(HasSSE2, {{s64, s32}}) |
| .legalFor(HasSSE2 && Is64Bit, {{s64, s64}}) |
| .clampScalar(1, (UseX87 && !HasSSE1) ? s16 : s32, sMaxScalar) |
| .widenScalarToNextPow2(1) |
| .customForCartesianProduct(UseX87, {s32, s64, s80}, {s16, s32, s64}) |
| .clampScalar(0, s32, HasSSE2 ? s64 : s32) |
| .widenScalarToNextPow2(0); |
| |
| getActionDefinitionsBuilder(G_FPTOSI) |
| .legalFor(HasSSE1, {{s32, s32}}) |
| .legalFor(HasSSE1 && Is64Bit, {{s64, s32}}) |
| .legalFor(HasSSE2, {{s32, s64}}) |
| .legalFor(HasSSE2 && Is64Bit, {{s64, s64}}) |
| .clampScalar(0, (UseX87 && !HasSSE1) ? s16 : s32, sMaxScalar) |
| .widenScalarToNextPow2(0) |
| .customForCartesianProduct(UseX87, {s16, s32, s64}, {s32, s64, s80}) |
| .clampScalar(1, s32, HasSSE2 ? s64 : s32) |
| .widenScalarToNextPow2(1); |
| |
| // For G_UITOFP and G_FPTOUI without AVX512, we have to custom legalize types |
| // <= s32 manually. Otherwise, in custom handler there is no way to |
| // understand whether s32 is an original type and we need to promote it to |
| // s64 or s32 is obtained after widening and we shouldn't widen it to s64. |
| // |
| // For AVX512 we simply widen types as there is direct mapping from opcodes |
| // to asm instructions. |
| getActionDefinitionsBuilder(G_UITOFP) |
| .legalFor(HasAVX512, {{s32, s32}, {s32, s64}, {s64, s32}, {s64, s64}}) |
| .customIf([=](const LegalityQuery &Query) { |
| return !HasAVX512 && |
| ((HasSSE1 && typeIs(0, s32)(Query)) || |
| (HasSSE2 && typeIs(0, s64)(Query))) && |
| scalarNarrowerThan(1, Is64Bit ? 64 : 32)(Query); |
| }) |
| .lowerIf([=](const LegalityQuery &Query) { |
| // Lower conversions from s64 |
| return !HasAVX512 && |
| ((HasSSE1 && typeIs(0, s32)(Query)) || |
| (HasSSE2 && typeIs(0, s64)(Query))) && |
| (Is64Bit && typeIs(1, s64)(Query)); |
| }) |
| .clampScalar(0, s32, HasSSE2 ? s64 : s32) |
| .widenScalarToNextPow2(0) |
| .clampScalar(1, s32, sMaxScalar) |
| .widenScalarToNextPow2(1); |
| |
| getActionDefinitionsBuilder(G_FPTOUI) |
| .legalFor(HasAVX512, {{s32, s32}, {s32, s64}, {s64, s32}, {s64, s64}}) |
| .customIf([=](const LegalityQuery &Query) { |
| return !HasAVX512 && |
| ((HasSSE1 && typeIs(1, s32)(Query)) || |
| (HasSSE2 && typeIs(1, s64)(Query))) && |
| scalarNarrowerThan(0, Is64Bit ? 64 : 32)(Query); |
| }) |
| // TODO: replace with customized legalization using |
| // specifics of cvttsd2si. The selection of this node requires |
| // a vector type. Either G_SCALAR_TO_VECTOR is needed or more advanced |
| // support of G_BUILD_VECTOR/G_INSERT_VECTOR_ELT is required beforehand. |
| .lowerIf([=](const LegalityQuery &Query) { |
| return !HasAVX512 && |
| ((HasSSE1 && typeIs(1, s32)(Query)) || |
| (HasSSE2 && typeIs(1, s64)(Query))) && |
| (Is64Bit && typeIs(0, s64)(Query)); |
| }) |
| .clampScalar(0, s32, sMaxScalar) |
| .widenScalarToNextPow2(0) |
| .clampScalar(1, s32, HasSSE2 ? s64 : s32) |
| .widenScalarToNextPow2(1); |
| |
| // vector ops |
| getActionDefinitionsBuilder(G_BUILD_VECTOR) |
| .customIf([=](const LegalityQuery &Query) { |
| return (HasSSE1 && typeInSet(0, {v4s32})(Query)) || |
| (HasSSE2 && typeInSet(0, {v2s64, v8s16, v16s8})(Query)) || |
| (HasAVX && typeInSet(0, {v4s64, v8s32, v16s16, v32s8})(Query)) || |
| (HasAVX512 && typeInSet(0, {v8s64, v16s32, v32s16, v64s8})); |
| }) |
| .clampNumElements(0, v16s8, s8MaxVector) |
| .clampNumElements(0, v8s16, s16MaxVector) |
| .clampNumElements(0, v4s32, s32MaxVector) |
| .clampNumElements(0, v2s64, s64MaxVector) |
| .moreElementsToNextPow2(0); |
| |
| getActionDefinitionsBuilder({G_EXTRACT, G_INSERT}) |
| .legalIf([=](const LegalityQuery &Query) { |
| unsigned SubIdx = Query.Opcode == G_EXTRACT ? 0 : 1; |
| unsigned FullIdx = Query.Opcode == G_EXTRACT ? 1 : 0; |
| return (HasAVX && typePairInSet(SubIdx, FullIdx, |
| {{v16s8, v32s8}, |
| {v8s16, v16s16}, |
| {v4s32, v8s32}, |
| {v2s64, v4s64}})(Query)) || |
| (HasAVX512 && typePairInSet(SubIdx, FullIdx, |
| {{v16s8, v64s8}, |
| {v32s8, v64s8}, |
| {v8s16, v32s16}, |
| {v16s16, v32s16}, |
| {v4s32, v16s32}, |
| {v8s32, v16s32}, |
| {v2s64, v8s64}, |
| {v4s64, v8s64}})(Query)); |
| }); |
| |
| // todo: only permit dst types up to max legal vector register size? |
| getActionDefinitionsBuilder(G_CONCAT_VECTORS) |
| .legalFor( |
| HasSSE1, |
| {{v32s8, v16s8}, {v16s16, v8s16}, {v8s32, v4s32}, {v4s64, v2s64}}) |
| .legalFor(HasAVX, {{v64s8, v16s8}, |
| {v64s8, v32s8}, |
| {v32s16, v8s16}, |
| {v32s16, v16s16}, |
| {v16s32, v4s32}, |
| {v16s32, v8s32}, |
| {v8s64, v2s64}, |
| {v8s64, v4s64}}); |
| |
| // todo: vectors and address spaces |
| getActionDefinitionsBuilder(G_SELECT) |
| .legalFor({{s8, s32}, {s16, s32}, {s32, s32}, {s64, s32}, {p0, s32}}) |
| .widenScalarToNextPow2(0, /*Min=*/8) |
| .clampScalar(0, HasCMOV ? s16 : s8, sMaxScalar) |
| .clampScalar(1, s32, s32); |
| |
| // memory intrinsics |
| getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall(); |
| |
| getActionDefinitionsBuilder({G_DYN_STACKALLOC, G_STACKSAVE, G_STACKRESTORE}) |
| .lower(); |
| |
| // fp intrinsics |
| getActionDefinitionsBuilder(G_INTRINSIC_ROUNDEVEN) |
| .scalarize(0) |
| .minScalar(0, LLT::scalar(32)) |
| .libcall(); |
| |
| getActionDefinitionsBuilder({G_FREEZE, G_CONSTANT_FOLD_BARRIER}) |
| .legalFor({s8, s16, s32, s64, p0}) |
| .widenScalarToNextPow2(0, /*Min=*/8) |
| .clampScalar(0, s8, sMaxScalar); |
| |
| getLegacyLegalizerInfo().computeTables(); |
| verify(*STI.getInstrInfo()); |
| } |
| |
| bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, |
| LostDebugLocObserver &LocObserver) const { |
| MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
| MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
| switch (MI.getOpcode()) { |
| default: |
| // No idea what to do. |
| return false; |
| case TargetOpcode::G_BUILD_VECTOR: |
| return legalizeBuildVector(MI, MRI, Helper); |
| case TargetOpcode::G_FPTOUI: |
| return legalizeFPTOUI(MI, MRI, Helper); |
| case TargetOpcode::G_UITOFP: |
| return legalizeUITOFP(MI, MRI, Helper); |
| case TargetOpcode::G_STORE: |
| return legalizeNarrowingStore(MI, MRI, Helper); |
| case TargetOpcode::G_SITOFP: |
| return legalizeSITOFP(MI, MRI, Helper); |
| case TargetOpcode::G_FPTOSI: |
| return legalizeFPTOSI(MI, MRI, Helper); |
| case TargetOpcode::G_GET_ROUNDING: |
| return legalizeGETROUNDING(MI, MRI, Helper); |
| } |
| llvm_unreachable("expected switch to return"); |
| } |
| |
| bool X86LegalizerInfo::legalizeSITOFP(MachineInstr &MI, |
| MachineRegisterInfo &MRI, |
| LegalizerHelper &Helper) const { |
| MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
| MachineFunction &MF = *MI.getMF(); |
| auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); |
| |
| assert((SrcTy.getSizeInBits() == 16 || SrcTy.getSizeInBits() == 32 || |
| SrcTy.getSizeInBits() == 64) && |
| "Unexpected source type for SITOFP in X87 mode."); |
| |
| TypeSize MemSize = SrcTy.getSizeInBytes(); |
| MachinePointerInfo PtrInfo; |
| Align Alignmt = Helper.getStackTemporaryAlignment(SrcTy); |
| auto SlotPointer = Helper.createStackTemporary(MemSize, Alignmt, PtrInfo); |
| MachineMemOperand *StoreMMO = MF.getMachineMemOperand( |
| PtrInfo, MachineMemOperand::MOStore, MemSize, Align(MemSize)); |
| |
| // Store the integer value on the FPU stack. |
| MIRBuilder.buildStore(Src, SlotPointer, *StoreMMO); |
| |
| MachineMemOperand *LoadMMO = MF.getMachineMemOperand( |
| PtrInfo, MachineMemOperand::MOLoad, MemSize, Align(MemSize)); |
| MIRBuilder.buildInstr(X86::G_FILD) |
| .addDef(Dst) |
| .addUse(SlotPointer.getReg(0)) |
| .addMemOperand(LoadMMO); |
| |
| MI.eraseFromParent(); |
| return true; |
| } |
| |
| bool X86LegalizerInfo::legalizeFPTOSI(MachineInstr &MI, |
| MachineRegisterInfo &MRI, |
| LegalizerHelper &Helper) const { |
| MachineFunction &MF = *MI.getMF(); |
| MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
| auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); |
| |
| TypeSize MemSize = DstTy.getSizeInBytes(); |
| MachinePointerInfo PtrInfo; |
| Align Alignmt = Helper.getStackTemporaryAlignment(DstTy); |
| auto SlotPointer = Helper.createStackTemporary(MemSize, Alignmt, PtrInfo); |
| MachineMemOperand *StoreMMO = MF.getMachineMemOperand( |
| PtrInfo, MachineMemOperand::MOStore, MemSize, Align(MemSize)); |
| |
| MIRBuilder.buildInstr(X86::G_FIST) |
| .addUse(Src) |
| .addUse(SlotPointer.getReg(0)) |
| .addMemOperand(StoreMMO); |
| |
| MIRBuilder.buildLoad(Dst, SlotPointer, PtrInfo, Align(MemSize)); |
| MI.eraseFromParent(); |
| return true; |
| } |
| |
| bool X86LegalizerInfo::legalizeBuildVector(MachineInstr &MI, |
| MachineRegisterInfo &MRI, |
| LegalizerHelper &Helper) const { |
| MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
| const auto &BuildVector = cast<GBuildVector>(MI); |
| Register Dst = BuildVector.getReg(0); |
| LLT DstTy = MRI.getType(Dst); |
| MachineFunction &MF = MIRBuilder.getMF(); |
| LLVMContext &Ctx = MF.getFunction().getContext(); |
| uint64_t DstTySize = DstTy.getScalarSizeInBits(); |
| |
| SmallVector<Constant *, 4> CstIdxs; |
| for (unsigned i = 0; i < BuildVector.getNumSources(); ++i) { |
| Register Source = BuildVector.getSourceReg(i); |
| |
| auto ValueAndReg = getIConstantVRegValWithLookThrough(Source, MRI); |
| if (ValueAndReg) { |
| CstIdxs.emplace_back(ConstantInt::get(Ctx, ValueAndReg->Value)); |
| continue; |
| } |
| |
| auto FPValueAndReg = getFConstantVRegValWithLookThrough(Source, MRI); |
| if (FPValueAndReg) { |
| CstIdxs.emplace_back(ConstantFP::get(Ctx, FPValueAndReg->Value)); |
| continue; |
| } |
| |
| if (getOpcodeDef<GImplicitDef>(Source, MRI)) { |
| CstIdxs.emplace_back(UndefValue::get(Type::getIntNTy(Ctx, DstTySize))); |
| continue; |
| } |
| return false; |
| } |
| |
| Constant *ConstVal = ConstantVector::get(CstIdxs); |
| |
| const DataLayout &DL = MIRBuilder.getDataLayout(); |
| unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace(); |
| Align Alignment(DL.getABITypeAlign(ConstVal->getType())); |
| auto Addr = MIRBuilder.buildConstantPool( |
| LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace)), |
| MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment)); |
| MachineMemOperand *MMO = |
| MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF), |
| MachineMemOperand::MOLoad, DstTy, Alignment); |
| |
| MIRBuilder.buildLoad(Dst, Addr, *MMO); |
| MI.eraseFromParent(); |
| return true; |
| } |
| |
| bool X86LegalizerInfo::legalizeFPTOUI(MachineInstr &MI, |
| MachineRegisterInfo &MRI, |
| LegalizerHelper &Helper) const { |
| MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
| auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); |
| unsigned DstSizeInBits = DstTy.getScalarSizeInBits(); |
| const LLT s32 = LLT::scalar(32); |
| const LLT s64 = LLT::scalar(64); |
| |
| // Simply reuse FPTOSI when it is possible to widen the type |
| if (DstSizeInBits <= 32) { |
| auto Casted = MIRBuilder.buildFPTOSI(DstTy == s32 ? s64 : s32, Src); |
| MIRBuilder.buildTrunc(Dst, Casted); |
| MI.eraseFromParent(); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| bool X86LegalizerInfo::legalizeUITOFP(MachineInstr &MI, |
| MachineRegisterInfo &MRI, |
| LegalizerHelper &Helper) const { |
| MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
| auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); |
| const LLT s32 = LLT::scalar(32); |
| const LLT s64 = LLT::scalar(64); |
| |
| // Simply reuse SITOFP when it is possible to widen the type |
| if (SrcTy.getSizeInBits() <= 32) { |
| auto Ext = MIRBuilder.buildZExt(SrcTy == s32 ? s64 : s32, Src); |
| MIRBuilder.buildSITOFP(Dst, Ext); |
| MI.eraseFromParent(); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| bool X86LegalizerInfo::legalizeNarrowingStore(MachineInstr &MI, |
| MachineRegisterInfo &MRI, |
| LegalizerHelper &Helper) const { |
| auto &Store = cast<GStore>(MI); |
| MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
| MachineMemOperand &MMO = **Store.memoperands_begin(); |
| MachineFunction &MF = MIRBuilder.getMF(); |
| LLT ValTy = MRI.getType(Store.getValueReg()); |
| auto *NewMMO = MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), ValTy); |
| |
| Helper.Observer.changingInstr(Store); |
| Store.setMemRefs(MF, {NewMMO}); |
| Helper.Observer.changedInstr(Store); |
| return true; |
| } |
| |
| bool X86LegalizerInfo::legalizeGETROUNDING(MachineInstr &MI, |
| MachineRegisterInfo &MRI, |
| LegalizerHelper &Helper) const { |
| /* |
| The rounding mode is in bits 11:10 of FPSR, and has the following |
| settings: |
| 00 Round to nearest |
| 01 Round to -inf |
| 10 Round to +inf |
| 11 Round to 0 |
| |
| GET_ROUNDING, on the other hand, expects the following: |
| -1 Undefined |
| 0 Round to 0 |
| 1 Round to nearest |
| 2 Round to +inf |
| 3 Round to -inf |
| |
| To perform the conversion, we use a packed lookup table of the four 2-bit |
| values that we can index by FPSP[11:10] |
| 0x2d --> (0b00,10,11,01) --> (0,2,3,1) >> FPSR[11:10] |
| |
| (0x2d >> ((FPSR >> 9) & 6)) & 3 |
| */ |
| |
| MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
| MachineFunction &MF = MIRBuilder.getMF(); |
| Register Dst = MI.getOperand(0).getReg(); |
| LLT DstTy = MRI.getType(Dst); |
| const LLT s8 = LLT::scalar(8); |
| const LLT s16 = LLT::scalar(16); |
| const LLT s32 = LLT::scalar(32); |
| |
| // Save FP Control Word to stack slot |
| int MemSize = 2; |
| Align Alignment = Align(2); |
| MachinePointerInfo PtrInfo; |
| auto StackTemp = Helper.createStackTemporary(TypeSize::getFixed(MemSize), |
| Alignment, PtrInfo); |
| Register StackPtr = StackTemp.getReg(0); |
| |
| auto StoreMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, |
| MemSize, Alignment); |
| |
| // Store FP Control Word to stack slot using G_FNSTCW16 |
| MIRBuilder.buildInstr(X86::G_FNSTCW16) |
| .addUse(StackPtr) |
| .addMemOperand(StoreMMO); |
| |
| // Load FP Control Word from stack slot |
| auto LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, |
| MemSize, Alignment); |
| |
| auto CWD32 = |
| MIRBuilder.buildZExt(s32, MIRBuilder.buildLoad(s16, StackPtr, *LoadMMO)); |
| auto Shifted8 = MIRBuilder.buildTrunc( |
| s8, MIRBuilder.buildLShr(s32, CWD32, MIRBuilder.buildConstant(s8, 9))); |
| auto Masked32 = MIRBuilder.buildZExt( |
| s32, MIRBuilder.buildAnd(s8, Shifted8, MIRBuilder.buildConstant(s8, 6))); |
| |
| // LUT is a packed lookup table (0x2d) used to map the 2-bit x87 FPU rounding |
| // mode (from bits 11:10 of the control word) to the values expected by |
| // GET_ROUNDING. The mapping is performed by shifting LUT right by the |
| // extracted rounding mode and masking the result with 3 to obtain the final |
| auto LUT = MIRBuilder.buildConstant(s32, 0x2d); |
| auto LUTShifted = MIRBuilder.buildLShr(s32, LUT, Masked32); |
| auto RetVal = |
| MIRBuilder.buildAnd(s32, LUTShifted, MIRBuilder.buildConstant(s32, 3)); |
| auto RetValTrunc = MIRBuilder.buildZExtOrTrunc(DstTy, RetVal); |
| |
| MIRBuilder.buildCopy(Dst, RetValTrunc); |
| |
| MI.eraseFromParent(); |
| return true; |
| } |
| |
| bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, |
| MachineInstr &MI) const { |
| return true; |
| } |