blob: 7d01887f24c1597e67a0126d39e4bed870f6f5ef [file] [log] [blame]
//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the interfaces that LoongArch uses to lower LLVM code into
// a selection DAG.
//
//===----------------------------------------------------------------------===//
#include "LoongArchISelLowering.h"
#include "LoongArch.h"
#include "LoongArchMachineFunctionInfo.h"
#include "LoongArchRegisterInfo.h"
#include "LoongArchSubtarget.h"
#include "LoongArchTargetMachine.h"
#include "MCTargetDesc/LoongArchBaseInfo.h"
#include "MCTargetDesc/LoongArchMCTargetDesc.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicsLoongArch.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
using namespace llvm;
#define DEBUG_TYPE "loongarch-isel-lowering"
STATISTIC(NumTailCalls, "Number of tail calls");
static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
cl::desc("Trap on integer division by zero."),
cl::init(false));
LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
const LoongArchSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
MVT GRLenVT = Subtarget.getGRLenVT();
// Set up the register classes.
addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
if (Subtarget.hasBasicF())
addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
if (Subtarget.hasBasicD())
addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
static const MVT::SimpleValueType LSXVTs[] = {
MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
static const MVT::SimpleValueType LASXVTs[] = {
MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
if (Subtarget.hasExtLSX())
for (MVT VT : LSXVTs)
addRegisterClass(VT, &LoongArch::LSX128RegClass);
if (Subtarget.hasExtLASX())
for (MVT VT : LASXVTs)
addRegisterClass(VT, &LoongArch::LASX256RegClass);
// Set operations for LA32 and LA64.
setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT,
MVT::i1, Promote);
setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom);
setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom);
setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom);
setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom);
setOperationAction(ISD::ROTL, GRLenVT, Expand);
setOperationAction(ISD::CTPOP, GRLenVT, Expand);
setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
ISD::JumpTable, ISD::GlobalTLSAddress},
GRLenVT, Custom);
setOperationAction(ISD::EH_DWARF_CFA, GRLenVT, Custom);
setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
setOperationAction(ISD::TRAP, MVT::Other, Legal);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
// Expand bitreverse.i16 with native-width bitrev and shift for now, before
// we get to know which of sll and revb.2h is faster.
setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
setOperationAction(ISD::BITREVERSE, GRLenVT, Legal);
// LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
// the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
// and i32 could still be byte-swapped relatively cheaply.
setOperationAction(ISD::BSWAP, MVT::i16, Custom);
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
setOperationAction(ISD::BR_CC, GRLenVT, Expand);
setOperationAction(ISD::SELECT_CC, GRLenVT, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand);
setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom);
setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand);
// Set operations for LA64 only.
if (Subtarget.is64Bit()) {
setOperationAction(ISD::SHL, MVT::i32, Custom);
setOperationAction(ISD::SRA, MVT::i32, Custom);
setOperationAction(ISD::SRL, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::BITCAST, MVT::i32, Custom);
setOperationAction(ISD::ROTR, MVT::i32, Custom);
setOperationAction(ISD::ROTL, MVT::i32, Custom);
setOperationAction(ISD::CTTZ, MVT::i32, Custom);
setOperationAction(ISD::CTLZ, MVT::i32, Custom);
setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom);
setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
setOperationAction(ISD::BSWAP, MVT::i32, Custom);
}
// Set operations for LA32 only.
if (!Subtarget.is64Bit()) {
setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
// Set libcalls.
setLibcallName(RTLIB::MUL_I128, nullptr);
}
static const ISD::CondCode FPCCToExpand[] = {
ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE,
ISD::SETGE, ISD::SETNE, ISD::SETGT};
// Set operations for 'F' feature.
if (Subtarget.hasBasicF()) {
setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
setOperationAction(ISD::FMA, MVT::f32, Legal);
setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
setOperationAction(ISD::IS_FPCLASS, MVT::f32, Legal);
setOperationAction(ISD::FSIN, MVT::f32, Expand);
setOperationAction(ISD::FCOS, MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FPOW, MVT::f32, Expand);
setOperationAction(ISD::FREM, MVT::f32, Expand);
if (Subtarget.is64Bit())
setOperationAction(ISD::FRINT, MVT::f32, Legal);
if (!Subtarget.hasBasicD()) {
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
}
}
}
// Set operations for 'D' feature.
if (Subtarget.hasBasicD()) {
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
setOperationAction(ISD::FMA, MVT::f64, Legal);
setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
setOperationAction(ISD::IS_FPCLASS, MVT::f64, Legal);
setOperationAction(ISD::FSIN, MVT::f64, Expand);
setOperationAction(ISD::FCOS, MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FPOW, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
if (Subtarget.is64Bit())
setOperationAction(ISD::FRINT, MVT::f64, Legal);
}
// Set operations for 'LSX' feature.
if (Subtarget.hasExtLSX())
setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN},
{MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8}, Legal);
// Set operations for 'LASX' feature.
if (Subtarget.hasExtLASX())
setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN},
{MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8},
Legal);
// Set DAG combine for LA32 and LA64.
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::SRL);
// Set DAG combine for 'LSX' feature.
if (Subtarget.hasExtLSX())
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
// Compute derived properties from the register classes.
computeRegisterProperties(Subtarget.getRegisterInfo());
setStackPointerRegisterToSaveRestore(LoongArch::R3);
setBooleanContents(ZeroOrOneBooleanContent);
setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
setMinCmpXchgSizeInBits(32);
// Function alignments.
setMinFunctionAlignment(Align(4));
// Set preferred alignments.
setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
}
bool LoongArchTargetLowering::isOffsetFoldingLegal(
const GlobalAddressSDNode *GA) const {
// In order to maximise the opportunity for common subexpression elimination,
// keep a separate ADD node for the global address offset instead of folding
// it in the global address node. Later peephole optimisations may choose to
// fold it back in when profitable.
return false;
}
SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
case ISD::EH_DWARF_CFA:
return lowerEH_DWARF_CFA(Op, DAG);
case ISD::GlobalAddress:
return lowerGlobalAddress(Op, DAG);
case ISD::GlobalTLSAddress:
return lowerGlobalTLSAddress(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN:
return lowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::INTRINSIC_W_CHAIN:
return lowerINTRINSIC_W_CHAIN(Op, DAG);
case ISD::INTRINSIC_VOID:
return lowerINTRINSIC_VOID(Op, DAG);
case ISD::BlockAddress:
return lowerBlockAddress(Op, DAG);
case ISD::JumpTable:
return lowerJumpTable(Op, DAG);
case ISD::SHL_PARTS:
return lowerShiftLeftParts(Op, DAG);
case ISD::SRA_PARTS:
return lowerShiftRightParts(Op, DAG, true);
case ISD::SRL_PARTS:
return lowerShiftRightParts(Op, DAG, false);
case ISD::ConstantPool:
return lowerConstantPool(Op, DAG);
case ISD::FP_TO_SINT:
return lowerFP_TO_SINT(Op, DAG);
case ISD::BITCAST:
return lowerBITCAST(Op, DAG);
case ISD::UINT_TO_FP:
return lowerUINT_TO_FP(Op, DAG);
case ISD::SINT_TO_FP:
return lowerSINT_TO_FP(Op, DAG);
case ISD::VASTART:
return lowerVASTART(Op, DAG);
case ISD::FRAMEADDR:
return lowerFRAMEADDR(Op, DAG);
case ISD::RETURNADDR:
return lowerRETURNADDR(Op, DAG);
case ISD::WRITE_REGISTER:
return lowerWRITE_REGISTER(Op, DAG);
}
return SDValue();
}
SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
SelectionDAG &DAG) const {
if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
DAG.getContext()->emitError(
"On LA64, only 64-bit registers can be written.");
return Op.getOperand(0);
}
if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
DAG.getContext()->emitError(
"On LA32, only 32-bit registers can be written.");
return Op.getOperand(0);
}
return Op;
}
SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
SelectionDAG &DAG) const {
if (!isa<ConstantSDNode>(Op.getOperand(0))) {
DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
"be a constant integer");
return SDValue();
}
MachineFunction &MF = DAG.getMachineFunction();
MF.getFrameInfo().setFrameAddressIsTaken(true);
Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
EVT VT = Op.getValueType();
SDLoc DL(Op);
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
int GRLenInBytes = Subtarget.getGRLen() / 8;
while (Depth--) {
int Offset = -(GRLenInBytes * 2);
SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
DAG.getIntPtrConstant(Offset, DL));
FrameAddr =
DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
}
return FrameAddr;
}
SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
if (verifyReturnAddressArgumentIsConstant(Op, DAG))
return SDValue();
// Currently only support lowering return address for current frame.
if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0) {
DAG.getContext()->emitError(
"return address can only be determined for the current frame");
return SDValue();
}
MachineFunction &MF = DAG.getMachineFunction();
MF.getFrameInfo().setReturnAddressIsTaken(true);
MVT GRLenVT = Subtarget.getGRLenVT();
// Return the value of the return address register, marking it an implicit
// live-in.
Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
getRegClassFor(GRLenVT));
return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
}
SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
auto Size = Subtarget.getGRLen() / 8;
auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
}
SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
SDLoc DL(Op);
SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
getPointerTy(MF.getDataLayout()));
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
MachinePointerInfo(SV));
}
SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
!Subtarget.hasBasicD() && "unexpected target features");
SDLoc DL(Op);
SDValue Op0 = Op.getOperand(0);
if (Op0->getOpcode() == ISD::AND) {
auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
return Op;
}
if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
Op0.getConstantOperandVal(2) == UINT64_C(0))
return Op;
if (Op0.getOpcode() == ISD::AssertZext &&
dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
return Op;
EVT OpVT = Op0.getValueType();
EVT RetVT = Op.getValueType();
RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
MakeLibCallOptions CallOptions;
CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
SDValue Chain = SDValue();
SDValue Result;
std::tie(Result, Chain) =
makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
return Result;
}
SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
!Subtarget.hasBasicD() && "unexpected target features");
SDLoc DL(Op);
SDValue Op0 = Op.getOperand(0);
if ((Op0.getOpcode() == ISD::AssertSext ||
Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) &&
dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
return Op;
EVT OpVT = Op0.getValueType();
EVT RetVT = Op.getValueType();
RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
MakeLibCallOptions CallOptions;
CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
SDValue Chain = SDValue();
SDValue Result;
std::tie(Result, Chain) =
makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
return Result;
}
SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue Op0 = Op.getOperand(0);
if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
Subtarget.is64Bit() && Subtarget.hasBasicF()) {
SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
}
return Op;
}
SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
!Subtarget.hasBasicD()) {
SDValue Dst =
DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0));
return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
}
EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0));
return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
}
static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
SelectionDAG &DAG, unsigned Flags) {
return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
}
static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
SelectionDAG &DAG, unsigned Flags) {
return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
Flags);
}
static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
SelectionDAG &DAG, unsigned Flags) {
return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
N->getOffset(), Flags);
}
static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
SelectionDAG &DAG, unsigned Flags) {
return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
}
template <class NodeTy>
SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
bool IsLocal) const {
SDLoc DL(N);
EVT Ty = getPointerTy(DAG.getDataLayout());
SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
switch (DAG.getTarget().getCodeModel()) {
default:
report_fatal_error("Unsupported code model");
case CodeModel::Large: {
assert(Subtarget.is64Bit() && "Large code model requires LA64");
// This is not actually used, but is necessary for successfully matching
// the PseudoLA_*_LARGE nodes.
SDValue Tmp = DAG.getConstant(0, DL, Ty);
if (IsLocal)
// This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
// eventually becomes the desired 5-insn code sequence.
return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
Tmp, Addr),
0);
// This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that eventually
// becomes the desired 5-insn code sequence.
return SDValue(
DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
0);
}
case CodeModel::Small:
case CodeModel::Medium:
if (IsLocal)
// This generates the pattern (PseudoLA_PCREL sym), which expands to
// (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
return SDValue(
DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
// This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
// (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr),
0);
}
}
SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
return getAddr(cast<BlockAddressSDNode>(Op), DAG);
}
SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
SelectionDAG &DAG) const {
return getAddr(cast<JumpTableSDNode>(Op), DAG);
}
SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
SelectionDAG &DAG) const {
return getAddr(cast<ConstantPoolSDNode>(Op), DAG);
}
SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
assert(N->getOffset() == 0 && "unexpected offset in global node");
return getAddr(N, DAG, N->getGlobal()->isDSOLocal());
}
SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
SelectionDAG &DAG,
unsigned Opc,
bool Large) const {
SDLoc DL(N);
EVT Ty = getPointerTy(DAG.getDataLayout());
MVT GRLenVT = Subtarget.getGRLenVT();
// This is not actually used, but is necessary for successfully matching the
// PseudoLA_*_LARGE nodes.
SDValue Tmp = DAG.getConstant(0, DL, Ty);
SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
SDValue Offset = Large
? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
: SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
// Add the thread pointer.
return DAG.getNode(ISD::ADD, DL, Ty, Offset,
DAG.getRegister(LoongArch::R2, GRLenVT));
}
SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
SelectionDAG &DAG,
unsigned Opc,
bool Large) const {
SDLoc DL(N);
EVT Ty = getPointerTy(DAG.getDataLayout());
IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
// This is not actually used, but is necessary for successfully matching the
// PseudoLA_*_LARGE nodes.
SDValue Tmp = DAG.getConstant(0, DL, Ty);
// Use a PC-relative addressing mode to access the dynamic GOT address.
SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
: SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
// Prepare argument list to generate call.
ArgListTy Args;
ArgListEntry Entry;
Entry.Node = Load;
Entry.Ty = CallTy;
Args.push_back(Entry);
// Setup call to __tls_get_addr.
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(DL)
.setChain(DAG.getEntryNode())
.setLibCallee(CallingConv::C, CallTy,
DAG.getExternalSymbol("__tls_get_addr", Ty),
std::move(Args));
return LowerCallTo(CLI).first;
}
SDValue
LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
if (DAG.getMachineFunction().getFunction().getCallingConv() ==
CallingConv::GHC)
report_fatal_error("In GHC calling convention TLS is not supported");
bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
assert(N->getOffset() == 0 && "unexpected offset in global node");
SDValue Addr;
switch (getTargetMachine().getTLSModel(N->getGlobal())) {
case TLSModel::GeneralDynamic:
// In this model, application code calls the dynamic linker function
// __tls_get_addr to locate TLS offsets into the dynamic thread vector at
// runtime.
Addr = getDynamicTLSAddr(N, DAG,
Large ? LoongArch::PseudoLA_TLS_GD_LARGE
: LoongArch::PseudoLA_TLS_GD,
Large);
break;
case TLSModel::LocalDynamic:
// Same as GeneralDynamic, except for assembly modifiers and relocation
// records.
Addr = getDynamicTLSAddr(N, DAG,
Large ? LoongArch::PseudoLA_TLS_LD_LARGE
: LoongArch::PseudoLA_TLS_LD,
Large);
break;
case TLSModel::InitialExec:
// This model uses the GOT to resolve TLS offsets.
Addr = getStaticTLSAddr(N, DAG,
Large ? LoongArch::PseudoLA_TLS_IE_LARGE
: LoongArch::PseudoLA_TLS_IE,
Large);
break;
case TLSModel::LocalExec:
// This model is used when static linking as the TLS offsets are resolved
// during program linking.
//
// This node doesn't need an extra argument for the large code model.
Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE);
break;
}
return Addr;
}
template <unsigned N>
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp,
SelectionDAG &DAG, bool IsSigned = false) {
auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
// Check the ImmArg.
if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
(!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
DAG.getContext()->emitError(Op->getOperationName(0) +
": argument out of range.");
return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
}
return SDValue();
}
SDValue
LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
switch (Op.getConstantOperandVal(0)) {
default:
return SDValue(); // Don't custom lower most intrinsics.
case Intrinsic::thread_pointer: {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
return DAG.getRegister(LoongArch::R2, PtrVT);
}
case Intrinsic::loongarch_lsx_vpickve2gr_d:
case Intrinsic::loongarch_lsx_vpickve2gr_du:
case Intrinsic::loongarch_lsx_vreplvei_d:
case Intrinsic::loongarch_lasx_xvrepl128vei_d:
return checkIntrinsicImmArg<1>(Op, 2, DAG);
case Intrinsic::loongarch_lsx_vreplvei_w:
case Intrinsic::loongarch_lasx_xvrepl128vei_w:
case Intrinsic::loongarch_lasx_xvpickve2gr_d:
case Intrinsic::loongarch_lasx_xvpickve2gr_du:
case Intrinsic::loongarch_lasx_xvpickve_d:
case Intrinsic::loongarch_lasx_xvpickve_d_f:
return checkIntrinsicImmArg<2>(Op, 2, DAG);
case Intrinsic::loongarch_lasx_xvinsve0_d:
return checkIntrinsicImmArg<2>(Op, 3, DAG);
case Intrinsic::loongarch_lsx_vsat_b:
case Intrinsic::loongarch_lsx_vsat_bu:
case Intrinsic::loongarch_lsx_vrotri_b:
case Intrinsic::loongarch_lsx_vsllwil_h_b:
case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
case Intrinsic::loongarch_lsx_vsrlri_b:
case Intrinsic::loongarch_lsx_vsrari_b:
case Intrinsic::loongarch_lsx_vreplvei_h:
case Intrinsic::loongarch_lasx_xvsat_b:
case Intrinsic::loongarch_lasx_xvsat_bu:
case Intrinsic::loongarch_lasx_xvrotri_b:
case Intrinsic::loongarch_lasx_xvsllwil_h_b:
case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
case Intrinsic::loongarch_lasx_xvsrlri_b:
case Intrinsic::loongarch_lasx_xvsrari_b:
case Intrinsic::loongarch_lasx_xvrepl128vei_h:
case Intrinsic::loongarch_lasx_xvpickve_w:
case Intrinsic::loongarch_lasx_xvpickve_w_f:
return checkIntrinsicImmArg<3>(Op, 2, DAG);
case Intrinsic::loongarch_lasx_xvinsve0_w:
return checkIntrinsicImmArg<3>(Op, 3, DAG);
case Intrinsic::loongarch_lsx_vsat_h:
case Intrinsic::loongarch_lsx_vsat_hu:
case Intrinsic::loongarch_lsx_vrotri_h:
case Intrinsic::loongarch_lsx_vsllwil_w_h:
case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
case Intrinsic::loongarch_lsx_vsrlri_h:
case Intrinsic::loongarch_lsx_vsrari_h:
case Intrinsic::loongarch_lsx_vreplvei_b:
case Intrinsic::loongarch_lasx_xvsat_h:
case Intrinsic::loongarch_lasx_xvsat_hu:
case Intrinsic::loongarch_lasx_xvrotri_h:
case Intrinsic::loongarch_lasx_xvsllwil_w_h:
case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
case Intrinsic::loongarch_lasx_xvsrlri_h:
case Intrinsic::loongarch_lasx_xvsrari_h:
case Intrinsic::loongarch_lasx_xvrepl128vei_b:
return checkIntrinsicImmArg<4>(Op, 2, DAG);
case Intrinsic::loongarch_lsx_vsrlni_b_h:
case Intrinsic::loongarch_lsx_vsrani_b_h:
case Intrinsic::loongarch_lsx_vsrlrni_b_h:
case Intrinsic::loongarch_lsx_vsrarni_b_h:
case Intrinsic::loongarch_lsx_vssrlni_b_h:
case Intrinsic::loongarch_lsx_vssrani_b_h:
case Intrinsic::loongarch_lsx_vssrlni_bu_h:
case Intrinsic::loongarch_lsx_vssrani_bu_h:
case Intrinsic::loongarch_lsx_vssrlrni_b_h:
case Intrinsic::loongarch_lsx_vssrarni_b_h:
case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
case Intrinsic::loongarch_lsx_vssrarni_bu_h:
case Intrinsic::loongarch_lasx_xvsrlni_b_h:
case Intrinsic::loongarch_lasx_xvsrani_b_h:
case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
case Intrinsic::loongarch_lasx_xvsrarni_b_h:
case Intrinsic::loongarch_lasx_xvssrlni_b_h:
case Intrinsic::loongarch_lasx_xvssrani_b_h:
case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
case Intrinsic::loongarch_lasx_xvssrani_bu_h:
case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
case Intrinsic::loongarch_lasx_xvssrarni_b_h:
case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
return checkIntrinsicImmArg<4>(Op, 3, DAG);
case Intrinsic::loongarch_lsx_vsat_w:
case Intrinsic::loongarch_lsx_vsat_wu:
case Intrinsic::loongarch_lsx_vrotri_w:
case Intrinsic::loongarch_lsx_vsllwil_d_w:
case Intrinsic::loongarch_lsx_vsllwil_du_wu:
case Intrinsic::loongarch_lsx_vsrlri_w:
case Intrinsic::loongarch_lsx_vsrari_w:
case Intrinsic::loongarch_lsx_vslei_bu:
case Intrinsic::loongarch_lsx_vslei_hu:
case Intrinsic::loongarch_lsx_vslei_wu:
case Intrinsic::loongarch_lsx_vslei_du:
case Intrinsic::loongarch_lsx_vslti_bu:
case Intrinsic::loongarch_lsx_vslti_hu:
case Intrinsic::loongarch_lsx_vslti_wu:
case Intrinsic::loongarch_lsx_vslti_du:
case Intrinsic::loongarch_lsx_vbsll_v:
case Intrinsic::loongarch_lsx_vbsrl_v:
case Intrinsic::loongarch_lasx_xvsat_w:
case Intrinsic::loongarch_lasx_xvsat_wu:
case Intrinsic::loongarch_lasx_xvrotri_w:
case Intrinsic::loongarch_lasx_xvsllwil_d_w:
case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
case Intrinsic::loongarch_lasx_xvsrlri_w:
case Intrinsic::loongarch_lasx_xvsrari_w:
case Intrinsic::loongarch_lasx_xvslei_bu:
case Intrinsic::loongarch_lasx_xvslei_hu:
case Intrinsic::loongarch_lasx_xvslei_wu:
case Intrinsic::loongarch_lasx_xvslei_du:
case Intrinsic::loongarch_lasx_xvslti_bu:
case Intrinsic::loongarch_lasx_xvslti_hu:
case Intrinsic::loongarch_lasx_xvslti_wu:
case Intrinsic::loongarch_lasx_xvslti_du:
case Intrinsic::loongarch_lasx_xvbsll_v:
case Intrinsic::loongarch_lasx_xvbsrl_v:
return checkIntrinsicImmArg<5>(Op, 2, DAG);
case Intrinsic::loongarch_lsx_vseqi_b:
case Intrinsic::loongarch_lsx_vseqi_h:
case Intrinsic::loongarch_lsx_vseqi_w:
case Intrinsic::loongarch_lsx_vseqi_d:
case Intrinsic::loongarch_lsx_vslei_b:
case Intrinsic::loongarch_lsx_vslei_h:
case Intrinsic::loongarch_lsx_vslei_w:
case Intrinsic::loongarch_lsx_vslei_d:
case Intrinsic::loongarch_lsx_vslti_b:
case Intrinsic::loongarch_lsx_vslti_h:
case Intrinsic::loongarch_lsx_vslti_w:
case Intrinsic::loongarch_lsx_vslti_d:
case Intrinsic::loongarch_lasx_xvseqi_b:
case Intrinsic::loongarch_lasx_xvseqi_h:
case Intrinsic::loongarch_lasx_xvseqi_w:
case Intrinsic::loongarch_lasx_xvseqi_d:
case Intrinsic::loongarch_lasx_xvslei_b:
case Intrinsic::loongarch_lasx_xvslei_h:
case Intrinsic::loongarch_lasx_xvslei_w:
case Intrinsic::loongarch_lasx_xvslei_d:
case Intrinsic::loongarch_lasx_xvslti_b:
case Intrinsic::loongarch_lasx_xvslti_h:
case Intrinsic::loongarch_lasx_xvslti_w:
case Intrinsic::loongarch_lasx_xvslti_d:
return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
case Intrinsic::loongarch_lsx_vsrlni_h_w:
case Intrinsic::loongarch_lsx_vsrani_h_w:
case Intrinsic::loongarch_lsx_vsrlrni_h_w:
case Intrinsic::loongarch_lsx_vsrarni_h_w:
case Intrinsic::loongarch_lsx_vssrlni_h_w:
case Intrinsic::loongarch_lsx_vssrani_h_w:
case Intrinsic::loongarch_lsx_vssrlni_hu_w:
case Intrinsic::loongarch_lsx_vssrani_hu_w:
case Intrinsic::loongarch_lsx_vssrlrni_h_w:
case Intrinsic::loongarch_lsx_vssrarni_h_w:
case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
case Intrinsic::loongarch_lsx_vssrarni_hu_w:
case Intrinsic::loongarch_lsx_vfrstpi_b:
case Intrinsic::loongarch_lsx_vfrstpi_h:
case Intrinsic::loongarch_lasx_xvsrlni_h_w:
case Intrinsic::loongarch_lasx_xvsrani_h_w:
case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
case Intrinsic::loongarch_lasx_xvsrarni_h_w:
case Intrinsic::loongarch_lasx_xvssrlni_h_w:
case Intrinsic::loongarch_lasx_xvssrani_h_w:
case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
case Intrinsic::loongarch_lasx_xvssrani_hu_w:
case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
case Intrinsic::loongarch_lasx_xvssrarni_h_w:
case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
case Intrinsic::loongarch_lasx_xvfrstpi_b:
case Intrinsic::loongarch_lasx_xvfrstpi_h:
return checkIntrinsicImmArg<5>(Op, 3, DAG);
case Intrinsic::loongarch_lsx_vsat_d:
case Intrinsic::loongarch_lsx_vsat_du:
case Intrinsic::loongarch_lsx_vrotri_d:
case Intrinsic::loongarch_lsx_vsrlri_d:
case Intrinsic::loongarch_lsx_vsrari_d:
case Intrinsic::loongarch_lasx_xvsat_d:
case Intrinsic::loongarch_lasx_xvsat_du:
case Intrinsic::loongarch_lasx_xvrotri_d:
case Intrinsic::loongarch_lasx_xvsrlri_d:
case Intrinsic::loongarch_lasx_xvsrari_d:
return checkIntrinsicImmArg<6>(Op, 2, DAG);
case Intrinsic::loongarch_lsx_vsrlni_w_d:
case Intrinsic::loongarch_lsx_vsrani_w_d:
case Intrinsic::loongarch_lsx_vsrlrni_w_d:
case Intrinsic::loongarch_lsx_vsrarni_w_d:
case Intrinsic::loongarch_lsx_vssrlni_w_d:
case Intrinsic::loongarch_lsx_vssrani_w_d:
case Intrinsic::loongarch_lsx_vssrlni_wu_d:
case Intrinsic::loongarch_lsx_vssrani_wu_d:
case Intrinsic::loongarch_lsx_vssrlrni_w_d:
case Intrinsic::loongarch_lsx_vssrarni_w_d:
case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
case Intrinsic::loongarch_lsx_vssrarni_wu_d:
case Intrinsic::loongarch_lasx_xvsrlni_w_d:
case Intrinsic::loongarch_lasx_xvsrani_w_d:
case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
case Intrinsic::loongarch_lasx_xvsrarni_w_d:
case Intrinsic::loongarch_lasx_xvssrlni_w_d:
case Intrinsic::loongarch_lasx_xvssrani_w_d:
case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
case Intrinsic::loongarch_lasx_xvssrani_wu_d:
case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
case Intrinsic::loongarch_lasx_xvssrarni_w_d:
case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
return checkIntrinsicImmArg<6>(Op, 3, DAG);
case Intrinsic::loongarch_lsx_vsrlni_d_q:
case Intrinsic::loongarch_lsx_vsrani_d_q:
case Intrinsic::loongarch_lsx_vsrlrni_d_q:
case Intrinsic::loongarch_lsx_vsrarni_d_q:
case Intrinsic::loongarch_lsx_vssrlni_d_q:
case Intrinsic::loongarch_lsx_vssrani_d_q:
case Intrinsic::loongarch_lsx_vssrlni_du_q:
case Intrinsic::loongarch_lsx_vssrani_du_q:
case Intrinsic::loongarch_lsx_vssrlrni_d_q:
case Intrinsic::loongarch_lsx_vssrarni_d_q:
case Intrinsic::loongarch_lsx_vssrlrni_du_q:
case Intrinsic::loongarch_lsx_vssrarni_du_q:
case Intrinsic::loongarch_lasx_xvsrlni_d_q:
case Intrinsic::loongarch_lasx_xvsrani_d_q:
case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
case Intrinsic::loongarch_lasx_xvsrarni_d_q:
case Intrinsic::loongarch_lasx_xvssrlni_d_q:
case Intrinsic::loongarch_lasx_xvssrani_d_q:
case Intrinsic::loongarch_lasx_xvssrlni_du_q:
case Intrinsic::loongarch_lasx_xvssrani_du_q:
case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
case Intrinsic::loongarch_lasx_xvssrarni_d_q:
case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
case Intrinsic::loongarch_lasx_xvssrarni_du_q:
return checkIntrinsicImmArg<7>(Op, 3, DAG);
case Intrinsic::loongarch_lsx_vnori_b:
case Intrinsic::loongarch_lsx_vshuf4i_b:
case Intrinsic::loongarch_lsx_vshuf4i_h:
case Intrinsic::loongarch_lsx_vshuf4i_w:
case Intrinsic::loongarch_lasx_xvnori_b:
case Intrinsic::loongarch_lasx_xvshuf4i_b:
case Intrinsic::loongarch_lasx_xvshuf4i_h:
case Intrinsic::loongarch_lasx_xvshuf4i_w:
case Intrinsic::loongarch_lasx_xvpermi_d:
return checkIntrinsicImmArg<8>(Op, 2, DAG);
case Intrinsic::loongarch_lsx_vshuf4i_d:
case Intrinsic::loongarch_lsx_vpermi_w:
case Intrinsic::loongarch_lsx_vbitseli_b:
case Intrinsic::loongarch_lsx_vextrins_b:
case Intrinsic::loongarch_lsx_vextrins_h:
case Intrinsic::loongarch_lsx_vextrins_w:
case Intrinsic::loongarch_lsx_vextrins_d:
case Intrinsic::loongarch_lasx_xvshuf4i_d:
case Intrinsic::loongarch_lasx_xvpermi_w:
case Intrinsic::loongarch_lasx_xvpermi_q:
case Intrinsic::loongarch_lasx_xvbitseli_b:
case Intrinsic::loongarch_lasx_xvextrins_b:
case Intrinsic::loongarch_lasx_xvextrins_h:
case Intrinsic::loongarch_lasx_xvextrins_w:
case Intrinsic::loongarch_lasx_xvextrins_d:
return checkIntrinsicImmArg<8>(Op, 3, DAG);
case Intrinsic::loongarch_lsx_vrepli_b:
case Intrinsic::loongarch_lsx_vrepli_h:
case Intrinsic::loongarch_lsx_vrepli_w:
case Intrinsic::loongarch_lsx_vrepli_d:
case Intrinsic::loongarch_lasx_xvrepli_b:
case Intrinsic::loongarch_lasx_xvrepli_h:
case Intrinsic::loongarch_lasx_xvrepli_w:
case Intrinsic::loongarch_lasx_xvrepli_d:
return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
case Intrinsic::loongarch_lsx_vldi:
case Intrinsic::loongarch_lasx_xvldi:
return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
}
}
// Helper function that emits error message for intrinsics with chain and return
// merge values of a UNDEF and the chain.
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op,
StringRef ErrorMsg,
SelectionDAG &DAG) {
DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
SDLoc(Op));
}
SDValue
LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT GRLenVT = Subtarget.getGRLenVT();
EVT VT = Op.getValueType();
SDValue Chain = Op.getOperand(0);
const StringRef ErrorMsgOOR = "argument out of range";
const StringRef ErrorMsgReqLA64 = "requires loongarch64";
const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
switch (Op.getConstantOperandVal(1)) {
default:
return Op;
case Intrinsic::loongarch_crc_w_b_w:
case Intrinsic::loongarch_crc_w_h_w:
case Intrinsic::loongarch_crc_w_w_w:
case Intrinsic::loongarch_crc_w_d_w:
case Intrinsic::loongarch_crcc_w_b_w:
case Intrinsic::loongarch_crcc_w_h_w:
case Intrinsic::loongarch_crcc_w_w_w:
case Intrinsic::loongarch_crcc_w_d_w:
return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
case Intrinsic::loongarch_csrrd_w:
case Intrinsic::loongarch_csrrd_d: {
unsigned Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
return !isUInt<14>(Imm)
? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
: DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
{Chain, DAG.getConstant(Imm, DL, GRLenVT)});
}
case Intrinsic::loongarch_csrwr_w:
case Intrinsic::loongarch_csrwr_d: {
unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
return !isUInt<14>(Imm)
? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
: DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
{Chain, Op.getOperand(2),
DAG.getConstant(Imm, DL, GRLenVT)});
}
case Intrinsic::loongarch_csrxchg_w:
case Intrinsic::loongarch_csrxchg_d: {
unsigned Imm = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
return !isUInt<14>(Imm)
? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
: DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
{Chain, Op.getOperand(2), Op.getOperand(3),
DAG.getConstant(Imm, DL, GRLenVT)});
}
case Intrinsic::loongarch_iocsrrd_d: {
return DAG.getNode(
LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
{Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
}
#define IOCSRRD_CASE(NAME, NODE) \
case Intrinsic::loongarch_##NAME: { \
return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
{Chain, Op.getOperand(2)}); \
}
IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
#undef IOCSRRD_CASE
case Intrinsic::loongarch_cpucfg: {
return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
{Chain, Op.getOperand(2)});
}
case Intrinsic::loongarch_lddir_d: {
unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
return !isUInt<8>(Imm)
? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
: Op;
}
case Intrinsic::loongarch_movfcsr2gr: {
if (!Subtarget.hasBasicF())
return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
unsigned Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
return !isUInt<2>(Imm)
? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
: DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
{Chain, DAG.getConstant(Imm, DL, GRLenVT)});
}
case Intrinsic::loongarch_lsx_vld:
case Intrinsic::loongarch_lsx_vldrepl_b:
case Intrinsic::loongarch_lasx_xvld:
case Intrinsic::loongarch_lasx_xvldrepl_b:
return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
: SDValue();
case Intrinsic::loongarch_lsx_vldrepl_h:
case Intrinsic::loongarch_lasx_xvldrepl_h:
return !isShiftedInt<11, 1>(
cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
? emitIntrinsicWithChainErrorMessage(
Op, "argument out of range or not a multiple of 2", DAG)
: SDValue();
case Intrinsic::loongarch_lsx_vldrepl_w:
case Intrinsic::loongarch_lasx_xvldrepl_w:
return !isShiftedInt<10, 2>(
cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
? emitIntrinsicWithChainErrorMessage(
Op, "argument out of range or not a multiple of 4", DAG)
: SDValue();
case Intrinsic::loongarch_lsx_vldrepl_d:
case Intrinsic::loongarch_lasx_xvldrepl_d:
return !isShiftedInt<9, 3>(
cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
? emitIntrinsicWithChainErrorMessage(
Op, "argument out of range or not a multiple of 8", DAG)
: SDValue();
}
}
// Helper function that emits error message for intrinsics with void return
// value and return the chain.
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg,
SelectionDAG &DAG) {
DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
return Op.getOperand(0);
}
SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT GRLenVT = Subtarget.getGRLenVT();
SDValue Chain = Op.getOperand(0);
uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
SDValue Op2 = Op.getOperand(2);
const StringRef ErrorMsgOOR = "argument out of range";
const StringRef ErrorMsgReqLA64 = "requires loongarch64";
const StringRef ErrorMsgReqLA32 = "requires loongarch32";
const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
switch (IntrinsicEnum) {
default:
// TODO: Add more Intrinsics.
return SDValue();
case Intrinsic::loongarch_cacop_d:
case Intrinsic::loongarch_cacop_w: {
if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
// call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
unsigned Imm1 = cast<ConstantSDNode>(Op2)->getZExtValue();
int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
return Op;
}
case Intrinsic::loongarch_dbar: {
unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
return !isUInt<15>(Imm)
? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
: DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
DAG.getConstant(Imm, DL, GRLenVT));
}
case Intrinsic::loongarch_ibar: {
unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
return !isUInt<15>(Imm)
? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
: DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
DAG.getConstant(Imm, DL, GRLenVT));
}
case Intrinsic::loongarch_break: {
unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
return !isUInt<15>(Imm)
? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
: DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
DAG.getConstant(Imm, DL, GRLenVT));
}
case Intrinsic::loongarch_movgr2fcsr: {
if (!Subtarget.hasBasicF())
return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
return !isUInt<2>(Imm)
? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
: DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
DAG.getConstant(Imm, DL, GRLenVT),
DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
Op.getOperand(3)));
}
case Intrinsic::loongarch_syscall: {
unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
return !isUInt<15>(Imm)
? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
: DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
DAG.getConstant(Imm, DL, GRLenVT));
}
#define IOCSRWR_CASE(NAME, NODE) \
case Intrinsic::loongarch_##NAME: { \
SDValue Op3 = Op.getOperand(3); \
return Subtarget.is64Bit() \
? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
: DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
Op3); \
}
IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
#undef IOCSRWR_CASE
case Intrinsic::loongarch_iocsrwr_d: {
return !Subtarget.is64Bit()
? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
: DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
Op2,
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
Op.getOperand(3)));
}
#define ASRT_LE_GT_CASE(NAME) \
case Intrinsic::loongarch_##NAME: { \
return !Subtarget.is64Bit() \
? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
: Op; \
}
ASRT_LE_GT_CASE(asrtle_d)
ASRT_LE_GT_CASE(asrtgt_d)
#undef ASRT_LE_GT_CASE
case Intrinsic::loongarch_ldpte_d: {
unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
return !Subtarget.is64Bit()
? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
: !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
: Op;
}
case Intrinsic::loongarch_lsx_vst:
case Intrinsic::loongarch_lasx_xvst:
return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
: SDValue();
case Intrinsic::loongarch_lasx_xvstelm_b:
return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
!isUInt<5>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
: SDValue();
case Intrinsic::loongarch_lsx_vstelm_b:
return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
!isUInt<4>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
: SDValue();
case Intrinsic::loongarch_lasx_xvstelm_h:
return (!isShiftedInt<8, 1>(
cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
!isUInt<4>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
? emitIntrinsicErrorMessage(
Op, "argument out of range or not a multiple of 2", DAG)
: SDValue();
case Intrinsic::loongarch_lsx_vstelm_h:
return (!isShiftedInt<8, 1>(
cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
!isUInt<3>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
? emitIntrinsicErrorMessage(
Op, "argument out of range or not a multiple of 2", DAG)
: SDValue();
case Intrinsic::loongarch_lasx_xvstelm_w:
return (!isShiftedInt<8, 2>(
cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
!isUInt<3>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
? emitIntrinsicErrorMessage(
Op, "argument out of range or not a multiple of 4", DAG)
: SDValue();
case Intrinsic::loongarch_lsx_vstelm_w:
return (!isShiftedInt<8, 2>(
cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
!isUInt<2>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
? emitIntrinsicErrorMessage(
Op, "argument out of range or not a multiple of 4", DAG)
: SDValue();
case Intrinsic::loongarch_lasx_xvstelm_d:
return (!isShiftedInt<8, 3>(
cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
!isUInt<2>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
? emitIntrinsicErrorMessage(
Op, "argument out of range or not a multiple of 8", DAG)
: SDValue();
case Intrinsic::loongarch_lsx_vstelm_d:
return (!isShiftedInt<8, 3>(
cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
!isUInt<1>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
? emitIntrinsicErrorMessage(
Op, "argument out of range or not a multiple of 8", DAG)
: SDValue();
}
}
SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue Lo = Op.getOperand(0);
SDValue Hi = Op.getOperand(1);
SDValue Shamt = Op.getOperand(2);
EVT VT = Lo.getValueType();
// if Shamt-GRLen < 0: // Shamt < GRLen
// Lo = Lo << Shamt
// Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
// else:
// Lo = 0
// Hi = Lo << (Shamt-GRLen)
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue One = DAG.getConstant(1, DL, VT);
SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
SDValue ShiftRightLo =
DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
SDValue Parts[2] = {Lo, Hi};
return DAG.getMergeValues(Parts, DL);
}
SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
SelectionDAG &DAG,
bool IsSRA) const {
SDLoc DL(Op);
SDValue Lo = Op.getOperand(0);
SDValue Hi = Op.getOperand(1);
SDValue Shamt = Op.getOperand(2);
EVT VT = Lo.getValueType();
// SRA expansion:
// if Shamt-GRLen < 0: // Shamt < GRLen
// Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
// Hi = Hi >>s Shamt
// else:
// Lo = Hi >>s (Shamt-GRLen);
// Hi = Hi >>s (GRLen-1)
//
// SRL expansion:
// if Shamt-GRLen < 0: // Shamt < GRLen
// Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
// Hi = Hi >>u Shamt
// else:
// Lo = Hi >>u (Shamt-GRLen);
// Hi = 0;
unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue One = DAG.getConstant(1, DL, VT);
SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
SDValue ShiftLeftHi =
DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
SDValue HiFalse =
IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
SDValue Parts[2] = {Lo, Hi};
return DAG.getMergeValues(Parts, DL);
}
// Returns the opcode of the target-specific SDNode that implements the 32-bit
// form of the given Opcode.
static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
switch (Opcode) {
default:
llvm_unreachable("Unexpected opcode");
case ISD::SHL:
return LoongArchISD::SLL_W;
case ISD::SRA:
return LoongArchISD::SRA_W;
case ISD::SRL:
return LoongArchISD::SRL_W;
case ISD::ROTR:
return LoongArchISD::ROTR_W;
case ISD::ROTL:
return LoongArchISD::ROTL_W;
case ISD::CTTZ:
return LoongArchISD::CTZ_W;
case ISD::CTLZ:
return LoongArchISD::CLZ_W;
}
}
// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
// otherwise be promoted to i64, making it difficult to select the
// SLL_W/.../*W later one because the fact the operation was originally of
// type i8/i16/i32 is lost.
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,
unsigned ExtOpc = ISD::ANY_EXTEND) {
SDLoc DL(N);
LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
SDValue NewOp0, NewRes;
switch (NumOp) {
default:
llvm_unreachable("Unexpected NumOp");
case 1: {
NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
break;
}
case 2: {
NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
break;
}
// TODO:Handle more NumOp.
}
// ReplaceNodeResults requires we maintain the same type for the return
// value.
return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
}
// Helper function that emits error message for intrinsics with/without chain
// and return a UNDEF or and the chain as the results.
static void emitErrorAndReplaceIntrinsicResults(
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG,
StringRef ErrorMsg, bool WithChain = true) {
DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
Results.push_back(DAG.getUNDEF(N->getValueType(0)));
if (!WithChain)
return;
Results.push_back(N->getOperand(0));
}
template <unsigned N>
static void
replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
unsigned ResOp) {
const StringRef ErrorMsgOOR = "argument out of range";
unsigned Imm = cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue();
if (!isUInt<N>(Imm)) {
emitErrorAndReplaceIntrinsicResults(Node, Results, DAG, ErrorMsgOOR,
/*WithChain=*/false);
return;
}
SDLoc DL(Node);
SDValue Vec = Node->getOperand(1);
SDValue PickElt =
DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
DAG.getValueType(Vec.getValueType().getVectorElementType()));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
PickElt.getValue(0)));
}
static void replaceVecCondBranchResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG,
const LoongArchSubtarget &Subtarget,
unsigned ResOp) {
SDLoc DL(N);
SDValue Vec = N->getOperand(1);
SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
Results.push_back(
DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
}
static void
replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG,
const LoongArchSubtarget &Subtarget) {
switch (N->getConstantOperandVal(0)) {
default:
llvm_unreachable("Unexpected Intrinsic.");
case Intrinsic::loongarch_lsx_vpickve2gr_b:
replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
LoongArchISD::VPICK_SEXT_ELT);
break;
case Intrinsic::loongarch_lsx_vpickve2gr_h:
case Intrinsic::loongarch_lasx_xvpickve2gr_w:
replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
LoongArchISD::VPICK_SEXT_ELT);
break;
case Intrinsic::loongarch_lsx_vpickve2gr_w:
replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
LoongArchISD::VPICK_SEXT_ELT);
break;
case Intrinsic::loongarch_lsx_vpickve2gr_bu:
replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
LoongArchISD::VPICK_ZEXT_ELT);
break;
case Intrinsic::loongarch_lsx_vpickve2gr_hu:
case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
LoongArchISD::VPICK_ZEXT_ELT);
break;
case Intrinsic::loongarch_lsx_vpickve2gr_wu:
replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
LoongArchISD::VPICK_ZEXT_ELT);
break;
case Intrinsic::loongarch_lsx_bz_b:
case Intrinsic::loongarch_lsx_bz_h:
case Intrinsic::loongarch_lsx_bz_w:
case Intrinsic::loongarch_lsx_bz_d:
case Intrinsic::loongarch_lasx_xbz_b:
case Intrinsic::loongarch_lasx_xbz_h:
case Intrinsic::loongarch_lasx_xbz_w:
case Intrinsic::loongarch_lasx_xbz_d:
replaceVecCondBranchResults(N, Results, DAG, Subtarget,
LoongArchISD::VALL_ZERO);
break;
case Intrinsic::loongarch_lsx_bz_v:
case Intrinsic::loongarch_lasx_xbz_v:
replaceVecCondBranchResults(N, Results, DAG, Subtarget,
LoongArchISD::VANY_ZERO);
break;
case Intrinsic::loongarch_lsx_bnz_b:
case Intrinsic::loongarch_lsx_bnz_h:
case Intrinsic::loongarch_lsx_bnz_w:
case Intrinsic::loongarch_lsx_bnz_d:
case Intrinsic::loongarch_lasx_xbnz_b:
case Intrinsic::loongarch_lasx_xbnz_h:
case Intrinsic::loongarch_lasx_xbnz_w:
case Intrinsic::loongarch_lasx_xbnz_d:
replaceVecCondBranchResults(N, Results, DAG, Subtarget,
LoongArchISD::VALL_NONZERO);
break;
case Intrinsic::loongarch_lsx_bnz_v:
case Intrinsic::loongarch_lasx_xbnz_v:
replaceVecCondBranchResults(N, Results, DAG, Subtarget,
LoongArchISD::VANY_NONZERO);
break;
}
}
void LoongArchTargetLowering::ReplaceNodeResults(
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
SDLoc DL(N);
EVT VT = N->getValueType(0);
switch (N->getOpcode()) {
default:
llvm_unreachable("Don't know how to legalize this operation");
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
case ISD::ROTR:
assert(VT == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
if (N->getOperand(1).getOpcode() != ISD::Constant) {
Results.push_back(customLegalizeToWOp(N, DAG, 2));
break;
}
break;
case ISD::ROTL:
ConstantSDNode *CN;
if ((CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))) {
Results.push_back(customLegalizeToWOp(N, DAG, 2));
break;
}
break;
case ISD::FP_TO_SINT: {
assert(VT == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
SDValue Src = N->getOperand(0);
EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
TargetLowering::TypeSoftenFloat) {
SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
return;
}
// If the FP type needs to be softened, emit a library call using the 'si'
// version. If we left it to default legalization we'd end up with 'di'.
RTLIB::Libcall LC;
LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
MakeLibCallOptions CallOptions;
EVT OpVT = Src.getValueType();
CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
SDValue Chain = SDValue();
SDValue Result;
std::tie(Result, Chain) =
makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
Results.push_back(Result);
break;
}
case ISD::BITCAST: {
SDValue Src = N->getOperand(0);
EVT SrcVT = Src.getValueType();
if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
Subtarget.hasBasicF()) {
SDValue Dst =
DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
}
break;
}
case ISD::FP_TO_UINT: {
assert(VT == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
auto &TLI = DAG.getTargetLoweringInfo();
SDValue Tmp1, Tmp2;
TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
break;
}
case ISD::BSWAP: {
SDValue Src = N->getOperand(0);
assert((VT == MVT::i16 || VT == MVT::i32) &&
"Unexpected custom legalization");
MVT GRLenVT = Subtarget.getGRLenVT();
SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
SDValue Tmp;
switch (VT.getSizeInBits()) {
default:
llvm_unreachable("Unexpected operand width");
case 16:
Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
break;
case 32:
// Only LA64 will get to here due to the size mismatch between VT and
// GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
break;
}
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
break;
}
case ISD::BITREVERSE: {
SDValue Src = N->getOperand(0);
assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
"Unexpected custom legalization");
MVT GRLenVT = Subtarget.getGRLenVT();
SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
SDValue Tmp;
switch (VT.getSizeInBits()) {
default:
llvm_unreachable("Unexpected operand width");
case 8:
Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
break;
case 32:
Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
break;
}
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
break;
}
case ISD::CTLZ:
case ISD::CTTZ: {
assert(VT == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
Results.push_back(customLegalizeToWOp(N, DAG, 1));
break;
}
case ISD::INTRINSIC_W_CHAIN: {
SDValue Chain = N->getOperand(0);
SDValue Op2 = N->getOperand(2);
MVT GRLenVT = Subtarget.getGRLenVT();
const StringRef ErrorMsgOOR = "argument out of range";
const StringRef ErrorMsgReqLA64 = "requires loongarch64";
const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
switch (N->getConstantOperandVal(1)) {
default:
llvm_unreachable("Unexpected Intrinsic.");
case Intrinsic::loongarch_movfcsr2gr: {
if (!Subtarget.hasBasicF()) {
emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
return;
}
unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
if (!isUInt<2>(Imm)) {
emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
return;
}
SDValue MOVFCSR2GRResults = DAG.getNode(
LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
{Chain, DAG.getConstant(Imm, DL, GRLenVT)});
Results.push_back(
DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
Results.push_back(MOVFCSR2GRResults.getValue(1));
break;
}
#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
case Intrinsic::loongarch_##NAME: { \
SDValue NODE = DAG.getNode( \
LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
{Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
Results.push_back(NODE.getValue(1)); \
break; \
}
CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
#undef CRC_CASE_EXT_BINARYOP
#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
case Intrinsic::loongarch_##NAME: { \
SDValue NODE = DAG.getNode( \
LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
{Chain, Op2, \
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
Results.push_back(NODE.getValue(1)); \
break; \
}
CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
#undef CRC_CASE_EXT_UNARYOP
#define CSR_CASE(ID) \
case Intrinsic::loongarch_##ID: { \
if (!Subtarget.is64Bit()) \
emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
break; \
}
CSR_CASE(csrrd_d);
CSR_CASE(csrwr_d);
CSR_CASE(csrxchg_d);
CSR_CASE(iocsrrd_d);
#undef CSR_CASE
case Intrinsic::loongarch_csrrd_w: {
unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
if (!isUInt<14>(Imm)) {
emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
return;
}
SDValue CSRRDResults =
DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
{Chain, DAG.getConstant(Imm, DL, GRLenVT)});
Results.push_back(
DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
Results.push_back(CSRRDResults.getValue(1));
break;
}
case Intrinsic::loongarch_csrwr_w: {
unsigned Imm = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
if (!isUInt<14>(Imm)) {
emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
return;
}
SDValue CSRWRResults =
DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
{Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
DAG.getConstant(Imm, DL, GRLenVT)});
Results.push_back(
DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
Results.push_back(CSRWRResults.getValue(1));
break;
}
case Intrinsic::loongarch_csrxchg_w: {
unsigned Imm = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue();
if (!isUInt<14>(Imm)) {
emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
return;
}
SDValue CSRXCHGResults = DAG.getNode(
LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
{Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
DAG.getConstant(Imm, DL, GRLenVT)});
Results.push_back(
DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
Results.push_back(CSRXCHGResults.getValue(1));
break;
}
#define IOCSRRD_CASE(NAME, NODE) \
case Intrinsic::loongarch_##NAME: { \
SDValue IOCSRRDResults = \
DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
{Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
Results.push_back( \
DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
Results.push_back(IOCSRRDResults.getValue(1)); \
break; \
}
IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
#undef IOCSRRD_CASE
case Intrinsic::loongarch_cpucfg: {
SDValue CPUCFGResults =
DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
{Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
Results.push_back(
DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
Results.push_back(CPUCFGResults.getValue(1));
break;
}
case Intrinsic::loongarch_lddir_d: {
if (!Subtarget.is64Bit()) {
emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
return;
}
break;
}
}
break;
}
case ISD::READ_REGISTER: {
if (Subtarget.is64Bit())
DAG.getContext()->emitError(
"On LA64, only 64-bit registers can be read.");
else
DAG.getContext()->emitError(
"On LA32, only 32-bit registers can be read.");
Results.push_back(DAG.getUNDEF(VT));
Results.push_back(N->getOperand(0));
break;
}
case ISD::INTRINSIC_WO_CHAIN: {
replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
break;
}
}
}
static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const LoongArchSubtarget &Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
SDValue FirstOperand = N->getOperand(0);
SDValue SecondOperand = N->getOperand(1);
unsigned FirstOperandOpc = FirstOperand.getOpcode();
EVT ValTy = N->getValueType(0);
SDLoc DL(N);
uint64_t lsb, msb;
unsigned SMIdx, SMLen;
ConstantSDNode *CN;
SDValue NewOperand;
MVT GRLenVT = Subtarget.getGRLenVT();
// Op's second operand must be a shifted mask.
if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
!isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
return SDValue();
if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
// Pattern match BSTRPICK.
// $dst = and ((sra or srl) $src , lsb), (2**len - 1)
// => BSTRPICK $dst, $src, msb, lsb
// where msb = lsb + len - 1
// The second operand of the shift must be an immediate.
if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
return SDValue();
lsb = CN->getZExtValue();
// Return if the shifted mask does not start at bit 0 or the sum of its
// length and lsb exceeds the word's size.
if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
return SDValue();
NewOperand = FirstOperand.getOperand(0);
} else {
// Pattern match BSTRPICK.
// $dst = and $src, (2**len- 1) , if len > 12
// => BSTRPICK $dst, $src, msb, lsb
// where lsb = 0 and msb = len - 1
// If the mask is <= 0xfff, andi can be used instead.
if (CN->getZExtValue() <= 0xfff)
return SDValue();
// Return if the MSB exceeds.
if (SMIdx + SMLen > ValTy.getSizeInBits())
return SDValue();
if (SMIdx > 0) {
// Omit if the constant has more than 2 uses. This a conservative
// decision. Whether it is a win depends on the HW microarchitecture.
// However it should always be better for 1 and 2 uses.
if (CN->use_size() > 2)
return SDValue();
// Return if the constant can be composed by a single LU12I.W.
if ((CN->getZExtValue() & 0xfff) == 0)
return SDValue();
// Return if the constand can be composed by a single ADDI with
// the zero register.
if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
return SDValue();
}
lsb = SMIdx;
NewOperand = FirstOperand;
}
msb = lsb + SMLen - 1;
SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
DAG.getConstant(msb, DL, GRLenVT),
DAG.getConstant(lsb, DL, GRLenVT));
if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
return NR0;
// Try to optimize to
// bstrpick $Rd, $Rs, msb, lsb
// slli $Rd, $Rd, lsb
return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
DAG.getConstant(lsb, DL, GRLenVT));
}
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const LoongArchSubtarget &Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
// $dst = srl (and $src, Mask), Shamt
// =>
// BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
// when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
//
SDValue FirstOperand = N->getOperand(0);
ConstantSDNode *CN;
EVT ValTy = N->getValueType(0);
SDLoc DL(N);
MVT GRLenVT = Subtarget.getGRLenVT();
unsigned MaskIdx, MaskLen;
uint64_t Shamt;
// The first operand must be an AND and the second operand of the AND must be
// a shifted mask.
if (FirstOperand.getOpcode() != ISD::AND ||
!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
!isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
return SDValue();
// The second operand (shift amount) must be an immediate.
if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
return SDValue();
Shamt = CN->getZExtValue();
if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
FirstOperand->getOperand(0),
DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
DAG.getConstant(Shamt, DL, GRLenVT));
return SDValue();
}
static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const LoongArchSubtarget &Subtarget) {
MVT GRLenVT = Subtarget.getGRLenVT();
EVT ValTy = N->getValueType(0);
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
ConstantSDNode *CN0, *CN1;
SDLoc DL(N);
unsigned ValBits = ValTy.getSizeInBits();
unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
unsigned Shamt;
bool SwapAndRetried = false;
if (DCI.isBeforeLegalizeOps())
return SDValue();
if (ValBits != 32 && ValBits != 64)
return SDValue();
Retry:
// 1st pattern to match BSTRINS:
// R = or (and X, mask0), (and (shl Y, lsb), mask1)
// where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
// =>
// R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
if (N0.getOpcode() == ISD::AND &&
(CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
(CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
(CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
(Shamt = CN1->getZExtValue()) == MaskIdx0 &&
(MaskIdx0 + MaskLen0 <= ValBits)) {
LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
N1.getOperand(0).getOperand(0),
DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
DAG.getConstant(MaskIdx0, DL, GRLenVT));
}
// 2nd pattern to match BSTRINS:
// R = or (and X, mask0), (shl (and Y, mask1), lsb)
// where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
// =>
// R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
if (N0.getOpcode() == ISD::AND &&
(CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
(CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
(Shamt = CN1->getZExtValue()) == MaskIdx0 &&
(CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
(MaskIdx0 + MaskLen0 <= ValBits)) {
LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
N1.getOperand(0).getOperand(0),
DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
DAG.getConstant(MaskIdx0, DL, GRLenVT));
}
// 3rd pattern to match BSTRINS:
// R = or (and X, mask0), (and Y, mask1)
// where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
// =>
// R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
// where msb = lsb + size - 1
if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
(CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
(MaskIdx0 + MaskLen0 <= 64) &&
(CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
(CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
DAG.getConstant(MaskIdx0, DL, GRLenVT)),
DAG.getConstant(ValBits == 32
? (MaskIdx0 + (MaskLen0 & 31) - 1)
: (MaskIdx0 + MaskLen0 - 1),
DL, GRLenVT),
DAG.getConstant(MaskIdx0, DL, GRLenVT));
}
// 4th pattern to match BSTRINS:
// R = or (and X, mask), (shl Y, shamt)
// where mask = (2**shamt - 1)
// =>
// R = BSTRINS X, Y, ValBits - 1, shamt
// where ValBits = 32 or 64
if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
(CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
(Shamt = CN1->getZExtValue()) == MaskLen0 &&
(MaskIdx0 + MaskLen0 <= ValBits)) {
LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
N1.getOperand(0),
DAG.getConstant((ValBits - 1), DL, GRLenVT),
DAG.getConstant(Shamt, DL, GRLenVT));
}
// 5th pattern to match BSTRINS:
// R = or (and X, mask), const
// where ~mask = (2**size - 1) << lsb, mask & const = 0
// =>
// R = BSTRINS X, (const >> lsb), msb, lsb
// where msb = lsb + size - 1
if (N0.getOpcode() == ISD::AND &&
(CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
(CN1 = dyn_cast<ConstantSDNode>(N1)) &&
(CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
return DAG.getNode(
LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
DAG.getConstant(MaskIdx0, DL, GRLenVT));
}
// 6th pattern.
// a = b | ((c & mask) << shamt), where all positions in b to be overwritten
// by the incoming bits are known to be zero.
// =>
// a = BSTRINS b, c, shamt + MaskLen - 1, shamt
//
// Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
// pattern is more common than the 1st. So we put the 1st before the 6th in
// order to match as many nodes as possible.
ConstantSDNode *CNMask, *CNShamt;
unsigned MaskIdx, MaskLen;
if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
(CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
CNShamt->getZExtValue() + MaskLen <= ValBits) {
Shamt = CNShamt->getZExtValue();
APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
N1.getOperand(0).getOperand(0),
DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
DAG.getConstant(Shamt, DL, GRLenVT));
}
}
// 7th pattern.
// a = b | ((c << shamt) & shifted_mask), where all positions in b to be
// overwritten by the incoming bits are known to be zero.
// =>
// a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
//
// Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
// before the 7th in order to match as many nodes as possible.
if (N1.getOpcode() == ISD::AND &&
(CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
N1.getOperand(0).getOpcode() == ISD::SHL &&
(CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
CNShamt->getZExtValue() == MaskIdx) {
APInt ShMask(ValBits, CNMask->getZExtValue());
if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
N1.getOperand(0).getOperand(0),
DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
DAG.getConstant(MaskIdx, DL, GRLenVT));
}
}
// (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
if (!SwapAndRetried) {
std::swap(N0, N1);
SwapAndRetried = true;
goto Retry;
}
SwapAndRetried = false;
Retry2:
// 8th pattern.
// a = b | (c & shifted_mask), where all positions in b to be overwritten by
// the incoming bits are known to be zero.
// =>
// a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
//
// Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
// we put it here in order to match as many nodes as possible or generate less
// instructions.
if (N1.getOpcode() == ISD::AND &&
(CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
APInt ShMask(ValBits, CNMask->getZExtValue());
if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
N1->getOperand(0),
DAG.getConstant(MaskIdx, DL, GRLenVT)),
DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
DAG.getConstant(MaskIdx, DL, GRLenVT));
}
}
// Swap N0/N1 and retry.
if (!SwapAndRetried) {
std::swap(N0, N1);
SwapAndRetried = true;
goto Retry2;
}
return SDValue();
}
// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const LoongArchSubtarget &Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
SDValue Src = N->getOperand(0);
if (Src.getOpcode() != LoongArchISD::REVB_2W)
return SDValue();
return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
Src.getOperand(0));
}
template <unsigned N>
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp,
SelectionDAG &DAG,
const LoongArchSubtarget &Subtarget,
bool IsSigned = false) {
SDLoc DL(Node);
auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
// Check the ImmArg.
if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
(!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
DAG.getContext()->emitError(Node->getOperationName(0) +
": argument out of range.");
return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
}
return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
}
template <unsigned N>
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
SelectionDAG &DAG, bool IsSigned = false) {
SDLoc DL(Node);
EVT ResTy = Node->getValueType(0);
auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
// Check the ImmArg.
if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
(!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
DAG.getContext()->emitError(Node->getOperationName(0) +
": argument out of range.");
return DAG.getNode(ISD::UNDEF, DL, ResTy);
}
return DAG.getConstant(
APInt(ResTy.getScalarType().getSizeInBits(),
IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
DL, ResTy);
}
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) {
SDLoc DL(Node);
EVT ResTy = Node->getValueType(0);
SDValue Vec = Node->getOperand(2);
SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
}
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) {
SDLoc DL(Node);
EVT ResTy = Node->getValueType(0);
SDValue One = DAG.getConstant(1, DL, ResTy);
SDValue Bit =
DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
DAG.getNOT(DL, Bit, ResTy));
}
template <unsigned N>
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) {
SDLoc DL(Node);
EVT ResTy = Node->getValueType(0);
auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
// Check the unsigned ImmArg.
if (!isUInt<N>(CImm->getZExtValue())) {
DAG.getContext()->emitError(Node->getOperationName(0) +
": argument out of range.");
return DAG.getNode(ISD::UNDEF, DL, ResTy);
}
APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
}
template <unsigned N>
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) {
SDLoc DL(Node);
EVT ResTy = Node->getValueType(0);
auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
// Check the unsigned ImmArg.
if (!isUInt<N>(CImm->getZExtValue())) {
DAG.getContext()->emitError(Node->getOperationName(0) +
": argument out of range.");
return DAG.getNode(ISD::UNDEF, DL, ResTy);
}
APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
}
template <unsigned N>
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) {
SDLoc DL(Node);
EVT ResTy = Node->getValueType(0);
auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
// Check the unsigned ImmArg.
if (!isUInt<N>(CImm->getZExtValue())) {
DAG.getContext()->emitError(Node->getOperationName(0) +
": argument out of range.");
return DAG.getNode(ISD::UNDEF, DL, ResTy);
}
APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
}
static SDValue
performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const LoongArchSubtarget &Subtarget) {
SDLoc DL(N);
switch (N->getConstantOperandVal(0)) {
default:
break;
case Intrinsic::loongarch_lsx_vadd_b:
case Intrinsic::loongarch_lsx_vadd_h:
case Intrinsic::loongarch_lsx_vadd_w:
case Intrinsic::loongarch_lsx_vadd_d:
case Intrinsic::loongarch_lasx_xvadd_b:
case Intrinsic::loongarch_lasx_xvadd_h:
case Intrinsic::loongarch_lasx_xvadd_w:
case Intrinsic::loongarch_lasx_xvadd_d:
return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
N->getOperand(2));
case Intrinsic::loongarch_lsx_vaddi_bu:
case Intrinsic::loongarch_lsx_vaddi_hu:
case Intrinsic::loongarch_lsx_vaddi_wu:
case Intrinsic::loongarch_lsx_vaddi_du:
case Intrinsic::loongarch_lasx_xvaddi_bu:
case Intrinsic::loongarch_lasx_xvaddi_hu:
case Intrinsic::loongarch_lasx_xvaddi_wu:
case Intrinsic::loongarch_lasx_xvaddi_du:
return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
lowerVectorSplatImm<5>(N, 2, DAG));
case Intrinsic::loongarch_lsx_vsub_b:
case Intrinsic::loongarch_lsx_vsub_h:
case Intrinsic::loongarch_lsx_vsub_w:
case Intrinsic::loongarch_lsx_vsub_d:
case Intrinsic::loongarch_lasx_xvsub_b:
case Intrinsic::loongarch_lasx_xvsub_h:
case Intrinsic::loongarch_lasx_xvsub_w:
case Intrinsic::loongarch_lasx_xvsub_d:
return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
N->getOperand(2));
case Intrinsic::loongarch_lsx_vsubi_bu:
case Intrinsic::loongarch_lsx_vsubi_hu:
case Intrinsic::loongarch_lsx_vsubi_wu:
case Intrinsic::loongarch_lsx_vsubi_du:
case Intrinsic::loongarch_lasx_xvsubi_bu:
case Intrinsic::loongarch_lasx_xvsubi_hu:
case Intrinsic::loongarch_lasx_xvsubi_wu:
case Intrinsic::loongarch_lasx_xvsubi_du:
return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
lowerVectorSplatImm<5>(N, 2, DAG));
case Intrinsic::loongarch_lsx_vneg_b:
case Intrinsic::loongarch_lsx_vneg_h:
case Intrinsic::loongarch_lsx_vneg_w:
case Intrinsic::loongarch_lsx_vneg_d:
case Intrinsic::loongarch_lasx_xvneg_b:
case Intrinsic::loongarch_lasx_xvneg_h:
case Intrinsic::loongarch_lasx_xvneg_w:
case Intrinsic::loongarch_lasx_xvneg_d:
return DAG.getNode(
ISD::SUB, DL, N->getValueType(0),
DAG.getConstant(
APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
/*isSigned=*/true),
SDLoc(N), N->getValueType(0)),
N->getOperand(1));
case Intrinsic::loongarch_lsx_vmax_b:
case Intrinsic::loongarch_lsx_vmax_h:
case Intrinsic::loongarch_lsx_vmax_w:
case Intrinsic::loongarch_lsx_vmax_d:
case Intrinsic::loongarch_lasx_xvmax_b:
case Intrinsic::loongarch_lasx_xvmax_h:
case Intrinsic::loongarch_lasx_xvmax_w:
case Intrinsic::loongarch_lasx_xvmax_d:
return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
N->getOperand(2));
case Intrinsic::loongarch_lsx_vmax_bu:
case Intrinsic::loongarch_lsx_vmax_hu:
case Intrinsic::loongarch_lsx_vmax_wu:
case Intrinsic::loongarch_lsx_vmax_du:
case Intrinsic::loongarch_lasx_xvmax_bu:
case Intrinsic::loongarch_lasx_xvmax_hu:
case Intrinsic::loongarch_lasx_xvmax_wu:
case Intrinsic::loongarch_lasx_xvmax_du:
return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
N->getOperand(2));
case Intrinsic::loongarch_lsx_vmaxi_b:
case Intrinsic::loongarch_lsx_vmaxi_h:
case Intrinsic::loongarch_lsx_vmaxi_w:
case Intrinsic::loongarch_lsx_vmaxi_d:
case Intrinsic::loongarch_lasx_xvmaxi_b:
case Intrinsic::loongarch_lasx_xvmaxi_h:
case Intrinsic::loongarch_lasx_xvmaxi_w:
case Intrinsic::loongarch_lasx_xvmaxi_d:
return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
case Intrinsic::loongarch_lsx_vmaxi_bu:
case Intrinsic::loongarch_lsx_vmaxi_hu:
case Intrinsic::loongarch_lsx_vmaxi_wu:
case Intrinsic::loongarch_lsx_vmaxi_du:
case Intrinsic::loongarch_lasx_xvmaxi_bu:
case Intrinsic::loongarch_lasx_xvmaxi_hu:
case Intrinsic::loongarch_lasx_xvmaxi_wu:
case Intrinsic::loongarch_lasx_xvmaxi_du:
return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
lowerVectorSplatImm<5>(N, 2, DAG));
case Intrinsic::loongarch_lsx_vmin_b:
case Intrinsic::loongarch_lsx_vmin_h:
case Intrinsic::loongarch_lsx_vmin_w:
case Intrinsic::loongarch_lsx_vmin_d: