blob: 5e3af53dc9188c6c4a7572da639149f16c6bfa84 [file] [log] [blame]
//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// This file implements the SystemZTargetLowering class.
#include "SystemZISelLowering.h"
#include "SystemZCallingConv.h"
#include "SystemZConstantPoolValue.h"
#include "SystemZMachineFunctionInfo.h"
#include "SystemZTargetMachine.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsS390.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/KnownBits.h"
#include <cctype>
using namespace llvm;
#define DEBUG_TYPE "systemz-lower"
namespace {
// Represents information about a comparison.
struct Comparison {
Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
: Op0(Op0In), Op1(Op1In), Chain(ChainIn),
Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
// The operands to the comparison.
SDValue Op0, Op1;
// Chain if this is a strict floating-point comparison.
SDValue Chain;
// The opcode that should be used to compare Op0 and Op1.
unsigned Opcode;
// A SystemZICMP value. Only used for integer comparisons.
unsigned ICmpType;
// The mask of CC values that Opcode can produce.
unsigned CCValid;
// The mask of CC values for which the original condition is true.
unsigned CCMask;
} // end anonymous namespace
// Classify VT as either 32 or 64 bit.
static bool is32Bit(EVT VT) {
switch (VT.getSimpleVT().SimpleTy) {
case MVT::i32:
return true;
case MVT::i64:
return false;
llvm_unreachable("Unsupported type");
// Return a version of MachineOperand that can be safely used before the
// final use.
static MachineOperand earlyUseOperand(MachineOperand Op) {
if (Op.isReg())
return Op;
SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
const SystemZSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize(0));
// Set up the register classes.
if (Subtarget.hasHighWord())
addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
if (!useSoftFloat()) {
if (Subtarget.hasVector()) {
addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
} else {
addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
if (Subtarget.hasVectorEnhancements1())
addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
if (Subtarget.hasVector()) {
addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
// Compute derived properties from the register classes
// Set up special registers.
// TODO: It may be better to default to latency-oriented scheduling, however
// LLVM's current latency-oriented scheduler can't handle physreg definitions
// such as SystemZ has with CC, so set this to the register-pressure
// scheduler, because it can.
// Instructions are strings of 2-byte aligned 2-byte values.
// For performance reasons we prefer 16-byte alignment.
// Handle operations that are handled in a similar way for all types.
++I) {
MVT VT = MVT::SimpleValueType(I);
if (isTypeLegal(VT)) {
// Lower SET_CC into an IPM-based sequence.
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
// Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
setOperationAction(ISD::SELECT, VT, Expand);
// Lower SELECT_CC and BR_CC into separate comparisons and branches.
setOperationAction(ISD::SELECT_CC, VT, Custom);
setOperationAction(ISD::BR_CC, VT, Custom);
// Expand jump table branches as address arithmetic followed by an
// indirect jump.
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
// Expand BRCOND into a BR_CC (see above).
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
// Handle integer types.
++I) {
MVT VT = MVT::SimpleValueType(I);
if (isTypeLegal(VT)) {
setOperationAction(ISD::ABS, VT, Legal);
// Expand individual DIV and REMs into DIVREMs.
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::SDIVREM, VT, Custom);
setOperationAction(ISD::UDIVREM, VT, Custom);
// Support addition/subtraction with overflow.
setOperationAction(ISD::SADDO, VT, Custom);
setOperationAction(ISD::SSUBO, VT, Custom);
// Support addition/subtraction with carry.
setOperationAction(ISD::UADDO, VT, Custom);
setOperationAction(ISD::USUBO, VT, Custom);
// Support carry in as value rather than glue.
setOperationAction(ISD::ADDCARRY, VT, Custom);
setOperationAction(ISD::SUBCARRY, VT, Custom);
// Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
// stores, putting a serialization instruction after the stores.
setOperationAction(ISD::ATOMIC_LOAD, VT, Custom);
setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
// available, or if the operand is constant.
setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
// Use POPCNT on z196 and above.
if (Subtarget.hasPopulationCount())
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Expand);
// No special instructions for these.
setOperationAction(ISD::CTTZ, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
// Use *MUL_LOHI where possible instead of MULH*.
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Custom);
setOperationAction(ISD::UMUL_LOHI, VT, Custom);
// Only z196 and above have native support for conversions to unsigned.
// On z10, promoting to i64 doesn't generate an inexact condition for
// values that are outside the i32 range but in the i64 range, so use
// the default expansion.
if (!Subtarget.hasFPExtension())
setOperationAction(ISD::FP_TO_UINT, VT, Expand);
// Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
// default to Expand, so need to be modified to Legal where appropriate.
setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal);
if (Subtarget.hasFPExtension())
setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Legal);
// And similarly for STRICT_[SU]INT_TO_FP.
setOperationAction(ISD::STRICT_SINT_TO_FP, VT, Legal);
if (Subtarget.hasFPExtension())
setOperationAction(ISD::STRICT_UINT_TO_FP, VT, Legal);
// Type legalization will convert 8- and 16-bit atomic operations into
// forms that operate on i32s (but still keeping the original memory VT).
// Lower them into full i32 operations.
setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);
// Even though i128 is not a legal type, we still need to custom lower
// the atomic operations in order to exploit SystemZ instructions.
setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
// We can use the CC result of compare-and-swap to implement
// the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
// Traps are legal, as we will convert them to "j .+2".
setOperationAction(ISD::TRAP, MVT::Other, Legal);
// z10 has instructions for signed but not unsigned FP conversion.
// Handle unsigned 32-bit types as signed 64-bit types.
if (!Subtarget.hasFPExtension()) {
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Promote);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
// We have native support for a 64-bit CTLZ, via FLOGR.
setOperationAction(ISD::CTLZ, MVT::i32, Promote);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
setOperationAction(ISD::CTLZ, MVT::i64, Legal);
// On z15 we have native support for a 64-bit CTPOP.
if (Subtarget.hasMiscellaneousExtensions3()) {
setOperationAction(ISD::CTPOP, MVT::i32, Promote);
setOperationAction(ISD::CTPOP, MVT::i64, Legal);
// Give LowerOperation the chance to replace 64-bit ORs with subregs.
setOperationAction(ISD::OR, MVT::i64, Custom);
// FIXME: Can we support these natively?
setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
// We have native instructions for i8, i16 and i32 extensions, but not i1.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
for (MVT VT : MVT::integer_valuetypes()) {
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
// Handle the various types of symbolic address.
setOperationAction(ISD::ConstantPool, PtrVT, Custom);
setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
setOperationAction(ISD::BlockAddress, PtrVT, Custom);
setOperationAction(ISD::JumpTable, PtrVT, Custom);
// We need to handle dynamic allocations specially because of the
// 160-byte area at the bottom of the stack.
setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom);
// Use custom expanders so that we can force the function to use
// a frame pointer.
setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
// Handle prefetches with PFD or PFDRL.
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
// Assume by default that all vector operations need to be expanded.
for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
if (getOperationAction(Opcode, VT) == Legal)
setOperationAction(Opcode, VT, Expand);
// Likewise all truncating stores and extending loads.
for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
setTruncStoreAction(VT, InnerVT, Expand);
setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
if (isTypeLegal(VT)) {
// These operations are legal for anything that can be stored in a
// vector register, even if there is no native support for the format
// as such. In particular, we can do these for v4f32 even though there
// are no specific instructions for that format.
setOperationAction(ISD::LOAD, VT, Legal);
setOperationAction(ISD::STORE, VT, Legal);
setOperationAction(ISD::VSELECT, VT, Legal);
setOperationAction(ISD::BITCAST, VT, Legal);
setOperationAction(ISD::UNDEF, VT, Legal);
// Likewise, except that we need to replace the nodes with something
// more specific.
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
// Handle integer vector types.
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
if (isTypeLegal(VT)) {
// These operations have direct equivalents.
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal);
setOperationAction(ISD::ADD, VT, Legal);
setOperationAction(ISD::SUB, VT, Legal);
if (VT != MVT::v2i64)
setOperationAction(ISD::MUL, VT, Legal);
setOperationAction(ISD::ABS, VT, Legal);
setOperationAction(ISD::AND, VT, Legal);
setOperationAction(ISD::OR, VT, Legal);
setOperationAction(ISD::XOR, VT, Legal);
if (Subtarget.hasVectorEnhancements1())
setOperationAction(ISD::CTPOP, VT, Legal);
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::CTTZ, VT, Legal);
setOperationAction(ISD::CTLZ, VT, Legal);
// Convert a GPR scalar to a vector by inserting it into element 0.
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
// Use a series of unpacks for extensions.
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
// Detect shifts by a scalar amount and convert them into
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::SRL, VT, Custom);
// At present ROTL isn't matched by DAGCombiner. ROTR should be
// converted into ROTL.
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
// Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
// and inverting the result as necessary.
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
if (Subtarget.hasVectorEnhancements1())
setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
if (Subtarget.hasVector()) {
// There should be no need to check for float types other than v2f64
// since <2 x f32> isn't a legal type.
setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f64, Legal);
if (Subtarget.hasVectorEnhancements2()) {
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v4f32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4f32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f32, Legal);
// Handle floating-point types.
for (unsigned I = MVT::FIRST_FP_VALUETYPE;
++I) {
MVT VT = MVT::SimpleValueType(I);
if (isTypeLegal(VT)) {
// We can use FI for FRINT.
setOperationAction(ISD::FRINT, VT, Legal);
// We can use the extended form of FI for other rounding operations.
if (Subtarget.hasFPExtension()) {
setOperationAction(ISD::FNEARBYINT, VT, Legal);
setOperationAction(ISD::FFLOOR, VT, Legal);
setOperationAction(ISD::FCEIL, VT, Legal);
setOperationAction(ISD::FTRUNC, VT, Legal);
setOperationAction(ISD::FROUND, VT, Legal);
// No special instructions for these.
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
// Handle constrained floating-point operations.
setOperationAction(ISD::STRICT_FADD, VT, Legal);
setOperationAction(ISD::STRICT_FSUB, VT, Legal);
setOperationAction(ISD::STRICT_FMUL, VT, Legal);
setOperationAction(ISD::STRICT_FDIV, VT, Legal);
setOperationAction(ISD::STRICT_FMA, VT, Legal);
setOperationAction(ISD::STRICT_FSQRT, VT, Legal);
setOperationAction(ISD::STRICT_FRINT, VT, Legal);
setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal);
setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal);
if (Subtarget.hasFPExtension()) {
setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
setOperationAction(ISD::STRICT_FROUND, VT, Legal);
setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
// Handle floating-point vector types.
if (Subtarget.hasVector()) {
// Scalar-to-vector conversion is just a subreg.
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
// Some insertions and extractions can be done directly but others
// need to go via integers.
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
// These operations have direct equivalents.
setOperationAction(ISD::FADD, MVT::v2f64, Legal);
setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
setOperationAction(ISD::FMA, MVT::v2f64, Legal);
setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
setOperationAction(ISD::FABS, MVT::v2f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
// Handle constrained floating-point operations.
setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
// The vector enhancements facility 1 has instructions for these.
if (Subtarget.hasVectorEnhancements1()) {
setOperationAction(ISD::FADD, MVT::v4f32, Legal);
setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
setOperationAction(ISD::FMA, MVT::v4f32, Legal);
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
setOperationAction(ISD::FABS, MVT::v4f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
setOperationAction(ISD::FMAXIMUM, MVT::f64, Legal);
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
setOperationAction(ISD::FMINIMUM, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal);
setOperationAction(ISD::FMAXIMUM, MVT::v2f64, Legal);
setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal);
setOperationAction(ISD::FMINIMUM, MVT::v2f64, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f128, Legal);
setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal);
setOperationAction(ISD::FMINNUM, MVT::f128, Legal);
setOperationAction(ISD::FMINIMUM, MVT::f128, Legal);
// Handle constrained floating-point operations.
setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
MVT::v4f32, MVT::v2f64 }) {
setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal);
setOperationAction(ISD::STRICT_FMINNUM, VT, Legal);
setOperationAction(ISD::STRICT_FMAXIMUM, VT, Legal);
setOperationAction(ISD::STRICT_FMINIMUM, VT, Legal);
// We only have fused f128 multiply-addition on vector registers.
if (!Subtarget.hasVectorEnhancements1()) {
setOperationAction(ISD::FMA, MVT::f128, Expand);
setOperationAction(ISD::STRICT_FMA, MVT::f128, Expand);
// We don't have a copysign instruction on vector registers.
if (Subtarget.hasVectorEnhancements1())
setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
// Needed so that we don't try to implement f128 constant loads using
// a load-and-extend of a f80 constant (in cases where the constant
// would fit in an f80).
for (MVT VT : MVT::fp_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
// We don't have extending load instruction on vector registers.
if (Subtarget.hasVectorEnhancements1()) {
setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
// Floating-point truncation and stores need to be done separately.
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f128, MVT::f32, Expand);
setTruncStoreAction(MVT::f128, MVT::f64, Expand);
// We have 64-bit FPR<->GPR moves, but need special handling for
// 32-bit forms.
if (!Subtarget.hasVector()) {
setOperationAction(ISD::BITCAST, MVT::i32, Custom);
setOperationAction(ISD::BITCAST, MVT::f32, Custom);
// VASTART and VACOPY need to deal with the SystemZ-specific varargs
// structure, but VAEND is a no-op.
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::VACOPY, MVT::Other, Custom);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
// Codes for which we want to perform some z-specific combinations.
// Handle intrinsics.
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
// We want to use MVC in preference to even a single load/store pair.
MaxStoresPerMemcpy = 0;
MaxStoresPerMemcpyOptSize = 0;
// The main memset sequence is a byte store followed by an MVC.
// Two STC or MV..I stores win over that, but the kind of fused stores
// generated by target-independent code don't when the byte value is
// variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
// than "STC;MVC". Handle the choice in target-specific code instead.
MaxStoresPerMemset = 0;
MaxStoresPerMemsetOptSize = 0;
// Default to having -disable-strictnode-mutation on
IsStrictFPEnabled = true;
bool SystemZTargetLowering::useSoftFloat() const {
return Subtarget.hasSoftFloat();
EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL,
LLVMContext &, EVT VT) const {
if (!VT.isVector())
return MVT::i32;
return VT.changeVectorElementTypeToInteger();
bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(
const MachineFunction &MF, EVT VT) const {
VT = VT.getScalarType();
if (!VT.isSimple())
return false;
switch (VT.getSimpleVT().SimpleTy) {
case MVT::f32:
case MVT::f64:
return true;
case MVT::f128:
return Subtarget.hasVectorEnhancements1();
return false;
// Return true if the constant can be generated with a vector instruction,
// such as VGM, VGMB or VREPI.
bool SystemZVectorConstantInfo::isVectorConstantLegal(
const SystemZSubtarget &Subtarget) {
const SystemZInstrInfo *TII =
static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
if (!Subtarget.hasVector() ||
(isFP128 && !Subtarget.hasVectorEnhancements1()))
return false;
// Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
// preferred way of creating all-zero and all-one vectors so give it
// priority over other methods below.
unsigned Mask = 0;
unsigned I = 0;
for (; I < SystemZ::VectorBytes; ++I) {
uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
if (Byte == 0xff)
Mask |= 1ULL << I;
else if (Byte != 0)
if (I == SystemZ::VectorBytes) {
Opcode = SystemZISD::BYTE_MASK;
VecVT = MVT::getVectorVT(MVT::getIntegerVT(8), 16);
return true;
if (SplatBitSize > 64)
return false;
auto tryValue = [&](uint64_t Value) -> bool {
int64_t SignedValue = SignExtend64(Value, SplatBitSize);
if (isInt<16>(SignedValue)) {
OpVals.push_back(((unsigned) SignedValue));
Opcode = SystemZISD::REPLICATE;
VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
SystemZ::VectorBits / SplatBitSize);
return true;
unsigned Start, End;
if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
// isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
// denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
// an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
OpVals.push_back(Start - (64 - SplatBitSize));
OpVals.push_back(End - (64 - SplatBitSize));
Opcode = SystemZISD::ROTATE_MASK;
VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
SystemZ::VectorBits / SplatBitSize);
return true;
return false;
// First try assuming that any undefined bits above the highest set bit
// and below the lowest set bit are 1s. This increases the likelihood of
// being able to use a sign-extended element value in VECTOR REPLICATE
// IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
uint64_t SplatBitsZ = SplatBits.getZExtValue();
uint64_t SplatUndefZ = SplatUndef.getZExtValue();
uint64_t Lower =
(SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
uint64_t Upper =
(SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
if (tryValue(SplatBitsZ | Upper | Lower))
return true;
// Now try assuming that any undefined bits between the first and
// last defined set bits are set. This increases the chances of
// using a non-wraparound mask.
uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
return tryValue(SplatBitsZ | Middle);
SystemZVectorConstantInfo::SystemZVectorConstantInfo(APFloat FPImm) {
IntBits = FPImm.bitcastToAPInt().zextOrSelf(128);
isFP128 = (&FPImm.getSemantics() == &APFloat::IEEEquad());
SplatBits = FPImm.bitcastToAPInt();
unsigned Width = SplatBits.getBitWidth();
IntBits <<= (SystemZ::VectorBits - Width);
// Find the smallest splat.
while (Width > 8) {
unsigned HalfSize = Width / 2;
APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
APInt LowValue = SplatBits.trunc(HalfSize);
// If the two halves do not match, stop here.
if (HighValue != LowValue || 8 > HalfSize)
SplatBits = HighValue;
Width = HalfSize;
SplatUndef = 0;
SplatBitSize = Width;
SystemZVectorConstantInfo::SystemZVectorConstantInfo(BuildVectorSDNode *BVN) {
assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
bool HasAnyUndefs;
// Get IntBits by finding the 128 bit splat.
BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
// Get SplatBits by finding the 8 bit or greater splat.
BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const {
// We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
if (Imm.isZero() || Imm.isNegZero())
return true;
return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
/// Returns true if stack probing through inline assembly is requested.
bool SystemZTargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
// If the function specifically requests inline stack probes, emit them.
if (MF.getFunction().hasFnAttribute("probe-stack"))
return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
return false;
bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
// We can use CGFI or CLGFI.
return isInt<32>(Imm) || isUInt<32>(Imm);
bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const {
// We can use ALGFI or SLGFI.
return isUInt<32>(Imm) || isUInt<32>(-Imm);
bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned, Align, MachineMemOperand::Flags, bool *Fast) const {
// Unaligned accesses should never be slower than the expanded version.
// We check specifically for aligned accesses in the few cases where
// they are required.
if (Fast)
*Fast = true;
return true;
// Information about the addressing mode for a memory access.
struct AddressingMode {
// True if a long displacement is supported.
bool LongDisplacement;
// True if use of index register is supported.
bool IndexReg;
AddressingMode(bool LongDispl, bool IdxReg) :
LongDisplacement(LongDispl), IndexReg(IdxReg) {}
// Return the desired addressing mode for a Load which has only one use (in
// the same block) which is a Store.
static AddressingMode getLoadStoreAddrMode(bool HasVector,
Type *Ty) {
// With vector support a Load->Store combination may be combined to either
// an MVC or vector operations and it seems to work best to allow the
// vector addressing mode.
if (HasVector)
return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
// Otherwise only the MVC case is special.
bool MVC = Ty->isIntegerTy(8);
return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
// Return the addressing mode which seems most desirable given an LLVM
// Instruction pointer.
static AddressingMode
supportedAddressingMode(Instruction *I, bool HasVector) {
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::memset:
case Intrinsic::memmove:
case Intrinsic::memcpy:
return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
if (isa<LoadInst>(I) && I->hasOneUse()) {
auto *SingleUser = cast<Instruction>(*I->user_begin());
if (SingleUser->getParent() == I->getParent()) {
if (isa<ICmpInst>(SingleUser)) {
if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
if (C->getBitWidth() <= 64 &&
(isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
// Comparison of memory with 16 bit signed / unsigned immediate
return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
} else if (isa<StoreInst>(SingleUser))
// Load->Store
return getLoadStoreAddrMode(HasVector, I->getType());
} else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
// Load->Store
return getLoadStoreAddrMode(HasVector, LoadI->getType());
if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
// * Use LDE instead of LE/LEY for z13 to avoid partial register
// dependencies (LDE only supports small offsets).
// * Utilize the vector registers to hold floating point
// values (vector load / store instructions only support small
// offsets).
Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
bool IsFPAccess = MemAccessTy->isFloatingPointTy();
bool IsVectorAccess = MemAccessTy->isVectorTy();
// A store of an extracted vector element will be combined into a VSTE type
// instruction.
if (!IsVectorAccess && isa<StoreInst>(I)) {
Value *DataOp = I->getOperand(0);
if (isa<ExtractElementInst>(DataOp))
IsVectorAccess = true;
// A load which gets inserted into a vector element will be combined into a
// VLE type instruction.
if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
User *LoadUser = *I->user_begin();
if (isa<InsertElementInst>(LoadUser))
IsVectorAccess = true;
if (IsFPAccess || IsVectorAccess)
return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
// Punt on globals for now, although they can be used in limited
if (AM.BaseGV)
return false;
// Require a 20-bit signed offset.
if (!isInt<20>(AM.BaseOffs))
return false;
AddressingMode SupportedAM(true, true);
if (I != nullptr)
SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
return false;
if (!SupportedAM.IndexReg)
// No indexing allowed.
return AM.Scale == 0;
// Indexing is OK but no scale factor can be applied.
return AM.Scale == 0 || AM.Scale == 1;
bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
return false;
unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedSize();
unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedSize();
return FromBits > ToBits;
bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const {
if (!FromVT.isInteger() || !ToVT.isInteger())
return false;
unsigned FromBits = FromVT.getFixedSizeInBits();
unsigned ToBits = ToVT.getFixedSizeInBits();
return FromBits > ToBits;
// Inline asm support
SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'a': // Address register
case 'd': // Data register (equivalent to 'r')
case 'f': // Floating-point register
case 'h': // High-part register
case 'r': // General-purpose register
case 'v': // Vector register
return C_RegisterClass;
case 'Q': // Memory with base and unsigned 12-bit displacement
case 'R': // Likewise, plus an index
case 'S': // Memory with base and signed 20-bit displacement
case 'T': // Likewise, plus an index
case 'm': // Equivalent to 'T'.
return C_Memory;
case 'I': // Unsigned 8-bit constant
case 'J': // Unsigned 12-bit constant
case 'K': // Signed 16-bit constant
case 'L': // Signed 20-bit displacement (on all targets we support)
case 'M': // 0x7fffffff
return C_Immediate;
return TargetLowering::getConstraintType(Constraint);
TargetLowering::ConstraintWeight SystemZTargetLowering::
getSingleConstraintMatchWeight(AsmOperandInfo &info,
const char *constraint) const {
ConstraintWeight weight = CW_Invalid;
Value *CallOperandVal = info.CallOperandVal;
// If we don't have a value, we can't do a match,
// but allow it at the lowest weight.
if (!CallOperandVal)
return CW_Default;
Type *type = CallOperandVal->getType();
// Look at the constraint type.
switch (*constraint) {
weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
case 'a': // Address register
case 'd': // Data register (equivalent to 'r')
case 'h': // High-part register
case 'r': // General-purpose register
if (CallOperandVal->getType()->isIntegerTy())
weight = CW_Register;
case 'f': // Floating-point register
if (type->isFloatingPointTy())
weight = CW_Register;
case 'v': // Vector register
if ((type->isVectorTy() || type->isFloatingPointTy()) &&
weight = CW_Register;
case 'I': // Unsigned 8-bit constant
if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
if (isUInt<8>(C->getZExtValue()))
weight = CW_Constant;
case 'J': // Unsigned 12-bit constant
if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
if (isUInt<12>(C->getZExtValue()))
weight = CW_Constant;
case 'K': // Signed 16-bit constant
if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
if (isInt<16>(C->getSExtValue()))
weight = CW_Constant;
case 'L': // Signed 20-bit displacement (on all targets we support)
if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
if (isInt<20>(C->getSExtValue()))
weight = CW_Constant;
case 'M': // 0x7fffffff
if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
if (C->getZExtValue() == 0x7fffffff)
weight = CW_Constant;
return weight;
// Parse a "{tNNN}" register constraint for which the register type "t"
// has already been verified. MC is the class associated with "t" and
// Map maps 0-based register numbers to LLVM register numbers.
static std::pair<unsigned, const TargetRegisterClass *>
parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC,
const unsigned *Map, unsigned Size) {
assert(*(Constraint.end()-1) == '}' && "Missing '}'");
if (isdigit(Constraint[2])) {
unsigned Index;
bool Failed =
Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
if (!Failed && Index < Size && Map[Index])
return std::make_pair(Map[Index], RC);
return std::make_pair(0U, nullptr);
std::pair<unsigned, const TargetRegisterClass *>
const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
if (Constraint.size() == 1) {
// GCC Constraint Letters
switch (Constraint[0]) {
default: break;
case 'd': // Data register (equivalent to 'r')
case 'r': // General-purpose register
if (VT == MVT::i64)
return std::make_pair(0U, &SystemZ::GR64BitRegClass);
else if (VT == MVT::i128)
return std::make_pair(0U, &SystemZ::GR128BitRegClass);
return std::make_pair(0U, &SystemZ::GR32BitRegClass);
case 'a': // Address register
if (VT == MVT::i64)
return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
else if (VT == MVT::i128)
return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
case 'h': // High-part register (an LLVM extension)
return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
case 'f': // Floating-point register
if (!useSoftFloat()) {
if (VT == MVT::f64)
return std::make_pair(0U, &SystemZ::FP64BitRegClass);
else if (VT == MVT::f128)
return std::make_pair(0U, &SystemZ::FP128BitRegClass);
return std::make_pair(0U, &SystemZ::FP32BitRegClass);
case 'v': // Vector register
if (Subtarget.hasVector()) {
if (VT == MVT::f32)
return std::make_pair(0U, &SystemZ::VR32BitRegClass);
if (VT == MVT::f64)
return std::make_pair(0U, &SystemZ::VR64BitRegClass);
return std::make_pair(0U, &SystemZ::VR128BitRegClass);
if (Constraint.size() > 0 && Constraint[0] == '{') {
// We need to override the default register parsing for GPRs and FPRs
// because the interpretation depends on VT. The internal names of
// the registers are also different from the external names
// (F0D and F0S instead of F0, etc.).
if (Constraint[1] == 'r') {
if (VT == MVT::i32)
return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
SystemZMC::GR32Regs, 16);
if (VT == MVT::i128)
return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
SystemZMC::GR128Regs, 16);
return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
SystemZMC::GR64Regs, 16);
if (Constraint[1] == 'f') {
if (useSoftFloat())
return std::make_pair(
0u, static_cast<const TargetRegisterClass *>(nullptr));
if (VT == MVT::f32)
return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
SystemZMC::FP32Regs, 16);
if (VT == MVT::f128)
return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
SystemZMC::FP128Regs, 16);
return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
SystemZMC::FP64Regs, 16);
if (Constraint[1] == 'v') {
if (!Subtarget.hasVector())
return std::make_pair(
0u, static_cast<const TargetRegisterClass *>(nullptr));
if (VT == MVT::f32)
return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
SystemZMC::VR32Regs, 32);
if (VT == MVT::f64)
return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
SystemZMC::VR64Regs, 32);
return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
SystemZMC::VR128Regs, 32);
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
Register SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT,
const MachineFunction &MF) const {
Register Reg = StringSwitch<Register>(RegName)
.Case("r15", SystemZ::R15D)
if (Reg)
return Reg;
report_fatal_error("Invalid register name global variable");
void SystemZTargetLowering::
LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
// Only support length 1 constraints for now.
if (Constraint.length() == 1) {
switch (Constraint[0]) {
case 'I': // Unsigned 8-bit constant
if (auto *C = dyn_cast<ConstantSDNode>(Op))
if (isUInt<8>(C->getZExtValue()))
Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
case 'J': // Unsigned 12-bit constant
if (auto *C = dyn_cast<ConstantSDNode>(Op))
if (isUInt<12>(C->getZExtValue()))
Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
case 'K': // Signed 16-bit constant
if (auto *C = dyn_cast<ConstantSDNode>(Op))
if (isInt<16>(C->getSExtValue()))
Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
case 'L': // Signed 20-bit displacement (on all targets we support)
if (auto *C = dyn_cast<ConstantSDNode>(Op))
if (isInt<20>(C->getSExtValue()))
Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
case 'M': // 0x7fffffff
if (auto *C = dyn_cast<ConstantSDNode>(Op))
if (C->getZExtValue() == 0x7fffffff)
Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
// Calling conventions
#include ""
const MCPhysReg *SystemZTargetLowering::getScratchRegisters(
CallingConv::ID) const {
static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
SystemZ::R14D, 0 };
return ScratchRegs;
bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType,
Type *ToType) const {
return isTruncateFree(FromType, ToType);
bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
return CI->isTailCall();
// We do not yet support 128-bit single-element vector types. If the user
// attempts to use such types as function argument or return type, prefer
// to error out instead of emitting code violating the ABI.
static void VerifyVectorType(MVT VT, EVT ArgVT) {
if (ArgVT.isVector() && !VT.isVector())
report_fatal_error("Unsupported vector argument or return type");
static void VerifyVectorTypes(const SmallVectorImpl<ISD::InputArg> &Ins) {
for (unsigned i = 0; i < Ins.size(); ++i)
VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
static void VerifyVectorTypes(const SmallVectorImpl<ISD::OutputArg> &Outs) {
for (unsigned i = 0; i < Outs.size(); ++i)
VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
// Value is a value that has been passed to us in the location described by VA
// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
// any loads onto Chain.
static SDValue convertLocVTToValVT(SelectionDAG &DAG, const SDLoc &DL,
CCValAssign &VA, SDValue Chain,
SDValue Value) {
// If the argument has been promoted from a smaller type, insert an
// assertion to capture this.
if (VA.getLocInfo() == CCValAssign::SExt)
Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
else if (VA.getLocInfo() == CCValAssign::ZExt)
Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
if (VA.isExtInLoc())
Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
else if (VA.getLocInfo() == CCValAssign::BCvt) {
// If this is a short vector argument loaded from the stack,
// extend from i64 to full vector size and then bitcast.
assert(VA.getLocVT() == MVT::i64);
Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
} else
assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
return Value;
// Value is a value of type VA.getValVT() that we need to copy into
// the location described by VA. Return a copy of Value converted to
// VA.getValVT(). The caller is responsible for handling indirect values.
static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL,
CCValAssign &VA, SDValue Value) {
switch (VA.getLocInfo()) {
case CCValAssign::SExt:
return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
case CCValAssign::ZExt:
return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
case CCValAssign::AExt:
return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
case CCValAssign::BCvt:
// If this is a short vector argument to be stored to the stack,
// bitcast to v2i64 and then extract first element.
assert(VA.getLocVT() == MVT::i64);
Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
DAG.getConstant(0, DL, MVT::i32));
case CCValAssign::Full:
return Value;
llvm_unreachable("Unhandled getLocInfo()");
SDValue SystemZTargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
SystemZMachineFunctionInfo *FuncInfo =
auto *TFL =
static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
EVT PtrVT = getPointerTy(DAG.getDataLayout());
// Detect unsupported vector argument types.
if (Subtarget.hasVector())
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
unsigned NumFixedGPRs = 0;
unsigned NumFixedFPRs = 0;
for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
SDValue ArgValue;
CCValAssign &VA = ArgLocs[I];
EVT LocVT = VA.getLocVT();
if (VA.isRegLoc()) {
// Arguments passed in registers
const TargetRegisterClass *RC;
switch (LocVT.getSimpleVT().SimpleTy) {
// Integers smaller than i64 should be promoted to i64.
llvm_unreachable("Unexpected argument type");
case MVT::i32:
NumFixedGPRs += 1;
RC = &SystemZ::GR32BitRegClass;
case MVT::i64:
NumFixedGPRs += 1;
RC = &SystemZ::GR64BitRegClass;
case MVT::f32:
NumFixedFPRs += 1;
RC = &SystemZ::FP32BitRegClass;
case MVT::f64:
NumFixedFPRs += 1;
RC = &SystemZ::FP64BitRegClass;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
case MVT::v2i64:
case MVT::v4f32:
case MVT::v2f64:
RC = &SystemZ::VR128BitRegClass;
Register VReg = MRI.createVirtualRegister(RC);
MRI.addLiveIn(VA.getLocReg(), VReg);
ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
} else {
assert(VA.isMemLoc() && "Argument not register or memory");
// Create the frame index object for this incoming parameter.
int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8,
VA.getLocMemOffset(), true);
// Create the SelectionDAG nodes corresponding to a load
// from this parameter. Unpromoted ints and floats are
// passed as right-justified 8-byte values.
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
DAG.getIntPtrConstant(4, DL));
ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
MachinePointerInfo::getFixedStack(MF, FI));
// Convert the value of the argument register into the value that's
// being passed.
if (VA.getLocInfo() == CCValAssign::Indirect) {
InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
// If the original argument was split (e.g. i128), we need
// to load all parts of it here (using the same address).
unsigned ArgIndex = Ins[I].OrigArgIndex;
assert (Ins[I].PartOffset == 0);
while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
CCValAssign &PartVA = ArgLocs[I + 1];
unsigned PartOffset = Ins[I + 1].PartOffset;
SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
DAG.getIntPtrConstant(PartOffset, DL));
InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
} else
InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
if (IsVarArg) {
// Save the number of non-varargs registers for later use by va_start, etc.
// Likewise the address (in the form of a frame index) of where the
// first stack vararg would be. The 1-byte size here is arbitrary.
int64_t StackSize = CCInfo.getNextStackOffset();
FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
// ...and a similar frame index for the caller-allocated save area
// that will be used to store the incoming registers.
int64_t RegSaveOffset =
-SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
// Store the FPR varargs in the reserved frame slots. (We store the
// GPRs as part of the prologue.)
if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
SDValue MemOps[SystemZ::ELFNumArgFPRs];
for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
int FI =
MFI.CreateFixedObject(8, -SystemZMC::ELFCallFrameSize + Offset, true);
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
unsigned VReg = MF.addLiveIn(SystemZ::ELFArgFPRs[I],
SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
MachinePointerInfo::getFixedStack(MF, FI));
// Join the stores, which are independent of one another.
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
return Chain;
static bool canUseSiblingCall(const CCState &ArgCCInfo,
SmallVectorImpl<CCValAssign> &ArgLocs,
SmallVectorImpl<ISD::OutputArg> &Outs) {
// Punt if there are any indirect or stack arguments, or if the call
// needs the callee-saved argument register R6, or if the call uses
// the callee-saved register arguments SwiftSelf and SwiftError.
for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
CCValAssign &VA = ArgLocs[I];
if (VA.getLocInfo() == CCValAssign::Indirect)
return false;
if (!VA.isRegLoc())
return false;
Register Reg = VA.getLocReg();
if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
return false;
if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
return false;
return true;
SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
bool &IsTailCall = CLI.IsTailCall;
CallingConv::ID CallConv = CLI.CallConv;
bool IsVarArg = CLI.IsVarArg;
MachineFunction &MF = DAG.getMachineFunction();
EVT PtrVT = getPointerTy(MF.getDataLayout());
LLVMContext &Ctx = *DAG.getContext();
// Detect unsupported vector argument and return types.
if (Subtarget.hasVector()) {
// Analyze the operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
// We don't support GuaranteedTailCallOpt, only automatically-detected
// sibling calls.
if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
IsTailCall = false;
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = ArgCCInfo.getNextStackOffset();
// Mark the start of the call.
if (!IsTailCall)
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
// Copy argument values to their designated locations.
SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
SDValue StackPtr;
for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
CCValAssign &VA = ArgLocs[I];
SDValue ArgValue = OutVals[I];
if (VA.getLocInfo() == CCValAssign::Indirect) {
// Store the argument in a stack slot and pass its address.
unsigned ArgIndex = Outs[I].OrigArgIndex;
if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
// Allocate the full stack space for a promoted (and split) argument.
Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
} else {
SlotVT = Outs[I].ArgVT;
SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
DAG.getStore(Chain, DL, ArgValue, SpillSlot,
MachinePointerInfo::getFixedStack(MF, FI)));
// If the original argument was split (e.g. i128), we need
// to store all parts of it here (and pass just one address).
assert (Outs[I].PartOffset == 0);
while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
SDValue PartValue = OutVals[I + 1];
unsigned PartOffset = Outs[I + 1].PartOffset;
SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
DAG.getIntPtrConstant(PartOffset, DL));
DAG.getStore(Chain, DL, PartValue, Address,
MachinePointerInfo::getFixedStack(MF, FI)));
assert((PartOffset + PartValue.getValueType().getStoreSize() <=
SlotVT.getStoreSize()) && "Not enough space for argument part!");
ArgValue = SpillSlot;
} else
ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
if (VA.isRegLoc())
// Queue up the argument copies and emit them at the end.
RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
else {
assert(VA.isMemLoc() && "Argument not register or memory");
// Work out the address of the stack slot. Unpromoted ints and
// floats are passed as right-justified 8-byte values.
if (!StackPtr.getNode())
StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT);
unsigned Offset = SystemZMC::ELFCallFrameSize + VA.getLocMemOffset();
if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
Offset += 4;
SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
DAG.getIntPtrConstant(Offset, DL));
// Emit the store.
DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
// Join the stores, which are independent of one another.
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
// Accept direct calls by converting symbolic call addresses to the
// associated Target* opcodes. Force %r1 to be used for indirect
// tail calls.
SDValue Glue;
if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
} else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
} else if (IsTailCall) {
Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
Glue = Chain.getValue(1);
Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
// Build a sequence of copy-to-reg nodes, chained and glued together.
for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
RegsToPass[I].second, Glue);
Glue = Chain.getValue(1);
// The first call operand is the chain and the second is the target address.
SmallVector<SDValue, 8> Ops;
// Add argument registers to the end of the list so that they are
// known live into the call.
for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
// Glue the call to the argument copies, if any.
if (Glue.getNode())
// Emit the call.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
if (IsTailCall)
return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
Glue = Chain.getValue(1);
// Mark the end of the call, which is glued to the call itself.
Chain = DAG.getCALLSEQ_END(Chain,
DAG.getConstant(NumBytes, DL, PtrVT, true),
DAG.getConstant(0, DL, PtrVT, true),
Glue, DL);
Glue = Chain.getValue(1);
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RetLocs;
CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
// Copy all of the result registers out of their specified physreg.
for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
CCValAssign &VA = RetLocs[I];
// Copy the value out, gluing the copy to the end of the call sequence.
SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
VA.getLocVT(), Glue);
Chain = RetValue.getValue(1);
Glue = RetValue.getValue(2);
// Convert the value of the return register into the value that's
// being returned.
InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
return Chain;
bool SystemZTargetLowering::
CanLowerReturn(CallingConv::ID CallConv,
MachineFunction &MF, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
// Detect unsupported vector return types.
if (Subtarget.hasVector())
// Special case that we cannot easily detect in RetCC_SystemZ since
// i128 is not a legal type.
for (auto &Out : Outs)
if (Out.ArgVT == MVT::i128)
return false;
SmallVector<CCValAssign, 16> RetLocs;
CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &DL, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
// Detect unsupported vector return types.
if (Subtarget.hasVector())
// Assign locations to each returned value.
SmallVector<CCValAssign, 16> RetLocs;
CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
// Quick exit for void returns
if (RetLocs.empty())
return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
if (CallConv == CallingConv::GHC)
report_fatal_error("GHC functions return void only");
// Copy the result values into the output registers.
SDValue Glue;
SmallVector<SDValue, 4> RetOps;
for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
CCValAssign &VA = RetLocs[I];
SDValue RetValue = OutVals[I];
// Make the return register live on exit.
assert(VA.isRegLoc() && "Can only return in registers!");
// Promote the value as required.
RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
// Chain and glue the copies together.
Register Reg = VA.getLocReg();
Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
// Update chain and glue.
RetOps[0] = Chain;
if (Glue.getNode())
return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
// Return true if Op is an intrinsic node with chain that returns the CC value
// as its only (other) argument. Provide the associated SystemZISD opcode and
// the mask of valid CC values if so.
static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
unsigned &CCValid) {
unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
switch (Id) {
case Intrinsic::s390_tbegin:
Opcode = SystemZISD::TBEGIN;
return true;
case Intrinsic::s390_tbegin_nofloat:
return true;
case Intrinsic::s390_tend:
Opcode = SystemZISD::TEND;
CCValid = SystemZ::CCMASK_TEND;
return true;
return false;
// Return true if Op is an intrinsic node without chain that returns the
// CC value as its final argument. Provide the associated SystemZISD
// opcode and the mask of valid CC values if so.
static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
switch (Id) {
case Intrinsic::s390_vpkshs:
case Intrinsic::s390_vpksfs:
case Intrinsic::s390_vpksgs:
Opcode = SystemZISD::PACKS_CC;
CCValid = SystemZ::CCMASK_VCMP;
return true;
case Intrinsic::s390_vpklshs:
case Intrinsic::s390_vpklsfs:
case Intrinsic::s390_vpklsgs:
Opcode = SystemZISD::PACKLS_CC;
CCValid = SystemZ::CCMASK_VCMP;
return true;
case Intrinsic::s390_vceqbs:
case Intrinsic::s390_vceqhs:
case Intrinsic::s390_vceqfs:
case Intrinsic::s390_vceqgs:
Opcode = SystemZISD::VICMPES;
CCValid = SystemZ::CCMASK_VCMP;
return true;
case Intrinsic::s390_vchbs:
case Intrinsic::s390_vchhs:
case Intrinsic::s390_vchfs:
case Intrinsic::s390_vchgs:
Opcode = SystemZISD::VICMPHS;
CCValid = SystemZ::CCMASK_VCMP;
return true;
case Intrinsic::s390_vchlbs:
case Intrinsic::s390_vchlhs:
case Intrinsic::s390_vchlfs:
case Intrinsic::s390_vchlgs:
Opcode = SystemZISD::VICMPHLS;
CCValid = SystemZ::CCMASK_VCMP;
return true;
case Intrinsic::s390_vtm:
Opcode = SystemZISD::VTM;
CCValid = SystemZ::CCMASK_VCMP;
return true;
case Intrinsic::s390_vfaebs:
case Intrinsic::s390_vfaehs:
case Intrinsic::s390_vfaefs:
Opcode = SystemZISD::VFAE_CC;
CCValid = SystemZ::CCMASK_ANY;
return true;
case Intrinsic::s390_vfaezbs:
case Intrinsic::s390_vfaezhs:
case Intrinsic::s390_vfaezfs:
Opcode = SystemZISD::VFAEZ_CC;
CCValid = SystemZ::CCMASK_ANY;
return true;
case Intrinsic::s390_vfeebs:
case Intrinsic::s390_vfeehs:
case Intrinsic::s390_vfeefs:
Opcode = SystemZISD::VFEE_CC;
CCValid = SystemZ::CCMASK_ANY;
return true;
case Intrinsic::s390_vfeezbs:
case Intrinsic::s390_vfeezhs:
case Intrinsic::s390_vfeezfs:
Opcode = SystemZISD::VFEEZ_CC;
CCValid = SystemZ::CCMASK_ANY;
return true;
case Intrinsic::s390_vfenebs:
case Intrinsic::s390_vfenehs:
case Intrinsic::s390_vfenefs:
Opcode = SystemZISD::VFENE_CC;
CCValid = SystemZ::CCMASK_ANY;
return true;
case Intrinsic::s390_vfenezbs:
case Intrinsic::s390_vfenezhs:
case Intrinsic::s390_vfenezfs:
Opcode = SystemZISD::VFENEZ_CC;
CCValid = SystemZ::CCMASK_ANY;
return true;
case Intrinsic::s390_vistrbs:
case Intrinsic::s390_vistrhs:
case Intrinsic::s390_vistrfs:
Opcode = SystemZISD::VISTR_CC;
CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3;
return true;
case Intrinsic::s390_vstrcbs:
case Intrinsic::s390_vstrchs:
case Intrinsic::s390_vstrcfs:
Opcode = SystemZISD::VSTRC_CC;
CCValid = SystemZ::CCMASK_ANY;
return true;
case Intrinsic::s390_vstrczbs:
case Intrinsic::s390_vstrczhs:
case Intrinsic::s390_vstrczfs:
Opcode = SystemZISD::VSTRCZ_CC;
CCValid = SystemZ::CCMASK_ANY;
return true;
case Intrinsic::s390_vstrsb:
case Intrinsic::s390_vstrsh:
case Intrinsic::s390_vstrsf:
Opcode = SystemZISD::VSTRS_CC;
CCValid = SystemZ::CCMASK_ANY;
return true;
case Intrinsic::s390_vstrszb:
case Intrinsic::s390_vstrszh:
case Intrinsic::s390_vstrszf:
Opcode = SystemZISD::VSTRSZ_CC;
CCValid = SystemZ::CCMASK_ANY;
return true;
case Intrinsic::s390_vfcedbs:
case Intrinsic::s390_vfcesbs:
Opcode = SystemZISD::VFCMPES;
CCValid = SystemZ::CCMASK_VCMP;
return true;
case Intrinsic::s390_vfchdbs:
case Intrinsic::s390_vfchsbs:
Opcode = SystemZISD::VFCMPHS;
CCValid = SystemZ::CCMASK_VCMP;
return true;
case Intrinsic::s390_vfchedbs:
case Intrinsic::s390_vfchesbs:
Opcode = SystemZISD::VFCMPHES;
CCValid = SystemZ::CCMASK_VCMP;
return true;
case Intrinsic::s390_vftcidb:
case Intrinsic::s390_vftcisb:
Opcode = SystemZISD::VFTCI;
CCValid = SystemZ::CCMASK_VCMP;
return true;
case Intrinsic::s390_tdc:
Opcode = SystemZISD::TDC;
CCValid = SystemZ::CCMASK_TDC;
return true;
return false;
// Emit an intrinsic with chain and an explicit CC register result.
static SDNode *emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op,
unsigned Opcode) {
// Copy all operands except the intrinsic ID.
unsigned NumOps = Op.getNumOperands();
SmallVector<SDValue, 6> Ops;
Ops.reserve(NumOps - 1);
for (unsigned I = 2; I < NumOps; ++I)
assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
SDValue OldChain = SDValue(Op.getNode(), 1);
SDValue NewChain = SDValue(Intr.getNode(), 1);
DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
return Intr.getNode();
// Emit an intrinsic with an explicit CC register result.
static SDNode *emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op,
unsigned Opcode) {
// Copy all operands except the intrinsic ID.
unsigned NumOps = Op.getNumOperands();
SmallVector<SDValue, 6> Ops;
Ops.reserve(NumOps - 1);
for (unsigned I = 1; I < NumOps; ++I)
SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
return Intr.getNode();
// CC is a comparison that will be implemented using an integer or
// floating-point comparison. Return the condition code mask for
// a branch on true. In the integer case, CCMASK_CMP_UO is set for
// unsigned comparisons and clear for signed ones. In the floating-point
// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
static unsigned CCMaskForCondCode(ISD::CondCode CC) {
#define CONV(X) \
case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
switch (CC) {
llvm_unreachable("Invalid integer condition!");
case ISD::SETO: return SystemZ::CCMASK_CMP_O;
case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
#undef CONV
// If C can be converted to a comparison against zero, adjust the operands
// as necessary.
static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
if (C.ICmpType == SystemZICMP::UnsignedOnly)
auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
if (!ConstOp1)
int64_t Value = ConstOp1->getSExtValue();
if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
(Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
(Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
(Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
// adjust the operands as necessary.
static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
Comparison &C) {
// For us to make any changes, it must a comparison between a single-use
// load and a constant.
if (!C.Op0.hasOneUse() ||
C.Op0.getOpcode() != ISD::LOAD ||
C.Op1.getOpcode() != ISD::Constant)
// We must have an 8- or 16-bit load.
auto *Load = cast<LoadSDNode>(C.Op0);
unsigned NumBits = Load->getMemoryVT().getSizeInBits();
if ((NumBits != 8 && NumBits != 16) ||
NumBits != Load->getMemoryVT().getStoreSizeInBits())
// The load must be an extending one and the constant must be within the
// range of the unextended value.
auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
uint64_t Value = ConstOp1->getZExtValue();
uint64_t Mask = (1 << NumBits) - 1;
if (Load->getExtensionType() == ISD::SEXTLOAD) {
// Make sure that ConstOp1 is in range of C.Op0.
int64_t SignedValue = ConstOp1->getSExtValue();
if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
if (C.ICmpType != SystemZICMP::SignedOnly) {
// Unsigned comparison between two sign-extended values is equivalent
// to unsigned comparison between two zero-extended values.
Value &= Mask;
} else if (NumBits == 8) {
// Try to treat the comparison as unsigned, so that we can use CLI.
// Adjust CCMask and Value as necessary.
if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
// Test whether the high bit of the byte is set.
Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
// Test whether the high bit of the byte is clear.
Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
// No instruction exists for this combination.
C.ICmpType = SystemZICMP::UnsignedOnly;
} else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
if (Value > Mask)
// If the constant is in range, we can use any comparison.
C.ICmpType = SystemZICMP::Any;
} else
// Make sure that the first operand is an i32 of the right extension type.
ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
if (C.Op0.getValueType() != MVT::i32 ||
Load->getExtensionType() != ExtType) {
C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
Load->getBasePtr(), Load->getPointerInfo(),
Load->getMemoryVT(), Load->getAlignment(),
// Update the chain uses.
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
// Make sure that the second operand is an i32 with the right value.
if (C.Op1.getValueType() != MVT::i32 ||
Value != ConstOp1->getZExtValue())
C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
// Return true if Op is either an unextended load, or a load suitable
// for integer register-memory comparisons of type ICmpType.
static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
if (Load) {
// There are no instructions to compare a register with a memory byte.
if (Load->getMemoryVT() == MVT::i8)
return false;
// Otherwise decide on extension type.
switch (Load->getExtensionType()) {
return true;
return ICmpType != SystemZICMP::UnsignedOnly;
return ICmpType != SystemZICMP::SignedOnly;
return false;
// Return true if it is better to swap the operands of C.
static bool shouldSwapCmpOperands(const Comparison &C) {
// Leave f128 comparisons alone, since they have no memory forms.
if (C.Op0.getValueType() == MVT::f128)
return false;
// Always keep a floating-point constant second, since comparisons with
// zero can use LOAD TEST and comparisons with other constants make a
// natural memory operand.
if (isa<ConstantFPSDNode>(C.Op1))
return false;
// Never swap comparisons with zero since there are many ways to optimize
// those later.
auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
if (ConstOp1 && ConstOp1->getZExtValue() == 0)
return false;
// Also keep natural memory operands second if the loaded value is
// only used here. Several comparisons have memory forms.
if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
return false;
// Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
// In that case we generally prefer the memory to be second.
if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
// The only exceptions are when the second operand is a constant and
// we can use things like CHHSI.
if (!ConstOp1)
return true;
// The unsigned memory-immediate instructions can handle 16-bit
// unsigned integers.
if (C.ICmpType != SystemZICMP::SignedOnly &&
return false;
// The signed memory-immediate instructions can handle 16-bit
// signed integers.
if (C.ICmpType != SystemZICMP::UnsignedOnly &&
return false;
return true;
// Try to promote the use of CGFR and CLGFR.
unsigned Opcode0 = C.Op0.getOpcode();
if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
return true;
if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
return true;
if (C.ICmpType != SystemZICMP::SignedOnly &&
Opcode0 == ISD::AND &&
C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff)
return true;
return false;
// Check whether C tests for equality between X and Y and whether X - Y
// or Y - X is also computed. In that case it's better to compare the
// result of the subtraction against zero.
static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
Comparison &C) {
if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
C.CCMask == SystemZ::CCMASK_CMP_NE) {
for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
SDNode *N = *I;
if (N->getOpcode() == ISD::SUB &&
((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
(N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
C.Op0 = SDValue(N, 0);
C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
// Check whether C compares a floating-point value with zero and if that
// floating-point value is also negated. In this case we can use the
// negation to set CC, so avoiding separate LOAD AND TEST and
static void adjustForFNeg(Comparison &C) {
// This optimization is invalid for strict comparisons, since FNEG
// does not raise any exceptions.
if (C.Chain)
auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
if (C1 && C1->isZero()) {
for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
SDNode *N = *I;
if (N->getOpcode() == ISD::FNEG) {
C.Op0 = SDValue(N, 0);
C.CCMask = SystemZ::reverseCCMask(C.CCMask);
// Check whether C compares (shl X, 32) with 0 and whether X is
// also sign-extended. In that case it is better to test the result
// of the sign extension using LTGFR.
// This case is important because InstCombine transforms a comparison
// with (sext (trunc X)) into a comparison with (shl X, 32).
static void adjustForLTGFR(Comparison &C) {
// Check for a comparison between (shl X, 32) and 0.
if (C.Op0.getOpcode() == ISD::SHL &&
C.Op0.getValueType() == MVT::i64 &&
C.Op1.getOpcode() == ISD::Constant &&
cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
if (C1 && C1->getZExtValue() == 32) {
SDValue ShlOp0 = C.Op0.getOperand(0);
// See whether X has any SIGN_EXTEND_INREG uses.
for (auto I = ShlOp0->use_begin(), E = ShlOp0->use_end(); I != E; ++I) {
SDNode *N = *I;
if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
C.Op0 = SDValue(N, 0);
// If C compares the truncation of an extending load, try to compare
// the untruncated value instead. This exposes more opportunities to
// reuse CC.
static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
Comparison &C) {
if (C.Op0.getOpcode() == ISD::TRUNCATE &&
C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
C.Op1.getOpcode() == ISD::Constant &&
cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
if (L->getMemoryVT().getStoreSizeInBits().getFixedSize() <=
C.Op0.getValueSizeInBits().getFixedSize()) {
unsigned Type = L->getExtensionType();
if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
(Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
C.Op0 = C.Op0.getOperand(0);
C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());