blob: 6f811ea1d3e1a241997cefb82c3fc659ced43d1d [file] [log] [blame]
/* LLVM LOCAL begin (ENTIRE FILE!) */
/* High-level LLVM backend interface
Copyright (C) 2008, 2009 Apple Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING. If not, write to the Free
Software Foundation, 59 Temple Place - Suite 330, Boston, MA
02111-1307, USA. */
//===----------------------------------------------------------------------===//
// This is a C++ source file that implements specific llvm ARM ABI.
//===----------------------------------------------------------------------===//
#include "llvm-abi.h"
#include "llvm-internal.h"
#include "llvm/CallingConv.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm-arm-target.h"
extern "C" {
#include "insn-codes.h"
#include "toplev.h"
#include "rtl.h"
#include "insn-config.h"
#include "recog.h"
static LLVMContext &Context = getGlobalContext();
enum neon_itype { neon_itype_dummy };
extern enum insn_code locate_neon_builtin_icode
(int fcode, neon_itype *itype, enum neon_builtins *neon_code);
}
static ConstantInt *getInt32Const(int c) {
return ConstantInt::get(Type::getInt32Ty(Context), c);
}
/// UnexpectedError - Report errors about unexpected uses of builtins. The
/// msg argument should begin with a "%H" so that the location of the
/// expression is printed in the error message.
static bool UnexpectedError(const char *msg, tree exp, Value *&Result) {
error(msg, &EXPR_LOCATION(exp));
// Set the Result to an undefined value.
Type *ResTy = ConvertType(TREE_TYPE(exp));
if (ResTy->isSingleValueType())
Result = UndefValue::get(ResTy);
// Return true, which can be propagated as the return value of
// TargetIntrinsicLower, to indicate that no further error message
// is needed.
return true;
}
static bool NonImmediateError(tree exp, Value *&Result) {
return UnexpectedError("%Hlast builtin argument must be an immediate",
exp, Result);
}
static bool BadImmediateError(tree exp, Value *&Result) {
return UnexpectedError("%Hunexpected immediate argument for builtin",
exp, Result);
}
static bool BadModeError(tree exp, Value *&Result) {
return UnexpectedError("%Hunexpected mode for builtin argument",
exp, Result);
}
enum neon_datatype {
neon_datatype_unspecified,
neon_datatype_signed,
neon_datatype_unsigned,
neon_datatype_float,
neon_datatype_polynomial
};
/// GetBuiltinExtraInfo - Decipher the extra integer immediate argument
/// used with many of GCC's builtins for NEON to distinguish variants of an
/// operation. The following values for that argument are used:
/// - bit0: For integer types (i.e., bit2 == 0), 0 = unsigned, 1 = signed;
/// otherwise, 0 = polynomial, 1 = float.
/// - bit1: The operation rounds its results.
/// - bit2: 0 = integer datatypes, 1 = floating-point or polynomial.
/// .
/// Returns false if the extra argument is not an integer immediate.
static bool GetBuiltinExtraInfo(const Value *extra_arg,
neon_datatype &datatype, bool &isRounded) {
const ConstantInt *arg = dyn_cast<ConstantInt>(extra_arg);
if (!arg)
return false;
int argval = arg->getZExtValue();
isRounded = ((argval & 2) != 0);
if ((argval & 4) == 0) {
if ((argval & 1) == 0)
datatype = neon_datatype_unsigned;
else
datatype = neon_datatype_signed;
} else {
if ((argval & 1) == 0)
datatype = neon_datatype_polynomial;
else
datatype = neon_datatype_float;
}
return true;
}
/// BuildConstantSplatVector - Create a ConstantVector with the same value
/// replicated in each element.
static Value *BuildConstantSplatVector(unsigned NumElements, ConstantInt *Val) {
std::vector<Constant*> CstOps;
for (unsigned i = 0; i != NumElements; ++i)
CstOps.push_back(Val);
return ConstantVector::get(CstOps);
}
/// BuildDup - Build a splat operation to duplicate a value into every
/// element of a vector.
static Value *BuildDup(Type *ResultType, Value *Val,
LLVMBuilder &Builder) {
// GCC may promote the scalar argument; cast it back.
VectorType *VTy = dyn_cast<VectorType>(ResultType);
assert(VTy && "expected a vector type");
Type *ElTy = VTy->getElementType();
if (Val->getType() != ElTy) {
assert(!ElTy->isFloatingPointTy() &&
"only integer types expected to be promoted");
Val = Builder.CreateTrunc(Val, ElTy);
}
// Insert the value into lane 0 of an undef vector.
Value *Undef = UndefValue::get(ResultType);
Value *Result = Builder.CreateInsertElement(Undef, Val, getInt32Const(0));
// Use a shuffle to move the value into the other lanes.
unsigned NUnits = VTy->getNumElements();
if (NUnits > 1) {
std::vector<Constant*> Idxs;
for (unsigned i = 0; i != NUnits; ++i)
Idxs.push_back(getInt32Const(0));
Result = Builder.CreateShuffleVector(Result, Undef,
ConstantVector::get(Idxs));
}
return Result;
}
/// BuildDupLane - Build a splat operation to take a value from one element
/// of a vector and splat it into another vector.
static Value *BuildDupLane(Value *Vec, unsigned LaneVal, unsigned NUnits,
LLVMBuilder &Builder) {
// Translate this to a vector shuffle.
std::vector<Constant*> Idxs;
for (unsigned i = 0; i != NUnits; ++i)
Idxs.push_back(getInt32Const(LaneVal));
return Builder.CreateShuffleVector(Vec, UndefValue::get(Vec->getType()),
ConstantVector::get(Idxs));
}
// NEON vector shift counts must be in the range 0..ElemBits-1 for left shifts
// or 1..ElemBits for right shifts. For narrowing shifts, compare against the
// destination element size. For widening shifts, the upper bound can be
// equal to the element size. Define separate functions to check these
// constraints, so that the rest of the code for handling vector shift counts
// can be shared.
typedef bool (*ShiftCountChecker)(int Cnt, int ElemBits);
static bool CheckLeftShiftCount(int Cnt, int ElemBits) {
return (Cnt >= 0 && Cnt < ElemBits);
}
static bool CheckLongLeftShiftCount(int Cnt, int ElemBits) {
return (Cnt >= 0 && Cnt <= ElemBits);
}
static bool CheckRightShiftCount(int Cnt, int ElemBits) {
return (Cnt >= 1 && Cnt <= ElemBits);
}
static bool CheckNarrowRightShiftCount(int Cnt, int ElemBits) {
return (Cnt >= 1 && Cnt <= ElemBits / 2);
}
/// BuildShiftCountVector - Check that the shift count argument to a constant
/// shift builtin is a constant in the appropriate range for the shift
/// operation. It expands the shift count into a vector, optionally with the
/// count negated for right shifts. Returns true on success.
static bool BuildShiftCountVector(Value *&Op, enum machine_mode Mode,
ShiftCountChecker CheckCount,
bool NegateRightShift) {
ConstantInt *Cnt = dyn_cast<ConstantInt>(Op);
if (!Cnt)
return false;
int CntVal = Cnt->getSExtValue();
assert (VECTOR_MODE_P (Mode) && "expected vector mode for shift");
unsigned ElemBits = GET_MODE_BITSIZE (GET_MODE_INNER (Mode));
if (!CheckCount(CntVal, ElemBits))
return false;
// Right shifts are represented in NEON intrinsics by a negative shift count.
Cnt = ConstantInt::get(IntegerType::get(Context, ElemBits),
NegateRightShift ? -CntVal : CntVal);
Op = BuildConstantSplatVector(GET_MODE_NUNITS(Mode), Cnt);
return true;
}
/// isValidLane - Check if the lane operand for a vector intrinsic is a
/// ConstantInt in the range 0..NUnits. If pLaneVal is not null, store
/// the lane value to it.
static bool isValidLane(Value *LnOp, int NUnits, unsigned *pLaneVal = 0) {
ConstantInt *Lane = dyn_cast<ConstantInt>(LnOp);
if (!Lane)
return false;
int LaneVal = Lane->getSExtValue();
if (LaneVal < 0 || LaneVal >= NUnits)
return false;
if (pLaneVal)
*pLaneVal = LaneVal;
return true;
}
/// TargetIntrinsicLower - To handle builtins, we want to expand the
/// invocation into normal LLVM code. If the target can handle the builtin,
/// this function should emit the expanded code and return true.
bool TreeToLLVM::TargetIntrinsicLower(tree exp,
unsigned FnCode,
const MemRef *DestLoc,
Value *&Result,
Type *ResultType,
std::vector<Value*> &Ops) {
neon_datatype datatype = neon_datatype_unspecified;
bool isRounded = false;
Intrinsic::ID intID = Intrinsic::not_intrinsic;
Function *intFn;
Type* intOpTypes[2];
if (FnCode < ARM_BUILTIN_NEON_BASE)
return false;
neon_builtins neon_code;
enum insn_code icode = locate_neon_builtin_icode (FnCode, 0, &neon_code);
// Read the extra immediate argument to the builtin.
switch (neon_code) {
default:
return false;
case NEON_BUILTIN_vpaddl:
case NEON_BUILTIN_vneg:
case NEON_BUILTIN_vqneg:
case NEON_BUILTIN_vabs:
case NEON_BUILTIN_vqabs:
case NEON_BUILTIN_vcls:
case NEON_BUILTIN_vclz:
case NEON_BUILTIN_vcnt:
case NEON_BUILTIN_vrecpe:
case NEON_BUILTIN_vrsqrte:
case NEON_BUILTIN_vmvn:
case NEON_BUILTIN_vcvt:
case NEON_BUILTIN_vmovn:
case NEON_BUILTIN_vqmovn:
case NEON_BUILTIN_vqmovun:
case NEON_BUILTIN_vmovl:
case NEON_BUILTIN_vrev64:
case NEON_BUILTIN_vrev32:
case NEON_BUILTIN_vrev16:
if (!GetBuiltinExtraInfo(Ops[1], datatype, isRounded))
return NonImmediateError(exp, Result);
break;
case NEON_BUILTIN_vadd:
case NEON_BUILTIN_vaddl:
case NEON_BUILTIN_vaddw:
case NEON_BUILTIN_vhadd:
case NEON_BUILTIN_vqadd:
case NEON_BUILTIN_vaddhn:
case NEON_BUILTIN_vmul:
case NEON_BUILTIN_vqdmulh:
case NEON_BUILTIN_vmull:
case NEON_BUILTIN_vqdmull:
case NEON_BUILTIN_vsub:
case NEON_BUILTIN_vsubl:
case NEON_BUILTIN_vsubw:
case NEON_BUILTIN_vqsub:
case NEON_BUILTIN_vhsub:
case NEON_BUILTIN_vsubhn:
case NEON_BUILTIN_vceq:
case NEON_BUILTIN_vcge:
case NEON_BUILTIN_vcgt:
case NEON_BUILTIN_vcage:
case NEON_BUILTIN_vcagt:
case NEON_BUILTIN_vtst:
case NEON_BUILTIN_vabd:
case NEON_BUILTIN_vabdl:
case NEON_BUILTIN_vmax:
case NEON_BUILTIN_vmin:
case NEON_BUILTIN_vpadd:
case NEON_BUILTIN_vpadal:
case NEON_BUILTIN_vpmax:
case NEON_BUILTIN_vpmin:
case NEON_BUILTIN_vrecps:
case NEON_BUILTIN_vrsqrts:
case NEON_BUILTIN_vshl:
case NEON_BUILTIN_vqshl:
case NEON_BUILTIN_vshr_n:
case NEON_BUILTIN_vshrn_n:
case NEON_BUILTIN_vqshrn_n:
case NEON_BUILTIN_vqshrun_n:
case NEON_BUILTIN_vshl_n:
case NEON_BUILTIN_vqshl_n:
case NEON_BUILTIN_vqshlu_n:
case NEON_BUILTIN_vshll_n:
case NEON_BUILTIN_vget_lane:
case NEON_BUILTIN_vcvt_n:
case NEON_BUILTIN_vmul_n:
case NEON_BUILTIN_vmull_n:
case NEON_BUILTIN_vqdmull_n:
case NEON_BUILTIN_vqdmulh_n:
case NEON_BUILTIN_vand:
case NEON_BUILTIN_vorr:
case NEON_BUILTIN_veor:
case NEON_BUILTIN_vbic:
case NEON_BUILTIN_vorn:
if (!GetBuiltinExtraInfo(Ops[2], datatype, isRounded))
return NonImmediateError(exp, Result);
break;
case NEON_BUILTIN_vmla:
case NEON_BUILTIN_vmls:
case NEON_BUILTIN_vmlal:
case NEON_BUILTIN_vmlsl:
case NEON_BUILTIN_vqdmlal:
case NEON_BUILTIN_vqdmlsl:
case NEON_BUILTIN_vaba:
case NEON_BUILTIN_vabal:
case NEON_BUILTIN_vsra_n:
case NEON_BUILTIN_vmul_lane:
case NEON_BUILTIN_vmull_lane:
case NEON_BUILTIN_vqdmull_lane:
case NEON_BUILTIN_vqdmulh_lane:
case NEON_BUILTIN_vmla_n:
case NEON_BUILTIN_vmlal_n:
case NEON_BUILTIN_vqdmlal_n:
case NEON_BUILTIN_vmls_n:
case NEON_BUILTIN_vmlsl_n:
case NEON_BUILTIN_vqdmlsl_n:
if (!GetBuiltinExtraInfo(Ops[3], datatype, isRounded))
return NonImmediateError(exp, Result);
break;
case NEON_BUILTIN_vmla_lane:
case NEON_BUILTIN_vmlal_lane:
case NEON_BUILTIN_vqdmlal_lane:
case NEON_BUILTIN_vmls_lane:
case NEON_BUILTIN_vmlsl_lane:
case NEON_BUILTIN_vqdmlsl_lane:
if (!GetBuiltinExtraInfo(Ops[4], datatype, isRounded))
return NonImmediateError(exp, Result);
break;
case NEON_BUILTIN_vsri_n:
case NEON_BUILTIN_vsli_n:
case NEON_BUILTIN_vset_lane:
case NEON_BUILTIN_vcreate:
case NEON_BUILTIN_vdup_n:
case NEON_BUILTIN_vdup_lane:
case NEON_BUILTIN_vcombine:
case NEON_BUILTIN_vget_high:
case NEON_BUILTIN_vget_low:
case NEON_BUILTIN_vtbl1:
case NEON_BUILTIN_vtbl2:
case NEON_BUILTIN_vtbl3:
case NEON_BUILTIN_vtbl4:
case NEON_BUILTIN_vtbx1:
case NEON_BUILTIN_vtbx2:
case NEON_BUILTIN_vtbx3:
case NEON_BUILTIN_vtbx4:
case NEON_BUILTIN_vext:
case NEON_BUILTIN_vbsl:
case NEON_BUILTIN_vtrn:
case NEON_BUILTIN_vzip:
case NEON_BUILTIN_vuzp:
case NEON_BUILTIN_vld1:
case NEON_BUILTIN_vld2:
case NEON_BUILTIN_vld3:
case NEON_BUILTIN_vld4:
case NEON_BUILTIN_vld1_lane:
case NEON_BUILTIN_vld2_lane:
case NEON_BUILTIN_vld3_lane:
case NEON_BUILTIN_vld4_lane:
case NEON_BUILTIN_vld1_dup:
case NEON_BUILTIN_vld2_dup:
case NEON_BUILTIN_vld3_dup:
case NEON_BUILTIN_vld4_dup:
case NEON_BUILTIN_vst1:
case NEON_BUILTIN_vst2:
case NEON_BUILTIN_vst3:
case NEON_BUILTIN_vst4:
case NEON_BUILTIN_vst1_lane:
case NEON_BUILTIN_vst2_lane:
case NEON_BUILTIN_vst3_lane:
case NEON_BUILTIN_vst4_lane:
case NEON_BUILTIN_vreinterpretv8qi:
case NEON_BUILTIN_vreinterpretv4hi:
case NEON_BUILTIN_vreinterpretv2si:
case NEON_BUILTIN_vreinterpretv2sf:
case NEON_BUILTIN_vreinterpretv1di:
case NEON_BUILTIN_vreinterpretv16qi:
case NEON_BUILTIN_vreinterpretv8hi:
case NEON_BUILTIN_vreinterpretv4si:
case NEON_BUILTIN_vreinterpretv4sf:
case NEON_BUILTIN_vreinterpretv2di:
// No extra argument used here.
break;
}
// Check that the isRounded flag is only set when it is supported.
if (isRounded) {
switch (neon_code) {
case NEON_BUILTIN_vhadd:
case NEON_BUILTIN_vaddhn:
case NEON_BUILTIN_vqdmulh:
case NEON_BUILTIN_vsubhn:
case NEON_BUILTIN_vshl:
case NEON_BUILTIN_vqshl:
case NEON_BUILTIN_vshr_n:
case NEON_BUILTIN_vshrn_n:
case NEON_BUILTIN_vqshrn_n:
case NEON_BUILTIN_vqshrun_n:
case NEON_BUILTIN_vsra_n:
case NEON_BUILTIN_vqdmulh_lane:
case NEON_BUILTIN_vqdmulh_n:
// These all support a rounded variant.
break;
default:
return BadImmediateError(exp, Result);
}
}
// Check for supported vector modes.
// Set defaults for mode checking.
int modeCheckOpnd = 1;
bool allow_64bit_modes = true;
bool allow_128bit_modes = true;
bool allow_8bit_elements = true;
bool allow_16bit_elements = true;
bool allow_32bit_elements = true;
bool allow_64bit_elements = false;
bool allow_16bit_polynomials = false;
switch (neon_code) {
default:
assert(0 && "unexpected builtin");
break;
case NEON_BUILTIN_vadd:
case NEON_BUILTIN_vsub:
case NEON_BUILTIN_vqadd:
case NEON_BUILTIN_vqsub:
case NEON_BUILTIN_vshl:
case NEON_BUILTIN_vqshl:
case NEON_BUILTIN_vshr_n:
case NEON_BUILTIN_vshl_n:
case NEON_BUILTIN_vqshl_n:
case NEON_BUILTIN_vqshlu_n:
case NEON_BUILTIN_vsra_n:
case NEON_BUILTIN_vsri_n:
case NEON_BUILTIN_vsli_n:
case NEON_BUILTIN_vmvn:
case NEON_BUILTIN_vext:
case NEON_BUILTIN_vbsl:
case NEON_BUILTIN_vand:
case NEON_BUILTIN_vorr:
case NEON_BUILTIN_veor:
case NEON_BUILTIN_vbic:
case NEON_BUILTIN_vorn:
case NEON_BUILTIN_vdup_lane:
allow_64bit_elements = true;
break;
case NEON_BUILTIN_vhadd:
case NEON_BUILTIN_vhsub:
case NEON_BUILTIN_vmul:
case NEON_BUILTIN_vceq:
case NEON_BUILTIN_vcge:
case NEON_BUILTIN_vcgt:
case NEON_BUILTIN_vcage:
case NEON_BUILTIN_vcagt:
case NEON_BUILTIN_vtst:
case NEON_BUILTIN_vabd:
case NEON_BUILTIN_vabdl:
case NEON_BUILTIN_vaba:
case NEON_BUILTIN_vmax:
case NEON_BUILTIN_vmin:
case NEON_BUILTIN_vpaddl:
case NEON_BUILTIN_vrecps:
case NEON_BUILTIN_vrsqrts:
case NEON_BUILTIN_vneg:
case NEON_BUILTIN_vqneg:
case NEON_BUILTIN_vabs:
case NEON_BUILTIN_vqabs:
case NEON_BUILTIN_vcls:
case NEON_BUILTIN_vclz:
case NEON_BUILTIN_vtrn:
case NEON_BUILTIN_vzip:
case NEON_BUILTIN_vuzp:
break;
case NEON_BUILTIN_vabal:
case NEON_BUILTIN_vmla:
case NEON_BUILTIN_vmls:
case NEON_BUILTIN_vpadal:
modeCheckOpnd = 2;
break;
case NEON_BUILTIN_vaddhn:
case NEON_BUILTIN_vsubhn:
case NEON_BUILTIN_vshrn_n:
case NEON_BUILTIN_vqshrn_n:
case NEON_BUILTIN_vqshrun_n:
case NEON_BUILTIN_vmovn:
case NEON_BUILTIN_vqmovn:
case NEON_BUILTIN_vqmovun:
allow_64bit_modes = false;
allow_8bit_elements = false;
allow_64bit_elements = true;
break;
case NEON_BUILTIN_vqdmulh:
case NEON_BUILTIN_vqdmulh_lane:
case NEON_BUILTIN_vqdmulh_n:
case NEON_BUILTIN_vmul_lane:
case NEON_BUILTIN_vmul_n:
case NEON_BUILTIN_vmla_lane:
case NEON_BUILTIN_vmla_n:
case NEON_BUILTIN_vmls_lane:
case NEON_BUILTIN_vmls_n:
allow_8bit_elements = false;
break;
case NEON_BUILTIN_vqdmull:
case NEON_BUILTIN_vqdmull_lane:
case NEON_BUILTIN_vqdmull_n:
case NEON_BUILTIN_vmull_lane:
case NEON_BUILTIN_vmull_n:
allow_128bit_modes = false;
allow_8bit_elements = false;
break;
case NEON_BUILTIN_vqdmlal:
case NEON_BUILTIN_vqdmlal_lane:
case NEON_BUILTIN_vqdmlal_n:
case NEON_BUILTIN_vqdmlsl:
case NEON_BUILTIN_vmlal_lane:
case NEON_BUILTIN_vmlal_n:
case NEON_BUILTIN_vmlsl_lane:
case NEON_BUILTIN_vmlsl_n:
case NEON_BUILTIN_vqdmlsl_lane:
case NEON_BUILTIN_vqdmlsl_n:
modeCheckOpnd = 2;
allow_128bit_modes = false;
allow_8bit_elements = false;
break;
case NEON_BUILTIN_vaddw:
case NEON_BUILTIN_vmlal:
case NEON_BUILTIN_vmlsl:
case NEON_BUILTIN_vsubw:
modeCheckOpnd = 2;
allow_128bit_modes = false;
break;
case NEON_BUILTIN_vaddl:
case NEON_BUILTIN_vmull:
case NEON_BUILTIN_vsubl:
case NEON_BUILTIN_vpadd:
case NEON_BUILTIN_vpmax:
case NEON_BUILTIN_vpmin:
case NEON_BUILTIN_vshll_n:
case NEON_BUILTIN_vmovl:
allow_128bit_modes = false;
break;
case NEON_BUILTIN_vcnt:
allow_16bit_elements = false;
allow_32bit_elements = false;
break;
case NEON_BUILTIN_vtbl1:
case NEON_BUILTIN_vtbl2:
case NEON_BUILTIN_vtbl3:
case NEON_BUILTIN_vtbl4:
case NEON_BUILTIN_vtbx1:
case NEON_BUILTIN_vtbx2:
case NEON_BUILTIN_vtbx3:
case NEON_BUILTIN_vtbx4:
allow_16bit_elements = false;
allow_32bit_elements = false;
allow_128bit_modes = false;
modeCheckOpnd = 0;
break;
case NEON_BUILTIN_vrecpe:
case NEON_BUILTIN_vrsqrte:
case NEON_BUILTIN_vcvt:
case NEON_BUILTIN_vcvt_n:
allow_8bit_elements = false;
allow_16bit_elements = false;
break;
case NEON_BUILTIN_vget_lane:
allow_64bit_elements = true;
allow_16bit_polynomials = true;
break;
case NEON_BUILTIN_vset_lane:
allow_64bit_elements = true;
allow_16bit_polynomials = true;
modeCheckOpnd = 2;
break;
case NEON_BUILTIN_vrev64:
allow_16bit_polynomials = true;
break;
case NEON_BUILTIN_vrev32:
allow_16bit_polynomials = true;
allow_32bit_elements = false;
break;
case NEON_BUILTIN_vrev16:
allow_16bit_elements = false;
allow_32bit_elements = false;
break;
case NEON_BUILTIN_vcreate:
modeCheckOpnd = 0;
allow_128bit_modes = false;
allow_64bit_elements = true;
break;
case NEON_BUILTIN_vdup_n:
modeCheckOpnd = 0;
allow_64bit_elements = true;
break;
case NEON_BUILTIN_vcombine:
case NEON_BUILTIN_vreinterpretv8qi:
case NEON_BUILTIN_vreinterpretv4hi:
case NEON_BUILTIN_vreinterpretv2si:
case NEON_BUILTIN_vreinterpretv2sf:
case NEON_BUILTIN_vreinterpretv1di:
allow_128bit_modes = false;
allow_64bit_elements = true;
break;
case NEON_BUILTIN_vget_high:
case NEON_BUILTIN_vget_low:
case NEON_BUILTIN_vreinterpretv16qi:
case NEON_BUILTIN_vreinterpretv8hi:
case NEON_BUILTIN_vreinterpretv4si:
case NEON_BUILTIN_vreinterpretv4sf:
case NEON_BUILTIN_vreinterpretv2di:
allow_64bit_modes = false;
allow_64bit_elements = true;
break;
case NEON_BUILTIN_vld1:
case NEON_BUILTIN_vld2:
case NEON_BUILTIN_vld3:
case NEON_BUILTIN_vld4:
case NEON_BUILTIN_vld1_lane:
case NEON_BUILTIN_vld2_lane:
case NEON_BUILTIN_vld3_lane:
case NEON_BUILTIN_vld4_lane:
case NEON_BUILTIN_vld1_dup:
case NEON_BUILTIN_vld2_dup:
case NEON_BUILTIN_vld3_dup:
case NEON_BUILTIN_vld4_dup:
case NEON_BUILTIN_vst1:
case NEON_BUILTIN_vst2:
case NEON_BUILTIN_vst3:
case NEON_BUILTIN_vst4:
case NEON_BUILTIN_vst1_lane:
case NEON_BUILTIN_vst2_lane:
case NEON_BUILTIN_vst3_lane:
case NEON_BUILTIN_vst4_lane:
// Most of the load/store builtins do not have operands with the mode of
// the operation. Skip the mode check, since there is no extra operand
// to check against the mode anyway.
modeCheckOpnd = -1;
break;
}
if (modeCheckOpnd >= 0) {
switch (insn_data[icode].operand[modeCheckOpnd].mode) {
case V8QImode: case V4HImode: case V2SImode: case V1DImode: case V2SFmode:
if (!allow_64bit_modes)
return BadModeError(exp, Result);
break;
case V16QImode: case V8HImode: case V4SImode: case V2DImode: case V4SFmode:
if (!allow_128bit_modes)
return BadModeError(exp, Result);
break;
default:
return BadModeError(exp, Result);
}
if (datatype == neon_datatype_polynomial) {
switch (insn_data[icode].operand[modeCheckOpnd].mode) {
case V8QImode: case V16QImode:
break;
case V4HImode: case V8HImode:
if (!allow_16bit_polynomials)
return BadModeError(exp, Result);
break;
default:
return BadModeError(exp, Result);
}
} else if (datatype == neon_datatype_float) {
switch (insn_data[icode].operand[modeCheckOpnd].mode) {
case V2SFmode: case V4SFmode:
break;
default:
return BadModeError(exp, Result);
}
} else {
switch (insn_data[icode].operand[modeCheckOpnd].mode) {
case V8QImode: case V16QImode:
if (!allow_8bit_elements)
return BadModeError(exp, Result);
break;
case V4HImode: case V8HImode:
if (!allow_16bit_elements)
return BadModeError(exp, Result);
break;
case V2SImode: case V4SImode:
case V2SFmode: case V4SFmode:
if (!allow_32bit_elements)
return BadModeError(exp, Result);
break;
case V1DImode: case V2DImode:
if (!allow_64bit_elements)
return BadModeError(exp, Result);
break;
default:
return BadModeError(exp, Result);
}
}
}
// Now translate the builtin to LLVM.
switch (neon_code) {
default:
assert(0 && "unimplemented builtin");
break;
case NEON_BUILTIN_vadd:
if (datatype == neon_datatype_polynomial)
return BadImmediateError(exp, Result);
if (datatype == neon_datatype_float)
Result = Builder.CreateFAdd(Ops[0], Ops[1]);
else
Result = Builder.CreateAdd(Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vaddl:
if (datatype == neon_datatype_signed) {
Ops[0] = Builder.CreateSExt(Ops[0], ResultType);
Ops[1] = Builder.CreateSExt(Ops[1], ResultType);
} else if (datatype == neon_datatype_unsigned) {
Ops[0] = Builder.CreateZExt(Ops[0], ResultType);
Ops[1] = Builder.CreateZExt(Ops[1], ResultType);
} else
return BadImmediateError(exp, Result);
Result = Builder.CreateAdd(Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vaddw:
if (datatype == neon_datatype_signed)
Ops[1] = Builder.CreateSExt(Ops[1], ResultType);
else if (datatype == neon_datatype_unsigned)
Ops[1] = Builder.CreateZExt(Ops[1], ResultType);
else
return BadImmediateError(exp, Result);
Result = Builder.CreateAdd(Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vhadd:
if (datatype == neon_datatype_signed)
intID = (isRounded ?
Intrinsic::arm_neon_vrhadds :
Intrinsic::arm_neon_vhadds);
else if (datatype == neon_datatype_unsigned)
intID = (isRounded ?
Intrinsic::arm_neon_vrhaddu :
Intrinsic::arm_neon_vhaddu);
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vqadd:
if (datatype == neon_datatype_signed)
intID = Intrinsic::arm_neon_vqadds;
else if (datatype == neon_datatype_unsigned)
intID = Intrinsic::arm_neon_vqaddu;
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vaddhn:
if (datatype == neon_datatype_signed ||
datatype == neon_datatype_unsigned)
intID = (isRounded ?
Intrinsic::arm_neon_vraddhn :
Intrinsic::arm_neon_vaddhn);
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vmul_lane:
case NEON_BUILTIN_vmul_n:
if (datatype == neon_datatype_polynomial)
return BadImmediateError(exp, Result);
// fall through....
case NEON_BUILTIN_vmul:
if (neon_code == NEON_BUILTIN_vmul_n) {
Ops[1] = BuildDup(Ops[0]->getType(), Ops[1], Builder);
} else if (neon_code == NEON_BUILTIN_vmul_lane) {
unsigned LaneVal;
unsigned NUnits = GET_MODE_NUNITS(insn_data[icode].operand[1].mode);
if (!isValidLane(Ops[2], NUnits, &LaneVal))
return UnexpectedError("%Hinvalid lane number", exp, Result);
Ops[1] = BuildDupLane(Ops[1], LaneVal, NUnits, Builder);
}
if (datatype == neon_datatype_polynomial) {
intID = Intrinsic::arm_neon_vmulp;
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
} else if (datatype == neon_datatype_float)
Result = Builder.CreateFMul(Ops[0], Ops[1]);
else
Result = Builder.CreateMul(Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vmla_lane:
case NEON_BUILTIN_vmla_n:
case NEON_BUILTIN_vmla:
if (neon_code == NEON_BUILTIN_vmla_n) {
Ops[2] = BuildDup(Ops[1]->getType(), Ops[2], Builder);
} else if (neon_code == NEON_BUILTIN_vmla_lane) {
unsigned LaneVal;
unsigned NUnits = GET_MODE_NUNITS(insn_data[icode].operand[2].mode);
if (!isValidLane(Ops[3], NUnits, &LaneVal))
return UnexpectedError("%Hinvalid lane number", exp, Result);
Ops[2] = BuildDupLane(Ops[2], LaneVal, NUnits, Builder);
}
if (datatype == neon_datatype_polynomial)
return BadImmediateError(exp, Result);
if (datatype == neon_datatype_float)
Result = Builder.CreateFAdd(Ops[0], Builder.CreateFMul(Ops[1], Ops[2]));
else
Result = Builder.CreateAdd(Ops[0], Builder.CreateMul(Ops[1], Ops[2]));
break;
case NEON_BUILTIN_vmls_lane:
case NEON_BUILTIN_vmls_n:
case NEON_BUILTIN_vmls:
if (neon_code == NEON_BUILTIN_vmls_n) {
Ops[2] = BuildDup(Ops[1]->getType(), Ops[2], Builder);
} else if (neon_code == NEON_BUILTIN_vmls_lane) {
unsigned LaneVal;
unsigned NUnits = GET_MODE_NUNITS(insn_data[icode].operand[2].mode);
if (!isValidLane(Ops[3], NUnits, &LaneVal))
return UnexpectedError("%Hinvalid lane number", exp, Result);
Ops[2] = BuildDupLane(Ops[2], LaneVal, NUnits, Builder);
}
if (datatype == neon_datatype_polynomial)
return BadImmediateError(exp, Result);
if (datatype == neon_datatype_float)
Result = Builder.CreateFSub(Ops[0], Builder.CreateFMul(Ops[1], Ops[2]));
else
Result = Builder.CreateSub(Ops[0], Builder.CreateMul(Ops[1], Ops[2]));
break;
case NEON_BUILTIN_vmlal_lane:
case NEON_BUILTIN_vmlal_n:
case NEON_BUILTIN_vmlal:
if (neon_code == NEON_BUILTIN_vmlal_n) {
Ops[2] = BuildDup(Ops[1]->getType(), Ops[2], Builder);
} else if (neon_code == NEON_BUILTIN_vmlal_lane) {
unsigned LaneVal;
unsigned NUnits = GET_MODE_NUNITS(insn_data[icode].operand[2].mode);
if (!isValidLane(Ops[3], NUnits, &LaneVal))
return UnexpectedError("%Hinvalid lane number", exp, Result);
Ops[2] = BuildDupLane(Ops[2], LaneVal, NUnits, Builder);
}
if (datatype == neon_datatype_signed)
intID = Intrinsic::arm_neon_vmulls;
else if (datatype == neon_datatype_unsigned)
intID = Intrinsic::arm_neon_vmullu;
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall2(intFn, Ops[1], Ops[2]);
Result = Builder.CreateAdd(Ops[0], Result);
break;
case NEON_BUILTIN_vmlsl_lane:
case NEON_BUILTIN_vmlsl_n:
case NEON_BUILTIN_vmlsl:
if (neon_code == NEON_BUILTIN_vmlsl_n) {
Ops[2] = BuildDup(Ops[1]->getType(), Ops[2], Builder);
} else if (neon_code == NEON_BUILTIN_vmlsl_lane) {
unsigned LaneVal;
unsigned NUnits = GET_MODE_NUNITS(insn_data[icode].operand[2].mode);
if (!isValidLane(Ops[3], NUnits, &LaneVal))
return UnexpectedError("%Hinvalid lane number", exp, Result);
Ops[2] = BuildDupLane(Ops[2], LaneVal, NUnits, Builder);
}
if (datatype == neon_datatype_signed)
intID = Intrinsic::arm_neon_vmulls;
else if (datatype == neon_datatype_unsigned)
intID = Intrinsic::arm_neon_vmullu;
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall2(intFn, Ops[1], Ops[2]);
Result = Builder.CreateSub(Ops[0], Result);
break;
case NEON_BUILTIN_vqdmulh_lane:
case NEON_BUILTIN_vqdmulh_n:
case NEON_BUILTIN_vqdmulh:
if (neon_code == NEON_BUILTIN_vqdmulh_n) {
Ops[1] = BuildDup(Ops[0]->getType(), Ops[1], Builder);
} else if (neon_code == NEON_BUILTIN_vqdmulh_lane) {
unsigned LaneVal;
unsigned NUnits = GET_MODE_NUNITS(insn_data[icode].operand[1].mode);
if (!isValidLane(Ops[2], NUnits, &LaneVal))
return UnexpectedError("%Hinvalid lane number", exp, Result);
Ops[1] = BuildDupLane(Ops[1], LaneVal, NUnits, Builder);
}
if (datatype == neon_datatype_signed)
intID = (isRounded ?
Intrinsic::arm_neon_vqrdmulh :
Intrinsic::arm_neon_vqdmulh);
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vqdmlal_lane:
case NEON_BUILTIN_vqdmlal_n:
case NEON_BUILTIN_vqdmlal:
if (neon_code == NEON_BUILTIN_vqdmlal_n) {
Ops[2] = BuildDup(Ops[1]->getType(), Ops[2], Builder);
} else if (neon_code == NEON_BUILTIN_vqdmlal_lane) {
unsigned LaneVal;
unsigned NUnits = GET_MODE_NUNITS(insn_data[icode].operand[2].mode);
if (!isValidLane(Ops[3], NUnits, &LaneVal))
return UnexpectedError("%Hinvalid lane number", exp, Result);
Ops[2] = BuildDupLane(Ops[2], LaneVal, NUnits, Builder);
}
if (datatype == neon_datatype_signed)
intID = Intrinsic::arm_neon_vqdmlal;
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall3(intFn, Ops[0], Ops[1], Ops[2]);
break;
case NEON_BUILTIN_vqdmlsl_lane:
case NEON_BUILTIN_vqdmlsl_n:
case NEON_BUILTIN_vqdmlsl:
if (neon_code == NEON_BUILTIN_vqdmlsl_n) {
Ops[2] = BuildDup(Ops[1]->getType(), Ops[2], Builder);
} else if (neon_code == NEON_BUILTIN_vqdmlsl_lane) {
unsigned LaneVal;
unsigned NUnits = GET_MODE_NUNITS(insn_data[icode].operand[2].mode);
if (!isValidLane(Ops[3], NUnits, &LaneVal))
return UnexpectedError("%Hinvalid lane number", exp, Result);
Ops[2] = BuildDupLane(Ops[2], LaneVal, NUnits, Builder);
}
if (datatype == neon_datatype_signed)
intID = Intrinsic::arm_neon_vqdmlsl;
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall3(intFn, Ops[0], Ops[1], Ops[2]);
break;
case NEON_BUILTIN_vmull_lane:
case NEON_BUILTIN_vmull_n:
case NEON_BUILTIN_vmull:
if (neon_code == NEON_BUILTIN_vmull_n) {
Ops[1] = BuildDup(Ops[0]->getType(), Ops[1], Builder);
} else if (neon_code == NEON_BUILTIN_vmull_lane) {
unsigned LaneVal;
unsigned NUnits = GET_MODE_NUNITS(insn_data[icode].operand[1].mode);
if (!isValidLane(Ops[2], NUnits, &LaneVal))
return UnexpectedError("%Hinvalid lane number", exp, Result);
Ops[1] = BuildDupLane(Ops[1], LaneVal, NUnits, Builder);
}
if (datatype == neon_datatype_polynomial)
intID = Intrinsic::arm_neon_vmullp;
else if (datatype == neon_datatype_signed)
intID = Intrinsic::arm_neon_vmulls;
else if (datatype == neon_datatype_unsigned)
intID = Intrinsic::arm_neon_vmullu;
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vqdmull_n:
case NEON_BUILTIN_vqdmull_lane:
case NEON_BUILTIN_vqdmull:
if (neon_code == NEON_BUILTIN_vqdmull_n) {
Ops[1] = BuildDup(Ops[0]->getType(), Ops[1], Builder);
} else if (neon_code == NEON_BUILTIN_vqdmull_lane) {
unsigned LaneVal;
unsigned NUnits = GET_MODE_NUNITS(insn_data[icode].operand[1].mode);
if (!isValidLane(Ops[2], NUnits, &LaneVal))
return UnexpectedError("%Hinvalid lane number", exp, Result);
Ops[1] = BuildDupLane(Ops[1], LaneVal, NUnits, Builder);
}
if (datatype == neon_datatype_signed)
intID = Intrinsic::arm_neon_vqdmull;
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vshl_n:
if (!BuildShiftCountVector(Ops[1], insn_data[icode].operand[1].mode,
CheckLeftShiftCount, false))
return UnexpectedError("%Hinvalid shift count", exp, Result);
if (datatype == neon_datatype_signed ||
datatype == neon_datatype_unsigned)
Result = Builder.CreateShl(Ops[0], Ops[1]);
else
return BadImmediateError(exp, Result);
break;
case NEON_BUILTIN_vshr_n:
if (!BuildShiftCountVector(Ops[1], insn_data[icode].operand[1].mode,
CheckRightShiftCount, isRounded))
return UnexpectedError("%Hinvalid shift count", exp, Result);
if (!isRounded) {
if (datatype == neon_datatype_signed)
Result = Builder.CreateAShr(Ops[0], Ops[1]);
else if (datatype == neon_datatype_unsigned)
Result = Builder.CreateLShr(Ops[0], Ops[1]);
else
return BadImmediateError(exp, Result);
break;
}
// fall through....
case NEON_BUILTIN_vshl:
if (datatype == neon_datatype_signed)
intID = (isRounded ?
Intrinsic::arm_neon_vrshifts :
Intrinsic::arm_neon_vshifts);
else if (datatype == neon_datatype_unsigned)
intID = (isRounded ?
Intrinsic::arm_neon_vrshiftu :
Intrinsic::arm_neon_vshiftu);
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vshrn_n:
if (!BuildShiftCountVector(Ops[1], insn_data[icode].operand[1].mode,
CheckNarrowRightShiftCount, true))
return UnexpectedError("%Hinvalid shift count", exp, Result);
if (datatype == neon_datatype_signed ||
datatype == neon_datatype_unsigned)
intID = (isRounded ?
Intrinsic::arm_neon_vrshiftn :
Intrinsic::arm_neon_vshiftn);
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vqshl_n:
if (!BuildShiftCountVector(Ops[1], insn_data[icode].operand[1].mode,
CheckLeftShiftCount, false))
return UnexpectedError("%Hinvalid shift count", exp, Result);
// fall through....
case NEON_BUILTIN_vqshl:
if (datatype == neon_datatype_signed)
intID = (isRounded ?
Intrinsic::arm_neon_vqrshifts :
Intrinsic::arm_neon_vqshifts);
else if (datatype == neon_datatype_unsigned)
intID = (isRounded ?
Intrinsic::arm_neon_vqrshiftu :
Intrinsic::arm_neon_vqshiftu);
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vqshlu_n:
if (!BuildShiftCountVector(Ops[1], insn_data[icode].operand[1].mode,
CheckLeftShiftCount, false))
return UnexpectedError("%Hinvalid shift count", exp, Result);
if (datatype != neon_datatype_signed)
return BadImmediateError(exp, Result);
intID = Intrinsic::arm_neon_vqshiftsu;
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vqshrn_n:
if (!BuildShiftCountVector(Ops[1], insn_data[icode].operand[1].mode,
CheckNarrowRightShiftCount, true))
return UnexpectedError("%Hinvalid shift count", exp, Result);
if (datatype == neon_datatype_signed)
intID = (isRounded ?
Intrinsic::arm_neon_vqrshiftns :
Intrinsic::arm_neon_vqshiftns);
else if (datatype == neon_datatype_unsigned)
intID = (isRounded ?
Intrinsic::arm_neon_vqrshiftnu :
Intrinsic::arm_neon_vqshiftnu);
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vqshrun_n:
if (!BuildShiftCountVector(Ops[1], insn_data[icode].operand[1].mode,
CheckNarrowRightShiftCount, true))
return UnexpectedError("%Hinvalid shift count", exp, Result);
if (datatype == neon_datatype_signed)
intID = (isRounded ?
Intrinsic::arm_neon_vqrshiftnsu :
Intrinsic::arm_neon_vqshiftnsu);
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vshll_n:
if (!BuildShiftCountVector(Ops[1], insn_data[icode].operand[1].mode,
CheckLongLeftShiftCount, false))
return UnexpectedError("%Hinvalid shift count", exp, Result);
if (datatype == neon_datatype_signed)
intID = Intrinsic::arm_neon_vshiftls;
else if (datatype == neon_datatype_unsigned)
intID = Intrinsic::arm_neon_vshiftlu;
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vsra_n:
if (!BuildShiftCountVector(Ops[2], insn_data[icode].operand[1].mode,
CheckRightShiftCount, isRounded))
return UnexpectedError("%Hinvalid shift count", exp, Result);
if (!isRounded) {
if (datatype == neon_datatype_signed)
Result = Builder.CreateAShr(Ops[1], Ops[2]);
else if (datatype == neon_datatype_unsigned)
Result = Builder.CreateLShr(Ops[1], Ops[2]);
else
return BadImmediateError(exp, Result);
} else {
if (datatype == neon_datatype_signed)
intID = (isRounded ?
Intrinsic::arm_neon_vrshifts :
Intrinsic::arm_neon_vshifts);
else if (datatype == neon_datatype_unsigned)
intID = (isRounded ?
Intrinsic::arm_neon_vrshiftu :
Intrinsic::arm_neon_vshiftu);
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall2(intFn, Ops[1], Ops[2]);
}
Result = Builder.CreateAdd(Ops[0], Result);
break;
case NEON_BUILTIN_vsub:
if (datatype == neon_datatype_polynomial)
return BadImmediateError(exp, Result);
if (datatype == neon_datatype_float)
Result = Builder.CreateFSub(Ops[0], Ops[1]);
else
Result = Builder.CreateSub(Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vsubl:
if (datatype == neon_datatype_signed) {
Ops[0] = Builder.CreateSExt(Ops[0], ResultType);
Ops[1] = Builder.CreateSExt(Ops[1], ResultType);
} else if (datatype == neon_datatype_unsigned) {
Ops[0] = Builder.CreateZExt(Ops[0], ResultType);
Ops[1] = Builder.CreateZExt(Ops[1], ResultType);
} else
return BadImmediateError(exp, Result);
Result = Builder.CreateSub(Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vsubw:
if (datatype == neon_datatype_signed)
Ops[1] = Builder.CreateSExt(Ops[1], ResultType);
else if (datatype == neon_datatype_unsigned)
Ops[1] = Builder.CreateZExt(Ops[1], ResultType);
else
return BadImmediateError(exp, Result);
Result = Builder.CreateSub(Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vqsub:
if (datatype == neon_datatype_signed)
intID = Intrinsic::arm_neon_vqsubs;
else if (datatype == neon_datatype_unsigned)
intID = Intrinsic::arm_neon_vqsubu;
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vhsub:
if (datatype == neon_datatype_signed)
intID = Intrinsic::arm_neon_vhsubs;
else if (datatype == neon_datatype_unsigned)
intID = Intrinsic::arm_neon_vhsubu;
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vsubhn:
if (datatype == neon_datatype_signed ||
datatype == neon_datatype_unsigned)
intID = (isRounded ?
Intrinsic::arm_neon_vrsubhn :
Intrinsic::arm_neon_vsubhn);
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vceq:
if (datatype == neon_datatype_float)
Result = Builder.CreateFCmp(FCmpInst::FCMP_OEQ, Ops[0], Ops[1]);
else
Result = Builder.CreateICmp(ICmpInst::ICMP_EQ, Ops[0], Ops[1]);
Result = Builder.CreateSExt(Result, ResultType);
break;
case NEON_BUILTIN_vcge:
if (datatype == neon_datatype_float)
Result = Builder.CreateFCmp(FCmpInst::FCMP_OGE, Ops[0], Ops[1]);
else if (datatype == neon_datatype_signed)
Result = Builder.CreateICmp(ICmpInst::ICMP_SGE, Ops[0], Ops[1]);
else if (datatype == neon_datatype_unsigned)
Result = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[0], Ops[1]);
else
return BadImmediateError(exp, Result);
Result = Builder.CreateSExt(Result, ResultType);
break;
case NEON_BUILTIN_vcgt:
if (datatype == neon_datatype_float)
Result = Builder.CreateFCmp(FCmpInst::FCMP_OGT, Ops[0], Ops[1]);
else if (datatype == neon_datatype_signed)
Result = Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Ops[1]);
else if (datatype == neon_datatype_unsigned)
Result = Builder.CreateICmp(ICmpInst::ICMP_UGT, Ops[0], Ops[1]);
else
return BadImmediateError(exp, Result);
Result = Builder.CreateSExt(Result, ResultType);
break;
case NEON_BUILTIN_vcage:
if (datatype != neon_datatype_float)
return BadImmediateError(exp, Result);
switch (insn_data[icode].operand[1].mode) {
case V2SFmode:
intID = Intrinsic::arm_neon_vacged;
break;
case V4SFmode:
intID = Intrinsic::arm_neon_vacgeq;
break;
default:
return BadModeError(exp, Result);
}
intFn = Intrinsic::getDeclaration(TheModule, intID);
Result = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vcagt:
if (datatype != neon_datatype_float)
return BadImmediateError(exp, Result);
switch (insn_data[icode].operand[1].mode) {
case V2SFmode:
intID = Intrinsic::arm_neon_vacgtd;
break;
case V4SFmode:
intID = Intrinsic::arm_neon_vacgtq;
break;
default:
return BadModeError(exp, Result);
}
intFn = Intrinsic::getDeclaration(TheModule, intID);
Result = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vtst:
if (datatype == neon_datatype_float)
return BadImmediateError(exp, Result);
Result = Builder.CreateICmp(ICmpInst::ICMP_NE,
Builder.CreateAnd(Ops[0], Ops[1]),
ConstantAggregateZero::get(ResultType));
Result = Builder.CreateSExt(Result, ResultType);
break;
case NEON_BUILTIN_vabd:
if (datatype == neon_datatype_float ||
datatype == neon_datatype_signed)
intID = Intrinsic::arm_neon_vabds;
else if (datatype == neon_datatype_unsigned)
intID = Intrinsic::arm_neon_vabdu;
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vabdl: {
if (datatype == neon_datatype_signed)
intID = Intrinsic::arm_neon_vabds;
else if (datatype == neon_datatype_unsigned)
intID = Intrinsic::arm_neon_vabdu;
else
return BadImmediateError(exp, Result);
VectorType *VTy = dyn_cast<VectorType>(ResultType);
assert(VTy && "expected a vector type for vabdl result");
llvm::Type *DTy = VectorType::getTruncatedElementVectorType(VTy);
intFn = Intrinsic::getDeclaration(TheModule, intID, DTy);
Ops[0] = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
Result = Builder.CreateZExt(Ops[0], ResultType);
break;
}
case NEON_BUILTIN_vaba:
if (datatype == neon_datatype_signed)
intID = Intrinsic::arm_neon_vabds;
else if (datatype == neon_datatype_unsigned)
intID = Intrinsic::arm_neon_vabdu;
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Ops[1] = Builder.CreateCall2(intFn, Ops[1], Ops[2]);
Result = Builder.CreateAdd(Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vabal: {
if (datatype == neon_datatype_signed)
intID = Intrinsic::arm_neon_vabds;
else if (datatype == neon_datatype_unsigned)
intID = Intrinsic::arm_neon_vabdu;
else
return BadImmediateError(exp, Result);
VectorType *VTy = dyn_cast<VectorType>(ResultType);
assert(VTy && "expected a vector type for vabal result");
llvm::Type *DTy = VectorType::getTruncatedElementVectorType(VTy);
intFn = Intrinsic::getDeclaration(TheModule, intID, DTy);
Ops[1] = Builder.CreateCall2(intFn, Ops[1], Ops[2]);
Ops[1] = Builder.CreateZExt(Ops[1], ResultType);
Result = Builder.CreateAdd(Ops[0], Ops[1]);
break;
}
case NEON_BUILTIN_vmax:
if (datatype == neon_datatype_float ||
datatype == neon_datatype_signed)
intID = Intrinsic::arm_neon_vmaxs;
else if (datatype == neon_datatype_unsigned)
intID = Intrinsic::arm_neon_vmaxu;
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vmin:
if (datatype == neon_datatype_float ||
datatype == neon_datatype_signed)
intID = Intrinsic::arm_neon_vmins;
else if (datatype == neon_datatype_unsigned)
intID = Intrinsic::arm_neon_vminu;
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vpadd:
if (datatype == neon_datatype_float ||
datatype == neon_datatype_signed ||
datatype == neon_datatype_unsigned)
intID = Intrinsic::arm_neon_vpadd;
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vpaddl:
if (datatype == neon_datatype_signed)
intID = Intrinsic::arm_neon_vpaddls;
else if (datatype == neon_datatype_unsigned)
intID = Intrinsic::arm_neon_vpaddlu;
else
return BadImmediateError(exp, Result);
intOpTypes[0] = ResultType;
intOpTypes[1] = Ops[0]->getType();
intFn = Intrinsic::getDeclaration(TheModule, intID, intOpTypes);
Result = Builder.CreateCall(intFn, Ops[0]);
break;
case NEON_BUILTIN_vpadal:
if (datatype == neon_datatype_signed)
intID = Intrinsic::arm_neon_vpadals;
else if (datatype == neon_datatype_unsigned)
intID = Intrinsic::arm_neon_vpadalu;
else
return BadImmediateError(exp, Result);
intOpTypes[0] = ResultType;
intOpTypes[1] = Ops[1]->getType();
intFn = Intrinsic::getDeclaration(TheModule, intID, intOpTypes);
Result = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vpmax:
if (datatype == neon_datatype_float ||
datatype == neon_datatype_signed)
intID = Intrinsic::arm_neon_vpmaxs;
else if (datatype == neon_datatype_unsigned)
intID = Intrinsic::arm_neon_vpmaxu;
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vpmin:
if (datatype == neon_datatype_float ||
datatype == neon_datatype_signed)
intID = Intrinsic::arm_neon_vpmins;
else if (datatype == neon_datatype_unsigned)
intID = Intrinsic::arm_neon_vpminu;
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vrecps:
if (datatype != neon_datatype_float)
return BadImmediateError(exp, Result);
intID = Intrinsic::arm_neon_vrecps;
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vrsqrts:
if (datatype != neon_datatype_float)
return BadImmediateError(exp, Result);
intID = Intrinsic::arm_neon_vrsqrts;
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vsri_n:
if (!BuildShiftCountVector(Ops[2], insn_data[icode].operand[1].mode,
CheckRightShiftCount, true))
return UnexpectedError("%Hinvalid shift count", exp, Result);
intID = Intrinsic::arm_neon_vshiftins;
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall3(intFn, Ops[0], Ops[1], Ops[2]);
break;
case NEON_BUILTIN_vsli_n:
if (!BuildShiftCountVector(Ops[2], insn_data[icode].operand[1].mode,
CheckLeftShiftCount, false))
return UnexpectedError("%Hinvalid shift count", exp, Result);
intID = Intrinsic::arm_neon_vshiftins;
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall3(intFn, Ops[0], Ops[1], Ops[2]);
break;
case NEON_BUILTIN_vabs:
if (datatype == neon_datatype_float ||
datatype == neon_datatype_signed)
intID = Intrinsic::arm_neon_vabs;
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall(intFn, Ops[0]);
break;
case NEON_BUILTIN_vqabs:
if (datatype != neon_datatype_signed)
return BadImmediateError(exp, Result);
intID = Intrinsic::arm_neon_vqabs;
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall(intFn, Ops[0]);
break;
case NEON_BUILTIN_vneg:
if (datatype != neon_datatype_signed &&
datatype != neon_datatype_float)
return BadImmediateError(exp, Result);
if (datatype == neon_datatype_float)
Result = Builder.CreateFNeg(Ops[0]);
else
Result = Builder.CreateNeg(Ops[0]);
break;
case NEON_BUILTIN_vqneg:
if (datatype != neon_datatype_signed)
return BadImmediateError(exp, Result);
intID = Intrinsic::arm_neon_vqneg;
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall(intFn, Ops[0]);
break;
case NEON_BUILTIN_vcls:
if (datatype != neon_datatype_signed)
return BadImmediateError(exp, Result);
intID = Intrinsic::arm_neon_vcls;
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall(intFn, Ops[0]);
break;
case NEON_BUILTIN_vclz:
if (datatype != neon_datatype_signed &&
datatype != neon_datatype_unsigned)
return BadImmediateError(exp, Result);
intID = Intrinsic::arm_neon_vclz;
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall(intFn, Ops[0]);
break;
case NEON_BUILTIN_vcnt:
if (datatype == neon_datatype_float)
return BadImmediateError(exp, Result);
intID = Intrinsic::arm_neon_vcnt;
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall(intFn, Ops[0]);
break;
case NEON_BUILTIN_vrecpe:
if (datatype == neon_datatype_float ||
datatype == neon_datatype_unsigned)
intID = Intrinsic::arm_neon_vrecpe;
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall(intFn, Ops[0]);
break;
case NEON_BUILTIN_vrsqrte:
if (datatype == neon_datatype_float ||
datatype == neon_datatype_unsigned)
intID = Intrinsic::arm_neon_vrsqrte;
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall(intFn, Ops[0]);
break;
case NEON_BUILTIN_vmvn:
if (datatype == neon_datatype_float)
return BadImmediateError(exp, Result);
Result = Builder.CreateNot(Ops[0]);
break;
case NEON_BUILTIN_vget_lane: {
unsigned NUnits = GET_MODE_NUNITS(insn_data[icode].operand[1].mode);
if (!isValidLane(Ops[1], NUnits))
return UnexpectedError("%Hinvalid lane number", exp, Result);
Result = Builder.CreateExtractElement(Ops[0], Ops[1]);
break;
}
case NEON_BUILTIN_vset_lane: {
unsigned NUnits = GET_MODE_NUNITS(insn_data[icode].operand[2].mode);
if (!isValidLane(Ops[2], NUnits))
return UnexpectedError("%Hinvalid lane number", exp, Result);
// GCC may promote the scalar argument; cast it back.
const VectorType *VTy = dyn_cast<const VectorType>(Ops[1]->getType());
assert(VTy && "expected a vector type for vset_lane vector operand");
Type *ElTy = VTy->getElementType();
if (Ops[0]->getType() != ElTy) {
assert(!ElTy->isFloatingPointTy() &&
"only integer types expected to be promoted");
Ops[0] = Builder.CreateTrunc(Ops[0], ElTy);
}
Result = Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2]);
break;
}
case NEON_BUILTIN_vcreate:
Result = Builder.CreateBitCast(Ops[0], ResultType);
break;
case NEON_BUILTIN_vdup_n:
Result = BuildDup(ResultType, Ops[0], Builder);
break;
case NEON_BUILTIN_vdup_lane: {
unsigned LaneVal;
unsigned NUnits = GET_MODE_NUNITS(insn_data[icode].operand[1].mode);
if (!isValidLane(Ops[1], NUnits, &LaneVal))
return UnexpectedError("%Hinvalid lane number", exp, Result);
unsigned DstUnits = GET_MODE_NUNITS(insn_data[icode].operand[0].mode);
Result = BuildDupLane(Ops[0], LaneVal, DstUnits, Builder);
break;
}
case NEON_BUILTIN_vcombine: {
unsigned NUnits = GET_MODE_NUNITS(insn_data[icode].operand[0].mode);
std::vector<Constant*> Idxs;
for (unsigned i = 0; i != NUnits; ++i)
Idxs.push_back(getInt32Const(i));
Result = Builder.CreateShuffleVector(Ops[0], Ops[1],
ConstantVector::get(Idxs));
break;
}
case NEON_BUILTIN_vget_high:
case NEON_BUILTIN_vget_low: {
Type *v2f64Ty = VectorType::get(Type::getDoubleTy(Context), 2);
unsigned Idx = (neon_code == NEON_BUILTIN_vget_low ? 0 : 1);
Result = Builder.CreateBitCast(Ops[0], v2f64Ty);
Result = Builder.CreateExtractElement(Result, getInt32Const(Idx));
Result = Builder.CreateBitCast(Result, ResultType);
break;
}
case NEON_BUILTIN_vmovn:
if (datatype == neon_datatype_signed ||
datatype == neon_datatype_unsigned)
Result = Builder.CreateTrunc(Ops[0], ResultType);
else
return BadImmediateError(exp, Result);
break;
case NEON_BUILTIN_vqmovn:
if (datatype == neon_datatype_signed)
intID = Intrinsic::arm_neon_vqmovns;
else if (datatype == neon_datatype_unsigned)
intID = Intrinsic::arm_neon_vqmovnu;
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall(intFn, Ops[0]);
break;
case NEON_BUILTIN_vqmovun:
if (datatype == neon_datatype_signed)
intID = Intrinsic::arm_neon_vqmovnsu;
else
return BadImmediateError(exp, Result);
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Result = Builder.CreateCall(intFn, Ops[0]);
break;
case NEON_BUILTIN_vmovl:
if (datatype == neon_datatype_signed)
Result = Builder.CreateSExt(Ops[0], ResultType);
else if (datatype == neon_datatype_unsigned)
Result = Builder.CreateZExt(Ops[0], ResultType);
else
return BadImmediateError(exp, Result);
break;
case NEON_BUILTIN_vext: {
// Check if immediate operand is valid.
unsigned NUnits = GET_MODE_NUNITS(insn_data[icode].operand[1].mode);
ConstantInt *Imm = dyn_cast<ConstantInt>(Ops[2]);
if (!Imm)
return UnexpectedError("%Hinvalid immediate for vext", exp, Result);
int ImmVal = Imm->getSExtValue();
if (ImmVal < 0 || ImmVal >= (int)NUnits)
return UnexpectedError("%Hout of range immediate for vext", exp, Result);
if (ImmVal == 0) {
Result = Ops[0];
break;
}
// Translate to a vector shuffle.
std::vector<Constant*> Idxs;
for (unsigned i = 0; i != NUnits; ++i)
Idxs.push_back(getInt32Const(i + ImmVal));
Result = Builder.CreateShuffleVector(Ops[0], Ops[1],
ConstantVector::get(Idxs));
break;
}
case NEON_BUILTIN_vrev64:
case NEON_BUILTIN_vrev32:
case NEON_BUILTIN_vrev16: {
unsigned ChunkBits = 0;
switch (neon_code) {
case NEON_BUILTIN_vrev64: ChunkBits = 64; break;
case NEON_BUILTIN_vrev32: ChunkBits = 32; break;
case NEON_BUILTIN_vrev16: ChunkBits = 16; break;
default: assert(false);
}
const VectorType *VTy = dyn_cast<const VectorType>(ResultType);
assert(VTy && "expected a vector type");
Type *ElTy = VTy->getElementType();
unsigned ChunkElts = ChunkBits / ElTy->getPrimitiveSizeInBits();
// Translate to a vector shuffle.
std::vector<Constant*> Idxs;
unsigned NUnits = VTy->getNumElements();
for (unsigned c = ChunkElts; c <= NUnits; c += ChunkElts) {
for (unsigned i = 0; i != ChunkElts; ++i) {
Idxs.push_back(getInt32Const(c - i - 1));
}
}
Result = Builder.CreateShuffleVector(Ops[0], UndefValue::get(ResultType),
ConstantVector::get(Idxs));
break;
}
case NEON_BUILTIN_vcvt:
if (FLOAT_MODE_P(insn_data[icode].operand[1].mode)) {
if (datatype == neon_datatype_unsigned)
Result = Builder.CreateFPToUI(Ops[0], ResultType);
else if (datatype == neon_datatype_signed)
Result = Builder.CreateFPToSI(Ops[0], ResultType);
else
return BadImmediateError(exp, Result);
} else {
if (datatype == neon_datatype_unsigned)
Result = Builder.CreateUIToFP(Ops[0], ResultType);
else if (datatype == neon_datatype_signed)
Result = Builder.CreateSIToFP(Ops[0], ResultType);
else
return BadImmediateError(exp, Result);
}
break;
case NEON_BUILTIN_vcvt_n: {
// Check if the fractional bits argument is between 1 and 32.
ConstantInt *FBits = dyn_cast<ConstantInt>(Ops[1]);
if (!FBits)
return UnexpectedError("%Hinvalid fractional bit count", exp, Result);
int FBitsVal = FBits->getSExtValue();
if (FBitsVal < 1 || FBitsVal > 32)
return UnexpectedError("%Hinvalid fractional bit count", exp, Result);
if (FLOAT_MODE_P(insn_data[icode].operand[1].mode)) {
if (datatype == neon_datatype_unsigned)
intID = Intrinsic::arm_neon_vcvtfp2fxu;
else if (datatype == neon_datatype_signed)
intID = Intrinsic::arm_neon_vcvtfp2fxs;
else
return BadImmediateError(exp, Result);
} else {
if (datatype == neon_datatype_unsigned)
intID = Intrinsic::arm_neon_vcvtfxu2fp;
else if (datatype == neon_datatype_signed)
intID = Intrinsic::arm_neon_vcvtfxs2fp;
else
return BadImmediateError(exp, Result);
}
intOpTypes[0] = ResultType;
intOpTypes[1] = Ops[0]->getType();
intFn = Intrinsic::getDeclaration(TheModule, intID, intOpTypes);
Result = Builder.CreateCall2(intFn, Ops[0], Ops[1]);
break;
}
case NEON_BUILTIN_vbsl:
Ops[1] = BitCastToType(Ops[1], Ops[0]->getType());
Ops[2] = BitCastToType(Ops[2], Ops[0]->getType());
Result = Builder.CreateOr(Builder.CreateAnd(Ops[1], Ops[0]),
Builder.CreateAnd(Ops[2],
Builder.CreateNot(Ops[0])));
break;
case NEON_BUILTIN_vtbl1:
case NEON_BUILTIN_vtbl2:
case NEON_BUILTIN_vtbl3:
case NEON_BUILTIN_vtbl4: {
unsigned TblVecs = 0;
switch (neon_code) {
case NEON_BUILTIN_vtbl1:
intID = Intrinsic::arm_neon_vtbl1;
TblVecs = 1;
break;
case NEON_BUILTIN_vtbl2:
intID = Intrinsic::arm_neon_vtbl2;
TblVecs = 2;
break;
case NEON_BUILTIN_vtbl3:
intID = Intrinsic::arm_neon_vtbl3;
TblVecs = 3;
break;
case NEON_BUILTIN_vtbl4:
intID = Intrinsic::arm_neon_vtbl4;
TblVecs = 4;
break;
default:
assert(false);
}
intFn = Intrinsic::getDeclaration(TheModule, intID);
std::vector<Value*> Args;
if (TblVecs == 1) {
Args.push_back(Ops[0]);
} else {
for (unsigned n = 0; n < TblVecs; ++n) {
Args.push_back(Builder.CreateExtractValue(Ops[0], n));
}
}
Args.push_back(Ops[1]);
Result = Builder.CreateCall(intFn, Args);
break;
}
case NEON_BUILTIN_vtbx1:
case NEON_BUILTIN_vtbx2:
case NEON_BUILTIN_vtbx3:
case NEON_BUILTIN_vtbx4: {
unsigned TblVecs = 0;
switch (neon_code) {
case NEON_BUILTIN_vtbx1:
intID = Intrinsic::arm_neon_vtbx1;
TblVecs = 1;
break;
case NEON_BUILTIN_vtbx2:
intID = Intrinsic::arm_neon_vtbx2;
TblVecs = 2;
break;
case NEON_BUILTIN_vtbx3:
intID = Intrinsic::arm_neon_vtbx3;
TblVecs = 3;
break;
case NEON_BUILTIN_vtbx4:
intID = Intrinsic::arm_neon_vtbx4;
TblVecs = 4;
break;
default:
assert(false);
}
intFn = Intrinsic::getDeclaration(TheModule, intID);
std::vector<Value*> Args;
Args.push_back(Ops[0]);
if (TblVecs == 1) {
Args.push_back(Ops[1]);
} else {
for (unsigned n = 0; n < TblVecs; ++n) {
Args.push_back(Builder.CreateExtractValue(Ops[1], n));
}
}
Args.push_back(Ops[2]);
Result = Builder.CreateCall(intFn, Args);
break;
}
case NEON_BUILTIN_vtrn: {
// Translate this to a pair of vector shuffles.
unsigned NUnits = GET_MODE_NUNITS(insn_data[icode].operand[1].mode);
for (unsigned Elt = 0; Elt != 2; ++Elt) {
std::vector<Constant*> Idxs;
for (unsigned i = 0; i < NUnits; i += 2) {
Idxs.push_back(getInt32Const(i + Elt));
Idxs.push_back(getInt32Const(i + NUnits + Elt));
}
Result = Builder.CreateShuffleVector(Ops[0], Ops[1],
ConstantVector::get(Idxs));
Value *Addr = Builder.CreateConstInBoundsGEP2_32(DestLoc->Ptr, 0, Elt);
Builder.CreateStore(Result, Addr);
}
Result = 0;
break;
}
case NEON_BUILTIN_vzip: {
// Translate this to a pair of vector shuffles.
unsigned NUnits = GET_MODE_NUNITS(insn_data[icode].operand[1].mode);
unsigned Idx = 0;
for (unsigned Elt = 0; Elt != 2; ++Elt) {
std::vector<Constant*> Idxs;
for (unsigned i = 0; i != NUnits; i += 2) {
Idxs.push_back(getInt32Const(Idx));
Idxs.push_back(getInt32Const(Idx + NUnits));
Idx += 1;
}
Result = Builder.CreateShuffleVector(Ops[0], Ops[1],
ConstantVector::get(Idxs));
Value *Addr = Builder.CreateConstInBoundsGEP2_32(DestLoc->Ptr, 0, Elt);
Builder.CreateStore(Result, Addr);
}
Result = 0;
break;
}
case NEON_BUILTIN_vuzp: {
// Translate this to a pair of vector shuffles.
unsigned NUnits = GET_MODE_NUNITS(insn_data[icode].operand[1].mode);
for (unsigned Elt = 0; Elt != 2; ++Elt) {
std::vector<Constant*> Idxs;
for (unsigned i = 0; i != NUnits; ++i)
Idxs.push_back(getInt32Const(2 * i + Elt));
Result = Builder.CreateShuffleVector(Ops[0], Ops[1],
ConstantVector::get(Idxs));
Value *Addr = Builder.CreateConstInBoundsGEP2_32(DestLoc->Ptr, 0, Elt);
Builder.CreateStore(Result, Addr);
}
Result = 0;
break;
}
case NEON_BUILTIN_vreinterpretv8qi:
case NEON_BUILTIN_vreinterpretv4hi:
case NEON_BUILTIN_vreinterpretv2si:
case NEON_BUILTIN_vreinterpretv2sf:
case NEON_BUILTIN_vreinterpretv1di:
case NEON_BUILTIN_vreinterpretv16qi:
case NEON_BUILTIN_vreinterpretv8hi:
case NEON_BUILTIN_vreinterpretv4si:
case NEON_BUILTIN_vreinterpretv4sf:
case NEON_BUILTIN_vreinterpretv2di:
Result = Builder.CreateBitCast(Ops[0], ResultType);
break;
case NEON_BUILTIN_vld1: {
intID = Intrinsic::arm_neon_vld1;
intFn = Intrinsic::getDeclaration(TheModule, intID, ResultType);
Type *VPTy = PointerType::getUnqual(Type::getInt8Ty(Context));
unsigned Align = getPointerAlignment(TREE_VALUE(TREE_OPERAND(exp, 1)));
Result = Builder.CreateCall2(intFn, BitCastToType(Ops[0], VPTy),
getInt32Const(Align));
break;
}
case NEON_BUILTIN_vld2:
case NEON_BUILTIN_vld3:
case NEON_BUILTIN_vld4: {
const StructType *STy = dyn_cast<const StructType>(ResultType);
assert(STy && "expected a struct type");
Type *VTy = STy->getElementType(0);
switch (neon_code) {
case NEON_BUILTIN_vld2: intID = Intrinsic::arm_neon_vld2; break;
case NEON_BUILTIN_vld3: intID = Intrinsic::arm_neon_vld3; break;
case NEON_BUILTIN_vld4: intID = Intrinsic::arm_neon_vld4; break;
default: assert(false);
}
intFn = Intrinsic::getDeclaration(TheModule, intID, VTy);
Type *VPTy = PointerType::getUnqual(Type::getInt8Ty(Context));
unsigned Align = getPointerAlignment(TREE_VALUE(TREE_OPERAND(exp, 1)));
Result = Builder.CreateCall2(intFn, BitCastToType(Ops[0], VPTy),
getInt32Const(Align));
Builder.CreateStore(Result, DestLoc->Ptr);
Result = 0;
break;
}
case NEON_BUILTIN_vld1_lane: {
unsigned NUnits = GET_MODE_NUNITS(insn_data[icode].operand[0].mode);
if (!isValidLane(Ops[2], NUnits))
return UnexpectedError("%Hinvalid lane number", exp, Result);
Value *Elt = Builder.CreateLoad(Ops[0]);
Result = Builder.CreateInsertElement(Ops[1], Elt, Ops[2]);
break;
}
case NEON_BUILTIN_vld2_lane:
case NEON_BUILTIN_vld3_lane:
case NEON_BUILTIN_vld4_lane: {
StructType *STy = dyn_cast<StructType>(ResultType);
assert(STy && "expected a struct type");
VectorType *VTy = dyn_cast<VectorType>(STy->getElementType(0));
assert(VTy && "expected a vector type");
if (!isValidLane(Ops[2], VTy->getNumElements()))
return UnexpectedError("%Hinvalid lane number", exp, Result);
switch (neon_code) {
case NEON_BUILTIN_vld2_lane: intID = Intrinsic::arm_neon_vld2lane; break;
case NEON_BUILTIN_vld3_lane: intID = Intrinsic::arm_neon_vld3lane; break;
case NEON_BUILTIN_vld4_lane: intID = Intrinsic::arm_neon_vld4lane; break;
default: assert(false);
}
intOpTypes[0] = VTy;
intFn = Intrinsic::getDeclaration(TheModule, intID, intOpTypes);
unsigned NumVecs = 0;
switch (neon_code) {
case NEON_BUILTIN_vld2_lane: NumVecs = 2; break;
case NEON_BUILTIN_vld3_lane: NumVecs = 3; break;
case NEON_BUILTIN_vld4_lane: NumVecs = 4; break;
default: assert(false);
}
std::vector<Value*> Args;
Type *VPTy = PointerType::getUnqual(Type::getInt8Ty(Context));
Args.push_back(BitCastToType(Ops[0], VPTy));
for (unsigned n = 0; n != NumVecs; ++n) {
Args.push_back(Builder.CreateExtractValue(Ops[1], n));
}
Args.push_back(Ops[2]); // lane number
unsigned Align = getPointerAlignment(TREE_VALUE(TREE_OPERAND(exp, 1)));
Args.push_back(getInt32Const(Align));
Result = Builder.CreateCall(intFn, Args);
Builder.CreateStore(Result, DestLoc->Ptr);
Result = 0;
break;
}
case NEON_BUILTIN_vld1_dup:
Result = BuildDup(ResultType, Builder.CreateLoad(Ops[0]), Builder);
break;
case NEON_BUILTIN_vld2_dup:
case NEON_BUILTIN_vld3_dup:
case NEON_BUILTIN_vld4_dup: {
StructType *STy = dyn_cast<StructType>(ResultType);
assert(STy && "expected a struct type");
VectorType *VTy = dyn_cast<VectorType>(STy->getElementType(0));
assert(VTy && "expected a vector type");
intOpTypes[0] = VTy;
// Handle 64-bit elements as a special-case. There is no "dup" needed.
if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
switch (neon_code) {
case NEON_BUILTIN_vld2_dup: intID = Intrinsic::arm_neon_vld2; break;
case NEON_BUILTIN_vld3_dup: intID = Intrinsic::arm_neon_vld3; break;
case NEON_BUILTIN_vld4_dup: intID = Intrinsic::arm_neon_vld4; break;
default: assert(false);
}
intFn = Intrinsic::getDeclaration(TheModule, intID, intOpTypes);
Type *VPTy = PointerType::getUnqual(Type::getInt8Ty(Context));
unsigned Align = getPointerAlignment(TREE_VALUE(TREE_OPERAND(exp, 1)));
Result = Builder.CreateCall2(intFn, BitCastToType(Ops[0], VPTy),
getInt32Const(Align));
Builder.CreateStore(Result, DestLoc->Ptr);
Result = 0;
break;
}
// First use a vldN_lane intrinsic to load into lane 0 of undef vectors.
switch (neon_code) {
case NEON_BUILTIN_vld2_dup: intID = Intrinsic::arm_neon_vld2lane; break;
case NEON_BUILTIN_vld3_dup: intID = Intrinsic::arm_neon_vld3lane; break;
case NEON_BUILTIN_vld4_dup: intID = Intrinsic::arm_neon_vld4lane; break;
default: assert(false);
}
intFn = Intrinsic::getDeclaration(TheModule, intID, intOpTypes);
unsigned NumVecs = 0;
switch (neon_code) {
case NEON_BUILTIN_vld2_dup: NumVecs = 2; break;
case NEON_BUILTIN_vld3_dup: NumVecs = 3; break;
case NEON_BUILTIN_vld4_dup: NumVecs = 4; break;
default: assert(false);
}
std::vector<Value*> Args;
Type *VPTy = PointerType::getUnqual(Type::getInt8Ty(Context));
Args.push_back(BitCastToType(Ops[0], VPTy));
for (unsigned n = 0; n != NumVecs; ++n) {
Args.push_back(UndefValue::get(VTy));
}
Args.push_back(getInt32Const(0));
unsigned Align = getPointerAlignment(TREE_VALUE(TREE_OPERAND(exp, 1)));
Args.push_back(getInt32Const(Align));
Result = Builder.CreateCall(intFn, Args);
// Now splat the values in lane 0 to the rest of the elements.
for (unsigned n = 0; n != NumVecs; ++n) {
Value *Vec = Builder.CreateExtractValue(Result, n);
Vec = BuildDupLane(Vec, 0, VTy->getNumElements(), Builder);
Result = Builder.CreateInsertValue(Result, Vec, n);
}
Builder.CreateStore(Result, DestLoc->Ptr);
Result = 0;
break;
}
case NEON_BUILTIN_vst1: {
Type *VTy = Ops[1]->getType();
intID = Intrinsic::arm_neon_vst1;
intFn = Intrinsic::getDeclaration(TheModule, intID, VTy);
Type *VPTy = PointerType::getUnqual(Type::getInt8Ty(Context));
unsigned Align = getPointerAlignment(TREE_VALUE(TREE_OPERAND(exp, 1)));
Builder.CreateCall3(intFn, BitCastToType(Ops[0], VPTy), Ops[1],
getInt32Const(Align));
Result = 0;
break;
}
case NEON_BUILTIN_vst2:
case NEON_BUILTIN_vst3:
case NEON_BUILTIN_vst4: {
const StructType *STy = dyn_cast<const StructType>(Ops[1]->getType());
assert(STy && "expected a struct type");
Type *VTy = STy->getElementType(0);
switch (neon_code) {
case NEON_BUILTIN_vst2: intID = Intrinsic::arm_neon_vst2; break;
case NEON_BUILTIN_vst3: intID = Intrinsic::arm_neon_vst3; break;
case NEON_BUILTIN_vst4: intID = Intrinsic::arm_neon_vst4; break;
default: assert(false);
}
intFn = Intrinsic::getDeclaration(TheModule, intID, VTy);
unsigned NumVecs = 0;
switch (neon_code) {
case NEON_BUILTIN_vst2: NumVecs = 2; break;
case NEON_BUILTIN_vst3: NumVecs = 3; break;
case NEON_BUILTIN_vst4: NumVecs = 4; break;
default: assert(false);
}
std::vector<Value*> Args;
Type *VPTy = PointerType::getUnqual(Type::getInt8Ty(Context));
Args.push_back(BitCastToType(Ops[0], VPTy));
for (unsigned n = 0; n != NumVecs; ++n) {
Args.push_back(Builder.CreateExtractValue(Ops[1], n));
}
unsigned Align = getPointerAlignment(TREE_VALUE(TREE_OPERAND(exp, 1)));
Args.push_back(getInt32Const(Align));
Builder.CreateCall(intFn, Args);
Result = 0;
break;
}
case NEON_BUILTIN_vst1_lane: {
unsigned NUnits = GET_MODE_NUNITS(insn_data[icode].operand[1].mode);
if (!isValidLane(Ops[2], NUnits))
return UnexpectedError("%Hinvalid lane number", exp, Result);
Builder.CreateStore(Builder.CreateExtractElement(Ops[1], Ops[2]), Ops[0]);
Result = 0;
break;
}
case NEON_BUILTIN_vst2_lane:
case NEON_BUILTIN_vst3_lane:
case NEON_BUILTIN_vst4_lane: {
StructType *STy = dyn_cast<StructType>(Ops[1]->getType());
assert(STy && "expected a struct type");
VectorType *VTy = dyn_cast<VectorType>(STy->getElementType(0));
assert(VTy && "expected a vector type");
if (!isValidLane(Ops[2], VTy->getNumElements()))
return UnexpectedError("%Hinvalid lane number", exp, Result);
switch (neon_code) {
case NEON_BUILTIN_vst2_lane: intID = Intrinsic::arm_neon_vst2lane; break;
case NEON_BUILTIN_vst3_lane: intID = Intrinsic::arm_neon_vst3lane; break;
case NEON_BUILTIN_vst4_lane: intID = Intrinsic::arm_neon_vst4lane; break;
default: assert(false);
}
intOpTypes[0] = VTy;
intFn = Intrinsic::getDeclaration(TheModule, intID, intOpTypes);
unsigned NumVecs = 0;
switch (neon_code) {
case NEON_BUILTIN_vst2_lane: NumVecs = 2; break;
case NEON_BUILTIN_vst3_lane: NumVecs = 3; break;
case NEON_BUILTIN_vst4_lane: NumVecs = 4; break;
default: assert(false);
}
std::vector<Value*> Args;
Type *VPTy = PointerType::getUnqual(Type::getInt8Ty(Context));
Args.push_back(BitCastToType(Ops[0], VPTy));
for (unsigned n = 0; n != NumVecs; ++n) {
Args.push_back(Builder.CreateExtractValue(Ops[1], n));
}
Args.push_back(Ops[2]); // lane number
unsigned Align = getPointerAlignment(TREE_VALUE(TREE_OPERAND(exp, 1)));
Args.push_back(getInt32Const(Align));
Builder.CreateCall(intFn, Args);
Result = 0;
break;
}
case NEON_BUILTIN_vand:
if (datatype != neon_datatype_signed &&
datatype != neon_datatype_unsigned)
return BadImmediateError(exp, Result);
Result = Builder.CreateAnd(Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vorr:
if (datatype != neon_datatype_signed &&
datatype != neon_datatype_unsigned)
return BadImmediateError(exp, Result);
Result = Builder.CreateOr(Ops[0], Ops[1]);
break;
case NEON_BUILTIN_veor:
if (datatype != neon_datatype_signed &&
datatype != neon_datatype_unsigned)
return BadImmediateError(exp, Result);
Result = Builder.CreateXor(Ops[0], Ops[1]);
break;
case NEON_BUILTIN_vbic:
if (datatype != neon_datatype_signed &&
datatype != neon_datatype_unsigned)
return BadImmediateError(exp, Result);
Result = Builder.CreateAnd(Ops[0], Builder.CreateNot(Ops[1]));
break;
case NEON_BUILTIN_vorn:
if (datatype != neon_datatype_signed &&
datatype != neon_datatype_unsigned)
return BadImmediateError(exp, Result);
Result = Builder.CreateOr(Ops[0], Builder.CreateNot(Ops[1]));
break;
}
return true;
}
// "Fundamental Data Types" according to the AAPCS spec. These are used
// to check that a given aggregate meets the criteria for a "homogeneous
// aggregate."
enum arm_fdts {
ARM_FDT_INVALID,
ARM_FDT_HALF_FLOAT,
ARM_FDT_FLOAT,
ARM_FDT_DOUBLE,
ARM_FDT_VECTOR_64,
ARM_FDT_VECTOR_128,
ARM_FDT_MAX
};
// Classify type according to the number of fundamental data types contained
// among its members. Returns true if type is a homogeneous aggregate.
static bool
vfp_arg_homogeneous_aggregate_p(enum machine_mode mode, tree type,
int *fdt_counts)
{
bool result = false;
HOST_WIDE_INT bytes =
(mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
if (type && AGGREGATE_TYPE_P (type))
{
int i;
int cnt = 0;
tree field;
// Zero sized arrays or structures are not homogeneous aggregates.
if (!bytes)
return 0;
// Classify each field of records.
switch (TREE_CODE (type))
{
case RECORD_TYPE:
// For classes first merge in the field of the subclasses.
if (TYPE_BINFO (type)) {
tree binfo, base_binfo;
int basenum;
for (binfo = TYPE_BINFO (type), basenum = 0;
BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++) {
int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 4;
tree type = BINFO_TYPE (base_binfo);
result = vfp_arg_homogeneous_aggregate_p(TYPE_MODE (type), type,
fdt_counts);
if (!result)
return false;
}
}
// And now merge the fields of structure.
for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) {
if (TREE_CODE (field) == FIELD_DECL) {
if (TREE_TYPE (field) == error_mark_node)
continue;
result = vfp_arg_homogeneous_aggregate_p(TYPE_MODE(TREE_TYPE(field)),
TREE_TYPE(field),
fdt_counts);
if (!result)
return false;
}
}
break;
case ARRAY_TYPE:
// Arrays are handled as small records.
{
int array_fdt_counts[ARM_FDT_MAX] = { 0 };
result = vfp_arg_homogeneous_aggregate_p(TYPE_MODE(TREE_TYPE(type)),
TREE_TYPE(type),
array_fdt_counts);
cnt = bytes / int_size_in_bytes(TREE_TYPE(type));
for (i = 0; i < ARM_FDT_MAX; ++i)
fdt_counts[i] += array_fdt_counts[i] * cnt;
if (!result)
return false;
}
break;
case UNION_TYPE:
case QUAL_UNION_TYPE:
{
// Unions are similar to RECORD_TYPE.
int union_fdt_counts[ARM_FDT_MAX] = { 0 };
// Unions are not derived.
gcc_assert (!TYPE_BINFO (type)
|| !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) {
int union_field_fdt_counts[ARM_FDT_MAX] = { 0 };
if (TREE_CODE (field) == FIELD_DECL) {
if (TREE_TYPE (field) == error_mark_node)
continue;
result = vfp_arg_homogeneous_aggregate_p(
TYPE_MODE(TREE_TYPE(field)),
TREE_TYPE(field),
union_field_fdt_counts);
if (!result)
return false;
// track largest union field
for (i = 0; i < ARM_FDT_MAX; ++i) {
if (union_field_fdt_counts[i] > 4) // bail early if we can
return false;
union_fdt_counts[i] = MAX(union_fdt_counts[i],
union_field_fdt_counts[i]);
union_field_fdt_counts[i] = 0; // clear it out for next iter
}
}
}
// check for only one type across all union fields
cnt = 0;
for (i = 0; i < ARM_FDT_MAX; ++i) {
if (union_fdt_counts[i])
++cnt;
if (cnt > 1)
return false;
fdt_counts[i] += union_fdt_counts[i];
}
}
break;
default:
assert(0 && "What type is this?");
}
// Walk through fdt_counts. This is a homogeneous aggregate if
// only one FDT is used.
cnt = 0;
for (i = 0; i < ARM_FDT_MAX; ++i) {
if (fdt_counts[i]) {
// Make sure that one FDT is 4 or less elements in size.
if (fdt_counts[i] > 4)
return false;
++cnt;
}
if (cnt > 1)
return false;
}
if (cnt == 0)
return false;
return true;
}
if (type)
{
int idx = 0;
int cnt = 0;
switch (TREE_CODE(type))
{
case REAL_TYPE:
idx = (TYPE_PRECISION(type) == 32) ?
ARM_FDT_FLOAT :
((TYPE_PRECISION(type) == 64) ?
ARM_FDT_DOUBLE :
ARM_FDT_INVALID);
cnt = 1;
break;
case COMPLEX_TYPE:
{
tree subtype = TREE_TYPE(type);
idx = (TYPE_PRECISION(subtype) == 32) ?
ARM_FDT_FLOAT :
((TYPE_PRECISION(subtype) == 64) ?
ARM_FDT_DOUBLE :
ARM_FDT_INVALID);
cnt = 2;
}
break;
case VECTOR_TYPE:
idx = (bytes == 8) ?
ARM_FDT_VECTOR_64 :
(bytes == 16) ?
ARM_FDT_VECTOR_128 :
ARM_FDT_INVALID;
cnt = 1;
break;
case INTEGER_TYPE:
case POINTER_TYPE:
case ENUMERAL_TYPE:
case BOOLEAN_TYPE:
case REFERENCE_TYPE:
case FUNCTION_TYPE:
case METHOD_TYPE:
default:
return false; // All of these disqualify.
}
fdt_counts[idx] += cnt;
return true;
}
else
assert(0 && "what type was this?");
return false;
}
// Walk over an LLVM Type that we know is a homogeneous aggregate and
// push the proper LLVM Types that represent the register types to pass
// that struct member in.
static void push_elts(Type *Ty, std::vector<Type*> &Elts)
{
for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
I != E; ++I) {
Type *STy = *I;
if (const VectorType *VTy = dyn_cast<VectorType>(STy)) {
switch (VTy->getBitWidth())
{
case 64: // v2f32
Elts.push_back(VectorType::get(Type::getFloatTy(Context), 2));
break;
case 128: // v2f64
Elts.push_back(VectorType::get(Type::getDoubleTy(Context), 2));
break;
default:
assert (0 && "invalid vector type");
}
} else if (ArrayType *ATy = dyn_cast<ArrayType>(STy)) {
Type *ETy = ATy->getElementType();
for (uint64_t i = ATy->getNumElements(); i > 0; --i)
Elts.push_back(ETy);
} else if (STy->getNumContainedTypes())
push_elts(STy, Elts);
else
Elts.push_back(STy);
}
}
static unsigned count_num_words(std::vector<Type*> &ScalarElts) {
unsigned NumWords = 0;
for (unsigned i = 0, e = ScalarElts.size(); i != e; ++i) {
Type *Ty = ScalarElts[i];
if (Ty->isPointerTy()) {
NumWords++;
} else if (Ty->isIntegerTy()) {
const unsigned TypeSize = Ty->getPrimitiveSizeInBits();
const unsigned NumWordsForType = (TypeSize + 31) / 32;
NumWords += NumWordsForType;
} else {
assert (0 && "Unexpected type.");
}
}
return NumWords;
}
// This function is used only on AAPCS. The difference from the generic
// handling of arguments is that arguments larger than 32 bits are split
// and padding arguments are added as necessary for alignment. This makes
// the IL a bit more explicit about how arguments are handled.
extern bool
llvm_arm_try_pass_aggregate_custom(tree type,
std::vector<Type*>& ScalarElts,
CallingConv::ID& CC,
struct DefaultABIClient* C) {
if (CC != CallingConv::ARM_AAPCS && CC != CallingConv::C)
return false;
if (CC == CallingConv::C && !TARGET_AAPCS_BASED)
return false;
if (TARGET_HARD_FLOAT_ABI)
return false;
Type *Ty = ConvertType(type);
if (Ty->isPointerTy())
return false;
const unsigned Size = TREE_INT_CST_LOW(TYPE_SIZE(type))/8;
const unsigned Alignment = TYPE_ALIGN(type)/8;
const unsigned NumWords = count_num_words(ScalarElts);
const bool AddPad = Alignment >= 8 && (NumWords % 2);
// First, build a type that will be bitcast to the original one and
// from where elements will be extracted.
std::vector<Type*> Elts;
Type* Int32Ty = Type::getInt32Ty(getGlobalContext());
const unsigned NumRegularArgs = Size / 4;
for (unsigned i = 0; i < NumRegularArgs; ++i) {
Elts.push_back(Int32Ty);
}
const unsigned RestSize = Size % 4;
llvm::Type *RestType = NULL;
if (RestSize> 2) {
RestType = Type::getInt32Ty(getGlobalContext());
} else if (RestSize > 1) {
RestType = Type::getInt16Ty(getGlobalContext());
} else if (RestSize > 0) {
RestType = Type::getInt8Ty(getGlobalContext());
}
if (RestType)
Elts.push_back(RestType);
StructType *STy = StructType::get(getGlobalContext(), Elts, false);
if (AddPad) {
ScalarElts.push_back(Int32Ty);
C->HandlePad(Int32Ty);
}
for (unsigned i = 0; i < NumRegularArgs; ++i) {
C->EnterField(i, STy);
C->HandleScalarArgument(Int32Ty, 0);
ScalarElts.push_back(Int32Ty);
C->ExitField();
}
if (RestType) {
C->EnterField(NumRegularArgs, STy);
C->HandleScalarArgument(RestType, 0, RestSize);
ScalarElts.push_back(RestType);
C->ExitField();
}
return true;
}
// Target hook for llvm-abi.h. It returns true if an aggregate of the
// specified type should be passed in a number of registers of mixed types.
// It also returns a vector of types that correspond to the registers used
// for parameter passing. This only applies to AAPCS-VFP "homogeneous
// aggregates" as specified in 4.3.5 of the AAPCS spec.
bool
llvm_arm_should_pass_aggregate_in_mixed_regs(tree TreeType, Type *Ty,
CallingConv::ID &CC,
std::vector<Type*> &Elts) {
if (!llvm_arm_should_pass_or_return_aggregate_in_regs(TreeType, CC))
return false;
// Walk Ty and push LLVM types corresponding to register types onto
// Elts.
push_elts(Ty, Elts);
return true;
}
static bool alloc_next_spr(bool *SPRs)
{
for (int i = 0; i < 16; ++i)
if (!SPRs[i]) {
SPRs[i] = true;
return true;
}
return false;
}
static bool alloc_next_dpr(bool *SPRs)
{
for (int i = 0; i < 16; i += 2)
if (!SPRs[i]) {
SPRs[i] = SPRs[i+1] = true;
return true;
}
return false;
}
static bool alloc_next_qpr(bool *SPRs) {
for (int i = 0; i < 16; i += 4)
if (!SPRs[i]) {
SPRs[i] = SPRs[i+1] = SPRs[i+2] = SPRs[i+3] = true;
return true;
}
return false;
}
// count_num_registers_uses - Simulate argument passing reg allocation in SPRs.
// Caller is expected to zero out SPRs. Returns true if all of ScalarElts fit
// in registers.
static bool count_num_registers_uses(std::vector<Type*> &ScalarElts,
bool *SPRs) {
for (unsigned i = 0, e = ScalarElts.size(); i != e; ++i) {
Type *Ty = ScalarElts[i];
if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
switch (VTy->getBitWidth())
{
case 64:
if (!alloc_next_dpr(SPRs))
return false;
break;
case 128:
if (!alloc_next_qpr(SPRs))
return false;
break;
default:
assert(0);
}
} else if (Ty->isIntegerTy() || Ty->isPointerTy() ||
Ty==Type::getVoidTy(Context)) {
;
} else {
// Floating point scalar argument.
assert(Ty->isFloatingPointTy() && Ty->isPrimitiveType() &&
"Expecting a floating point primitive type!");
switch (Ty->getTypeID())
{
case Type::FloatTyID:
if (!alloc_next_spr(SPRs))
return false;
break;
case Type::DoubleTyID:
if (!alloc_next_spr(SPRs))
return false;
break;
default:
assert(0);
}
}
}
return true;
}
// Target hook for llvm-abi.h. This is called when an aggregate is being passed
// in registers. If there are only enough available parameter registers to pass
// part of the aggregate, return true. That means the aggregate should instead
// be passed in memory.
bool
llvm_arm_aggregate_partially_passed_in_regs(std::vector<Type*> &Elts,
std::vector<Type*> &ScalarElts,
CallingConv::ID &CC) {
// Homogeneous aggregates are an AAPCS-VFP feature.
if ((CC != CallingConv::ARM_AAPCS_VFP) ||
!(TARGET_AAPCS_BASED && TARGET_VFP && TARGET_HARD_FLOAT_ABI))
return true;
bool SPRs[16] = { 0 }; // represents S0-S16
// Figure out which SPRs are available.
if (!count_num_registers_uses(ScalarElts, SPRs))
return true;
if (!count_num_registers_uses(Elts, SPRs))
return true;
return false; // it all fit in registers!
}
// Return LLVM Type if TYPE can be returned as an aggregate,
// otherwise return NULL.
Type *llvm_arm_aggr_type_for_struct_return(tree TreeType,
CallingConv::ID &CC) {
if (!llvm_arm_should_pass_or_return_aggregate_in_regs(TreeType, CC))
return NULL;
// Walk Ty and push LLVM types corresponding to register types onto
// Elts.
std::vector<Type*> Elts;
Type *Ty = ConvertType(TreeType);
push_elts(Ty, Elts);
return StructType::get(Context, Elts, false);
}
// llvm_arm_extract_mrv_array_element - Helper function that helps extract
// an array element from multiple return value.
//
// Here, SRC is returning multiple values. DEST's DESTFIELDNO field is an array.
// Extract SRCFIELDNO's ELEMENO value and store it in DEST's FIELDNO field's
// ELEMENTNO.
//
static void llvm_arm_extract_mrv_array_element(Value *Src, Value *Dest,
unsigned SrcFieldNo,
unsigned SrcElemNo,
unsigned DestFieldNo,
unsigned DestElemNo,
LLVMBuilder &Builder,
bool isVolatile) {
Value *EVI = Builder.CreateExtractValue(Src, SrcFieldNo, "mrv_gr");
const StructType *STy = cast<StructType>(Src->getType());
llvm::Value *Idxs[3];
Idxs[0] = ConstantInt::get(llvm::Type::getInt32Ty(Context), 0);
Idxs[1] = ConstantInt::get(llvm::Type::getInt32Ty(Context), DestFieldNo);
Idxs[2] = ConstantInt::get(llvm::Type::getInt32Ty(Context), DestElemNo);
Value *GEP = Builder.CreateGEP(Dest, Idxs, "mrv_gep");
if (STy->getElementType(SrcFieldNo)->isVectorTy()) {
Value *ElemIndex = ConstantInt::get(Type::getInt32Ty(Context), SrcElemNo);
Value *EVIElem = Builder.CreateExtractElement(EVI, ElemIndex, "mrv");
Builder.CreateStore(EVIElem, GEP, isVolatile);
} else {
Builder.CreateStore(EVI, GEP, isVolatile);
}
}
// llvm_arm_extract_multiple_return_value - Extract multiple values returned
// by SRC and store them in DEST. It is expected that SRC and
// DEST types are StructType, but they may not match.
void llvm_arm_extract_multiple_return_value(Value *Src, Value *Dest,
bool isVolatile,
LLVMBuilder &Builder) {
const StructType *STy = cast<StructType>(Src->getType());
unsigned NumElements = STy->getNumElements();
const PointerType *PTy = cast<PointerType>(Dest->getType());
const StructType *DestTy = cast<StructType>(PTy->getElementType());
unsigned SNO = 0;
unsigned DNO = 0;
while (SNO < NumElements) {
Type *DestElemType = DestTy->getElementType(DNO);
// Directly access first class values.
if (DestElemType->isSingleValueType()) {
Value *GEP = Builder.CreateStructGEP(Dest, DNO, "mrv_gep");
Value *EVI = Builder.CreateExtractValue(Src, SNO, "mrv_gr");
Builder.CreateStore(EVI, GEP, isVolatile);
++DNO; ++SNO;
continue;
}
// Access array elements individually. Note, Src and Dest type may
// not match. For example { <2 x float>, float } and { float[3]; }
const ArrayType *ATy = cast<ArrayType>(DestElemType);
unsigned ArraySize = ATy->getNumElements();
unsigned DElemNo = 0; // DestTy's DNO field's element number
while (DElemNo < ArraySize) {
unsigned i = 0;
unsigned Size = 1;
if (const VectorType *SElemTy =
dyn_cast<VectorType>(STy->getElementType(SNO))) {
Size = SElemTy->getNumElements();
}
while (i < Size) {
llvm_arm_extract_mrv_array_element(Src, Dest, SNO, i++,
DNO, DElemNo++,
Builder, isVolatile);
}
// Consumed this src field. Try next one.
++SNO;
}
// Finished building current dest field.
++DNO;
}
}
// Target hook for llvm-abi.h for LLVM_SHOULD_NOT_USE_SHADOW_RETURN and is
// also a utility function used for other target hooks in this file. Returns
// true if the aggregate should be passed or returned in registers.
bool llvm_arm_should_pass_or_return_aggregate_in_regs(tree TreeType,
CallingConv::ID &CC) {
// Homogeneous aggregates are an AAPCS-VFP feature.
if ((CC != CallingConv::ARM_AAPCS_VFP) ||
!(TARGET_AAPCS_BASED && TARGET_VFP && TARGET_HARD_FLOAT_ABI))
return false;
// Alas, we can't use LLVM Types to figure this out because we need to
// examine unions closely. We'll have to walk the GCC TreeType.
int fdt_counts[ARM_FDT_MAX] = { 0 };
bool result = false;
result = vfp_arg_homogeneous_aggregate_p(TYPE_MODE(TreeType), TreeType,
fdt_counts);
return result && !TREE_ADDRESSABLE(TreeType);
}
/* LLVM LOCAL end (ENTIRE FILE!) */