| //===--------------- Target.cpp - Implements the IA-32 ABI. ---------------===// |
| // |
| // Copyright (C) 2005 to 2013 Evan Cheng, Duncan Sands et al. |
| // |
| // This file is part of DragonEgg. |
| // |
| // DragonEgg is free software; you can redistribute it and/or modify it under |
| // the terms of the GNU General Public License as published by the Free Software |
| // Foundation; either version 2, or (at your option) any later version. |
| // |
| // DragonEgg is distributed in the hope that it will be useful, but WITHOUT ANY |
| // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR |
| // A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
| // |
| // You should have received a copy of the GNU General Public License along with |
| // DragonEgg; see the file COPYING. If not, write to the Free Software |
| // Foundation, 51 Franklin Street, Suite 500, Boston, MA 02110-1335, USA. |
| // |
| //===----------------------------------------------------------------------===// |
| // This file implements specific LLVM IA-32 ABI. |
| //===----------------------------------------------------------------------===// |
| |
| // Plugin headers |
| #include "dragonegg/ABI.h" |
| #include "dragonegg/Target.h" |
| |
| // LLVM headers |
| #include "llvm/MC/SubtargetFeature.h" |
| #include "llvm/IR/Module.h" |
| |
| // System headers |
| #include <gmp.h> |
| |
| // GCC headers |
| #include "auto-host.h" |
| #ifndef ENABLE_BUILD_WITH_CXX |
| #include <cstring> // Otherwise included by system.h with C linkage. |
| extern "C" { |
| #endif |
| #include "config.h" |
| // Stop GCC declaring 'getopt' as it can clash with the system's declaration. |
| #undef HAVE_DECL_GETOPT |
| #include "system.h" |
| #include "coretypes.h" |
| #include "target.h" |
| #include "tree.h" |
| |
| #include "diagnostic.h" |
| #include "gimple.h" |
| #if (GCC_MINOR > 6) |
| #include "gimple-pretty-print.h" |
| #endif |
| #include "toplev.h" |
| |
| #if (GCC_MINOR == 6) |
| extern void debug_gimple_stmt(union gimple_statement_d *); |
| #endif |
| |
| #ifndef ENABLE_BUILD_WITH_CXX |
| } // extern "C" |
| #endif |
| |
| // Trees header. |
| #include "dragonegg/Trees.h" |
| |
| // One day we will do parameter marshalling right: by using CUMULATIVE_ARGS. |
| // While waiting for that happy day, just include a chunk of i386.c. |
| #include "ABIHack.inc" |
| |
| using namespace llvm; |
| |
| static LLVMContext &Context = getGlobalContext(); |
| |
| /// BitCastToIntVector - Bitcast the vector operand to a vector of integers of |
| // the same length. |
| static Value *BitCastToIntVector(Value *Op, LLVMBuilder &Builder) { |
| VectorType *VecTy = cast<VectorType>(Op->getType()); |
| Type *EltTy = VecTy->getElementType(); |
| Type *IntTy = IntegerType::get(Context, EltTy->getPrimitiveSizeInBits()); |
| return Builder.CreateBitCast(Op, |
| VectorType::get(IntTy, VecTy->getNumElements())); |
| } |
| |
| /// BuiltinCode - A enumerated type with one value for each supported builtin. |
| enum BuiltinCode { |
| SearchForHandler, // Builtin not seen before - search for a handler. |
| clzs, // Builtin with exceptional name. |
| ctzs, // Builtin with exceptional name. |
| #define DEFINE_BUILTIN(x) x |
| #include "x86_builtins" |
| #undef DEFINE_BUILTIN |
| , |
| UnsupportedBuiltin // There is no handler for this builtin. |
| }; |
| |
| struct HandlerEntry { |
| const char *Name; |
| BuiltinCode Handler; |
| }; |
| |
| static bool HandlerLT(const HandlerEntry &E, const HandlerEntry &F) { |
| return strcmp(E.Name, F.Name) < 0; |
| } |
| |
| /* TargetIntrinsicLower - For builtins that we want to expand to normal LLVM |
| * code, emit the code now. If we can handle the code, this macro should emit |
| * the code, return true. |
| */ |
| bool TreeToLLVM::TargetIntrinsicLower( |
| gimple stmt, tree fndecl, const MemRef */*DestLoc*/, Value *&Result, |
| Type *ResultType, std::vector<Value *> &Ops) { |
| // DECL_FUNCTION_CODE contains a value of the enumerated type ix86_builtins, |
| // declared in i386.c. If this type was visible to us then we could simply |
| // use a switch statement on DECL_FUNCTION_CODE to jump to the right code for |
| // handling the builtin. But the type isn't visible, so instead we generate |
| // at run-time a map from the values of DECL_FUNCTION_CODE to values of the |
| // enumerated type BuiltinCode (defined above), the analog of ix86_builtins, |
| // and do the switch on the BuiltinCode value instead. |
| |
| // The map from DECL_FUNCTION_CODE values to BuiltinCode. |
| static std::vector<BuiltinCode> FunctionCodeMap; |
| if (FunctionCodeMap.size() <= DECL_FUNCTION_CODE(fndecl)) |
| FunctionCodeMap.resize(DECL_FUNCTION_CODE(fndecl) + 1); |
| |
| // See if we already associated a BuiltinCode with this DECL_FUNCTION_CODE. |
| BuiltinCode &Handler = FunctionCodeMap[DECL_FUNCTION_CODE(fndecl)]; |
| if (Handler == SearchForHandler) { |
| // No associated BuiltinCode. Work out what value to use based on the |
| // builtin's name. |
| |
| // List of builtin names and associated BuiltinCode. |
| static const HandlerEntry Handlers[] = { |
| { "__builtin_clzs", clzs }, // Builtin with exceptional name. |
| { "__builtin_ctzs", ctzs }, // Builtin with exceptional name. |
| #define DEFINE_BUILTIN(x) \ |
| { "__builtin_ia32_" #x, x } |
| #include "x86_builtins" |
| #undef DEFINE_BUILTIN |
| }; |
| size_t N = sizeof(Handlers) / sizeof(Handlers[0]); |
| #ifndef NDEBUG |
| // Check that the list of handlers is sorted by name. |
| static bool Checked = false; |
| if (!Checked) { |
| for (unsigned i = 1; i < N; ++i) |
| assert(HandlerLT(Handlers[i - 1], Handlers[i]) && |
| "Handlers not sorted!"); |
| Checked = true; |
| } |
| #endif |
| |
| Handler = UnsupportedBuiltin; |
| const char *Identifier = IDENTIFIER_POINTER(DECL_NAME(fndecl)); |
| HandlerEntry ToFind = { Identifier, SearchForHandler }; |
| const HandlerEntry *E = |
| std::lower_bound(Handlers, Handlers + N, ToFind, HandlerLT); |
| if ((E < Handlers + N) && !strcmp(E->Name, ToFind.Name)) |
| Handler = E->Handler; |
| } |
| |
| bool flip = false; |
| unsigned PredCode; |
| |
| switch (Handler) { |
| case SearchForHandler: |
| debug_gimple_stmt(stmt); |
| llvm_unreachable("Unexpected builtin code!"); |
| case UnsupportedBuiltin: |
| return false; |
| case addps: |
| case addps256: |
| case addpd: |
| case addpd256: |
| Result = Builder.CreateFAdd(Ops[0], Ops[1]); |
| return true; |
| case copysignpd: |
| case copysignpd256: |
| case copysignps: |
| case copysignps256: { |
| if (Ops.size() != 2) |
| return false; |
| VectorType *VecTy = llvm::dyn_cast<VectorType>(Ops[0]->getType()); |
| if (Ops[1]->getType() != VecTy) |
| return false; |
| Type *EltTy = VecTy->getElementType(); |
| unsigned EltBitWidth = EltTy->getPrimitiveSizeInBits(); |
| Type *IntEltTy = IntegerType::get(Context, EltBitWidth); |
| Type *IntVecTy = VectorType::get(IntEltTy, VecTy->getNumElements()); |
| APInt SignBit = APInt::getSignBit(EltBitWidth); |
| Constant *SignMask = ConstantInt::get(IntVecTy, SignBit); |
| Value *IntLHS = Builder.CreateBitCast(Ops[0], IntVecTy); |
| Value *IntRHS = Builder.CreateBitCast(Ops[1], IntVecTy); |
| Value *Sign = Builder.CreateAnd(IntRHS, SignMask); |
| Value *Abs = Builder.CreateAnd(IntLHS, ConstantExpr::getNot(SignMask)); |
| Value *IntRes = Builder.CreateOr(Abs, Sign); |
| Result = Builder.CreateBitCast(IntRes, VecTy); |
| return true; |
| } |
| case paddb: |
| case paddw: |
| case paddd: |
| case paddq: |
| case paddb128: |
| case paddw128: |
| case paddd128: |
| case paddq128: |
| Result = Builder.CreateAdd(Ops[0], Ops[1]); |
| return true; |
| case subps: |
| case subps256: |
| case subpd: |
| case subpd256: |
| Result = Builder.CreateFSub(Ops[0], Ops[1]); |
| return true; |
| case psubb: |
| case psubw: |
| case psubd: |
| case psubq: |
| case psubb128: |
| case psubw128: |
| case psubd128: |
| case psubq128: |
| Result = Builder.CreateSub(Ops[0], Ops[1]); |
| return true; |
| case mulps: |
| case mulps256: |
| case mulpd: |
| case mulpd256: |
| Result = Builder.CreateFMul(Ops[0], Ops[1]); |
| return true; |
| case pmullw: |
| case pmullw128: |
| case pmulld128: |
| Result = Builder.CreateMul(Ops[0], Ops[1]); |
| return true; |
| case divps: |
| case divps256: |
| case divpd: |
| case divpd256: |
| Result = Builder.CreateFDiv(Ops[0], Ops[1]); |
| return true; |
| case pand: |
| case pand128: |
| Result = Builder.CreateAnd(Ops[0], Ops[1]); |
| return true; |
| case pandn: |
| case pandn128: |
| Ops[0] = Builder.CreateNot(Ops[0]); |
| Result = Builder.CreateAnd(Ops[0], Ops[1]); |
| return true; |
| case por: |
| case por128: |
| Result = Builder.CreateOr(Ops[0], Ops[1]); |
| return true; |
| case pxor: |
| case pxor128: |
| Result = Builder.CreateXor(Ops[0], Ops[1]); |
| return true; |
| case andps: |
| case andps256: |
| case andpd: |
| case andpd256: |
| Ops[0] = BitCastToIntVector(Ops[0], Builder); |
| Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType()); |
| Result = Builder.CreateAnd(Ops[0], Ops[1]); |
| Result = Builder.CreateBitCast(Result, ResultType); |
| return true; |
| case orps: |
| case orps256: |
| case orpd: |
| case orpd256: |
| Ops[0] = BitCastToIntVector(Ops[0], Builder); |
| Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType()); |
| Result = Builder.CreateOr(Ops[0], Ops[1]); |
| Result = Builder.CreateBitCast(Result, ResultType); |
| return true; |
| case xorps: |
| case xorps256: |
| case xorpd: |
| case xorpd256: |
| Ops[0] = BitCastToIntVector(Ops[0], Builder); |
| Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType()); |
| Result = Builder.CreateXor(Ops[0], Ops[1]); |
| Result = Builder.CreateBitCast(Result, ResultType); |
| return true; |
| case andnps: |
| case andnps256: |
| case andnpd: |
| case andnpd256: |
| Ops[0] = BitCastToIntVector(Ops[0], Builder); |
| Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType()); |
| Ops[0] = Builder.CreateNot(Ops[0]); |
| Result = Builder.CreateAnd(Ops[0], Ops[1]); |
| Result = Builder.CreateBitCast(Result, ResultType); |
| return true; |
| case shufps: |
| if (ConstantInt *Elt = llvm::dyn_cast<ConstantInt>(Ops[2])) { |
| int EV = Elt->getZExtValue(); |
| Result = BuildVectorShuffle(Ops[0], Ops[1], ((EV & 0x03) >> 0), |
| ((EV & 0x0c) >> 2), ((EV & 0x30) >> 4) + 4, |
| ((EV & 0xc0) >> 6) + 4); |
| } else { |
| error_at(gimple_location(stmt), "mask must be an immediate"); |
| Result = Ops[0]; |
| } |
| return true; |
| case shufpd: |
| if (ConstantInt *Elt = llvm::dyn_cast<ConstantInt>(Ops[2])) { |
| int EV = Elt->getZExtValue(); |
| Result = BuildVectorShuffle(Ops[0], Ops[1], ((EV & 0x01) >> 0), |
| ((EV & 0x02) >> 1) + 2); |
| } else { |
| error_at(gimple_location(stmt), "mask must be an immediate"); |
| Result = Ops[0]; |
| } |
| return true; |
| case pshufw: |
| case pshufd: |
| if (ConstantInt *Elt = llvm::dyn_cast<ConstantInt>(Ops[1])) { |
| int EV = Elt->getZExtValue(); |
| Result = BuildVectorShuffle(Ops[0], Ops[0], ((EV & 0x03) >> 0), |
| ((EV & 0x0c) >> 2), ((EV & 0x30) >> 4), |
| ((EV & 0xc0) >> 6)); |
| } else { |
| error_at(gimple_location(stmt), "mask must be an immediate"); |
| Result = Ops[0]; |
| } |
| return true; |
| case pshufhw: |
| if (ConstantInt *Elt = llvm::dyn_cast<ConstantInt>(Ops[1])) { |
| int EV = Elt->getZExtValue(); |
| Result = |
| BuildVectorShuffle(Ops[0], Ops[0], 0, 1, 2, 3, ((EV & 0x03) >> 0) + 4, |
| ((EV & 0x0c) >> 2) + 4, ((EV & 0x30) >> 4) + 4, |
| ((EV & 0xc0) >> 6) + 4); |
| return true; |
| } |
| return false; |
| case pshuflw: |
| if (ConstantInt *Elt = llvm::dyn_cast<ConstantInt>(Ops[1])) { |
| int EV = Elt->getZExtValue(); |
| Result = BuildVectorShuffle(Ops[0], Ops[0], ((EV & 0x03) >> 0), |
| ((EV & 0x0c) >> 2), ((EV & 0x30) >> 4), |
| ((EV & 0xc0) >> 6), 4, 5, 6, 7); |
| } else { |
| error_at(gimple_location(stmt), "mask must be an immediate"); |
| Result = Ops[0]; |
| } |
| |
| return true; |
| case punpckhbw: |
| Result = BuildVectorShuffle(Ops[0], Ops[1], 4, 12, 5, 13, 6, 14, 7, 15); |
| return true; |
| case punpckhwd: |
| Result = BuildVectorShuffle(Ops[0], Ops[1], 2, 6, 3, 7); |
| return true; |
| case punpckhdq: |
| Result = BuildVectorShuffle(Ops[0], Ops[1], 1, 3); |
| return true; |
| case punpcklbw: |
| Result = BuildVectorShuffle(Ops[0], Ops[1], 0, 8, 1, 9, 2, 10, 3, 11); |
| return true; |
| case punpcklwd: |
| Result = BuildVectorShuffle(Ops[0], Ops[1], 0, 4, 1, 5); |
| return true; |
| case punpckldq: |
| Result = BuildVectorShuffle(Ops[0], Ops[1], 0, 2); |
| return true; |
| case punpckhbw128: |
| Result = BuildVectorShuffle(Ops[0], Ops[1], 8, 24, 9, 25, 10, 26, 11, 27, |
| 12, 28, 13, 29, 14, 30, 15, 31); |
| return true; |
| case punpckhwd128: |
| Result = BuildVectorShuffle(Ops[0], Ops[1], 4, 12, 5, 13, 6, 14, 7, 15); |
| return true; |
| case punpckhdq128: |
| Result = BuildVectorShuffle(Ops[0], Ops[1], 2, 6, 3, 7); |
| return true; |
| case punpckhqdq128: |
| Result = BuildVectorShuffle(Ops[0], Ops[1], 1, 3); |
| return true; |
| case punpcklbw128: |
| Result = BuildVectorShuffle(Ops[0], Ops[1], 0, 16, 1, 17, 2, 18, 3, 19, 4, |
| 20, 5, 21, 6, 22, 7, 23); |
| return true; |
| case punpcklwd128: |
| Result = BuildVectorShuffle(Ops[0], Ops[1], 0, 8, 1, 9, 2, 10, 3, 11); |
| return true; |
| case punpckldq128: |
| Result = BuildVectorShuffle(Ops[0], Ops[1], 0, 4, 1, 5); |
| return true; |
| case punpcklqdq128: |
| Result = BuildVectorShuffle(Ops[0], Ops[1], 0, 2); |
| return true; |
| case unpckhps: |
| Result = BuildVectorShuffle(Ops[0], Ops[1], 2, 6, 3, 7); |
| return true; |
| case unpckhpd: |
| Result = BuildVectorShuffle(Ops[0], Ops[1], 1, 3); |
| return true; |
| case unpcklps: |
| Result = BuildVectorShuffle(Ops[0], Ops[1], 0, 4, 1, 5); |
| return true; |
| case unpcklpd: |
| Result = BuildVectorShuffle(Ops[0], Ops[1], 0, 2); |
| return true; |
| case movhlps: |
| Result = BuildVectorShuffle(Ops[0], Ops[1], 6, 7, 2, 3); |
| return true; |
| case movlhps: |
| Result = BuildVectorShuffle(Ops[0], Ops[1], 0, 1, 4, 5); |
| return true; |
| case movss: |
| Result = BuildVectorShuffle(Ops[0], Ops[1], 4, 1, 2, 3); |
| return true; |
| case movsd: |
| Result = BuildVectorShuffle(Ops[0], Ops[1], 2, 1); |
| return true; |
| case movq128: { |
| Value *Zero = Constant::getNullValue(Ops[0]->getType()); |
| Result = BuildVectorShuffle(Zero, Ops[0], 2, 1); |
| return true; |
| } |
| //TODO IX86_BUILTIN_LOADQ: { |
| //TODO PointerType *i64Ptr = Type::getInt64PtrTy(Context); |
| //TODO Ops[0] = Builder.CreateBitCast(Ops[0], i64Ptr); |
| //TODO Ops[0] = Builder.CreateLoad(Ops[0]); |
| //TODO Value *Zero = ConstantInt::get(Type::getInt64Ty(Context), 0); |
| //TODO Result = BuildVector(Zero, Zero, NULL); |
| //TODO Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), 0); |
| //TODO Result = Builder.CreateInsertElement(Result, Ops[0], Idx); |
| //TODO Result = Builder.CreateBitCast(Result, ResultType); |
| //TODO return true; |
| //TODO } |
| case loadups: { |
| VectorType *v4f32 = VectorType::get(Type::getFloatTy(Context), 4); |
| PointerType *v4f32Ptr = v4f32->getPointerTo(); |
| Value *BC = Builder.CreateBitCast(Ops[0], v4f32Ptr); |
| Result = Builder.CreateAlignedLoad(BC, 1); |
| return true; |
| } |
| case loadupd: { |
| VectorType *v2f64 = VectorType::get(Type::getDoubleTy(Context), 2); |
| PointerType *v2f64Ptr = v2f64->getPointerTo(); |
| Value *BC = Builder.CreateBitCast(Ops[0], v2f64Ptr); |
| Result = Builder.CreateAlignedLoad(BC, 1); |
| return true; |
| } |
| case loaddqu: { |
| VectorType *v16i8 = VectorType::get(Type::getInt8Ty(Context), 16); |
| PointerType *v16i8Ptr = v16i8->getPointerTo(); |
| Value *BC = Builder.CreateBitCast(Ops[0], v16i8Ptr); |
| Result = Builder.CreateAlignedLoad(BC, 1); |
| return true; |
| } |
| case storeups: { |
| VectorType *v4f32 = VectorType::get(Type::getFloatTy(Context), 4); |
| PointerType *v4f32Ptr = v4f32->getPointerTo(); |
| Value *BC = Builder.CreateBitCast(Ops[0], v4f32Ptr); |
| Builder.CreateAlignedStore(Ops[1], BC, 1); |
| return true; |
| } |
| case storeupd: { |
| VectorType *v2f64 = VectorType::get(Type::getDoubleTy(Context), 2); |
| PointerType *v2f64Ptr = v2f64->getPointerTo(); |
| Value *BC = Builder.CreateBitCast(Ops[0], v2f64Ptr); |
| Builder.CreateAlignedStore(Ops[1], BC, 1); |
| return true; |
| } |
| case storedqu: { |
| VectorType *v16i8 = VectorType::get(Type::getInt8Ty(Context), 16); |
| PointerType *v16i8Ptr = v16i8->getPointerTo(); |
| Value *BC = Builder.CreateBitCast(Ops[0], v16i8Ptr); |
| Builder.CreateAlignedStore(Ops[1], BC, 1); |
| return true; |
| } |
| case loadhps: { |
| PointerType *f64Ptr = Type::getDoublePtrTy(Context); |
| Ops[1] = Builder.CreateBitCast(Ops[1], f64Ptr); |
| Value *Load = Builder.CreateLoad(Ops[1]); |
| Ops[1] = |
| BuildVector(Load, UndefValue::get(Type::getDoubleTy(Context)), NULL); |
| Ops[1] = Builder.CreateBitCast(Ops[1], ResultType); |
| Result = BuildVectorShuffle(Ops[0], Ops[1], 0, 1, 4, 5); |
| Result = Builder.CreateBitCast(Result, ResultType); |
| return true; |
| } |
| case loadlps: { |
| PointerType *f64Ptr = Type::getDoublePtrTy(Context); |
| Ops[1] = Builder.CreateBitCast(Ops[1], f64Ptr); |
| Value *Load = Builder.CreateLoad(Ops[1]); |
| Ops[1] = |
| BuildVector(Load, UndefValue::get(Type::getDoubleTy(Context)), NULL); |
| Ops[1] = Builder.CreateBitCast(Ops[1], ResultType); |
| Result = BuildVectorShuffle(Ops[0], Ops[1], 4, 5, 2, 3); |
| Result = Builder.CreateBitCast(Result, ResultType); |
| return true; |
| } |
| case loadhpd: { |
| PointerType *f64Ptr = Type::getDoublePtrTy(Context); |
| Ops[1] = Builder.CreateBitCast(Ops[1], f64Ptr); |
| Value *Load = Builder.CreateLoad(Ops[1]); |
| Ops[1] = |
| BuildVector(Load, UndefValue::get(Type::getDoubleTy(Context)), NULL); |
| Ops[1] = Builder.CreateBitCast(Ops[1], ResultType); |
| Result = BuildVectorShuffle(Ops[0], Ops[1], 0, 2); |
| Result = Builder.CreateBitCast(Result, ResultType); |
| return true; |
| } |
| case loadlpd: { |
| PointerType *f64Ptr = Type::getDoublePtrTy(Context); |
| Ops[1] = Builder.CreateBitCast(Ops[1], f64Ptr); |
| Value *Load = Builder.CreateLoad(Ops[1]); |
| Ops[1] = |
| BuildVector(Load, UndefValue::get(Type::getDoubleTy(Context)), NULL); |
| Ops[1] = Builder.CreateBitCast(Ops[1], ResultType); |
| Result = BuildVectorShuffle(Ops[0], Ops[1], 2, 1); |
| Result = Builder.CreateBitCast(Result, ResultType); |
| return true; |
| } |
| case storehps: { |
| VectorType *v2f64 = VectorType::get(Type::getDoubleTy(Context), 2); |
| PointerType *f64Ptr = Type::getDoublePtrTy(Context); |
| Ops[0] = Builder.CreateBitCast(Ops[0], f64Ptr); |
| Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), 1); |
| Ops[1] = Builder.CreateBitCast(Ops[1], v2f64); |
| Ops[1] = Builder.CreateExtractElement(Ops[1], Idx); |
| Builder.CreateStore(Ops[1], Ops[0]); |
| return true; |
| } |
| case storelps: { |
| VectorType *v2f64 = VectorType::get(Type::getDoubleTy(Context), 2); |
| PointerType *f64Ptr = Type::getDoublePtrTy(Context); |
| Ops[0] = Builder.CreateBitCast(Ops[0], f64Ptr); |
| Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), 0); |
| Ops[1] = Builder.CreateBitCast(Ops[1], v2f64); |
| Ops[1] = Builder.CreateExtractElement(Ops[1], Idx); |
| Builder.CreateStore(Ops[1], Ops[0]); |
| return true; |
| } |
| case movshdup: |
| Result = BuildVectorShuffle(Ops[0], Ops[0], 1, 1, 3, 3); |
| return true; |
| case movsldup: |
| Result = BuildVectorShuffle(Ops[0], Ops[0], 0, 0, 2, 2); |
| return true; |
| case vec_init_v2si: |
| Result = BuildVector(Ops[0], Ops[1], NULL); |
| return true; |
| case vec_init_v4hi: |
| // Sometimes G++ promotes arguments to int. |
| for (unsigned i = 0; i != 4; ++i) |
| Ops[i] = Builder.CreateIntCast(Ops[i], Type::getInt16Ty(Context), |
| /*isSigned*/ false); |
| Result = BuildVector(Ops[0], Ops[1], Ops[2], Ops[3], NULL); |
| return true; |
| case vec_init_v8qi: |
| // Sometimes G++ promotes arguments to int. |
| for (unsigned i = 0; i != 8; ++i) |
| Ops[i] = Builder.CreateIntCast(Ops[i], Type::getInt8Ty(Context), |
| /*isSigned*/ false); |
| Result = BuildVector(Ops[0], Ops[1], Ops[2], Ops[3], Ops[4], Ops[5], Ops[6], |
| Ops[7], NULL); |
| return true; |
| case vec_ext_v2si: |
| case vec_ext_v4hi: |
| case vec_ext_v2df: |
| case vec_ext_v2di: |
| case vec_ext_v4si: |
| case vec_ext_v4sf: |
| case vec_ext_v8hi: |
| case vec_ext_v16qi: |
| Result = Builder.CreateExtractElement(Ops[0], Ops[1]); |
| return true; |
| case vec_set_v16qi: |
| // Sometimes G++ promotes arguments to int. |
| Ops[1] = Builder.CreateIntCast(Ops[1], Type::getInt8Ty(Context), |
| /*isSigned*/ false); |
| Result = Builder.CreateInsertElement(Ops[0], Ops[1], Ops[2]); |
| return true; |
| case vec_set_v4hi: |
| case vec_set_v8hi: |
| // GCC sometimes doesn't produce the right element type. |
| Ops[1] = Builder.CreateIntCast(Ops[1], Type::getInt16Ty(Context), |
| /*isSigned*/ false); |
| Result = Builder.CreateInsertElement(Ops[0], Ops[1], Ops[2]); |
| return true; |
| case vec_set_v4si: |
| Result = Builder.CreateInsertElement(Ops[0], Ops[1], Ops[2]); |
| return true; |
| case vec_set_v2di: |
| Result = Builder.CreateInsertElement(Ops[0], Ops[1], Ops[2]); |
| return true; |
| |
| case cmpeqps: |
| PredCode = 0; |
| goto CMPXXPS; |
| case cmpltps: |
| PredCode = 1; |
| goto CMPXXPS; |
| case cmpgtps: |
| PredCode = 1; |
| flip = true; |
| goto CMPXXPS; |
| case cmpleps: |
| PredCode = 2; |
| goto CMPXXPS; |
| case cmpgeps: |
| PredCode = 2; |
| flip = true; |
| goto CMPXXPS; |
| case cmpunordps: |
| PredCode = 3; |
| goto CMPXXPS; |
| case cmpneqps: |
| PredCode = 4; |
| goto CMPXXPS; |
| case cmpnltps: |
| PredCode = 5; |
| goto CMPXXPS; |
| case cmpngtps: |
| PredCode = 5; |
| flip = true; |
| goto CMPXXPS; |
| case cmpnleps: |
| PredCode = 6; |
| goto CMPXXPS; |
| case cmpngeps: |
| PredCode = 6; |
| flip = true; |
| goto CMPXXPS; |
| case cmpordps: |
| PredCode = 7; |
| goto CMPXXPS; |
| CMPXXPS : { |
| Function *cmpps = |
| Intrinsic::getDeclaration(TheModule, Intrinsic::x86_sse_cmp_ps); |
| Value *Pred = ConstantInt::get(Type::getInt8Ty(Context), PredCode); |
| Value *Arg0 = Ops[0]; |
| Value *Arg1 = Ops[1]; |
| if (flip) |
| std::swap(Arg0, Arg1); |
| Value *CallOps[3] = { Arg0, Arg1, Pred }; |
| Result = Builder.CreateCall(cmpps, CallOps); |
| Result = Builder.CreateBitCast(Result, ResultType); |
| return true; |
| } |
| case cmpeqss: |
| PredCode = 0; |
| goto CMPXXSS; |
| case cmpltss: |
| PredCode = 1; |
| goto CMPXXSS; |
| case cmpless: |
| PredCode = 2; |
| goto CMPXXSS; |
| case cmpunordss: |
| PredCode = 3; |
| goto CMPXXSS; |
| case cmpneqss: |
| PredCode = 4; |
| goto CMPXXSS; |
| case cmpnltss: |
| PredCode = 5; |
| goto CMPXXSS; |
| case cmpnless: |
| PredCode = 6; |
| goto CMPXXSS; |
| case cmpordss: |
| PredCode = 7; |
| goto CMPXXSS; |
| CMPXXSS : { |
| Function *cmpss = |
| Intrinsic::getDeclaration(TheModule, Intrinsic::x86_sse_cmp_ss); |
| Value *Pred = ConstantInt::get(Type::getInt8Ty(Context), PredCode); |
| Value *CallOps[3] = { Ops[0], Ops[1], Pred }; |
| Result = Builder.CreateCall(cmpss, CallOps); |
| Result = Builder.CreateBitCast(Result, ResultType); |
| return true; |
| } |
| case cmpeqpd: |
| PredCode = 0; |
| goto CMPXXPD; |
| case cmpltpd: |
| PredCode = 1; |
| goto CMPXXPD; |
| case cmpgtpd: |
| PredCode = 1; |
| flip = true; |
| goto CMPXXPD; |
| case cmplepd: |
| PredCode = 2; |
| goto CMPXXPD; |
| case cmpgepd: |
| PredCode = 2; |
| flip = true; |
| goto CMPXXPD; |
| case cmpunordpd: |
| PredCode = 3; |
| goto CMPXXPD; |
| case cmpneqpd: |
| PredCode = 4; |
| goto CMPXXPD; |
| case cmpnltpd: |
| PredCode = 5; |
| goto CMPXXPD; |
| case cmpngtpd: |
| PredCode = 5; |
| flip = true; |
| goto CMPXXPD; |
| case cmpnlepd: |
| PredCode = 6; |
| goto CMPXXPD; |
| case cmpngepd: |
| PredCode = 6; |
| flip = true; |
| goto CMPXXPD; |
| case cmpordpd: |
| PredCode = 7; |
| goto CMPXXPD; |
| CMPXXPD : { |
| Function *cmppd = |
| Intrinsic::getDeclaration(TheModule, Intrinsic::x86_sse2_cmp_pd); |
| Value *Pred = ConstantInt::get(Type::getInt8Ty(Context), PredCode); |
| Value *Arg0 = Ops[0]; |
| Value *Arg1 = Ops[1]; |
| if (flip) |
| std::swap(Arg0, Arg1); |
| |
| Value *CallOps[3] = { Arg0, Arg1, Pred }; |
| Result = Builder.CreateCall(cmppd, CallOps); |
| Result = Builder.CreateBitCast(Result, ResultType); |
| return true; |
| } |
| case cmpeqsd: |
| PredCode = 0; |
| goto CMPXXSD; |
| case cmpltsd: |
| PredCode = 1; |
| goto CMPXXSD; |
| case cmplesd: |
| PredCode = 2; |
| goto CMPXXSD; |
| case cmpunordsd: |
| PredCode = 3; |
| goto CMPXXSD; |
| case cmpneqsd: |
| PredCode = 4; |
| goto CMPXXSD; |
| case cmpnltsd: |
| PredCode = 5; |
| goto CMPXXSD; |
| case cmpnlesd: |
| PredCode = 6; |
| goto CMPXXSD; |
| case cmpordsd: |
| PredCode = 7; |
| goto CMPXXSD; |
| CMPXXSD : { |
| Function *cmpsd = |
| Intrinsic::getDeclaration(TheModule, Intrinsic::x86_sse2_cmp_sd); |
| Value *Pred = ConstantInt::get(Type::getInt8Ty(Context), PredCode); |
| Value *CallOps[3] = { Ops[0], Ops[1], Pred }; |
| Result = Builder.CreateCall(cmpsd, CallOps); |
| Result = Builder.CreateBitCast(Result, ResultType); |
| return true; |
| } |
| case ldmxcsr: { |
| Function *ldmxcsr = |
| Intrinsic::getDeclaration(TheModule, Intrinsic::x86_sse_ldmxcsr); |
| Value *Ptr = CreateTemporary(Type::getInt32Ty(Context)); |
| Builder.CreateStore(Ops[0], Ptr); |
| Ptr = Builder.CreateBitCast(Ptr, Type::getInt8PtrTy(Context)); |
| Builder.CreateCall(ldmxcsr, Ptr); |
| return true; |
| } |
| case stmxcsr: { |
| Function *stmxcsr = |
| Intrinsic::getDeclaration(TheModule, Intrinsic::x86_sse_stmxcsr); |
| Value *Ptr = CreateTemporary(Type::getInt32Ty(Context)); |
| Value *BPtr = Builder.CreateBitCast(Ptr, Type::getInt8PtrTy(Context)); |
| Builder.CreateCall(stmxcsr, BPtr); |
| |
| Result = Builder.CreateLoad(Ptr); |
| return true; |
| } |
| case palignr: { |
| if (isa<ConstantInt>(Ops[2])) { |
| |
| // In the header we multiply by 8, correct that back now. |
| unsigned shiftVal = (cast<ConstantInt>(Ops[2])->getZExtValue()) / 8; |
| |
| // If palignr is shifting the pair of input vectors less than 9 bytes, |
| // emit a shuffle instruction. |
| if (shiftVal <= 8) { |
| Type *IntTy = Type::getInt32Ty(Context); |
| Type *EltTy = Type::getInt8Ty(Context); |
| Type *VecTy = VectorType::get(EltTy, 8); |
| |
| Ops[1] = Builder.CreateBitCast(Ops[1], VecTy); |
| Ops[0] = Builder.CreateBitCast(Ops[0], VecTy); |
| |
| SmallVector<Constant *, 8> Indices; |
| for (unsigned i = 0; i != 8; ++i) |
| Indices.push_back(ConstantInt::get(IntTy, shiftVal + i)); |
| |
| Value *SV = ConstantVector::get(Indices); |
| Result = Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); |
| Result = Builder.CreateBitCast(Result, ResultType); |
| return true; |
| } |
| |
| // If palignr is shifting the pair of input vectors more than 8 but less |
| // than 16 bytes, emit a logical right shift of the destination. |
| if (shiftVal < 16) { |
| // MMX has these as 1 x i64 vectors for some odd optimization reasons. |
| Type *EltTy = Type::getInt64Ty(Context); |
| Type *VecTy = VectorType::get(EltTy, 1); |
| Type *MMXTy = Type::getX86_MMXTy(Context); |
| |
| Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy); |
| Ops[1] = ConstantInt::get(VecTy, (shiftVal - 8) * 8); |
| Ops[1] = Builder.CreateBitCast(Ops[1], MMXTy); |
| |
| // create i32 constant |
| Function *F = |
| Intrinsic::getDeclaration(TheModule, Intrinsic::x86_mmx_psrl_q); |
| Result = |
| Builder.CreateCall(F, ArrayRef<Value *>(&Ops[0], 2), "palignr"); |
| Result = Builder.CreateBitCast(Result, ResultType); |
| return true; |
| } |
| |
| // If palignr is shifting the pair of vectors more than 32 bytes, |
| // emit zero. |
| Result = Constant::getNullValue(ResultType); |
| return true; |
| } else { |
| error_at(gimple_location(stmt), "mask must be an immediate"); |
| Result = Ops[0]; |
| return true; |
| } |
| } |
| case palignr128: { |
| if (isa<ConstantInt>(Ops[2])) { |
| |
| // In the header we multiply by 8, correct that back now. |
| unsigned shiftVal = (cast<ConstantInt>(Ops[2])->getZExtValue()) / 8; |
| |
| // If palignr is shifting the pair of input vectors less than 17 bytes, |
| // emit a shuffle instruction. |
| if (shiftVal <= 16) { |
| Type *IntTy = Type::getInt32Ty(Context); |
| Type *EltTy = Type::getInt8Ty(Context); |
| Type *VecTy = VectorType::get(EltTy, 16); |
| |
| Ops[1] = Builder.CreateBitCast(Ops[1], VecTy); |
| Ops[0] = Builder.CreateBitCast(Ops[0], VecTy); |
| |
| SmallVector<Constant *, 16> Indices; |
| for (unsigned i = 0; i != 16; ++i) |
| Indices.push_back(ConstantInt::get(IntTy, shiftVal + i)); |
| |
| Value *SV = ConstantVector::get(Indices); |
| Result = Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); |
| Result = Builder.CreateBitCast(Result, ResultType); |
| return true; |
| } |
| |
| // If palignr is shifting the pair of input vectors more than 16 but less |
| // than 32 bytes, emit a logical right shift of the destination. |
| if (shiftVal < 32) { |
| Type *EltTy = Type::getInt64Ty(Context); |
| Type *VecTy = VectorType::get(EltTy, 2); |
| Type *IntTy = Type::getInt32Ty(Context); |
| |
| Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); |
| Ops[1] = ConstantInt::get(IntTy, (shiftVal - 16) * 8); |
| |
| // create i32 constant |
| Function *F = |
| Intrinsic::getDeclaration(TheModule, Intrinsic::x86_sse2_psrl_dq); |
| Result = |
| Builder.CreateCall(F, ArrayRef<Value *>(&Ops[0], 2), "palignr"); |
| Result = Builder.CreateBitCast(Result, ResultType); |
| return true; |
| } |
| |
| // If palignr is shifting the pair of vectors more than 32 bytes, emit zero. |
| Result = Constant::getNullValue(ResultType); |
| return true; |
| } else { |
| error_at(gimple_location(stmt), "mask must be an immediate"); |
| Result = Ops[0]; |
| return true; |
| } |
| } |
| case movntdq: |
| case movntdq256: |
| case movnti: |
| case movntpd: |
| case movntpd256: |
| case movntps: |
| case movntps256: |
| case movntq: |
| case movntsd: |
| case movntss: { |
| MDNode *Node = MDNode::get(Context, Builder.getInt32(1)); |
| |
| // Convert the type of the pointer to a pointer to the stored type. |
| unsigned AS = Ops[0]->getType()->getPointerAddressSpace(); |
| Value *Ptr = Builder.CreateBitCast( |
| Ops[0], PointerType::get(Ops[1]->getType(), AS), "cast"); |
| |
| StoreInst *SI = Builder.CreateAlignedStore(Ops[1], Ptr, 16); |
| SI->setMetadata(TheModule->getMDKindID("nontemporal"), Node); |
| return true; |
| } |
| case rsqrtf: { |
| // rsqrtss with a Newton-Raphson step to improve accuracy: |
| // rsqrtf(x) = rsqrtss(x) * -0.5 * (rsqrtss(x) * x * rsqrtss(x) - 3.0) |
| Function *rsqrtss = |
| Intrinsic::getDeclaration(TheModule, Intrinsic::x86_sse_rsqrt_ss); |
| // As rsqrtss is declared as taking a <4 x float> operand, mulch the operand |
| // into a vector. |
| Value *X = Ops[0]; |
| Type *FloatTy = Type::getFloatTy(Context); |
| Value *AsFloat = Builder.CreateFPTrunc(X, FloatTy); |
| Type *V4SFTy = VectorType::get(FloatTy, 4); |
| Value *AsVec = Builder.CreateInsertElement(UndefValue::get(V4SFTy), AsFloat, |
| Builder.getInt32(0)); |
| // Take the reciprocal square root of the vector and mulch it back into a |
| // scalar of the original type. |
| AsVec = Builder.CreateCall(rsqrtss, AsVec); |
| Value *R = Builder.CreateExtractElement(AsVec, Builder.getInt32(0)); |
| R = Builder.CreateFPExt(R, X->getType()); // rsqrtss(x) |
| |
| // Perform the Newton-Raphson step. |
| Value *RHS = |
| Builder.CreateFAdd(Builder.CreateFMul(Builder.CreateFMul(R, X), R), |
| ConstantFP::get(X->getType(), -3.0)); |
| Value *LHS = Builder.CreateFMul(R, ConstantFP::get(X->getType(), -0.5)); |
| Result = Builder.CreateFMul(LHS, RHS); |
| return true; |
| } |
| case rsqrtps_nr: { |
| // rsqrtps with a Newton-Raphson step to improve accuracy: |
| // rsqrtps_nr(x) = rsqrtps(x) * -0.5 * (rsqrtps(x) * x * rsqrtps(x) - 3.0) |
| Function *rsqrtps = |
| Intrinsic::getDeclaration(TheModule, Intrinsic::x86_sse_rsqrt_ps); |
| Value *X = Ops[0]; // x |
| Value *R = Builder.CreateCall(rsqrtps, X); // rsqrtps(x) |
| Value *RHS = |
| Builder.CreateFAdd(Builder.CreateFMul(Builder.CreateFMul(R, X), R), |
| ConstantFP::get(X->getType(), -3.0)); |
| Value *LHS = Builder.CreateFMul(R, ConstantFP::get(X->getType(), -0.5)); |
| Result = Builder.CreateFMul(LHS, RHS); |
| return true; |
| } |
| case sqrtps_nr: { |
| // Turn this into sqrtps without a Newton-Raphson step - sqrtps is already |
| // accurate enough. |
| Function *sqrtps = |
| Intrinsic::getDeclaration(TheModule, Intrinsic::x86_sse_sqrt_ps); |
| Result = Builder.CreateCall(sqrtps, Ops[0]); |
| return true; |
| } |
| case vec_perm_v16qi: |
| case vec_perm_v16qi_u: |
| case vec_perm_v2df: |
| case vec_perm_v2di: |
| case vec_perm_v2di_u: |
| case vec_perm_v4df: |
| case vec_perm_v4sf: |
| case vec_perm_v4si: |
| case vec_perm_v4si_u: |
| case vec_perm_v8hi: |
| case vec_perm_v8hi_u: |
| case vec_perm_v8sf: { |
| VectorType *VecTy = llvm::dyn_cast<VectorType>(Ops[0]->getType()); |
| if (Ops[1]->getType() != VecTy) |
| return false; |
| unsigned NElts = VecTy->getNumElements(); |
| Constant *Mask = llvm::dyn_cast<Constant>(Ops[2]); |
| if (!Mask) |
| return false; |
| VectorType *MaskTy = llvm::dyn_cast<VectorType>(Mask->getType()); |
| if (!MaskTy || MaskTy->getNumElements() != NElts || |
| !MaskTy->getElementType()->isIntegerTy()) |
| return false; |
| if (!MaskTy->getElementType()->isIntegerTy(32)) |
| Mask = ConstantExpr::getIntegerCast( |
| Mask, VectorType::get(Builder.getInt32Ty(), NElts), false); |
| Result = Builder.CreateShuffleVector(Ops[0], Ops[1], Mask); |
| return true; |
| } |
| case pcmpeqb128: |
| case pcmpeqb256: |
| case pcmpeqd128: |
| case pcmpeqd256: |
| case pcmpeqq: |
| case pcmpeqq256: |
| case pcmpeqw128: |
| case pcmpeqw256: |
| Result = Builder.CreateICmpEQ(Ops[0], Ops[1]); |
| // Need to sign extend since icmp returns a vector of i1. |
| Result = Builder.CreateSExt(Result, ResultType); |
| return true; |
| case pcmpgtb128: |
| case pcmpgtb256: |
| case pcmpgtd128: |
| case pcmpgtd256: |
| case pcmpgtq: |
| case pcmpgtq256: |
| case pcmpgtw128: |
| case pcmpgtw256: |
| Result = Builder.CreateICmpSGT(Ops[0], Ops[1]); |
| // Need to sign extend since icmp returns a vector of i1. |
| Result = Builder.CreateSExt(Result, ResultType); |
| return true; |
| case pswapdsf: |
| case pswapdsi: { |
| Type *MMXTy = Type::getX86_MMXTy(Context); |
| Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy); |
| Function *pswapd = |
| Intrinsic::getDeclaration(TheModule, Intrinsic::x86_3dnowa_pswapd); |
| Result = Builder.CreateCall(pswapd, Ops[0]); |
| Result = Builder.CreateBitCast(Result, ResultType); |
| return true; |
| } |
| case clzs: { |
| // The value is usually passed in as an int rather than as a short. |
| Type *Int16Ty = Builder.getInt16Ty(); |
| Result = Builder.CreateTruncOrBitCast(Ops[0], Int16Ty); |
| Function *ctlz = |
| Intrinsic::getDeclaration(TheModule, Intrinsic::ctlz, Int16Ty); |
| Result = Builder.CreateCall2(ctlz, Result, Builder.getTrue()); |
| return true; |
| } |
| case ctzs: { |
| // The value is usually passed in as an int rather than as a short. |
| Type *Int16Ty = Builder.getInt16Ty(); |
| Result = Builder.CreateTruncOrBitCast(Ops[0], Int16Ty); |
| Function *cttz = |
| Intrinsic::getDeclaration(TheModule, Intrinsic::cttz, Int16Ty); |
| Result = Builder.CreateCall2(cttz, Result, Builder.getTrue()); |
| return true; |
| } |
| case rdrand16_step: |
| case rdrand32_step: |
| case rdrand64_step: { |
| Intrinsic::ID ID; |
| if (Handler == rdrand16_step) |
| ID = Intrinsic::x86_rdrand_16; |
| else if (Handler == rdrand32_step) |
| ID = Intrinsic::x86_rdrand_32; |
| else { |
| assert(Handler == rdrand64_step && "Unexpected rdrand builtin!"); |
| ID = Intrinsic::x86_rdrand_64; |
| } |
| |
| Function *rdrand = Intrinsic::getDeclaration(TheModule, ID); |
| Value *Call = Builder.CreateCall(rdrand); |
| Builder.CreateStore(Builder.CreateExtractValue(Call, 0), Ops[0]); |
| Result = Builder.CreateExtractValue(Call, 1); |
| return true; |
| } |
| } |
| llvm_unreachable("Forgot case for code?"); |
| } |
| |
| /* Target hook for llvm-abi.h. It returns true if an aggregate of the |
| specified type should be passed in memory. This is only called for |
| x86-64. */ |
| static bool llvm_x86_64_should_pass_aggregate_in_memory( |
| tree TreeType, enum machine_mode Mode) { |
| int IntRegs, SSERegs; |
| /* If examine_argument return 0, then it's passed byval in memory.*/ |
| int ret = examine_argument(Mode, TreeType, 0, &IntRegs, &SSERegs); |
| if (ret == 0) |
| return true; |
| if (ret == 1 && IntRegs == 0 && SSERegs == 0) // zero-sized struct |
| return true; |
| return false; |
| } |
| |
| /* Returns true if all elements of the type are integer types. */ |
| static bool llvm_x86_is_all_integer_types(Type *Ty) { |
| for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end(); |
| I != E; ++I) { |
| Type *STy = *I; |
| if (!STy->isIntOrIntVectorTy() && !STy->isPointerTy()) |
| return false; |
| } |
| return true; |
| } |
| |
| /* Target hook for llvm-abi.h. It returns true if an aggregate of the |
| specified type should be passed in a number of registers of mixed types. |
| It also returns a vector of types that correspond to the registers used |
| for parameter passing. This is only called for x86-32. */ |
| bool llvm_x86_32_should_pass_aggregate_in_mixed_regs( |
| tree TreeType, Type *Ty, std::vector<Type *> &Elts) { |
| // If this is a small fixed size type, investigate it. |
| HOST_WIDE_INT SrcSize = int_size_in_bytes(TreeType); |
| if (SrcSize <= 0 || SrcSize > 16) |
| return false; |
| |
| // X86-32 passes aggregates on the stack. If this is an extremely simple |
| // aggregate whose elements would be passed the same if passed as scalars, |
| // pass them that way in order to promote SROA on the caller and callee side. |
| // Note that we can't support passing all structs this way. For example, |
| // {i16, i16} should be passed in on 32-bit unit, which is not how "i16, i16" |
| // would be passed as stand-alone arguments. |
| StructType *STy = llvm::dyn_cast<StructType>(Ty); |
| if (!STy || STy->isPacked()) |
| return false; |
| |
| for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { |
| Type *EltTy = STy->getElementType(i); |
| // 32 and 64-bit integers are fine, as are float and double. Long double |
| // (which can be picked as the type for a union of 16 bytes) is not fine, |
| // as loads and stores of it get only 10 bytes. |
| if (EltTy == Type::getInt32Ty(Context) || |
| EltTy == Type::getInt64Ty(Context) || |
| EltTy == Type::getFloatTy(Context) || |
| EltTy == Type::getDoubleTy(Context) || EltTy->isPointerTy()) { |
| Elts.push_back(EltTy); |
| continue; |
| } |
| |
| // TODO: Vectors are also ok to pass if they don't require extra alignment. |
| // TODO: We can also pass structs like {i8, i32}. |
| |
| Elts.clear(); |
| return false; |
| } |
| |
| return true; |
| } |
| |
| /* It returns true if an aggregate of the specified type should be passed as a |
| first class aggregate. */ |
| bool llvm_x86_should_pass_aggregate_as_fca(tree type, Type *Ty) { |
| if (!isa<COMPLEX_TYPE>(type)) |
| return false; |
| StructType *STy = llvm::dyn_cast<StructType>(Ty); |
| if (!STy || STy->isPacked()) |
| return false; |
| |
| // FIXME: Currently codegen isn't lowering most _Complex types in a way that |
| // makes it ABI compatible for x86-64. Same for _Complex char and _Complex |
| // short in 32-bit. |
| Type *EltTy = STy->getElementType(0); |
| return !((TARGET_64BIT && |
| (EltTy->isIntegerTy() || EltTy == Type::getFloatTy(Context) || |
| EltTy == Type::getDoubleTy(Context))) || EltTy->isIntegerTy(16) || |
| EltTy->isIntegerTy(8)); |
| } |
| |
| /* Target hook for llvm-abi.h. It returns true if an aggregate of the |
| specified type should be passed in memory. */ |
| bool llvm_x86_should_pass_aggregate_in_memory(tree TreeType, Type *Ty) { |
| if (llvm_x86_should_pass_aggregate_as_fca(TreeType, Ty)) |
| return false; |
| |
| enum machine_mode Mode = type_natural_mode(TreeType, NULL); |
| HOST_WIDE_INT Bytes = (Mode == BLKmode) ? int_size_in_bytes(TreeType) : (int) |
| GET_MODE_SIZE(Mode); |
| |
| // Zero sized array, struct, or class, not passed in memory. |
| if (Bytes == 0) |
| return false; |
| |
| if (!TARGET_64BIT) { |
| std::vector<Type *> Elts; |
| return !llvm_x86_32_should_pass_aggregate_in_mixed_regs(TreeType, Ty, Elts); |
| } |
| return llvm_x86_64_should_pass_aggregate_in_memory(TreeType, Mode); |
| } |
| |
| /* count_num_registers_uses - Return the number of GPRs and XMMs parameter |
| register used so far. Caller is responsible for initializing outputs. */ |
| static void count_num_registers_uses(std::vector<Type *> &ScalarElts, |
| unsigned &NumGPRs, unsigned &NumXMMs) { |
| for (size_t i = 0, e = ScalarElts.size(); i != e; ++i) { |
| Type *Ty = ScalarElts[i]; |
| if (VectorType *VTy = llvm::dyn_cast<VectorType>(Ty)) { |
| if (!TARGET_MACHO) |
| continue; |
| if (VTy->getNumElements() == 1) |
| // v1i64 is passed in GPRs on Darwin. |
| ++NumGPRs; |
| else |
| // All other vector scalar values are passed in XMM registers. |
| ++NumXMMs; |
| } else if (Ty->isIntegerTy() || Ty->isPointerTy()) { |
| ++NumGPRs; |
| } else if (Ty == Type::getVoidTy(Context)) { |
| // Padding bytes that are not passed anywhere |
| ; |
| } else { |
| // Floating point scalar argument. |
| assert(Ty->isFloatingPointTy() && |
| "Expecting a floating point primitive type!"); |
| if (Ty->getTypeID() == Type::FloatTyID || |
| Ty->getTypeID() == Type::DoubleTyID) |
| ++NumXMMs; |
| } |
| } |
| } |
| |
| /* Target hook for llvm-abi.h. This is called when an aggregate is being passed |
| in registers. If there are only enough available parameter registers to pass |
| part of the aggregate, return true. That means the aggregate should instead |
| be passed in memory. */ |
| bool llvm_x86_64_aggregate_partially_passed_in_regs( |
| std::vector<Type *> &Elts, std::vector<Type *> &ScalarElts, |
| bool isShadowReturn) { |
| // Counting number of GPRs and XMMs used so far. According to AMD64 ABI |
| // document: "If there are no registers available for any eightbyte of an |
| // argument, the whole argument is passed on the stack." X86-64 uses 6 |
| // integer |
| // For example, if two GPRs are required but only one is available, then |
| // both parts will be in memory. |
| // FIXME: This is a temporary solution. To be removed when llvm has first |
| // class aggregate values. |
| unsigned NumGPRs = isShadowReturn ? 1 : 0; |
| unsigned NumXMMs = 0; |
| count_num_registers_uses(ScalarElts, NumGPRs, NumXMMs); |
| |
| unsigned NumGPRsNeeded = 0; |
| unsigned NumXMMsNeeded = 0; |
| count_num_registers_uses(Elts, NumGPRsNeeded, NumXMMsNeeded); |
| |
| bool GPRsSatisfied = true; |
| if (NumGPRsNeeded) { |
| if (NumGPRs < 6) { |
| if ((NumGPRs + NumGPRsNeeded) > 6) |
| // Only partially satisfied. |
| return true; |
| } else |
| GPRsSatisfied = false; |
| } |
| |
| bool XMMsSatisfied = true; |
| if (NumXMMsNeeded) { |
| if (NumXMMs < 8) { |
| if ((NumXMMs + NumXMMsNeeded) > 8) |
| // Only partially satisfied. |
| return true; |
| } else |
| XMMsSatisfied = false; |
| } |
| |
| return !GPRsSatisfied || !XMMsSatisfied; |
| } |
| |
| /* Target hook for llvm-abi.h. It returns true if an aggregate of the |
| specified type should be passed in a number of registers of mixed types. |
| It also returns a vector of types that correspond to the registers used |
| for parameter passing. This is only called for x86-64. */ |
| bool llvm_x86_64_should_pass_aggregate_in_mixed_regs( |
| tree TreeType, Type *Ty, std::vector<Type *> &Elts) { |
| if (llvm_x86_should_pass_aggregate_as_fca(TreeType, Ty)) |
| return false; |
| |
| enum x86_64_reg_class Class[MAX_CLASSES]; |
| enum machine_mode Mode = type_natural_mode(TreeType, NULL); |
| bool totallyEmpty = true; |
| HOST_WIDE_INT Bytes = (Mode == BLKmode) ? int_size_in_bytes(TreeType) : (int) |
| GET_MODE_SIZE(Mode); |
| int NumClasses = classify_argument(Mode, TreeType, Class, 0); |
| if (!NumClasses) |
| return false; |
| |
| if (NumClasses == 1 && Class[0] == X86_64_INTEGERSI_CLASS) |
| // This will fit in one i32 register. |
| return false; |
| |
| for (int i = 0; i < NumClasses; ++i) { |
| switch (Class[i]) { |
| case X86_64_INTEGER_CLASS: |
| case X86_64_INTEGERSI_CLASS: |
| Elts.push_back(Type::getInt64Ty(Context)); |
| totallyEmpty = false; |
| Bytes -= 8; |
| break; |
| case X86_64_SSE_CLASS: |
| totallyEmpty = false; |
| // If it's a SSE class argument, then one of the followings are possible: |
| // 1. 1 x SSE, size is 8: 1 x Double. |
| // 2. 1 x SSE, size is 4: 1 x Float. |
| // 3. 1 x SSE + 1 x SSEUP, size is 16: 1 x <4 x i32>, <4 x f32>, |
| // <2 x i64>, or <2 x f64>. |
| // 4. 1 x SSE + 1 x SSESF, size is 12: 1 x Double, 1 x Float. |
| // 5. 2 x SSE, size is 16: 2 x Double. |
| if ((NumClasses - i) == 1) { |
| if (Bytes == 8) { |
| Elts.push_back(Type::getDoubleTy(Context)); |
| Bytes -= 8; |
| } else if (Bytes == 4) { |
| Elts.push_back(Type::getFloatTy(Context)); |
| Bytes -= 4; |
| } else |
| llvm_unreachable("Not yet handled!"); |
| } else if ((NumClasses - i) == 2) { |
| if (Class[i + 1] == X86_64_SSEUP_CLASS) { |
| Type *LLVMTy = ConvertType(TreeType); |
| if (StructType *STy = llvm::dyn_cast<StructType>(LLVMTy)) |
| // Look pass the struct wrapper. |
| if (STy->getNumElements() == 1) |
| LLVMTy = STy->getElementType(0); |
| if (VectorType *VTy = llvm::dyn_cast<VectorType>(LLVMTy)) { |
| if (VTy->getNumElements() == 2) { |
| if (VTy->getElementType()->isIntegerTy()) { |
| Elts.push_back(VectorType::get(Type::getInt64Ty(Context), 2)); |
| } else { |
| Elts.push_back(VectorType::get(Type::getDoubleTy(Context), 2)); |
| } |
| Bytes -= 8; |
| } else { |
| assert(VTy->getNumElements() == 4); |
| if (VTy->getElementType()->isIntegerTy()) { |
| Elts.push_back(VectorType::get(Type::getInt32Ty(Context), 4)); |
| } else { |
| Elts.push_back(VectorType::get(Type::getFloatTy(Context), 4)); |
| } |
| Bytes -= 4; |
| } |
| } else if (llvm_x86_is_all_integer_types(LLVMTy)) { |
| Elts.push_back(VectorType::get(Type::getInt32Ty(Context), 4)); |
| Bytes -= 4; |
| } else { |
| Elts.push_back(VectorType::get(Type::getFloatTy(Context), 4)); |
| Bytes -= 4; |
| } |
| } else if (Class[i + 1] == X86_64_SSESF_CLASS) { |
| assert(Bytes == 12 && "Not yet handled!"); |
| Elts.push_back(Type::getDoubleTy(Context)); |
| Elts.push_back(Type::getFloatTy(Context)); |
| Bytes -= 12; |
| } else if (Class[i + 1] == X86_64_SSE_CLASS) { |
| Elts.push_back(Type::getDoubleTy(Context)); |
| Elts.push_back(Type::getDoubleTy(Context)); |
| Bytes -= 16; |
| } else if (Class[i + 1] == X86_64_SSEDF_CLASS && Bytes == 16) { |
| Elts.push_back(VectorType::get(Type::getFloatTy(Context), 2)); |
| Elts.push_back(Type::getDoubleTy(Context)); |
| } else if (Class[i + 1] == X86_64_INTEGER_CLASS) { |
| Elts.push_back(VectorType::get(Type::getFloatTy(Context), 2)); |
| Elts.push_back(Type::getInt64Ty(Context)); |
| } else if (Class[i + 1] == X86_64_NO_CLASS) { |
| // padding bytes, don't pass |
| Elts.push_back(Type::getDoubleTy(Context)); |
| Elts.push_back(Type::getVoidTy(Context)); |
| Bytes -= 16; |
| } else |
| llvm_unreachable("Not yet handled!"); |
| ++i; // Already handled the next one. |
| } else |
| llvm_unreachable("Not yet handled!"); |
| break; |
| case X86_64_SSESF_CLASS: |
| totallyEmpty = false; |
| Elts.push_back(Type::getFloatTy(Context)); |
| Bytes -= 4; |
| break; |
| case X86_64_SSEDF_CLASS: |
| totallyEmpty = false; |
| Elts.push_back(Type::getDoubleTy(Context)); |
| Bytes -= 8; |
| break; |
| case X86_64_X87_CLASS: |
| case X86_64_X87UP_CLASS: |
| case X86_64_COMPLEX_X87_CLASS: |
| return false; |
| case X86_64_NO_CLASS: |
| // Padding bytes that are not passed (unless the entire object consists |
| // of padding) |
| Elts.push_back(Type::getVoidTy(Context)); |
| Bytes -= 8; |
| break; |
| default: |
| llvm_unreachable("Unexpected register class!"); |
| } |
| } |
| |
| return !totallyEmpty; |
| } |
| |
| /* On Darwin x86-32, vectors which are not MMX nor SSE should be passed as |
| integers. On Darwin x86-64, such vectors bigger than 128 bits should be |
| passed in memory (byval). */ |
| bool llvm_x86_should_pass_vector_in_integer_regs(tree type) { |
| if (!TARGET_MACHO) |
| return false; |
| if (isa<VECTOR_TYPE>(type) && TYPE_SIZE(type) && |
| isa<INTEGER_CST>(TYPE_SIZE(type))) { |
| if (TREE_INT_CST_LOW(TYPE_SIZE(type)) == 64 && TARGET_MMX) |
| return false; |
| if (TREE_INT_CST_LOW(TYPE_SIZE(type)) == 128 && TARGET_SSE) |
| return false; |
| if (TARGET_64BIT && TREE_INT_CST_LOW(TYPE_SIZE(type)) > 128) |
| return false; |
| } |
| return true; |
| } |
| |
| /* On Darwin x86-64, vectors which are bigger than 128 bits should be passed |
| byval (in memory). */ |
| bool llvm_x86_should_pass_vector_using_byval_attr(tree type) { |
| if (!TARGET_MACHO) |
| return false; |
| if (!TARGET_64BIT) |
| return false; |
| if (isa<VECTOR_TYPE>(type) && TYPE_SIZE(type) && |
| isa<INTEGER_CST>(TYPE_SIZE(type))) { |
| if (TREE_INT_CST_LOW(TYPE_SIZE(type)) <= 128) |
| return false; |
| } |
| return true; |
| } |
| |
| /* The MMX vector v1i64 is returned in EAX and EDX on Darwin. Communicate |
| this by returning i64 here. Likewise, (generic) vectors such as v2i16 |
| are returned in EAX. |
| On Darwin x86-64, v1i64 is returned in RAX and other MMX vectors are |
| returned in XMM0. Judging from comments, this would not be right for |
| Win64. Don't know about Linux. */ |
| tree llvm_x86_should_return_vector_as_scalar(tree type, bool isBuiltin) { |
| if (TARGET_MACHO && !isBuiltin && isa<VECTOR_TYPE>(type) && TYPE_SIZE(type) && |
| isa<INTEGER_CST>(TYPE_SIZE(type))) { |
| if (TREE_INT_CST_LOW(TYPE_SIZE(type)) == 64 && |
| TYPE_VECTOR_SUBPARTS(type) == 1) |
| return uint64_type_node; |
| if (TARGET_64BIT && TREE_INT_CST_LOW(TYPE_SIZE(type)) == 64) |
| return double_type_node; |
| if (TREE_INT_CST_LOW(TYPE_SIZE(type)) == 32) |
| return uint32_type_node; |
| } |
| return 0; |
| } |
| |
| /* MMX vectors are returned in XMM0 on x86-64 Darwin. The easiest way to |
| communicate this is pretend they're doubles. |
| Judging from comments, this would not be right for Win64. Don't know |
| about Linux. */ |
| tree llvm_x86_should_return_selt_struct_as_scalar(tree type) { |
| tree retType = isSingleElementStructOrArray(type, true, false); |
| if (!retType || !TARGET_64BIT || !TARGET_MACHO) |
| return retType; |
| if (isa<VECTOR_TYPE>(retType) && TYPE_SIZE(retType) && |
| isa<INTEGER_CST>(TYPE_SIZE(retType)) && |
| TREE_INT_CST_LOW(TYPE_SIZE(retType)) == 64) |
| return double_type_node; |
| return retType; |
| } |
| |
| /* MMX vectors v2i32, v4i16, v8i8, v2f32 are returned using sret on Darwin |
| 32-bit. Vectors bigger than 128 are returned using sret. */ |
| bool llvm_x86_should_return_vector_as_shadow(tree type, bool isBuiltin) { |
| if (TARGET_MACHO && !isBuiltin && !TARGET_64BIT && isa<VECTOR_TYPE>(type) && |
| TYPE_SIZE(type) && isa<INTEGER_CST>(TYPE_SIZE(type))) { |
| if (TREE_INT_CST_LOW(TYPE_SIZE(type)) == 64 && |
| TYPE_VECTOR_SUBPARTS(type) > 1) |
| return true; |
| } |
| if (TREE_INT_CST_LOW(TYPE_SIZE(type)) > 128) |
| return true; |
| return false; |
| } |
| |
| // llvm_x86_should_not_return_complex_in_memory - Return true if TYPE |
| // should be returned using multiple value return instruction. |
| bool llvm_x86_should_not_return_complex_in_memory(tree type) { |
| |
| if (!TARGET_64BIT) |
| return false; |
| |
| if (isa<COMPLEX_TYPE>(type) && TREE_INT_CST_LOW(TYPE_SIZE_UNIT(type)) == 32) |
| return true; |
| |
| return false; |
| } |
| |
| // llvm_suitable_multiple_ret_value_type - Return TRUE if return value |
| // of type TY should be returned using multiple value return instruction. |
| static bool llvm_suitable_multiple_ret_value_type(Type *Ty, tree TreeType) { |
| |
| if (!TARGET_64BIT) |
| return false; |
| |
| StructType *STy = llvm::dyn_cast<StructType>(Ty); |
| if (!STy) |
| return false; |
| |
| if (llvm_x86_should_not_return_complex_in_memory(TreeType)) |
| return true; |
| |
| // Let gcc specific routine answer the question. |
| enum x86_64_reg_class Class[MAX_CLASSES]; |
| enum machine_mode Mode = type_natural_mode(TreeType, NULL); |
| int NumClasses = classify_argument(Mode, TreeType, Class, 0); |
| if (NumClasses == 0) |
| return false; |
| |
| if (NumClasses == 1 && |
| (Class[0] == X86_64_INTEGERSI_CLASS || Class[0] == X86_64_INTEGER_CLASS)) |
| // This will fit in one i64 register. |
| return false; |
| |
| if (NumClasses == 2 && |
| (Class[0] == X86_64_NO_CLASS || Class[1] == X86_64_NO_CLASS)) |
| // One word is padding which is not passed at all; treat this as returning |
| // the scalar type of the other word. |
| return false; |
| |
| // Otherwise, use of multiple value return is OK. |
| return true; |
| } |
| |
| // llvm_x86_scalar_type_for_struct_return - Return LLVM type if TYPE |
| // can be returned as a scalar, otherwise return NULL. |
| Type *llvm_x86_scalar_type_for_struct_return(tree type, unsigned *Offset) { |
| *Offset = 0; |
| Type *Ty = ConvertType(type); |
| uint64_t Size = getDataLayout().getTypeAllocSize(Ty); |
| if (Size == 0) |
| return Type::getVoidTy(Context); |
| else if (Size == 1) |
| return Type::getInt8Ty(Context); |
| else if (Size == 2) |
| return Type::getInt16Ty(Context); |
| else if (Size <= 4) |
| return Type::getInt32Ty(Context); |
| |
| // Check if Ty should be returned using multiple value return instruction. |
| if (llvm_suitable_multiple_ret_value_type(Ty, type)) |
| return NULL; |
| |
| if (TARGET_64BIT) { |
| // This logic relies on llvm_suitable_multiple_ret_value_type to have |
| // removed anything not expected here. |
| enum x86_64_reg_class Class[MAX_CLASSES]; |
| enum machine_mode Mode = type_natural_mode(type, NULL); |
| int NumClasses = classify_argument(Mode, type, Class, 0); |
| if (NumClasses == 0) |
| return Type::getInt64Ty(Context); |
| |
| if (NumClasses == 1) { |
| if (Class[0] == X86_64_INTEGERSI_CLASS || |
| Class[0] == X86_64_INTEGER_CLASS) { |
| // one int register |
| HOST_WIDE_INT Bytes = (Mode == BLKmode) ? int_size_in_bytes(type) |
| : (int) GET_MODE_SIZE(Mode); |
| if (Bytes > 4) |
| return Type::getInt64Ty(Context); |
| else if (Bytes > 2) |
| return Type::getInt32Ty(Context); |
| else if (Bytes > 1) |
| return Type::getInt16Ty(Context); |
| else |
| return Type::getInt8Ty(Context); |
| } |
| llvm_unreachable("Unexpected type!"); |
| } |
| if (NumClasses == 2) { |
| if (Class[1] == X86_64_NO_CLASS) { |
| if (Class[0] == X86_64_INTEGER_CLASS || Class[0] == X86_64_NO_CLASS || |
| Class[0] == X86_64_INTEGERSI_CLASS) |
| return Type::getInt64Ty(Context); |
| else if (Class[0] == X86_64_SSE_CLASS || Class[0] == X86_64_SSEDF_CLASS) |
| return Type::getDoubleTy(Context); |
| else if (Class[0] == X86_64_SSESF_CLASS) |
| return Type::getFloatTy(Context); |
| llvm_unreachable("Unexpected type!"); |
| } |
| if (Class[0] == X86_64_NO_CLASS) { |
| *Offset = 8; |
| if (Class[1] == X86_64_INTEGERSI_CLASS || |
| Class[1] == X86_64_INTEGER_CLASS) |
| return Type::getInt64Ty(Context); |
| else if (Class[1] == X86_64_SSE_CLASS || Class[1] == X86_64_SSEDF_CLASS) |
| return Type::getDoubleTy(Context); |
| else if (Class[1] == X86_64_SSESF_CLASS) |
| return Type::getFloatTy(Context); |
| llvm_unreachable("Unexpected type!"); |
| } |
| llvm_unreachable("Unexpected type!"); |
| } |
| llvm_unreachable("Unexpected type!"); |
| } else { |
| if (Size <= 8) |
| return Type::getInt64Ty(Context); |
| else if (Size <= 16) |
| return IntegerType::get(Context, 128); |
| else if (Size <= 32) |
| return IntegerType::get(Context, 256); |
| } |
| return NULL; |
| } |
| |
| /// llvm_x86_64_get_multiple_return_reg_classes - Find register classes used |
| /// to return Ty. It is expected that Ty requires multiple return values. |
| /// This routine uses GCC implementation to find required register classes. |
| /// The original implementation of this routine is based on |
| /// llvm_x86_64_should_pass_aggregate_in_mixed_regs code. |
| static void llvm_x86_64_get_multiple_return_reg_classes( |
| tree TreeType, Type */*Ty*/, std::vector<Type *> &Elts) { |
| enum x86_64_reg_class Class[MAX_CLASSES]; |
| enum machine_mode Mode = type_natural_mode(TreeType, NULL); |
| HOST_WIDE_INT Bytes = (Mode == BLKmode) ? int_size_in_bytes(TreeType) : (int) |
| GET_MODE_SIZE(Mode); |
| int NumClasses = classify_argument(Mode, TreeType, Class, 0); |
| assert(NumClasses && "This type does not need multiple return registers!"); |
| |
| assert((NumClasses != 1 || Class[0] != X86_64_INTEGERSI_CLASS) && |
| "This will fit in one i32 register!"); |
| |
| assert((NumClasses != 1 || Class[0] != X86_64_INTEGER_CLASS) && |
| "This type does not need multiple return registers!"); |
| |
| // classify_argument uses a single X86_64_NO_CLASS as a special case for |
| // empty structs. Recognize it and don't add any return values in that |
| // case. |
| if (NumClasses == 1 && Class[0] == X86_64_NO_CLASS) |
| return; |
| |
| for (int i = 0; i < NumClasses; ++i) { |
| switch (Class[i]) { |
| case X86_64_INTEGER_CLASS: |
| case X86_64_INTEGERSI_CLASS: |
| Elts.push_back(Type::getInt64Ty(Context)); |
| Bytes -= 8; |
| break; |
| case X86_64_SSE_CLASS: |
| // If it's a SSE class argument, then one of the followings are possible: |
| // 1. 1 x SSE, size is 8: 1 x Double. |
| // 2. 1 x SSE, size is 4: 1 x Float. |
| // 3. 1 x SSE + 1 x SSEUP, size is 16: 1 x <4 x i32>, <4 x f32>, |
| // <2 x i64>, or <2 x f64>. |
| // 4. 1 x SSE + 1 x SSESF, size is 12: 1 x Double, 1 x Float. |
| // 5. 2 x SSE, size is 16: 2 x Double. |
| // 6. 1 x SSE, 1 x NO: Second is padding, pass as double. |
| if ((NumClasses - i) == 1) { |
| if (Bytes == 8) { |
| Elts.push_back(Type::getDoubleTy(Context)); |
| Bytes -= 8; |
| } else if (Bytes == 4) { |
| Elts.push_back(Type::getFloatTy(Context)); |
| Bytes -= 4; |
| } else |
| llvm_unreachable("Not yet handled!"); |
| } else if ((NumClasses - i) == 2) { |
| if (Class[i + 1] == X86_64_SSEUP_CLASS) { |
| Type *Ty = ConvertType(TreeType); |
| if (StructType *STy = llvm::dyn_cast<StructType>(Ty)) |
| // Look pass the struct wrapper. |
| if (STy->getNumElements() == 1) |
| Ty = STy->getElementType(0); |
| if (VectorType *VTy = llvm::dyn_cast<VectorType>(Ty)) { |
| if (VTy->getNumElements() == 2) { |
| if (VTy->getElementType()->isIntegerTy()) |
| Elts.push_back(VectorType::get(Type::getInt64Ty(Context), 2)); |
| else |
| Elts.push_back(VectorType::get(Type::getDoubleTy(Context), 2)); |
| Bytes -= 8; |
| } else { |
| assert(VTy->getNumElements() == 4); |
| if (VTy->getElementType()->isIntegerTy()) |
| Elts.push_back(VectorType::get(Type::getInt32Ty(Context), 4)); |
| else |
| Elts.push_back(VectorType::get(Type::getFloatTy(Context), 4)); |
| Bytes -= 4; |
| } |
| } else if (llvm_x86_is_all_integer_types(Ty)) { |
| Elts.push_back(VectorType::get(Type::getInt32Ty(Context), 4)); |
| Bytes -= 4; |
| } else { |
| Elts.push_back(VectorType::get(Type::getFloatTy(Context), 4)); |
| Bytes -= 4; |
| } |
| } else if (Class[i + 1] == X86_64_SSESF_CLASS) { |
| assert(Bytes == 12 && "Not yet handled!"); |
| Elts.push_back(Type::getDoubleTy(Context)); |
| Elts.push_back(Type::getFloatTy(Context)); |
| Bytes -= 12; |
| } else if (Class[i + 1] == X86_64_SSE_CLASS) { |
| Elts.push_back(Type::getDoubleTy(Context)); |
| Elts.push_back(Type::getDoubleTy(Context)); |
| Bytes -= 16; |
| } else if (Class[i + 1] == X86_64_SSEDF_CLASS && Bytes == 16) { |
| Elts.push_back(VectorType::get(Type::getFloatTy(Context), 2)); |
| Elts.push_back(Type::getDoubleTy(Context)); |
| } else if (Class[i + 1] == X86_64_INTEGER_CLASS) { |
| Elts.push_back(VectorType::get(Type::getFloatTy(Context), 2)); |
| Elts.push_back(Type::getInt64Ty(Context)); |
| } else if (Class[i + 1] == X86_64_NO_CLASS) { |
| Elts.push_back(Type::getDoubleTy(Context)); |
| Bytes -= 16; |
| } else { |
| llvm_unreachable("Not yet handled!"); |
| } |
| ++i; // Already handled the next one. |
| } else |
| llvm_unreachable("Not yet handled!"); |
| break; |
| case X86_64_SSESF_CLASS: |
| Elts.push_back(Type::getFloatTy(Context)); |
| Bytes -= 4; |
| break; |
| case X86_64_SSEDF_CLASS: |
| Elts.push_back(Type::getDoubleTy(Context)); |
| Bytes -= 8; |
| break; |
| case X86_64_X87_CLASS: |
| case X86_64_X87UP_CLASS: |
| case X86_64_COMPLEX_X87_CLASS: |
| Elts.push_back(Type::getX86_FP80Ty(Context)); |
| break; |
| case X86_64_NO_CLASS: |
| // padding bytes. |
| Elts.push_back(Type::getInt64Ty(Context)); |
| break; |
| default: |
| llvm_unreachable("Unexpected register class!"); |
| } |
| } |
| } |
| |
| // Return LLVM Type if TYPE can be returned as an aggregate, |
| // otherwise return NULL. |
| Type *llvm_x86_aggr_type_for_struct_return(tree type) { |
| Type *Ty = ConvertType(type); |
| if (!llvm_suitable_multiple_ret_value_type(Ty, type)) |
| return NULL; |
| |
| StructType *STy = cast<StructType>(Ty); |
| std::vector<Type *> ElementTypes; |
| |
| // Special handling for _Complex. |
| if (llvm_x86_should_not_return_complex_in_memory(type)) { |
| ElementTypes.push_back(Type::getX86_FP80Ty(Context)); |
| ElementTypes.push_back(Type::getX86_FP80Ty(Context)); |
| return StructType::get(Context, ElementTypes, STy->isPacked()); |
| } |
| |
| std::vector<Type *> GCCElts; |
| llvm_x86_64_get_multiple_return_reg_classes(type, Ty, GCCElts); |
| return StructType::get(Context, GCCElts, false); |
| } |
| |
| // llvm_x86_extract_mrv_array_element - Helper function that help extract |
| // an array element from multiple return value. |
| // |
| // Here, SRC is returning multiple values. DEST's DESTFIELNO field is an array. |
| // Extract SRCFIELDNO's ELEMENO value and store it in DEST's FIELDNO field's |
| // ELEMENTNO. |
| // |
| static void llvm_x86_extract_mrv_array_element( |
| Value *Src, Value *Dest, unsigned SrcFieldNo, unsigned SrcElemNo, |
| unsigned DestFieldNo, unsigned DestElemNo, LLVMBuilder &Builder, |
| bool isVolatile) { |
| Value *EVI = Builder.CreateExtractValue(Src, SrcFieldNo, "mrv_gr"); |
| StructType *STy = cast<StructType>(Src->getType()); |
| Value *Idxs[3]; |
| Idxs[0] = ConstantInt::get(Type::getInt32Ty(Context), 0); |
| Idxs[1] = ConstantInt::get(Type::getInt32Ty(Context), DestFieldNo); |
| Idxs[2] = ConstantInt::get(Type::getInt32Ty(Context), DestElemNo); |
| Value *GEP = Builder.CreateGEP(Dest, Idxs, "mrv_gep"); |
| if (STy->getElementType(SrcFieldNo)->isVectorTy()) { |
| Value *ElemIndex = ConstantInt::get(Type::getInt32Ty(Context), SrcElemNo); |
| Value *EVIElem = Builder.CreateExtractElement(EVI, ElemIndex, "mrv"); |
| Builder.CreateAlignedStore(EVIElem, GEP, 1, isVolatile); |
| } else { |
| Builder.CreateAlignedStore(EVI, GEP, 1, isVolatile); |
| } |
| } |
| |
| // llvm_x86_extract_multiple_return_value - Extract multiple values returned |
| // by SRC and store them in DEST. It is expected thaty SRC and |
| // DEST types are StructType, but they may not match. |
| void llvm_x86_extract_multiple_return_value( |
| Value *Src, Value *Dest, bool isVolatile, LLVMBuilder &Builder) { |
| |
| StructType *STy = cast<StructType>(Src->getType()); |
| unsigned NumElements = STy->getNumElements(); |
| |
| PointerType *PTy = cast<PointerType>(Dest->getType()); |
| StructType *DestTy = cast<StructType>(PTy->getElementType()); |
| |
| unsigned SNO = 0; |
| unsigned DNO = 0; |
| |
| if (DestTy->getNumElements() == 3 && |
| DestTy->getElementType(0)->getTypeID() == Type::FloatTyID && |
| DestTy->getElementType(1)->getTypeID() == Type::FloatTyID && |
| DestTy->getElementType(2)->getTypeID() == Type::FloatTyID) { |
| // DestTy is { float, float, float } |
| // STy is { <4 x float>, float > } |
| |
| Value *EVI = Builder.CreateExtractValue(Src, 0, "mrv_gr"); |
| |
| Value *E0Index = ConstantInt::get(Type::getInt32Ty(Context), 0); |
| Value *EVI0 = Builder.CreateExtractElement(EVI, E0Index, "mrv.v"); |
| Value *GEP0 = Builder.CreateStructGEP(Dest, 0, "mrv_gep"); |
| Builder.CreateAlignedStore(EVI0, GEP0, 1, isVolatile); |
| |
| Value *E1Index = ConstantInt::get(Type::getInt32Ty(Context), 1); |
| Value *EVI1 = Builder.CreateExtractElement(EVI, E1Index, "mrv.v"); |
| Value *GEP1 = Builder.CreateStructGEP(Dest, 1, "mrv_gep"); |
| Builder.CreateAlignedStore(EVI1, GEP1, 1, isVolatile); |
| |
| Value *GEP2 = Builder.CreateStructGEP(Dest, 2, "mrv_gep"); |
| Value *EVI2 = Builder.CreateExtractValue(Src, 1, "mrv_gr"); |
| Builder.CreateAlignedStore(EVI2, GEP2, 1, isVolatile); |
| return; |
| } |
| |
| while (SNO < NumElements) { |
| |
| Type *DestElemType = DestTy->getElementType(DNO); |
| |
| // Directly access first class values using getresult. |
| if (DestElemType->isSingleValueType()) { |
| Value *GEP = Builder.CreateStructGEP(Dest, DNO, "mrv_gep"); |
| Value *EVI = Builder.CreateExtractValue(Src, SNO, "mrv_gr"); |
| Builder.CreateAlignedStore(EVI, GEP, 1, isVolatile); |
| ++DNO; |
| ++SNO; |
| continue; |
| } |
| |
| // Special treatement for _Complex. |
| if (DestElemType->isStructTy()) { |
| Value *Idxs[3]; |
| Idxs[0] = ConstantInt::get(Type::getInt32Ty(Context), 0); |
| Idxs[1] = ConstantInt::get(Type::getInt32Ty(Context), DNO); |
| |
| Idxs[2] = ConstantInt::get(Type::getInt32Ty(Context), 0); |
| Value *GEP = Builder.CreateGEP(Dest, Idxs, "mrv_gep"); |
| Value *EVI = Builder.CreateExtractValue(Src, 0, "mrv_gr"); |
| Builder.CreateAlignedStore(EVI, GEP, 1, isVolatile); |
| ++SNO; |
| |
| Idxs[2] = ConstantInt::get(Type::getInt32Ty(Context), 1); |
| GEP = Builder.CreateGEP(Dest, Idxs, "mrv_gep"); |
| EVI = Builder.CreateExtractValue(Src, 1, "mrv_gr"); |
| Builder.CreateAlignedStore(EVI, GEP, 1, isVolatile); |
| ++DNO; |
| ++SNO; |
| continue; |
| } |
| |
| // Access array elements individually. Note, Src and Dest type may |
| // not match. For example { <2 x float>, float } and { float[3]; } |
| ArrayType *ATy = cast<ArrayType>(DestElemType); |
| unsigned ArraySize = ATy->getNumElements(); |
| unsigned DElemNo = 0; // DestTy's DNO field's element number |
| while (DElemNo < ArraySize) { |
| unsigned i = 0; |
| unsigned Size = 1; |
| |
| if (VectorType *SElemTy = |
| llvm::dyn_cast<VectorType>(STy->getElementType(SNO))) { |
| Size = SElemTy->getNumElements(); |
| if (SElemTy->getElementType()->getTypeID() == Type::FloatTyID && |
| Size == 4) |
| // Ignore last two <4 x float> elements. |
| Size = 2; |
| } |
| while (i < Size) { |
| llvm_x86_extract_mrv_array_element(Src, Dest, SNO, i++, DNO, DElemNo++, |
| Builder, isVolatile); |
| } |
| // Consumed this src field. Try next one. |
| ++SNO; |
| } |
| // Finished building current dest field. |
| ++DNO; |
| } |
| } |
| |
| /// llvm_x86_should_pass_aggregate_in_integer_regs - x86-32 is same as the |
| /// default. x86-64 detects the case where a type is 16 bytes long but |
| /// only 8 of them are passed, the rest being padding (*size is set to 8 |
| /// to identify this case). It also pads out the size to that of a full |
| /// register. This means we'll be loading bytes off the end of the object |
| /// in some cases. That's what gcc does, so it must be OK, right? Right? |
| bool llvm_x86_should_pass_aggregate_in_integer_regs(tree type, unsigned *size, |
| bool *DontCheckAlignment) { |
| *size = 0; |
| if (TARGET_64BIT) { |
| enum x86_64_reg_class Class[MAX_CLASSES]; |
| enum machine_mode Mode = type_natural_mode(type, NULL); |
| int NumClasses = classify_argument(Mode, type, Class, 0); |
| *DontCheckAlignment = true; |
| if (NumClasses == 1 && (Class[0] == X86_64_INTEGER_CLASS || |
| Class[0] == X86_64_INTEGERSI_CLASS)) { |
| // one int register |
| HOST_WIDE_INT Bytes = (Mode == BLKmode) ? int_size_in_bytes(type) : (int) |
| GET_MODE_SIZE(Mode); |
| if (Bytes > 4) |
| *size = 8; |
| else if (Bytes > 2) |
| *size = 4; |
| else |
| *size = Bytes; |
| return true; |
| } |
| if (NumClasses == 2 && (Class[0] == X86_64_INTEGERSI_CLASS || |
| Class[0] == X86_64_INTEGER_CLASS)) { |
| if (Class[1] == X86_64_INTEGER_CLASS) { |
| // 16 byte object, 2 int registers |
| *size = 16; |
| return true; |
| } |
| // IntegerSI can occur only as element 0. |
| if (Class[1] == X86_64_NO_CLASS) { |
| // 16 byte object, only 1st register has information |
| *size = 8; |
| return true; |
| } |
| } |
| return false; |
| } else |
| return !isSingleElementStructOrArray(type, false, true); |
| } |
| |
| const char *llvm_x86_override_target_environment() { |
| #ifdef TARGET_X32 |
| return TARGET_X32 ? "gnux32" : ""; |
| #else |
| return ""; |
| #endif |
| } |
| |
| static void addFeature(llvm::SubtargetFeatures &F, const char *Feature, |
| bool Enabled) { |
| const char *Prefix = Enabled ? "+" : "-"; |
| F.AddFeature(std::string(Prefix) + Feature); |
| } |
| |
| void llvm_x86_set_subtarget_features(std::string &C, |
| llvm::SubtargetFeatures &F) { |
| if (TARGET_MACHO && !strcmp(ix86_arch_string, "apple")) |
| C = TARGET_64BIT ? "core2" : "yonah"; |
| else |
| C = ix86_arch_string; |
| |
| addFeature(F, "64bit", TARGET_64BIT); |
| addFeature(F, "3dnow", TARGET_3DNOW); |
| addFeature(F, "3dnowa", TARGET_3DNOW_A); |
| addFeature(F, "aes", TARGET_AES); |
| addFeature(F, "avx", TARGET_AVX); |
| addFeature(F, "cx16", TARGET_CMPXCHG16B); |
| addFeature(F, "fma", TARGET_FMA); |
| addFeature(F, "fma4", TARGET_FMA4); |
| addFeature(F, "mmx", TARGET_MMX); |
| addFeature(F, "popcnt", TARGET_POPCNT); |
| #ifdef TARGET_RDRND |
| addFeature(F, "rdrnd", TARGET_RDRND); |
| #endif |
| addFeature(F, "sse", TARGET_SSE); |
| addFeature(F, "sse2", TARGET_SSE2); |
| addFeature(F, "sse3", TARGET_SSE3); |
| addFeature(F, "sse4.1", TARGET_SSE4_1); |
| addFeature(F, "sse4.2", TARGET_SSE4_2); |
| addFeature(F, "sse4a", TARGET_SSE4A); |
| addFeature(F, "ssse3", TARGET_SSSE3); |
| } |