| //===---- CIRGenBuiltinAArch64.cpp - Emit CIR for AArch64 builtins --------===// |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This contains code to emit ARM64 Builtin calls as CIR or a function call |
| // to be later resolved. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "CIRGenBuilder.h" |
| #include "CIRGenFunction.h" |
| #include "clang/Basic/AArch64CodeGenUtils.h" |
| #include "clang/Basic/TargetBuiltins.h" |
| #include "clang/CIR/Dialect/IR/CIRTypes.h" |
| #include "clang/CIR/MissingFeatures.h" |
| |
| // TODO(cir): once all builtins are covered, decide whether we still |
| // need to use LLVM intrinsics or if there's a better approach to follow. Right |
| // now the intrinsics are reused to make it convenient to encode all thousands |
| // of them and passing down to LLVM lowering. |
| #include "llvm/IR/Intrinsics.h" |
| #include "llvm/IR/IntrinsicsAArch64.h" |
| |
| #include "mlir/IR/BuiltinTypes.h" |
| #include "mlir/IR/Value.h" |
| #include "clang/AST/GlobalDecl.h" |
| #include "clang/Basic/Builtins.h" |
| |
| using namespace clang; |
| using namespace clang::CIRGen; |
| using namespace llvm; |
| using namespace clang::aarch64; |
| |
| // Generate vscale * scalingFactor |
| static mlir::Value genVscaleTimesFactor(mlir::Location loc, |
| CIRGenBuilderTy builder, |
| mlir::Type cirTy, |
| int32_t scalingFactor) { |
| mlir::Value vscale = builder.emitIntrinsicCallOp(loc, "vscale", cirTy); |
| return builder.createNUWAMul(loc, vscale, |
| builder.getUInt64(scalingFactor, loc)); |
| } |
| |
| #define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ |
| {#NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \ |
| TypeModifier} |
| |
| #define SVEMAP2(NameBase, TypeModifier) \ |
| {#NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier} |
| static const ARMVectorIntrinsicInfo aarch64SVEIntrinsicMap[] = { |
| #define GET_SVE_LLVM_INTRINSIC_MAP |
| #include "clang/Basic/arm_sve_builtin_cg.inc" |
| #undef GET_SVE_LLVM_INTRINSIC_MAP |
| }; |
| |
| static bool aarch64SIMDIntrinsicsProvenSorted = false; |
| static bool aarch64SISDIntrinsicsProvenSorted = false; |
| static bool aarch64SVEIntrinsicsProvenSorted = false; |
| |
| // Check if Builtin `builtinId` is present in `intrinsicMap`. If yes, returns |
| // the corresponding info struct. |
| static const ARMVectorIntrinsicInfo * |
| findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> intrinsicMap, |
| unsigned builtinID, bool &mapProvenSorted) { |
| |
| #ifndef NDEBUG |
| if (!mapProvenSorted) { |
| assert(llvm::is_sorted(intrinsicMap)); |
| mapProvenSorted = true; |
| } |
| #endif |
| |
| const ARMVectorIntrinsicInfo *info = |
| llvm::lower_bound(intrinsicMap, builtinID); |
| |
| if (info != intrinsicMap.end() && info->BuiltinID == builtinID) |
| return info; |
| |
| return nullptr; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Generic helpers |
| //===----------------------------------------------------------------------===// |
| // Emit an intrinsic where all operands are of the same type as the result. |
| // Depending on mode, this may be a constrained floating-point intrinsic. |
| static mlir::Value |
| emitCallMaybeConstrainedBuiltin(CIRGenBuilderTy &builder, mlir::Location loc, |
| StringRef intrName, mlir::Type retTy, |
| llvm::SmallVector<mlir::Value> &ops) { |
| assert(!cir::MissingFeatures::emitConstrainedFPCall()); |
| |
| return builder.emitIntrinsicCallOp(loc, intrName, retTy, ops); |
| } |
| |
| static llvm::StringRef getLLVMIntrNameNoPrefix(llvm::Intrinsic::ID intrID) { |
| llvm::StringRef llvmIntrName = llvm::Intrinsic::getBaseName(intrID); |
| assert(llvmIntrName.starts_with("llvm.") && "Not an LLVM intrinsic!"); |
| return llvmIntrName.drop_front(/*strlen("llvm.")=*/5); |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // NEON helpers |
| //===----------------------------------------------------------------------===// |
| /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra |
| /// argument that specifies the vector type. The additional argument is meant |
| /// for Sema checking (see `CheckNeonBuiltinFunctionCall`) and this function |
| /// should be kept consistent with the logic in Sema. |
| /// TODO: Make this return false for SISD builtins. |
| /// TODO(cir): Share this with ARM.cpp |
| static bool hasExtraNeonArgument(unsigned builtinID) { |
| // Required by the headers included below, but not in this particular |
| // function. |
| [[maybe_unused]] int PtrArgNum = -1; |
| [[maybe_unused]] bool HasConstPtr = false; |
| |
| // The mask encodes the type. We don't care about the actual value. Instead, |
| // we just check whether its been set. |
| uint64_t mask = 0; |
| switch (builtinID) { |
| #define GET_NEON_OVERLOAD_CHECK |
| #include "clang/Basic/arm_fp16.inc" |
| #include "clang/Basic/arm_neon.inc" |
| #undef GET_NEON_OVERLOAD_CHECK |
| // Non-neon builtins for controling VFP that take extra argument for |
| // discriminating the type. |
| case ARM::BI__builtin_arm_vcvtr_f: |
| case ARM::BI__builtin_arm_vcvtr_d: |
| mask = 1; |
| } |
| switch (builtinID) { |
| default: |
| break; |
| } |
| |
| return mask != 0; |
| } |
| |
| static cir::VectorType getFloatNeonType(CIRGenFunction &cgf, |
| NeonTypeFlags intTypeFlags) { |
| int isQuad = intTypeFlags.isQuad(); |
| switch (intTypeFlags.getEltType()) { |
| case NeonTypeFlags::Int16: |
| return cir::VectorType::get(cgf.fP16Ty, (4 << isQuad)); |
| case NeonTypeFlags::Int32: |
| return cir::VectorType::get(cgf.floatTy, (2 << isQuad)); |
| case NeonTypeFlags::Int64: |
| return cir::VectorType::get(cgf.doubleTy, (1 << isQuad)); |
| default: |
| llvm_unreachable("Type can't be converted to floating-point!"); |
| } |
| } |
| |
| static int64_t getIntValueFromConstOp(mlir::Value val) { |
| return val.getDefiningOp<cir::ConstantOp>().getIntValue().getSExtValue(); |
| } |
| |
| /// Build a constant shift amount vector of `vecTy` to shift a vector |
| /// Here `shiftVal` is a constant integer that will be broadcast into a |
| /// a const vector of `vecTy` which is the return value of this function |
| /// If `neg` is true, the shift amount is negated before splatting (used |
| /// when encoding a right shift as a left shift by a negative amount for |
| /// intrinsics like aarch64.neon.{s,u}rshl). |
| static mlir::Value emitNeonShiftVector(CIRGenBuilderTy &builder, |
| mlir::Value shiftVal, |
| cir::VectorType vecTy, |
| mlir::Location loc, bool neg) { |
| if (neg) { |
| int64_t shiftAmt = -getIntValueFromConstOp(shiftVal); |
| shiftVal = builder.getConstantInt(loc, vecTy.getElementType(), shiftAmt); |
| } |
| mlir::Type eltTy = vecTy.getElementType(); |
| if (shiftVal.getType() != eltTy) { |
| shiftVal = builder.createIntCast(shiftVal, eltTy); |
| } |
| return cir::VecSplatOp::create(builder, loc, vecTy, shiftVal); |
| } |
| |
| // TODO(cir): Remove `cgm` from the list of arguments once all NYI(s) are gone. |
| template <typename Operation> |
| static mlir::Value |
| emitNeonCallToOp(CIRGenModule &cgm, CIRGenBuilderTy &builder, |
| llvm::SmallVector<mlir::Type> argTypes, |
| llvm::SmallVectorImpl<mlir::Value> &args, |
| std::optional<llvm::StringRef> intrinsicName, |
| mlir::Type funcResTy, mlir::Location loc, |
| bool isConstrainedFPIntrinsic = false, unsigned shift = 0, |
| bool rightshift = false) { |
| // TODO(cir): Consider removing the following unreachable when we have |
| // emitConstrainedFPCall feature implemented |
| assert(!cir::MissingFeatures::emitConstrainedFPCall()); |
| if (isConstrainedFPIntrinsic) |
| cgm.errorNYI(loc, std::string("constrained FP intrinsic")); |
| |
| for (unsigned j = 0; j < argTypes.size(); ++j) { |
| if (isConstrainedFPIntrinsic) { |
| assert(!cir::MissingFeatures::emitConstrainedFPCall()); |
| } |
| if (shift > 0 && shift == j) { |
| args[j] = emitNeonShiftVector(builder, args[j], |
| mlir::cast<cir::VectorType>(argTypes[j]), |
| loc, rightshift); |
| } else { |
| args[j] = builder.createBitcast(args[j], argTypes[j]); |
| } |
| } |
| if (isConstrainedFPIntrinsic) { |
| assert(!cir::MissingFeatures::emitConstrainedFPCall()); |
| return nullptr; |
| } |
| if constexpr (std::is_same_v<Operation, cir::LLVMIntrinsicCallOp>) { |
| return Operation::create(builder, loc, |
| builder.getStringAttr(intrinsicName.value()), |
| funcResTy, args) |
| .getResult(); |
| } else { |
| return Operation::create(builder, loc, funcResTy, args).getResult(); |
| } |
| } |
| |
| // TODO(cir): Remove `cgm` from the list of arguments once all NYI(s) are gone. |
| static mlir::Value emitNeonCall(CIRGenModule &cgm, CIRGenBuilderTy &builder, |
| llvm::SmallVector<mlir::Type> argTypes, |
| llvm::SmallVectorImpl<mlir::Value> &args, |
| llvm::StringRef intrinsicName, |
| mlir::Type funcResTy, mlir::Location loc, |
| bool isConstrainedFPIntrinsic = false, |
| unsigned shift = 0, bool rightshift = false) { |
| return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>( |
| cgm, builder, std::move(argTypes), args, intrinsicName, funcResTy, loc, |
| isConstrainedFPIntrinsic, shift, rightshift); |
| } |
| |
| // Computes the input vector type for a NEON pairwise widening operation (e.g. |
| // vpaddl/vpadal). Given a result vector type, it derives the corresponding |
| // input type by halving the element bit width and doubling the number of lanes, |
| // while setting the signedness based on usgn. |
| static cir::VectorType getNeonPairwiseWidenInputType(cir::VectorType resType, |
| bool usgn) { |
| mlir::Type elemTy = resType.getElementType(); |
| uint64_t resLanes = resType.getSize(); |
| auto intTy = mlir::dyn_cast<cir::IntType>(elemTy); |
| assert(intTy && "vpaddl result type must be an integer vector"); |
| |
| unsigned resWidth = intTy.getWidth(); |
| assert((resWidth == 16 || resWidth == 32 || resWidth == 64) && |
| "unexpected vpaddl result element width"); |
| |
| unsigned argWidth = resWidth / 2; |
| unsigned argLanes = resLanes * 2; |
| cir::VectorType result = cir::VectorType::get( |
| cir::IntType::get(resType.getContext(), argWidth, /* is_signed */ !usgn), |
| argLanes); |
| return result; |
| } |
| |
| static mlir::Value emitCommonNeonSISDBuiltinExpr( |
| CIRGenFunction &cgf, const ARMVectorIntrinsicInfo &info, |
| llvm::SmallVectorImpl<mlir::Value> &ops, const CallExpr *expr) { |
| assert(info.LLVMIntrinsic && "Generic code assumes a valid intrinsic"); |
| |
| switch (info.BuiltinID) { |
| case NEON::BI__builtin_neon_vcled_s64: |
| case NEON::BI__builtin_neon_vcled_u64: |
| case NEON::BI__builtin_neon_vcles_f32: |
| case NEON::BI__builtin_neon_vcled_f64: |
| case NEON::BI__builtin_neon_vcltd_s64: |
| case NEON::BI__builtin_neon_vcltd_u64: |
| case NEON::BI__builtin_neon_vclts_f32: |
| case NEON::BI__builtin_neon_vcltd_f64: |
| case NEON::BI__builtin_neon_vcales_f32: |
| case NEON::BI__builtin_neon_vcaled_f64: |
| case NEON::BI__builtin_neon_vcalts_f32: |
| case NEON::BI__builtin_neon_vcaltd_f64: |
| cgf.cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| cgf.getContext().BuiltinInfo.getName(info.BuiltinID)); |
| break; |
| } |
| |
| llvm::StringRef llvmIntrName = getLLVMIntrNameNoPrefix( |
| static_cast<llvm::Intrinsic::ID>(info.LLVMIntrinsic)); |
| mlir::Location loc = cgf.getLoc(expr->getExprLoc()); |
| |
| // The switch stmt is intended to help catch NYI cases and will be removed |
| // once the CIR implementation is complete. Avoid adding specialized |
| // code in cases - that should only be required for a handful of examples. |
| switch (info.BuiltinID) { |
| default: |
| cgf.cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| cgf.getContext().BuiltinInfo.getName(info.BuiltinID)); |
| break; |
| case NEON::BI__builtin_neon_vminv_s8: |
| case NEON::BI__builtin_neon_vminvq_s8: |
| case NEON::BI__builtin_neon_vminv_s16: |
| case NEON::BI__builtin_neon_vminvq_s16: |
| case NEON::BI__builtin_neon_vminv_s32: |
| case NEON::BI__builtin_neon_vminvq_s32: |
| case NEON::BI__builtin_neon_vminv_u8: |
| case NEON::BI__builtin_neon_vminvq_u8: |
| case NEON::BI__builtin_neon_vminv_u16: |
| case NEON::BI__builtin_neon_vminvq_u16: |
| case NEON::BI__builtin_neon_vminv_u32: |
| case NEON::BI__builtin_neon_vminvq_u32: |
| case NEON::BI__builtin_neon_vminv_f32: |
| case NEON::BI__builtin_neon_vminvq_f32: |
| case NEON::BI__builtin_neon_vminvq_f64: |
| case NEON::BI__builtin_neon_vminnmv_f32: |
| case NEON::BI__builtin_neon_vminnmvq_f32: |
| case NEON::BI__builtin_neon_vminnmvq_f64: |
| case NEON::BI__builtin_neon_vabdd_f64: |
| case NEON::BI__builtin_neon_vabds_f32: |
| case NEON::BI__builtin_neon_vshld_s64: |
| case NEON::BI__builtin_neon_vshld_u64: |
| case NEON::BI__builtin_neon_vpmins_f32: |
| case NEON::BI__builtin_neon_vpminqd_f64: |
| case NEON::BI__builtin_neon_vpminnms_f32: |
| case NEON::BI__builtin_neon_vpminnmqd_f64: |
| case NEON::BI__builtin_neon_vcvts_n_f32_s32: |
| case NEON::BI__builtin_neon_vcvts_n_f32_u32: |
| case NEON::BI__builtin_neon_vcvts_n_s32_f32: |
| case NEON::BI__builtin_neon_vcvts_n_u32_f32: |
| case NEON::BI__builtin_neon_vcvtd_n_f64_s64: |
| case NEON::BI__builtin_neon_vcvtd_n_f64_u64: |
| case NEON::BI__builtin_neon_vcvtd_n_s64_f64: |
| case NEON::BI__builtin_neon_vcvtd_n_u64_f64: |
| case NEON::BI__builtin_neon_vaddlv_s32: |
| case NEON::BI__builtin_neon_vaddlv_u32: |
| case NEON::BI__builtin_neon_vaddlvq_s32: |
| case NEON::BI__builtin_neon_vaddlvq_u32: |
| case NEON::BI__builtin_neon_vaddv_s8: |
| case NEON::BI__builtin_neon_vaddv_s16: |
| case NEON::BI__builtin_neon_vaddv_s32: |
| case NEON::BI__builtin_neon_vaddv_u8: |
| case NEON::BI__builtin_neon_vaddv_u16: |
| case NEON::BI__builtin_neon_vaddv_u32: |
| case NEON::BI__builtin_neon_vaddv_f32: |
| case NEON::BI__builtin_neon_vaddvq_s8: |
| case NEON::BI__builtin_neon_vaddvq_s16: |
| case NEON::BI__builtin_neon_vaddvq_s32: |
| case NEON::BI__builtin_neon_vaddvq_s64: |
| case NEON::BI__builtin_neon_vaddvq_u8: |
| case NEON::BI__builtin_neon_vaddvq_u16: |
| case NEON::BI__builtin_neon_vaddvq_u32: |
| case NEON::BI__builtin_neon_vaddvq_u64: |
| case NEON::BI__builtin_neon_vaddvq_f32: |
| case NEON::BI__builtin_neon_vaddvq_f64: |
| case NEON::BI__builtin_neon_vabdh_f16: |
| case NEON::BI__builtin_neon_vrecpeh_f16: |
| case NEON::BI__builtin_neon_vrecpxh_f16: |
| case NEON::BI__builtin_neon_vrsqrteh_f16: |
| case NEON::BI__builtin_neon_vrsqrtsh_f16: |
| return emitNeonCall(cgf.cgm, cgf.getBuilder(), |
| {cgf.convertType(expr->getArg(0)->getType())}, ops, |
| llvmIntrName, cgf.convertType(expr->getType()), loc); |
| } |
| |
| return nullptr; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Emit-helpers |
| //===----------------------------------------------------------------------===// |
| static mlir::Value |
| emitAArch64CompareBuiltinExpr(CIRGenFunction &cgf, CIRGenBuilderTy &builder, |
| mlir::Location loc, mlir::Value src, |
| mlir::Type retTy, const cir::CmpOpKind kind) { |
| |
| bool scalarCmp = !isa<cir::VectorType>(src.getType()); |
| if (!scalarCmp) { |
| assert(!cast<cir::VectorType>(retTy).getIsScalable() && |
| "This is only intended for fixed-width vectors"); |
| // Vector types are cast to i8 vectors. Recover original type. |
| src = builder.createBitcast(src, retTy); |
| } |
| |
| mlir::Value zero = builder.getNullValue(src.getType(), loc); |
| |
| if (!scalarCmp) |
| return builder.createVecCompare(loc, kind, src, zero); |
| |
| // For scalars, cast !cir.bool to !cir.int<s, 1> so that the compare |
| // result is sign- rather zero-extended when casting to the output |
| // retType. |
| mlir::Value cmp = builder.createCast( |
| loc, cir::CastKind::bool_to_int, |
| builder.createCompare(loc, kind, src, zero), builder.getSIntNTy(1)); |
| |
| return builder.createCast(loc, cir::CastKind::integral, cmp, retTy); |
| } |
| |
| // TODO(cir): Remove `loc` from the list of arguments once all NYIs are gone. |
| static cir::VectorType getNeonType(CIRGenFunction *cgf, NeonTypeFlags typeFlags, |
| mlir::Location loc, |
| bool hasLegalHalfType = true, |
| bool v1Ty = false, |
| bool allowBFloatArgsAndRet = true) { |
| int isQuad = typeFlags.isQuad(); |
| switch (typeFlags.getEltType()) { |
| case NeonTypeFlags::Int8: |
| case NeonTypeFlags::Poly8: |
| return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt8Ty |
| : cgf->sInt8Ty, |
| v1Ty ? 1 : (8 << isQuad)); |
| case NeonTypeFlags::MFloat8: |
| return cir::VectorType::get(cgf->uInt8Ty, v1Ty ? 1 : (8 << isQuad)); |
| case NeonTypeFlags::Int16: |
| case NeonTypeFlags::Poly16: |
| return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt16Ty |
| : cgf->sInt16Ty, |
| v1Ty ? 1 : (4 << isQuad)); |
| case NeonTypeFlags::BFloat16: |
| if (allowBFloatArgsAndRet) |
| return cir::VectorType::get(cgf->getCIRGenModule().bFloat16Ty, |
| v1Ty ? 1 : (4 << isQuad)); |
| return cir::VectorType::get(cgf->uInt16Ty, v1Ty ? 1 : (4 << isQuad)); |
| case NeonTypeFlags::Float16: |
| if (hasLegalHalfType) |
| return cir::VectorType::get(cgf->getCIRGenModule().fP16Ty, |
| v1Ty ? 1 : (4 << isQuad)); |
| return cir::VectorType::get(cgf->uInt16Ty, v1Ty ? 1 : (4 << isQuad)); |
| case NeonTypeFlags::Int32: |
| return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt32Ty |
| : cgf->sInt32Ty, |
| v1Ty ? 1 : (2 << isQuad)); |
| case NeonTypeFlags::Int64: |
| case NeonTypeFlags::Poly64: |
| return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt64Ty |
| : cgf->sInt64Ty, |
| v1Ty ? 1 : (1 << isQuad)); |
| case NeonTypeFlags::Poly128: |
| // FIXME: i128 and f128 doesn't get fully support in Clang and llvm. |
| // There is a lot of i128 and f128 API missing. |
| // so we use v16i8 to represent poly128 and get pattern matched. |
| cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Poly128")); |
| [[fallthrough]]; |
| case NeonTypeFlags::Float32: |
| return cir::VectorType::get(cgf->getCIRGenModule().floatTy, |
| v1Ty ? 1 : (2 << isQuad)); |
| case NeonTypeFlags::Float64: |
| return cir::VectorType::get(cgf->getCIRGenModule().doubleTy, |
| v1Ty ? 1 : (1 << isQuad)); |
| } |
| llvm_unreachable("Unknown vector element type!"); |
| } |
| |
| static mlir::Value emitNeonSplat(CIRGenBuilderTy &builder, mlir::Location loc, |
| mlir::Value v, mlir::Value lane, |
| unsigned int resEltCnt) { |
| assert(isa<cir::ConstantOp>(lane.getDefiningOp()) && |
| "lane number is not a constant!"); |
| int64_t laneCst = getIntValueFromConstOp(lane); |
| llvm::SmallVector<int64_t, 4> shuffleMask(resEltCnt, laneCst); |
| return builder.createVecShuffle(loc, v, shuffleMask); |
| } |
| |
| /// Flip the signedness of `vecTy`'s element type, keeping the width and |
| /// number of lanes the same. Used when a NEON intrinsic takes a shift |
| /// amount vector that must be signed (e.g. aarch64.neon.urshl takes a |
| /// signed amount even though the data vector is unsigned). |
| static cir::VectorType getSignChangedVectorType(CIRGenBuilderTy &builder, |
| cir::VectorType vecTy) { |
| auto elemTy = mlir::cast<cir::IntType>(vecTy.getElementType()); |
| elemTy = elemTy.isSigned() ? builder.getUIntNTy(elemTy.getWidth()) |
| : builder.getSIntNTy(elemTy.getWidth()); |
| return cir::VectorType::get(elemTy, vecTy.getSize()); |
| } |
| |
| static mlir::Value emitCommonNeonShift(CIRGenBuilderTy &builder, |
| mlir::Location loc, |
| cir::VectorType resTy, |
| mlir::Value shifTgt, |
| mlir::Value shiftAmt, bool shiftLeft) { |
| shiftAmt = emitNeonShiftVector(builder, shiftAmt, resTy, loc, /*neg=*/false); |
| return cir::ShiftOp::create(builder, loc, resTy, |
| builder.createBitcast(shifTgt, resTy), shiftAmt, |
| shiftLeft); |
| } |
| |
| // Right-shift a vector by a constant. |
| static mlir::Value emitNeonRShiftImm(CIRGenFunction &cgf, mlir::Value shiftVec, |
| mlir::Value shiftVal, |
| cir::VectorType vecTy, bool usgn, |
| mlir::Location loc) { |
| CIRGenBuilderTy &builder = cgf.getBuilder(); |
| int64_t shiftAmt = getIntValueFromConstOp(shiftVal); |
| int eltSize = |
| cgf.cgm.getDataLayout().getTypeSizeInBits(vecTy.getElementType()); |
| |
| shiftVec = builder.createBitcast(shiftVec, vecTy); |
| // lshr/ashr are undefined when the shift amount is equal to the vector |
| // element size. |
| if (shiftAmt == eltSize) { |
| if (usgn) { |
| // Right-shifting an unsigned value by its size yields 0. |
| return builder.getZero(loc, vecTy); |
| } |
| // Right-shifting a signed value by its size is equivalent |
| // to a shift of size-1. |
| --shiftAmt; |
| shiftVal = builder.getConstInt(loc, vecTy.getElementType(), shiftAmt); |
| } |
| return emitCommonNeonShift(builder, loc, vecTy, shiftVec, shiftVal, |
| /*shiftLeft=*/false); |
| } |
| |
| static cir::VectorType getIntVecFromVecTy(CIRGenBuilderTy &builder, |
| cir::VectorType vecTy) { |
| if (!cir::isAnyFloatingPointType(vecTy.getElementType())) |
| return vecTy; |
| |
| if (mlir::isa<cir::SingleType>(vecTy.getElementType())) |
| return cir::VectorType::get(builder.getSInt32Ty(), vecTy.getSize()); |
| if (mlir::isa<cir::DoubleType>(vecTy.getElementType())) |
| return cir::VectorType::get(builder.getSInt64Ty(), vecTy.getSize()); |
| llvm_unreachable( |
| "Unsupported element type in getVecOfIntTypeWithSameEltWidth"); |
| } |
| |
| static mlir::Value emitCommonNeonBuiltinExpr( |
| CIRGenFunction &cgf, unsigned builtinID, unsigned llvmIntrinsic, |
| unsigned altLLVMIntrinsic, const char *nameHint, unsigned modifier, |
| const CallExpr *expr, llvm::SmallVectorImpl<mlir::Value> &ops) { |
| mlir::Location loc = cgf.getLoc(expr->getExprLoc()); |
| clang::ASTContext &ctx = cgf.getContext(); |
| |
| // Extract the trailing immediate argument that encodes the type discriminator |
| // for this overloaded intrinsic. |
| // TODO: Move to the parent code that takes care of argument processing. |
| const clang::Expr *arg = expr->getArg(expr->getNumArgs() - 1); |
| std::optional<llvm::APSInt> neonTypeConst = arg->getIntegerConstantExpr(ctx); |
| if (!neonTypeConst) |
| return nullptr; |
| |
| // Determine the type of this overloaded NEON intrinsic. |
| NeonTypeFlags neonType(neonTypeConst->getZExtValue()); |
| const bool isUnsigned = neonType.isUnsigned(); |
| const bool hasLegalHalfType = cgf.getTarget().hasFastHalfType(); |
| const bool usgn = neonType.isUnsigned(); |
| |
| // The value of allowBFloatArgsAndRet is true for AArch64, but it should |
| // come from ABI info. |
| // TODO(cir): Use ABInfo to extract this information |
| const bool allowBFloatArgsAndRet = cgf.getTarget().hasFastHalfType(); |
| // FIXME |
| // getTargetHooks().getABIInfo().allowBFloatArgsAndRet(); |
| |
| cir::VectorType vTy = getNeonType(&cgf, neonType, loc, hasLegalHalfType, |
| false, allowBFloatArgsAndRet); |
| cir::VectorType ty = vTy; |
| if (!ty) |
| return nullptr; |
| |
| switch (builtinID) { |
| case NEON::BI__builtin_neon_splat_lane_v: |
| case NEON::BI__builtin_neon_splat_laneq_v: |
| case NEON::BI__builtin_neon_splatq_lane_v: |
| case NEON::BI__builtin_neon_splatq_laneq_v: { |
| uint64_t numElements = vTy.getSize(); |
| if (builtinID == NEON::BI__builtin_neon_splatq_lane_v) |
| numElements *= 2; |
| if (builtinID == NEON::BI__builtin_neon_splat_laneq_v) |
| numElements /= 2; |
| ops[0] = cgf.getBuilder().createBitcast(loc, ops[0], vTy); |
| return emitNeonSplat(cgf.getBuilder(), loc, ops[0], ops[1], numElements); |
| } |
| case NEON::BI__builtin_neon_vpadd_v: |
| case NEON::BI__builtin_neon_vpaddq_v: |
| case NEON::BI__builtin_neon_vabs_v: |
| case NEON::BI__builtin_neon_vabsq_v: |
| case NEON::BI__builtin_neon_vadd_v: |
| case NEON::BI__builtin_neon_vaddq_v: |
| case NEON::BI__builtin_neon_vaddhn_v: |
| case NEON::BI__builtin_neon_vcale_v: |
| case NEON::BI__builtin_neon_vcaleq_v: |
| case NEON::BI__builtin_neon_vcalt_v: |
| case NEON::BI__builtin_neon_vcaltq_v: |
| case NEON::BI__builtin_neon_vcage_v: |
| case NEON::BI__builtin_neon_vcageq_v: |
| case NEON::BI__builtin_neon_vcagt_v: |
| case NEON::BI__builtin_neon_vcagtq_v: |
| cgf.cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| ctx.BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| case NEON::BI__builtin_neon_vceqz_v: |
| case NEON::BI__builtin_neon_vceqzq_v: |
| return emitAArch64CompareBuiltinExpr(cgf, cgf.getBuilder(), loc, ops[0], |
| vTy, cir::CmpOpKind::eq); |
| case NEON::BI__builtin_neon_vcgez_v: |
| case NEON::BI__builtin_neon_vcgezq_v: |
| case NEON::BI__builtin_neon_vclez_v: |
| case NEON::BI__builtin_neon_vclezq_v: |
| case NEON::BI__builtin_neon_vcgtz_v: |
| case NEON::BI__builtin_neon_vcgtzq_v: |
| case NEON::BI__builtin_neon_vcltz_v: |
| case NEON::BI__builtin_neon_vcltzq_v: |
| case NEON::BI__builtin_neon_vclz_v: |
| case NEON::BI__builtin_neon_vclzq_v: |
| case NEON::BI__builtin_neon_vcvt_f32_v: |
| case NEON::BI__builtin_neon_vcvtq_f32_v: |
| case NEON::BI__builtin_neon_vcvt_f16_s16: |
| case NEON::BI__builtin_neon_vcvt_f16_u16: |
| case NEON::BI__builtin_neon_vcvtq_f16_s16: |
| case NEON::BI__builtin_neon_vcvtq_f16_u16: |
| case NEON::BI__builtin_neon_vcvt_n_f16_s16: |
| case NEON::BI__builtin_neon_vcvt_n_f16_u16: |
| case NEON::BI__builtin_neon_vcvtq_n_f16_s16: |
| case NEON::BI__builtin_neon_vcvtq_n_f16_u16: |
| cgf.cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| ctx.BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| case NEON::BI__builtin_neon_vcvt_n_f32_v: |
| case NEON::BI__builtin_neon_vcvt_n_f64_v: |
| case NEON::BI__builtin_neon_vcvtq_n_f32_v: |
| case NEON::BI__builtin_neon_vcvtq_n_f64_v: { |
| // The constant argument to an _n_ intrinsic always is Int32Ty. |
| mlir::Type cstIntTy = cgf.sInt32Ty; |
| llvm::StringRef llvmIntrName = |
| getLLVMIntrNameNoPrefix(static_cast<llvm::Intrinsic::ID>( |
| usgn ? llvmIntrinsic : altLLVMIntrinsic)); |
| return emitNeonCall(cgf.getCIRGenModule(), cgf.getBuilder(), |
| /*argTypes=*/{vTy, cstIntTy}, ops, llvmIntrName, |
| /*funcResTy=*/getFloatNeonType(cgf, neonType), loc); |
| } |
| case NEON::BI__builtin_neon_vcvt_n_s16_f16: |
| case NEON::BI__builtin_neon_vcvt_n_s32_v: |
| case NEON::BI__builtin_neon_vcvt_n_u16_f16: |
| case NEON::BI__builtin_neon_vcvt_n_u32_v: |
| case NEON::BI__builtin_neon_vcvt_n_s64_v: |
| case NEON::BI__builtin_neon_vcvt_n_u64_v: |
| case NEON::BI__builtin_neon_vcvtq_n_s16_f16: |
| case NEON::BI__builtin_neon_vcvtq_n_s32_v: |
| case NEON::BI__builtin_neon_vcvtq_n_u16_f16: |
| case NEON::BI__builtin_neon_vcvtq_n_u32_v: |
| case NEON::BI__builtin_neon_vcvtq_n_s64_v: |
| case NEON::BI__builtin_neon_vcvtq_n_u64_v: { |
| // The constant argument to an _n_ intrinsic always is Int32Ty. |
| mlir::Type cstIntTy = cgf.sInt32Ty; |
| llvm::StringRef llvmIntrName = getLLVMIntrNameNoPrefix( |
| static_cast<llvm::Intrinsic::ID>(llvmIntrinsic)); |
| return emitNeonCall( |
| cgf.getCIRGenModule(), cgf.getBuilder(), |
| /*argTypes=*/{getFloatNeonType(cgf, neonType), cstIntTy}, ops, |
| llvmIntrName, |
| /*funcResTy=*/vTy, loc); |
| } |
| case NEON::BI__builtin_neon_vcvt_s32_v: |
| case NEON::BI__builtin_neon_vcvt_u32_v: |
| case NEON::BI__builtin_neon_vcvt_s64_v: |
| case NEON::BI__builtin_neon_vcvt_u64_v: |
| case NEON::BI__builtin_neon_vcvt_s16_f16: |
| case NEON::BI__builtin_neon_vcvt_u16_f16: |
| case NEON::BI__builtin_neon_vcvtq_s32_v: |
| case NEON::BI__builtin_neon_vcvtq_u32_v: |
| case NEON::BI__builtin_neon_vcvtq_s64_v: |
| case NEON::BI__builtin_neon_vcvtq_u64_v: |
| case NEON::BI__builtin_neon_vcvtq_s16_f16: |
| case NEON::BI__builtin_neon_vcvtq_u16_f16: |
| case NEON::BI__builtin_neon_vcvta_s16_f16: |
| case NEON::BI__builtin_neon_vcvta_s32_v: |
| case NEON::BI__builtin_neon_vcvta_s64_v: |
| case NEON::BI__builtin_neon_vcvta_u16_f16: |
| case NEON::BI__builtin_neon_vcvta_u32_v: |
| case NEON::BI__builtin_neon_vcvta_u64_v: |
| case NEON::BI__builtin_neon_vcvtaq_s16_f16: |
| case NEON::BI__builtin_neon_vcvtaq_s32_v: |
| case NEON::BI__builtin_neon_vcvtaq_s64_v: |
| case NEON::BI__builtin_neon_vcvtaq_u16_f16: |
| case NEON::BI__builtin_neon_vcvtaq_u32_v: |
| case NEON::BI__builtin_neon_vcvtaq_u64_v: |
| case NEON::BI__builtin_neon_vcvtn_s16_f16: |
| case NEON::BI__builtin_neon_vcvtn_s32_v: |
| case NEON::BI__builtin_neon_vcvtn_s64_v: |
| case NEON::BI__builtin_neon_vcvtn_u16_f16: |
| case NEON::BI__builtin_neon_vcvtn_u32_v: |
| case NEON::BI__builtin_neon_vcvtn_u64_v: |
| case NEON::BI__builtin_neon_vcvtnq_s16_f16: |
| case NEON::BI__builtin_neon_vcvtnq_s32_v: |
| case NEON::BI__builtin_neon_vcvtnq_s64_v: |
| case NEON::BI__builtin_neon_vcvtnq_u16_f16: |
| case NEON::BI__builtin_neon_vcvtnq_u32_v: |
| case NEON::BI__builtin_neon_vcvtnq_u64_v: |
| case NEON::BI__builtin_neon_vcvtp_s16_f16: |
| case NEON::BI__builtin_neon_vcvtp_s32_v: |
| case NEON::BI__builtin_neon_vcvtp_s64_v: |
| case NEON::BI__builtin_neon_vcvtp_u16_f16: |
| case NEON::BI__builtin_neon_vcvtp_u32_v: |
| case NEON::BI__builtin_neon_vcvtp_u64_v: |
| case NEON::BI__builtin_neon_vcvtpq_s16_f16: |
| case NEON::BI__builtin_neon_vcvtpq_s32_v: |
| case NEON::BI__builtin_neon_vcvtpq_s64_v: |
| case NEON::BI__builtin_neon_vcvtpq_u16_f16: |
| case NEON::BI__builtin_neon_vcvtpq_u32_v: |
| case NEON::BI__builtin_neon_vcvtpq_u64_v: |
| case NEON::BI__builtin_neon_vcvtm_s16_f16: |
| case NEON::BI__builtin_neon_vcvtm_s32_v: |
| case NEON::BI__builtin_neon_vcvtm_s64_v: |
| case NEON::BI__builtin_neon_vcvtm_u16_f16: |
| case NEON::BI__builtin_neon_vcvtm_u32_v: |
| case NEON::BI__builtin_neon_vcvtm_u64_v: |
| case NEON::BI__builtin_neon_vcvtmq_s16_f16: |
| case NEON::BI__builtin_neon_vcvtmq_s32_v: |
| case NEON::BI__builtin_neon_vcvtmq_s64_v: |
| case NEON::BI__builtin_neon_vcvtmq_u16_f16: |
| case NEON::BI__builtin_neon_vcvtmq_u32_v: |
| case NEON::BI__builtin_neon_vcvtmq_u64_v: |
| case NEON::BI__builtin_neon_vcvtx_f32_v: |
| case NEON::BI__builtin_neon_vext_v: |
| case NEON::BI__builtin_neon_vextq_v: |
| case NEON::BI__builtin_neon_vfma_v: |
| cgf.cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| ctx.BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| case NEON::BI__builtin_neon_vfmaq_v: { |
| // NEON intrinsic: vfmaq(accumulator, multiplicand1, multiplicand2) |
| // LLVM intrinsic: fma(multiplicand1, multiplicand2, accumulator) |
| // Reorder arguments to match LLVM fma signature |
| mlir::Value op0 = cgf.getBuilder().createBitcast(ops[0], ty); |
| mlir::Value op1 = cgf.getBuilder().createBitcast(ops[1], ty); |
| mlir::Value op2 = cgf.getBuilder().createBitcast(ops[2], ty); |
| llvm::SmallVector<mlir::Value> fmaOps = {op1, op2, op0}; |
| return emitCallMaybeConstrainedBuiltin(cgf.getBuilder(), loc, "fma", ty, |
| fmaOps); |
| } |
| case NEON::BI__builtin_neon_vld1_v: |
| case NEON::BI__builtin_neon_vld1q_v: |
| case NEON::BI__builtin_neon_vld1_x2_v: |
| case NEON::BI__builtin_neon_vld1q_x2_v: |
| case NEON::BI__builtin_neon_vld1_x3_v: |
| case NEON::BI__builtin_neon_vld1q_x3_v: |
| case NEON::BI__builtin_neon_vld1_x4_v: |
| case NEON::BI__builtin_neon_vld1q_x4_v: |
| case NEON::BI__builtin_neon_vld2_v: |
| case NEON::BI__builtin_neon_vld2q_v: |
| case NEON::BI__builtin_neon_vld3_v: |
| case NEON::BI__builtin_neon_vld3q_v: |
| case NEON::BI__builtin_neon_vld4_v: |
| case NEON::BI__builtin_neon_vld4q_v: |
| case NEON::BI__builtin_neon_vld2_dup_v: |
| case NEON::BI__builtin_neon_vld2q_dup_v: |
| case NEON::BI__builtin_neon_vld3_dup_v: |
| case NEON::BI__builtin_neon_vld3q_dup_v: |
| case NEON::BI__builtin_neon_vld4_dup_v: |
| case NEON::BI__builtin_neon_vld4q_dup_v: |
| case NEON::BI__builtin_neon_vld1_dup_v: |
| case NEON::BI__builtin_neon_vld1q_dup_v: |
| case NEON::BI__builtin_neon_vld2_lane_v: |
| case NEON::BI__builtin_neon_vld2q_lane_v: |
| case NEON::BI__builtin_neon_vld3_lane_v: |
| case NEON::BI__builtin_neon_vld3q_lane_v: |
| case NEON::BI__builtin_neon_vld4_lane_v: |
| case NEON::BI__builtin_neon_vld4q_lane_v: |
| case NEON::BI__builtin_neon_vmovl_v: |
| case NEON::BI__builtin_neon_vmovn_v: |
| case NEON::BI__builtin_neon_vmull_v: |
| case NEON::BI__builtin_neon_vpadal_v: |
| case NEON::BI__builtin_neon_vpadalq_v: |
| cgf.cgm.errorNYI(expr->getSourceRange(), |
| std::string("Reached code-path for ARM builtin call ") + |
| ctx.BuiltinInfo.getName(builtinID) + |
| "(ARM builtins are not supported ATM)"); |
| return mlir::Value{}; |
| case NEON::BI__builtin_neon_vpaddl_v: |
| case NEON::BI__builtin_neon_vpaddlq_v: { |
| llvm::StringRef llvmIntrName = |
| getLLVMIntrNameNoPrefix(static_cast<llvm::Intrinsic::ID>( |
| usgn ? llvmIntrinsic : altLLVMIntrinsic)); |
| return emitNeonCall(cgf.getCIRGenModule(), cgf.getBuilder(), |
| /*argTypes=*/{getNeonPairwiseWidenInputType(vTy, usgn)}, |
| ops, llvmIntrName, |
| /*funcResTy=*/vTy, loc); |
| } |
| case NEON::BI__builtin_neon_vqdmlal_v: |
| case NEON::BI__builtin_neon_vqdmlsl_v: |
| case NEON::BI__builtin_neon_vqdmulhq_lane_v: |
| case NEON::BI__builtin_neon_vqdmulh_lane_v: |
| case NEON::BI__builtin_neon_vqrdmulhq_lane_v: |
| case NEON::BI__builtin_neon_vqrdmulh_lane_v: |
| case NEON::BI__builtin_neon_vqdmulhq_laneq_v: |
| case NEON::BI__builtin_neon_vqdmulh_laneq_v: |
| case NEON::BI__builtin_neon_vqrdmulhq_laneq_v: |
| case NEON::BI__builtin_neon_vqrdmulh_laneq_v: |
| case NEON::BI__builtin_neon_vqshl_n_v: |
| case NEON::BI__builtin_neon_vqshlq_n_v: |
| case NEON::BI__builtin_neon_vqshlu_n_v: |
| case NEON::BI__builtin_neon_vqshluq_n_v: |
| case NEON::BI__builtin_neon_vrecpe_v: |
| case NEON::BI__builtin_neon_vrecpeq_v: |
| case NEON::BI__builtin_neon_vrsqrte_v: |
| case NEON::BI__builtin_neon_vrsqrteq_v: |
| case NEON::BI__builtin_neon_vrndi_v: |
| case NEON::BI__builtin_neon_vrndiq_v: |
| cgf.cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| ctx.BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| case NEON::BI__builtin_neon_vrshr_n_v: |
| case NEON::BI__builtin_neon_vrshrq_n_v: { |
| llvm::StringRef intrName = |
| usgn ? "aarch64.neon.urshl" : "aarch64.neon.srshl"; |
| return emitNeonCall( |
| cgf.cgm, cgf.getBuilder(), |
| {ty, usgn ? getSignChangedVectorType(cgf.getBuilder(), ty) : ty}, ops, |
| intrName, ty, loc, /*isConstrainedFPIntrinsic=*/false, |
| /*shift=*/1, |
| /*rightshift=*/true); |
| } |
| case NEON::BI__builtin_neon_vsha512hq_u64: |
| case NEON::BI__builtin_neon_vsha512h2q_u64: |
| case NEON::BI__builtin_neon_vsha512su0q_u64: |
| case NEON::BI__builtin_neon_vsha512su1q_u64: |
| cgf.cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| ctx.BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| case NEON::BI__builtin_neon_vshl_n_v: |
| case NEON::BI__builtin_neon_vshlq_n_v: |
| return emitCommonNeonShift(cgf.getBuilder(), loc, vTy, ops[0], ops[1], |
| /*shiftLeft=*/true); |
| case NEON::BI__builtin_neon_vshll_n_v: |
| case NEON::BI__builtin_neon_vshrn_n_v: |
| cgf.cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| ctx.BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| case NEON::BI__builtin_neon_vshr_n_v: |
| case NEON::BI__builtin_neon_vshrq_n_v: |
| return emitNeonRShiftImm(cgf, ops[0], ops[1], vTy, isUnsigned, loc); |
| case NEON::BI__builtin_neon_vst1_v: |
| case NEON::BI__builtin_neon_vst1q_v: |
| case NEON::BI__builtin_neon_vst2_v: |
| case NEON::BI__builtin_neon_vst2q_v: |
| case NEON::BI__builtin_neon_vst3_v: |
| case NEON::BI__builtin_neon_vst3q_v: |
| case NEON::BI__builtin_neon_vst4_v: |
| case NEON::BI__builtin_neon_vst4q_v: |
| case NEON::BI__builtin_neon_vst2_lane_v: |
| case NEON::BI__builtin_neon_vst2q_lane_v: |
| case NEON::BI__builtin_neon_vst3_lane_v: |
| case NEON::BI__builtin_neon_vst3q_lane_v: |
| case NEON::BI__builtin_neon_vst4_lane_v: |
| case NEON::BI__builtin_neon_vst4q_lane_v: |
| case NEON::BI__builtin_neon_vsm3partw1q_u32: |
| case NEON::BI__builtin_neon_vsm3partw2q_u32: |
| case NEON::BI__builtin_neon_vsm3ss1q_u32: |
| case NEON::BI__builtin_neon_vsm4ekeyq_u32: |
| case NEON::BI__builtin_neon_vsm4eq_u32: |
| case NEON::BI__builtin_neon_vsm3tt1aq_u32: |
| case NEON::BI__builtin_neon_vsm3tt1bq_u32: |
| case NEON::BI__builtin_neon_vsm3tt2aq_u32: |
| case NEON::BI__builtin_neon_vsm3tt2bq_u32: |
| case NEON::BI__builtin_neon_vst1_x2_v: |
| case NEON::BI__builtin_neon_vst1q_x2_v: |
| case NEON::BI__builtin_neon_vst1_x3_v: |
| case NEON::BI__builtin_neon_vst1q_x3_v: |
| case NEON::BI__builtin_neon_vst1_x4_v: |
| case NEON::BI__builtin_neon_vst1q_x4_v: |
| case NEON::BI__builtin_neon_vsubhn_v: |
| case NEON::BI__builtin_neon_vtrn_v: |
| case NEON::BI__builtin_neon_vtrnq_v: |
| case NEON::BI__builtin_neon_vtst_v: |
| case NEON::BI__builtin_neon_vtstq_v: |
| case NEON::BI__builtin_neon_vuzp_v: |
| case NEON::BI__builtin_neon_vuzpq_v: |
| case NEON::BI__builtin_neon_vxarq_u64: |
| case NEON::BI__builtin_neon_vzip_v: |
| case NEON::BI__builtin_neon_vzipq_v: |
| case NEON::BI__builtin_neon_vdot_s32: |
| case NEON::BI__builtin_neon_vdot_u32: |
| case NEON::BI__builtin_neon_vdotq_s32: |
| case NEON::BI__builtin_neon_vdotq_u32: |
| case NEON::BI__builtin_neon_vfmlal_low_f16: |
| case NEON::BI__builtin_neon_vfmlalq_low_f16: |
| case NEON::BI__builtin_neon_vfmlsl_low_f16: |
| case NEON::BI__builtin_neon_vfmlslq_low_f16: |
| case NEON::BI__builtin_neon_vfmlal_high_f16: |
| case NEON::BI__builtin_neon_vfmlalq_high_f16: |
| case NEON::BI__builtin_neon_vfmlsl_high_f16: |
| case NEON::BI__builtin_neon_vfmlslq_high_f16: |
| case NEON::BI__builtin_neon_vmmlaq_s32: |
| case NEON::BI__builtin_neon_vmmlaq_u32: |
| cgf.cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| ctx.BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| case NEON::BI__builtin_neon_vmul_v: |
| case NEON::BI__builtin_neon_vmulq_v: |
| return cgf.getBuilder().emitIntrinsicCallOp(loc, "aarch64.neon.pmul", vTy, |
| ops); |
| case NEON::BI__builtin_neon_vusmmlaq_s32: |
| case NEON::BI__builtin_neon_vusdot_s32: |
| case NEON::BI__builtin_neon_vusdotq_s32: |
| case NEON::BI__builtin_neon_vbfdot_f32: |
| case NEON::BI__builtin_neon_vbfdotq_f32: |
| case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: |
| cgf.cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| ctx.BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| // The switch stmt is intended to help catch NYI cases and will be removed |
| // once the CIR implementation is complete. Avoid adding specialized |
| // code in cases - that should only be required for a handful of examples. |
| switch (builtinID) { |
| default: |
| cgf.cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| cgf.getContext().BuiltinInfo.getName(builtinID)); |
| break; |
| case NEON::BI__builtin_neon_vshl_v: |
| case NEON::BI__builtin_neon_vshlq_v: { |
| llvm::StringRef llvmIntrName = |
| getLLVMIntrNameNoPrefix(static_cast<llvm::Intrinsic::ID>( |
| usgn ? llvmIntrinsic : altLLVMIntrinsic)); |
| |
| mlir::Value result = |
| emitNeonCall(cgf.getCIRGenModule(), cgf.getBuilder(), |
| /*argTypes=*/{vTy, vTy}, ops, llvmIntrName, |
| /*funcResTy=*/vTy, loc); |
| mlir::Type resultType = cgf.convertType(expr->getType()); |
| return cgf.getBuilder().createBitcast(result, resultType); |
| } |
| } |
| |
| // NYI |
| return nullptr; |
| } |
| |
| bool CIRGenFunction::getAArch64SVEProcessedOperands( |
| unsigned builtinID, const CallExpr *expr, SmallVectorImpl<mlir::Value> &ops, |
| SVETypeFlags typeFlags) { |
| // Find out if any arguments are required to be integer constant expressions. |
| unsigned iceArguments = 0; |
| ASTContext::GetBuiltinTypeError error; |
| getContext().GetBuiltinType(builtinID, error, &iceArguments); |
| assert(error == ASTContext::GE_None && "Should not codegen an error"); |
| |
| for (unsigned i = 0, e = expr->getNumArgs(); i != e; i++) { |
| bool isIce = iceArguments & (1 << i); |
| mlir::Value arg = emitScalarExpr(expr->getArg(i)); |
| |
| if (isIce) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| } |
| |
| // FIXME: Handle types like svint16x2_t, which are currently incorrectly |
| // converted to i32. These should be treated as structs and unpacked. |
| |
| ops.push_back(arg); |
| } |
| return true; |
| } |
| |
| // Reinterpret the input predicate so that it can be used to correctly isolate |
| // the elements of the specified datatype. |
| mlir::Value CIRGenFunction::emitSVEPredicateCast(mlir::Value pred, |
| unsigned minNumElts, |
| mlir::Location loc) { |
| |
| // TODO: Handle "aarch64.svcount" once we get round to supporting SME. |
| |
| auto retTy = cir::VectorType::get(builder.getUIntNTy(1), minNumElts, |
| /*is_scalable=*/true); |
| if (pred.getType() == retTy) |
| return pred; |
| |
| llvm::Intrinsic::ID intID; |
| switch (minNumElts) { |
| default: |
| llvm_unreachable("unsupported element count!"); |
| case 1: |
| case 2: |
| case 4: |
| case 8: |
| intID = Intrinsic::aarch64_sve_convert_from_svbool; |
| break; |
| case 16: |
| intID = Intrinsic::aarch64_sve_convert_to_svbool; |
| break; |
| } |
| |
| llvm::StringRef llvmIntrName = getLLVMIntrNameNoPrefix(intID); |
| auto call = builder.emitIntrinsicCallOp(loc, llvmIntrName, retTy, |
| mlir::ValueRange{pred}); |
| assert(call.getType() == retTy && "Unexpected return type!"); |
| return call; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // SVE helpers |
| //===----------------------------------------------------------------------===// |
| // Get the minimum number of elements in an SVE vector for the given element |
| // type. The actual number of elements in the vector would be an integer (power |
| // of two) multiple of this value. |
| static unsigned getSVEMinEltCount(clang::SVETypeFlags::EltType sveType) { |
| switch (sveType) { |
| default: |
| llvm_unreachable("Invalid SVETypeFlag!"); |
| |
| case SVETypeFlags::EltTyInt8: |
| return 16; |
| case SVETypeFlags::EltTyInt16: |
| return 8; |
| case SVETypeFlags::EltTyInt32: |
| return 4; |
| case SVETypeFlags::EltTyInt64: |
| return 2; |
| |
| case SVETypeFlags::EltTyMFloat8: |
| return 16; |
| case SVETypeFlags::EltTyFloat16: |
| case SVETypeFlags::EltTyBFloat16: |
| return 8; |
| case SVETypeFlags::EltTyFloat32: |
| return 4; |
| case SVETypeFlags::EltTyFloat64: |
| return 2; |
| |
| case SVETypeFlags::EltTyBool8: |
| return 16; |
| case SVETypeFlags::EltTyBool16: |
| return 8; |
| case SVETypeFlags::EltTyBool32: |
| return 4; |
| case SVETypeFlags::EltTyBool64: |
| return 2; |
| } |
| } |
| |
| // TODO(cir): Share with OGCG |
| constexpr unsigned sveBitsPerBlock = 128; |
| |
| static cir::VectorType getSVEVectorForElementType(CIRGenModule &cgm, |
| mlir::Type eltTy) { |
| unsigned numElts = |
| sveBitsPerBlock / cgm.getDataLayout().getTypeSizeInBits(eltTy); |
| return cir::VectorType::get(eltTy, numElts, /*is_scalable=*/true); |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // SVE helpers |
| //===----------------------------------------------------------------------===// |
| std::optional<mlir::Value> |
| CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned builtinID, |
| const CallExpr *expr) { |
| mlir::Type ty = convertType(expr->getType()); |
| |
| if (builtinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 && |
| builtinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| assert(!cir::MissingFeatures::aarch64SVEIntrinsics()); |
| |
| auto *builtinIntrInfo = findARMVectorIntrinsicInMap( |
| aarch64SVEIntrinsicMap, builtinID, aarch64SVEIntrinsicsProvenSorted); |
| |
| // The operands of the builtin call |
| llvm::SmallVector<mlir::Value> ops; |
| |
| SVETypeFlags typeFlags(builtinIntrInfo->TypeModifier); |
| if (!CIRGenFunction::getAArch64SVEProcessedOperands(builtinID, expr, ops, |
| typeFlags)) |
| return mlir::Value{}; |
| |
| if (typeFlags.isLoad() || typeFlags.isStore() || typeFlags.isGatherLoad() || |
| typeFlags.isScatterStore() || typeFlags.isPrefetch() || |
| typeFlags.isGatherPrefetch() || typeFlags.isStructLoad() || |
| typeFlags.isStructStore() || typeFlags.isTupleSet() || |
| typeFlags.isTupleGet() || typeFlags.isTupleCreate() || |
| typeFlags.isUndef()) |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| |
| mlir::Location loc = getLoc(expr->getExprLoc()); |
| |
| // Handle built-ins for which there is a corresponding LLVM Intrinsic. |
| // ------------------------------------------------------------------- |
| if (builtinIntrInfo->LLVMIntrinsic != 0) { |
| // Emit set FPMR for intrinsics that require it. |
| if (typeFlags.setsFPMR()) |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| |
| // Zero-ing predication |
| if (typeFlags.getMergeType() == SVETypeFlags::MergeZeroExp) { |
| auto null = builder.getNullValue(convertType(expr->getType()), |
| getLoc(expr->getExprLoc())); |
| ops.insert(ops.begin(), null); |
| } |
| |
| if (typeFlags.getMergeType() == SVETypeFlags::MergeAnyExp) |
| ops.insert(ops.begin(), |
| builder.getConstant(loc, cir::UndefAttr::get(ty))); |
| |
| // Some ACLE builtins leave out the argument to specify the predicate |
| // pattern, which is expected to be expanded to an SV_ALL pattern. |
| if (typeFlags.isAppendSVALL()) |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| if (typeFlags.isInsertOp1SVALL()) |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| |
| // Predicates must match the main datatype. |
| for (mlir::Value &op : ops) |
| if (auto predTy = dyn_cast<cir::VectorType>(op.getType())) |
| if (auto cirInt = dyn_cast<cir::IntType>(predTy.getElementType())) |
| if (cirInt.getWidth() == 1) |
| op = emitSVEPredicateCast( |
| op, getSVEMinEltCount(typeFlags.getEltType()), loc); |
| |
| // Splat scalar operand to vector (intrinsics with _n infix) |
| if (typeFlags.hasSplatOperand()) { |
| unsigned opNo = typeFlags.getSplatOperand(); |
| ops[opNo] = cir::VecSplatOp::create( |
| builder, loc, getSVEVectorForElementType(cgm, ops[opNo].getType()), |
| ops[opNo]); |
| } |
| |
| if (typeFlags.isReverseCompare()) |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| if (typeFlags.isReverseUSDOT()) |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| if (typeFlags.isReverseMergeAnyBinOp() && |
| typeFlags.getMergeType() == SVETypeFlags::MergeAny) |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| if (typeFlags.isReverseMergeAnyAccOp() && |
| typeFlags.getMergeType() == SVETypeFlags::MergeAny) |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| |
| // Predicated intrinsics with _z suffix. |
| if (typeFlags.getMergeType() == SVETypeFlags::MergeZero) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| } |
| |
| llvm::StringRef llvmIntrName = getLLVMIntrNameNoPrefix( |
| static_cast<llvm::Intrinsic::ID>(builtinIntrInfo->LLVMIntrinsic)); |
| auto retTy = convertType(expr->getType()); |
| |
| auto call = builder.emitIntrinsicCallOp(loc, llvmIntrName, retTy, |
| mlir::ValueRange{ops}); |
| if (call.getType() == retTy) |
| return call; |
| |
| // Predicate results must be converted to svbool_t. |
| if (isa<mlir::VectorType>(retTy) && |
| cast<mlir::VectorType>(retTy).isScalable()) |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| // TODO Handle struct types, e.g. svint8x2_t (update the converter first). |
| |
| llvm_unreachable("unsupported element count!"); |
| } |
| |
| // Handle the remaining built-ins. |
| // ------------------------------- |
| switch (builtinID) { |
| default: |
| return std::nullopt; |
| |
| case SVE::BI__builtin_sve_svreinterpret_b: |
| case SVE::BI__builtin_sve_svreinterpret_c: |
| case SVE::BI__builtin_sve_svpsel_lane_b8: |
| case SVE::BI__builtin_sve_svpsel_lane_b16: |
| case SVE::BI__builtin_sve_svpsel_lane_b32: |
| case SVE::BI__builtin_sve_svpsel_lane_b64: |
| case SVE::BI__builtin_sve_svpsel_lane_c8: |
| case SVE::BI__builtin_sve_svpsel_lane_c16: |
| case SVE::BI__builtin_sve_svpsel_lane_c32: |
| case SVE::BI__builtin_sve_svpsel_lane_c64: |
| case SVE::BI__builtin_sve_svmov_b_z: |
| case SVE::BI__builtin_sve_svnot_b_z: |
| case SVE::BI__builtin_sve_svmovlb_u16: |
| case SVE::BI__builtin_sve_svmovlb_u32: |
| case SVE::BI__builtin_sve_svmovlb_u64: |
| case SVE::BI__builtin_sve_svmovlb_s16: |
| case SVE::BI__builtin_sve_svmovlb_s32: |
| case SVE::BI__builtin_sve_svmovlb_s64: |
| case SVE::BI__builtin_sve_svmovlt_u16: |
| case SVE::BI__builtin_sve_svmovlt_u32: |
| case SVE::BI__builtin_sve_svmovlt_u64: |
| case SVE::BI__builtin_sve_svmovlt_s16: |
| case SVE::BI__builtin_sve_svmovlt_s32: |
| case SVE::BI__builtin_sve_svmovlt_s64: |
| case SVE::BI__builtin_sve_svpmullt_u16: |
| case SVE::BI__builtin_sve_svpmullt_u64: |
| case SVE::BI__builtin_sve_svpmullt_n_u16: |
| case SVE::BI__builtin_sve_svpmullt_n_u64: |
| case SVE::BI__builtin_sve_svpmullb_u16: |
| case SVE::BI__builtin_sve_svpmullb_u64: |
| case SVE::BI__builtin_sve_svpmullb_n_u16: |
| case SVE::BI__builtin_sve_svpmullb_n_u64: |
| |
| case SVE::BI__builtin_sve_svdup_n_b8: |
| case SVE::BI__builtin_sve_svdup_n_b16: |
| case SVE::BI__builtin_sve_svdup_n_b32: |
| case SVE::BI__builtin_sve_svdup_n_b64: |
| |
| case SVE::BI__builtin_sve_svdupq_n_b8: |
| case SVE::BI__builtin_sve_svdupq_n_b16: |
| case SVE::BI__builtin_sve_svdupq_n_b32: |
| case SVE::BI__builtin_sve_svdupq_n_b64: |
| case SVE::BI__builtin_sve_svdupq_n_u8: |
| case SVE::BI__builtin_sve_svdupq_n_s8: |
| case SVE::BI__builtin_sve_svdupq_n_u64: |
| case SVE::BI__builtin_sve_svdupq_n_f64: |
| case SVE::BI__builtin_sve_svdupq_n_s64: |
| case SVE::BI__builtin_sve_svdupq_n_u16: |
| case SVE::BI__builtin_sve_svdupq_n_f16: |
| case SVE::BI__builtin_sve_svdupq_n_bf16: |
| case SVE::BI__builtin_sve_svdupq_n_s16: |
| case SVE::BI__builtin_sve_svdupq_n_u32: |
| case SVE::BI__builtin_sve_svdupq_n_f32: |
| case SVE::BI__builtin_sve_svdupq_n_s32: |
| case SVE::BI__builtin_sve_svpfalse_b: |
| case SVE::BI__builtin_sve_svpfalse_c: |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| |
| case SVE::BI__builtin_sve_svlen_u8: |
| case SVE::BI__builtin_sve_svlen_s8: |
| return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 16); |
| |
| case SVE::BI__builtin_sve_svlen_u16: |
| case SVE::BI__builtin_sve_svlen_s16: |
| case SVE::BI__builtin_sve_svlen_f16: |
| case SVE::BI__builtin_sve_svlen_bf16: |
| return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 8); |
| |
| case SVE::BI__builtin_sve_svlen_u32: |
| case SVE::BI__builtin_sve_svlen_s32: |
| case SVE::BI__builtin_sve_svlen_f32: |
| return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 4); |
| |
| case SVE::BI__builtin_sve_svlen_u64: |
| case SVE::BI__builtin_sve_svlen_s64: |
| case SVE::BI__builtin_sve_svlen_f64: |
| return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 2); |
| |
| case SVE::BI__builtin_sve_svtbl2_u8: |
| case SVE::BI__builtin_sve_svtbl2_s8: |
| case SVE::BI__builtin_sve_svtbl2_u16: |
| case SVE::BI__builtin_sve_svtbl2_s16: |
| case SVE::BI__builtin_sve_svtbl2_u32: |
| case SVE::BI__builtin_sve_svtbl2_s32: |
| case SVE::BI__builtin_sve_svtbl2_u64: |
| case SVE::BI__builtin_sve_svtbl2_s64: |
| case SVE::BI__builtin_sve_svtbl2_f16: |
| case SVE::BI__builtin_sve_svtbl2_bf16: |
| case SVE::BI__builtin_sve_svtbl2_f32: |
| case SVE::BI__builtin_sve_svtbl2_f64: |
| case SVE::BI__builtin_sve_svset_neonq_s8: |
| case SVE::BI__builtin_sve_svset_neonq_s16: |
| case SVE::BI__builtin_sve_svset_neonq_s32: |
| case SVE::BI__builtin_sve_svset_neonq_s64: |
| case SVE::BI__builtin_sve_svset_neonq_u8: |
| case SVE::BI__builtin_sve_svset_neonq_u16: |
| case SVE::BI__builtin_sve_svset_neonq_u32: |
| case SVE::BI__builtin_sve_svset_neonq_u64: |
| case SVE::BI__builtin_sve_svset_neonq_f16: |
| case SVE::BI__builtin_sve_svset_neonq_f32: |
| case SVE::BI__builtin_sve_svset_neonq_f64: |
| case SVE::BI__builtin_sve_svset_neonq_bf16: |
| case SVE::BI__builtin_sve_svget_neonq_s8: |
| case SVE::BI__builtin_sve_svget_neonq_s16: |
| case SVE::BI__builtin_sve_svget_neonq_s32: |
| case SVE::BI__builtin_sve_svget_neonq_s64: |
| case SVE::BI__builtin_sve_svget_neonq_u8: |
| case SVE::BI__builtin_sve_svget_neonq_u16: |
| case SVE::BI__builtin_sve_svget_neonq_u32: |
| case SVE::BI__builtin_sve_svget_neonq_u64: |
| case SVE::BI__builtin_sve_svget_neonq_f16: |
| case SVE::BI__builtin_sve_svget_neonq_f32: |
| case SVE::BI__builtin_sve_svget_neonq_f64: |
| case SVE::BI__builtin_sve_svget_neonq_bf16: |
| case SVE::BI__builtin_sve_svdup_neonq_s8: |
| case SVE::BI__builtin_sve_svdup_neonq_s16: |
| case SVE::BI__builtin_sve_svdup_neonq_s32: |
| case SVE::BI__builtin_sve_svdup_neonq_s64: |
| case SVE::BI__builtin_sve_svdup_neonq_u8: |
| case SVE::BI__builtin_sve_svdup_neonq_u16: |
| case SVE::BI__builtin_sve_svdup_neonq_u32: |
| case SVE::BI__builtin_sve_svdup_neonq_u64: |
| case SVE::BI__builtin_sve_svdup_neonq_f16: |
| case SVE::BI__builtin_sve_svdup_neonq_f32: |
| case SVE::BI__builtin_sve_svdup_neonq_f64: |
| case SVE::BI__builtin_sve_svdup_neonq_bf16: |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| // Unreachable: All cases in the switch above return. |
| } |
| |
| std::optional<mlir::Value> |
| CIRGenFunction::emitAArch64SMEBuiltinExpr(unsigned builtinID, |
| const CallExpr *expr) { |
| assert(!cir::MissingFeatures::aarch64SMEIntrinsics()); |
| |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| // Some intrinsics are equivalent for codegen. |
| static const std::pair<unsigned, unsigned> neonEquivalentIntrinsicMap[] = { |
| { |
| NEON::BI__builtin_neon_splat_lane_bf16, |
| NEON::BI__builtin_neon_splat_lane_v, |
| }, |
| { |
| NEON::BI__builtin_neon_splat_laneq_bf16, |
| NEON::BI__builtin_neon_splat_laneq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_splatq_lane_bf16, |
| NEON::BI__builtin_neon_splatq_lane_v, |
| }, |
| { |
| NEON::BI__builtin_neon_splatq_laneq_bf16, |
| NEON::BI__builtin_neon_splatq_laneq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vabd_f16, |
| NEON::BI__builtin_neon_vabd_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vabdq_f16, |
| NEON::BI__builtin_neon_vabdq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vabs_f16, |
| NEON::BI__builtin_neon_vabs_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vabsq_f16, |
| NEON::BI__builtin_neon_vabsq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vcage_f16, |
| NEON::BI__builtin_neon_vcage_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vcageq_f16, |
| NEON::BI__builtin_neon_vcageq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vcagt_f16, |
| NEON::BI__builtin_neon_vcagt_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vcagtq_f16, |
| NEON::BI__builtin_neon_vcagtq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vcale_f16, |
| NEON::BI__builtin_neon_vcale_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vcaleq_f16, |
| NEON::BI__builtin_neon_vcaleq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vcalt_f16, |
| NEON::BI__builtin_neon_vcalt_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vcaltq_f16, |
| NEON::BI__builtin_neon_vcaltq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vceqz_f16, |
| NEON::BI__builtin_neon_vceqz_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vceqzq_f16, |
| NEON::BI__builtin_neon_vceqzq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vcgez_f16, |
| NEON::BI__builtin_neon_vcgez_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vcgezq_f16, |
| NEON::BI__builtin_neon_vcgezq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vcgtz_f16, |
| NEON::BI__builtin_neon_vcgtz_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vcgtzq_f16, |
| NEON::BI__builtin_neon_vcgtzq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vclez_f16, |
| NEON::BI__builtin_neon_vclez_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vclezq_f16, |
| NEON::BI__builtin_neon_vclezq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vcltz_f16, |
| NEON::BI__builtin_neon_vcltz_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vcltzq_f16, |
| NEON::BI__builtin_neon_vcltzq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vfma_f16, |
| NEON::BI__builtin_neon_vfma_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vfma_lane_f16, |
| NEON::BI__builtin_neon_vfma_lane_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vfma_laneq_f16, |
| NEON::BI__builtin_neon_vfma_laneq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vfmaq_f16, |
| NEON::BI__builtin_neon_vfmaq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vfmaq_lane_f16, |
| NEON::BI__builtin_neon_vfmaq_lane_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vfmaq_laneq_f16, |
| NEON::BI__builtin_neon_vfmaq_laneq_v, |
| }, |
| {NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v}, |
| {NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v}, |
| {NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v}, |
| {NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v}, |
| {NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v}, |
| {NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v}, |
| {NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v}, |
| {NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v}, |
| {NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v}, |
| {NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v}, |
| {NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v}, |
| {NEON::BI__builtin_neon_vld1q_lane_bf16, |
| NEON::BI__builtin_neon_vld1q_lane_v}, |
| {NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v}, |
| {NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v}, |
| {NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v}, |
| {NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v}, |
| {NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v}, |
| {NEON::BI__builtin_neon_vld2q_lane_bf16, |
| NEON::BI__builtin_neon_vld2q_lane_v}, |
| {NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v}, |
| {NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v}, |
| {NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v}, |
| {NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v}, |
| {NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v}, |
| {NEON::BI__builtin_neon_vld3q_lane_bf16, |
| NEON::BI__builtin_neon_vld3q_lane_v}, |
| {NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v}, |
| {NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v}, |
| {NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v}, |
| {NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v}, |
| {NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v}, |
| {NEON::BI__builtin_neon_vld4q_lane_bf16, |
| NEON::BI__builtin_neon_vld4q_lane_v}, |
| { |
| NEON::BI__builtin_neon_vmax_f16, |
| NEON::BI__builtin_neon_vmax_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vmaxnm_f16, |
| NEON::BI__builtin_neon_vmaxnm_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vmaxnmq_f16, |
| NEON::BI__builtin_neon_vmaxnmq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vmaxq_f16, |
| NEON::BI__builtin_neon_vmaxq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vmin_f16, |
| NEON::BI__builtin_neon_vmin_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vminnm_f16, |
| NEON::BI__builtin_neon_vminnm_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vminnmq_f16, |
| NEON::BI__builtin_neon_vminnmq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vminq_f16, |
| NEON::BI__builtin_neon_vminq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vmulx_f16, |
| NEON::BI__builtin_neon_vmulx_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vmulxq_f16, |
| NEON::BI__builtin_neon_vmulxq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vpadd_f16, |
| NEON::BI__builtin_neon_vpadd_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vpaddq_f16, |
| NEON::BI__builtin_neon_vpaddq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vpmax_f16, |
| NEON::BI__builtin_neon_vpmax_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vpmaxnm_f16, |
| NEON::BI__builtin_neon_vpmaxnm_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vpmaxnmq_f16, |
| NEON::BI__builtin_neon_vpmaxnmq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vpmaxq_f16, |
| NEON::BI__builtin_neon_vpmaxq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vpmin_f16, |
| NEON::BI__builtin_neon_vpmin_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vpminnm_f16, |
| NEON::BI__builtin_neon_vpminnm_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vpminnmq_f16, |
| NEON::BI__builtin_neon_vpminnmq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vpminq_f16, |
| NEON::BI__builtin_neon_vpminq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vrecpe_f16, |
| NEON::BI__builtin_neon_vrecpe_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vrecpeq_f16, |
| NEON::BI__builtin_neon_vrecpeq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vrecps_f16, |
| NEON::BI__builtin_neon_vrecps_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vrecpsq_f16, |
| NEON::BI__builtin_neon_vrecpsq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vrnd_f16, |
| NEON::BI__builtin_neon_vrnd_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vrnda_f16, |
| NEON::BI__builtin_neon_vrnda_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vrndaq_f16, |
| NEON::BI__builtin_neon_vrndaq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vrndi_f16, |
| NEON::BI__builtin_neon_vrndi_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vrndiq_f16, |
| NEON::BI__builtin_neon_vrndiq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vrndm_f16, |
| NEON::BI__builtin_neon_vrndm_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vrndmq_f16, |
| NEON::BI__builtin_neon_vrndmq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vrndn_f16, |
| NEON::BI__builtin_neon_vrndn_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vrndnq_f16, |
| NEON::BI__builtin_neon_vrndnq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vrndp_f16, |
| NEON::BI__builtin_neon_vrndp_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vrndpq_f16, |
| NEON::BI__builtin_neon_vrndpq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vrndq_f16, |
| NEON::BI__builtin_neon_vrndq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vrndx_f16, |
| NEON::BI__builtin_neon_vrndx_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vrndxq_f16, |
| NEON::BI__builtin_neon_vrndxq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vrsqrte_f16, |
| NEON::BI__builtin_neon_vrsqrte_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vrsqrteq_f16, |
| NEON::BI__builtin_neon_vrsqrteq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vrsqrts_f16, |
| NEON::BI__builtin_neon_vrsqrts_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vrsqrtsq_f16, |
| NEON::BI__builtin_neon_vrsqrtsq_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vsqrt_f16, |
| NEON::BI__builtin_neon_vsqrt_v, |
| }, |
| { |
| NEON::BI__builtin_neon_vsqrtq_f16, |
| NEON::BI__builtin_neon_vsqrtq_v, |
| }, |
| {NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v}, |
| {NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v}, |
| {NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v}, |
| {NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v}, |
| {NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v}, |
| {NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v}, |
| {NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v}, |
| {NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v}, |
| {NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v}, |
| {NEON::BI__builtin_neon_vst1q_lane_bf16, |
| NEON::BI__builtin_neon_vst1q_lane_v}, |
| {NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v}, |
| {NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v}, |
| {NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v}, |
| {NEON::BI__builtin_neon_vst2q_lane_bf16, |
| NEON::BI__builtin_neon_vst2q_lane_v}, |
| {NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v}, |
| {NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v}, |
| {NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v}, |
| {NEON::BI__builtin_neon_vst3q_lane_bf16, |
| NEON::BI__builtin_neon_vst3q_lane_v}, |
| {NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v}, |
| {NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v}, |
| {NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v}, |
| {NEON::BI__builtin_neon_vst4q_lane_bf16, |
| NEON::BI__builtin_neon_vst4q_lane_v}, |
| // The mangling rules cause us to have one ID for each type for |
| // vldap1(q)_lane and vstl1(q)_lane, but codegen is equivalent for all of |
| // them. Choose an arbitrary one to be handled as tha canonical variation. |
| {NEON::BI__builtin_neon_vldap1_lane_u64, |
| NEON::BI__builtin_neon_vldap1_lane_s64}, |
| {NEON::BI__builtin_neon_vldap1_lane_f64, |
| NEON::BI__builtin_neon_vldap1_lane_s64}, |
| {NEON::BI__builtin_neon_vldap1_lane_p64, |
| NEON::BI__builtin_neon_vldap1_lane_s64}, |
| {NEON::BI__builtin_neon_vldap1q_lane_u64, |
| NEON::BI__builtin_neon_vldap1q_lane_s64}, |
| {NEON::BI__builtin_neon_vldap1q_lane_f64, |
| NEON::BI__builtin_neon_vldap1q_lane_s64}, |
| {NEON::BI__builtin_neon_vldap1q_lane_p64, |
| NEON::BI__builtin_neon_vldap1q_lane_s64}, |
| {NEON::BI__builtin_neon_vstl1_lane_u64, |
| NEON::BI__builtin_neon_vstl1_lane_s64}, |
| {NEON::BI__builtin_neon_vstl1_lane_f64, |
| NEON::BI__builtin_neon_vstl1_lane_s64}, |
| {NEON::BI__builtin_neon_vstl1_lane_p64, |
| NEON::BI__builtin_neon_vstl1_lane_s64}, |
| {NEON::BI__builtin_neon_vstl1q_lane_u64, |
| NEON::BI__builtin_neon_vstl1q_lane_s64}, |
| {NEON::BI__builtin_neon_vstl1q_lane_f64, |
| NEON::BI__builtin_neon_vstl1q_lane_s64}, |
| {NEON::BI__builtin_neon_vstl1q_lane_p64, |
| NEON::BI__builtin_neon_vstl1q_lane_s64}, |
| }; |
| |
| std::optional<mlir::Value> |
| CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, |
| ReturnValueSlot returnValue, |
| llvm::Triple::ArchType arch) { |
| if (builtinID >= clang::AArch64::FirstSVEBuiltin && |
| builtinID <= clang::AArch64::LastSVEBuiltin) |
| return emitAArch64SVEBuiltinExpr(builtinID, expr); |
| |
| if (builtinID >= clang::AArch64::FirstSMEBuiltin && |
| builtinID <= clang::AArch64::LastSMEBuiltin) |
| return emitAArch64SMEBuiltinExpr(builtinID, expr); |
| |
| if (builtinID == Builtin::BI__builtin_cpu_supports) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| switch (builtinID) { |
| default: |
| break; |
| case clang::AArch64::BI__builtin_arm_nop: |
| case clang::AArch64::BI__builtin_arm_yield: |
| case clang::AArch64::BI__yield: |
| case clang::AArch64::BI__builtin_arm_wfe: |
| case clang::AArch64::BI__wfe: |
| case clang::AArch64::BI__builtin_arm_wfi: |
| case clang::AArch64::BI__wfi: |
| case clang::AArch64::BI__builtin_arm_sev: |
| case clang::AArch64::BI__sev: |
| case clang::AArch64::BI__builtin_arm_sevl: |
| case clang::AArch64::BI__sevl: |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == clang::AArch64::BI__builtin_arm_trap) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == clang::AArch64::BI__builtin_arm_get_sme_state) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == clang::AArch64::BI__builtin_arm_rbit) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| if (builtinID == clang::AArch64::BI__builtin_arm_rbit64) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == clang::AArch64::BI__builtin_arm_clz || |
| builtinID == clang::AArch64::BI__builtin_arm_clz64) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == clang::AArch64::BI__builtin_arm_cls) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| if (builtinID == clang::AArch64::BI__builtin_arm_cls64) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == clang::AArch64::BI__builtin_arm_rint32zf || |
| builtinID == clang::AArch64::BI__builtin_arm_rint32z) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == clang::AArch64::BI__builtin_arm_rint64zf || |
| builtinID == clang::AArch64::BI__builtin_arm_rint64z) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == clang::AArch64::BI__builtin_arm_rint32xf || |
| builtinID == clang::AArch64::BI__builtin_arm_rint32x) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == clang::AArch64::BI__builtin_arm_rint64xf || |
| builtinID == clang::AArch64::BI__builtin_arm_rint64x) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == clang::AArch64::BI__builtin_arm_jcvt) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == clang::AArch64::BI__builtin_arm_ld64b || |
| builtinID == clang::AArch64::BI__builtin_arm_st64b || |
| builtinID == clang::AArch64::BI__builtin_arm_st64bv || |
| builtinID == clang::AArch64::BI__builtin_arm_st64bv0) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == clang::AArch64::BI__builtin_arm_rndr || |
| builtinID == clang::AArch64::BI__builtin_arm_rndrrs) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == clang::AArch64::BI__clear_cache) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if ((builtinID == clang::AArch64::BI__builtin_arm_ldrex || |
| builtinID == clang::AArch64::BI__builtin_arm_ldaex) && |
| getContext().getTypeSize(expr->getType()) == 128) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| if (builtinID == clang::AArch64::BI__builtin_arm_ldrex || |
| builtinID == clang::AArch64::BI__builtin_arm_ldaex) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if ((builtinID == clang::AArch64::BI__builtin_arm_strex || |
| builtinID == clang::AArch64::BI__builtin_arm_stlex) && |
| getContext().getTypeSize(expr->getArg(0)->getType()) == 128) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == clang::AArch64::BI__builtin_arm_strex || |
| builtinID == clang::AArch64::BI__builtin_arm_stlex) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == clang::AArch64::BI__getReg) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == clang::AArch64::BI__break) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == clang::AArch64::BI__builtin_arm_clrex) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == clang::AArch64::BI_ReadWriteBarrier) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| // CRC32 |
| Intrinsic::ID crcIntrinsicID = Intrinsic::not_intrinsic; |
| switch (builtinID) { |
| case clang::AArch64::BI__builtin_arm_crc32b: |
| crcIntrinsicID = Intrinsic::aarch64_crc32b; |
| break; |
| case clang::AArch64::BI__builtin_arm_crc32cb: |
| crcIntrinsicID = Intrinsic::aarch64_crc32cb; |
| break; |
| case clang::AArch64::BI__builtin_arm_crc32h: |
| crcIntrinsicID = Intrinsic::aarch64_crc32h; |
| break; |
| case clang::AArch64::BI__builtin_arm_crc32ch: |
| crcIntrinsicID = Intrinsic::aarch64_crc32ch; |
| break; |
| case clang::AArch64::BI__builtin_arm_crc32w: |
| crcIntrinsicID = Intrinsic::aarch64_crc32w; |
| break; |
| case clang::AArch64::BI__builtin_arm_crc32cw: |
| crcIntrinsicID = Intrinsic::aarch64_crc32cw; |
| break; |
| case clang::AArch64::BI__builtin_arm_crc32d: |
| crcIntrinsicID = Intrinsic::aarch64_crc32x; |
| break; |
| case clang::AArch64::BI__builtin_arm_crc32cd: |
| crcIntrinsicID = Intrinsic::aarch64_crc32cx; |
| break; |
| } |
| |
| if (crcIntrinsicID != Intrinsic::not_intrinsic) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| // Memory Operations (MOPS) |
| if (builtinID == AArch64::BI__builtin_arm_mops_memset_tag) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| // Memory Tagging Extensions (MTE) Intrinsics |
| Intrinsic::ID mteIntrinsicID = Intrinsic::not_intrinsic; |
| switch (builtinID) { |
| case clang::AArch64::BI__builtin_arm_irg: |
| mteIntrinsicID = Intrinsic::aarch64_irg; |
| break; |
| case clang::AArch64::BI__builtin_arm_addg: |
| mteIntrinsicID = Intrinsic::aarch64_addg; |
| break; |
| case clang::AArch64::BI__builtin_arm_gmi: |
| mteIntrinsicID = Intrinsic::aarch64_gmi; |
| break; |
| case clang::AArch64::BI__builtin_arm_ldg: |
| mteIntrinsicID = Intrinsic::aarch64_ldg; |
| break; |
| case clang::AArch64::BI__builtin_arm_stg: |
| mteIntrinsicID = Intrinsic::aarch64_stg; |
| break; |
| case clang::AArch64::BI__builtin_arm_subp: |
| mteIntrinsicID = Intrinsic::aarch64_subp; |
| break; |
| } |
| |
| if (mteIntrinsicID != Intrinsic::not_intrinsic) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == clang::AArch64::BI__builtin_arm_rsr || |
| builtinID == clang::AArch64::BI__builtin_arm_rsr64 || |
| builtinID == clang::AArch64::BI__builtin_arm_rsr128 || |
| builtinID == clang::AArch64::BI__builtin_arm_rsrp || |
| builtinID == clang::AArch64::BI__builtin_arm_wsr || |
| builtinID == clang::AArch64::BI__builtin_arm_wsr64 || |
| builtinID == clang::AArch64::BI__builtin_arm_wsr128 || |
| builtinID == clang::AArch64::BI__builtin_arm_wsrp) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == clang::AArch64::BI_ReadStatusReg || |
| builtinID == clang::AArch64::BI_WriteStatusReg || |
| builtinID == clang::AArch64::BI__sys) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == clang::AArch64::BI_AddressOfReturnAddress) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == clang::AArch64::BI__builtin_sponentry) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == clang::AArch64::BI__mulh || |
| builtinID == clang::AArch64::BI__umulh) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == AArch64::BI__writex18byte || |
| builtinID == AArch64::BI__writex18word || |
| builtinID == AArch64::BI__writex18dword || |
| builtinID == AArch64::BI__writex18qword) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == AArch64::BI__readx18byte || |
| builtinID == AArch64::BI__readx18word || |
| builtinID == AArch64::BI__readx18dword || |
| builtinID == AArch64::BI__readx18qword) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == AArch64::BI__addx18byte || |
| builtinID == AArch64::BI__addx18word || |
| builtinID == AArch64::BI__addx18dword || |
| builtinID == AArch64::BI__addx18qword || |
| builtinID == AArch64::BI__incx18byte || |
| builtinID == AArch64::BI__incx18word || |
| builtinID == AArch64::BI__incx18dword || |
| builtinID == AArch64::BI__incx18qword) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == AArch64::BI_CopyDoubleFromInt64 || |
| builtinID == AArch64::BI_CopyFloatFromInt32 || |
| builtinID == AArch64::BI_CopyInt32FromFloat || |
| builtinID == AArch64::BI_CopyInt64FromDouble) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == AArch64::BI_CountLeadingOnes || |
| builtinID == AArch64::BI_CountLeadingOnes64 || |
| builtinID == AArch64::BI_CountLeadingZeros || |
| builtinID == AArch64::BI_CountLeadingZeros64) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == AArch64::BI_CountLeadingSigns || |
| builtinID == AArch64::BI_CountLeadingSigns64) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == AArch64::BI_CountOneBits || |
| builtinID == AArch64::BI_CountOneBits64) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == AArch64::BI__prefetch) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == AArch64::BI__hlt) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| if (builtinID == NEON::BI__builtin_neon_vcvth_bf16_f32) { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| // Handle MSVC intrinsics before argument evaluation to prevent double |
| // evaluation. |
| assert(!cir::MissingFeatures::msvcBuiltins()); |
| |
| // Some intrinsics are equivalent - if they are use the base intrinsic ID. |
| auto it = llvm::find_if(neonEquivalentIntrinsicMap, [builtinID](auto &p) { |
| return p.first == builtinID; |
| }); |
| if (it != end(neonEquivalentIntrinsicMap)) |
| builtinID = it->second; |
| |
| // Find out if any arguments are required to be integer constant |
| // expressions. |
| assert(!cir::MissingFeatures::handleBuiltinICEArguments()); |
| unsigned iceArguments = 0; |
| ASTContext::GetBuiltinTypeError error; |
| getContext().GetBuiltinType(builtinID, error, &iceArguments); |
| assert(error == ASTContext::GE_None && "Should not codegen an error"); |
| llvm::SmallVector<mlir::Value> ops; |
| |
| // Skip extra arguments used to discriminate vector types and that are |
| // intended for Sema checking. |
| bool hasExtraArg = hasExtraNeonArgument(builtinID); |
| unsigned numArgs = expr->getNumArgs() - (hasExtraArg ? 1 : 0); |
| for (unsigned i = 0, e = numArgs; i != e; i++) { |
| if (i == 0) { |
| switch (builtinID) { |
| case NEON::BI__builtin_neon_vld1_v: |
| case NEON::BI__builtin_neon_vld1q_v: |
| case NEON::BI__builtin_neon_vld1_dup_v: |
| case NEON::BI__builtin_neon_vld1q_dup_v: |
| case NEON::BI__builtin_neon_vld1_lane_v: |
| case NEON::BI__builtin_neon_vld1q_lane_v: |
| case NEON::BI__builtin_neon_vst1_v: |
| case NEON::BI__builtin_neon_vst1q_v: |
| case NEON::BI__builtin_neon_vst1_lane_v: |
| case NEON::BI__builtin_neon_vst1q_lane_v: |
| case NEON::BI__builtin_neon_vldap1_lane_s64: |
| case NEON::BI__builtin_neon_vldap1q_lane_s64: |
| case NEON::BI__builtin_neon_vstl1_lane_s64: |
| case NEON::BI__builtin_neon_vstl1q_lane_s64: |
| // Get the alignment for the argument in addition to the value; |
| // we'll use it later. |
| cgm.errorNYI( |
| expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin argument handling ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| } |
| } |
| ops.push_back( |
| emitScalarOrConstFoldImmArg(iceArguments, i, expr->getArg(i))); |
| } |
| |
| const ARMVectorIntrinsicInfo *builtin = findARMVectorIntrinsicInMap( |
| AArch64SISDIntrinsicMap, builtinID, aarch64SISDIntrinsicsProvenSorted); |
| if (builtin) |
| return emitCommonNeonSISDBuiltinExpr(*this, *builtin, ops, expr); |
| |
| // Not all intrinsics handled by the common case work for AArch64 yet, so only |
| // defer to common code if it's been added to our special map. |
| assert(!cir::MissingFeatures::aarch64SIMDIntrinsics()); |
| |
| assert(!cir::MissingFeatures::aarch64TblBuiltinExpr()); |
| |
| const Expr *arg = expr->getArg(expr->getNumArgs() - 1); |
| NeonTypeFlags type(0); |
| // A trailing constant integer is used for discriminating overloaded builtin |
| // calls. Use it to determine the type of this overloaded NEON intrinsic. |
| if (std::optional<llvm::APSInt> result = |
| arg->getIntegerConstantExpr(getContext())) |
| type = NeonTypeFlags(result->getZExtValue()); |
| |
| bool usgn = type.isUnsigned(); |
| |
| mlir::Location loc = getLoc(expr->getExprLoc()); |
| |
| // Not all intrinsics handled by the common case work for AArch64 yet, so only |
| // defer to common code if it's been added to our special map. |
| builtin = findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap, builtinID, |
| aarch64SIMDIntrinsicsProvenSorted); |
| if (builtin) |
| return emitCommonNeonBuiltinExpr( |
| *this, builtin->BuiltinID, builtin->LLVMIntrinsic, |
| builtin->AltLLVMIntrinsic, builtin->NameHint, builtin->TypeModifier, |
| expr, ops); |
| |
| // Handle non-overloaded intrinsics first. |
| switch (builtinID) { |
| default: |
| break; |
| case NEON::BI__builtin_neon_vabsh_f16: { |
| return cir::FAbsOp::create(builder, loc, ops); |
| } |
| case NEON::BI__builtin_neon_vaddq_p128: |
| case NEON::BI__builtin_neon_vldrq_p128: |
| case NEON::BI__builtin_neon_vstrq_p128: |
| case NEON::BI__builtin_neon_vcvts_f32_u32: |
| case NEON::BI__builtin_neon_vcvtd_f64_u64: |
| case NEON::BI__builtin_neon_vcvts_f32_s32: |
| case NEON::BI__builtin_neon_vcvtd_f64_s64: |
| case NEON::BI__builtin_neon_vcvth_f16_u16: |
| case NEON::BI__builtin_neon_vcvth_f16_u32: |
| case NEON::BI__builtin_neon_vcvth_f16_u64: |
| case NEON::BI__builtin_neon_vcvth_f16_s16: |
| case NEON::BI__builtin_neon_vcvth_f16_s32: |
| case NEON::BI__builtin_neon_vcvth_f16_s64: |
| case NEON::BI__builtin_neon_vcvtah_u16_f16: |
| case NEON::BI__builtin_neon_vcvtmh_u16_f16: |
| case NEON::BI__builtin_neon_vcvtnh_u16_f16: |
| case NEON::BI__builtin_neon_vcvtph_u16_f16: |
| case NEON::BI__builtin_neon_vcvth_u16_f16: |
| case NEON::BI__builtin_neon_vcvtah_s16_f16: |
| case NEON::BI__builtin_neon_vcvtmh_s16_f16: |
| case NEON::BI__builtin_neon_vcvtnh_s16_f16: |
| case NEON::BI__builtin_neon_vcvtph_s16_f16: |
| case NEON::BI__builtin_neon_vcvth_s16_f16: |
| case NEON::BI__builtin_neon_vcaleh_f16: |
| case NEON::BI__builtin_neon_vcalth_f16: |
| case NEON::BI__builtin_neon_vcageh_f16: |
| case NEON::BI__builtin_neon_vcagth_f16: |
| case NEON::BI__builtin_neon_vcvth_n_s16_f16: |
| case NEON::BI__builtin_neon_vcvth_n_u16_f16: |
| case NEON::BI__builtin_neon_vcvth_n_f16_s16: |
| case NEON::BI__builtin_neon_vcvth_n_f16_u16: |
| case NEON::BI__builtin_neon_vpaddd_s64: |
| case NEON::BI__builtin_neon_vpaddd_f64: |
| case NEON::BI__builtin_neon_vpadds_f32: |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| case NEON::BI__builtin_neon_vceqzd_s64: |
| case NEON::BI__builtin_neon_vceqzd_f64: |
| case NEON::BI__builtin_neon_vceqzs_f32: |
| case NEON::BI__builtin_neon_vceqzh_f16: |
| return emitAArch64CompareBuiltinExpr( |
| *this, builder, loc, ops[0], |
| convertType(expr->getCallReturnType(getContext())), cir::CmpOpKind::eq); |
| case NEON::BI__builtin_neon_vcgezd_s64: |
| case NEON::BI__builtin_neon_vcgezd_f64: |
| case NEON::BI__builtin_neon_vcgezs_f32: |
| case NEON::BI__builtin_neon_vcgezh_f16: |
| case NEON::BI__builtin_neon_vclezd_s64: |
| case NEON::BI__builtin_neon_vclezd_f64: |
| case NEON::BI__builtin_neon_vclezs_f32: |
| case NEON::BI__builtin_neon_vclezh_f16: |
| case NEON::BI__builtin_neon_vcgtzd_s64: |
| case NEON::BI__builtin_neon_vcgtzd_f64: |
| case NEON::BI__builtin_neon_vcgtzs_f32: |
| case NEON::BI__builtin_neon_vcgtzh_f16: |
| case NEON::BI__builtin_neon_vcltzd_s64: |
| case NEON::BI__builtin_neon_vcltzd_f64: |
| case NEON::BI__builtin_neon_vcltzs_f32: |
| case NEON::BI__builtin_neon_vcltzh_f16: |
| case NEON::BI__builtin_neon_vceqzd_u64: { |
| return emitAArch64CompareBuiltinExpr( |
| *this, builder, loc, ops[0], |
| convertType(expr->getCallReturnType(getContext())), cir::CmpOpKind::eq); |
| } |
| case NEON::BI__builtin_neon_vceqd_f64: |
| case NEON::BI__builtin_neon_vcled_f64: |
| case NEON::BI__builtin_neon_vcltd_f64: |
| case NEON::BI__builtin_neon_vcged_f64: |
| case NEON::BI__builtin_neon_vcgtd_f64: |
| case NEON::BI__builtin_neon_vceqs_f32: |
| case NEON::BI__builtin_neon_vcles_f32: |
| case NEON::BI__builtin_neon_vclts_f32: |
| case NEON::BI__builtin_neon_vcges_f32: |
| case NEON::BI__builtin_neon_vcgts_f32: |
| case NEON::BI__builtin_neon_vceqh_f16: |
| case NEON::BI__builtin_neon_vcleh_f16: |
| case NEON::BI__builtin_neon_vclth_f16: |
| case NEON::BI__builtin_neon_vcgeh_f16: |
| case NEON::BI__builtin_neon_vcgth_f16: |
| case NEON::BI__builtin_neon_vceqd_s64: |
| case NEON::BI__builtin_neon_vceqd_u64: |
| case NEON::BI__builtin_neon_vcgtd_s64: |
| case NEON::BI__builtin_neon_vcgtd_u64: |
| case NEON::BI__builtin_neon_vcltd_s64: |
| case NEON::BI__builtin_neon_vcltd_u64: |
| case NEON::BI__builtin_neon_vcged_u64: |
| case NEON::BI__builtin_neon_vcged_s64: |
| case NEON::BI__builtin_neon_vcled_u64: |
| case NEON::BI__builtin_neon_vcled_s64: |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| case NEON::BI__builtin_neon_vnegd_s64: { |
| return builder.createNeg(ops[0]); |
| } |
| case NEON::BI__builtin_neon_vnegh_f16: { |
| return builder.createFNeg(ops[0]); |
| } |
| case NEON::BI__builtin_neon_vtstd_s64: |
| case NEON::BI__builtin_neon_vtstd_u64: |
| case NEON::BI__builtin_neon_vset_lane_i8: |
| case NEON::BI__builtin_neon_vset_lane_i16: |
| case NEON::BI__builtin_neon_vset_lane_i32: |
| case NEON::BI__builtin_neon_vset_lane_i64: |
| case NEON::BI__builtin_neon_vset_lane_bf16: |
| case NEON::BI__builtin_neon_vset_lane_f32: |
| case NEON::BI__builtin_neon_vsetq_lane_i8: |
| case NEON::BI__builtin_neon_vsetq_lane_i16: |
| case NEON::BI__builtin_neon_vsetq_lane_i32: |
| case NEON::BI__builtin_neon_vsetq_lane_i64: |
| case NEON::BI__builtin_neon_vsetq_lane_bf16: |
| case NEON::BI__builtin_neon_vsetq_lane_f32: |
| case NEON::BI__builtin_neon_vset_lane_f64: |
| case NEON::BI__builtin_neon_vset_lane_mf8: |
| case NEON::BI__builtin_neon_vsetq_lane_mf8: |
| case NEON::BI__builtin_neon_vsetq_lane_f64: |
| case NEON::BI__builtin_neon_vget_lane_i8: |
| case NEON::BI__builtin_neon_vdupb_lane_i8: |
| case NEON::BI__builtin_neon_vgetq_lane_i8: |
| case NEON::BI__builtin_neon_vdupb_laneq_i8: |
| case NEON::BI__builtin_neon_vget_lane_mf8: |
| case NEON::BI__builtin_neon_vdupb_lane_mf8: |
| case NEON::BI__builtin_neon_vgetq_lane_mf8: |
| case NEON::BI__builtin_neon_vdupb_laneq_mf8: |
| case NEON::BI__builtin_neon_vget_lane_i16: |
| case NEON::BI__builtin_neon_vduph_lane_i16: |
| case NEON::BI__builtin_neon_vgetq_lane_i16: |
| case NEON::BI__builtin_neon_vduph_laneq_i16: |
| case NEON::BI__builtin_neon_vget_lane_i32: |
| case NEON::BI__builtin_neon_vdups_lane_i32: |
| case NEON::BI__builtin_neon_vdups_lane_f32: |
| case NEON::BI__builtin_neon_vgetq_lane_i32: |
| case NEON::BI__builtin_neon_vdups_laneq_i32: |
| case NEON::BI__builtin_neon_vget_lane_i64: |
| case NEON::BI__builtin_neon_vdupd_lane_i64: |
| case NEON::BI__builtin_neon_vdupd_lane_f64: |
| case NEON::BI__builtin_neon_vgetq_lane_i64: |
| case NEON::BI__builtin_neon_vdupd_laneq_i64: |
| case NEON::BI__builtin_neon_vget_lane_f32: |
| case NEON::BI__builtin_neon_vget_lane_f64: |
| case NEON::BI__builtin_neon_vgetq_lane_f32: |
| case NEON::BI__builtin_neon_vdups_laneq_f32: |
| case NEON::BI__builtin_neon_vgetq_lane_f64: |
| case NEON::BI__builtin_neon_vdupd_laneq_f64: |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| case NEON::BI__builtin_neon_vaddh_f16: |
| return builder.createFAdd(loc, ops[0], ops[1]); |
| case NEON::BI__builtin_neon_vsubh_f16: |
| return builder.createFSub(loc, ops[0], ops[1]); |
| case NEON::BI__builtin_neon_vmulh_f16: |
| return builder.createFMul(loc, ops[0], ops[1]); |
| case NEON::BI__builtin_neon_vdivh_f16: |
| return builder.createFDiv(loc, ops[0], ops[1]); |
| case NEON::BI__builtin_neon_vfmah_f16: |
| // NEON intrinsic puts accumulator first, unlike the LLVM fma. |
| std::rotate(ops.begin(), ops.begin() + 1, ops.end()); |
| return emitCallMaybeConstrainedBuiltin(builder, loc, "fma", |
| convertType(expr->getType()), ops); |
| break; |
| case NEON::BI__builtin_neon_vfmsh_f16: |
| // NEON intrinsic puts accumulator first, unlike the LLVM fma. |
| std::rotate(ops.begin(), ops.begin() + 1, ops.end()); |
| ops[0] = builder.createFNeg(ops[0]); |
| return emitCallMaybeConstrainedBuiltin(builder, loc, "fma", |
| convertType(expr->getType()), ops); |
| case NEON::BI__builtin_neon_vaddd_s64: |
| case NEON::BI__builtin_neon_vaddd_u64: |
| case NEON::BI__builtin_neon_vsubd_s64: |
| case NEON::BI__builtin_neon_vsubd_u64: |
| case NEON::BI__builtin_neon_vqdmlalh_s16: |
| case NEON::BI__builtin_neon_vqdmlslh_s16: |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| case NEON::BI__builtin_neon_vqshlud_n_s64: { |
| cir::IntType int64Type = builder.getSInt64Ty(); |
| ops[1] = builder.getSInt64(getZExtIntValueFromConstOp(ops[1]), loc); |
| return emitNeonCall(cgm, builder, {int64Type, int64Type}, ops, |
| "aarch64.neon.sqshlu", convertType(expr->getType()), |
| loc); |
| } |
| case NEON::BI__builtin_neon_vqshld_n_u64: |
| case NEON::BI__builtin_neon_vqshld_n_s64: { |
| cir::IntType int64Type = builtinID == NEON::BI__builtin_neon_vqshld_n_u64 |
| ? builder.getUInt64Ty() |
| : builder.getSInt64Ty(); |
| llvm::StringRef intrinsicName = |
| builtinID == NEON::BI__builtin_neon_vqshld_n_u64 ? "aarch64.neon.uqshl" |
| : "aarch64.neon.sqshl"; |
| ops[1] = builder.getSInt64(getZExtIntValueFromConstOp(ops[1]), loc); |
| return emitNeonCall(cgm, builder, {int64Type, int64Type}, ops, |
| intrinsicName, convertType(expr->getType()), loc); |
| } |
| case NEON::BI__builtin_neon_vrshrd_n_u64: |
| case NEON::BI__builtin_neon_vrshrd_n_s64: { |
| llvm::StringRef intrName = builtinID == NEON::BI__builtin_neon_vrshrd_n_s64 |
| ? "aarch64.neon.srshl" |
| : "aarch64.neon.urshl"; |
| cir::IntType int64Ty = builtinID == NEON::BI__builtin_neon_vqshld_n_u64 |
| ? builder.getUInt64Ty() |
| : builder.getSInt64Ty(); |
| int64_t sv = -cast<cir::IntAttr>( |
| cast<cir::ConstantOp>(ops[1].getDefiningOp()).getValue()) |
| .getSInt(); |
| ops[1] = builder.getSInt64(sv, loc); |
| return emitNeonCall(cgm, builder, {int64Ty, builder.getSInt64Ty()}, ops, |
| intrName, int64Ty, loc); |
| } |
| case NEON::BI__builtin_neon_vrsrad_n_u64: |
| case NEON::BI__builtin_neon_vrsrad_n_s64: { |
| cir::IntType int64Type = builtinID == NEON::BI__builtin_neon_vrsrad_n_u64 |
| ? builder.getUInt64Ty() |
| : builder.getSInt64Ty(); |
| ops[2] = builder.createNeg(ops[2]); |
| const StringRef intrName = builtinID == NEON::BI__builtin_neon_vrsrad_n_u64 |
| ? "aarch64.neon.urshl" |
| : "aarch64.neon.srshl"; |
| |
| llvm::SmallVector<mlir::Value, 2> args = { |
| ops[1], builder.createIntCast(ops[2], builder.getSInt64Ty())}; |
| ops[1] = builder.emitIntrinsicCallOp(loc, intrName, int64Type, args); |
| return builder.createAdd(loc, ops[0], |
| builder.createBitcast(ops[1], int64Type)); |
| } |
| case NEON::BI__builtin_neon_vshld_n_s64: |
| case NEON::BI__builtin_neon_vshld_n_u64: { |
| auto loc = getLoc(expr->getExprLoc()); |
| std::optional<llvm::APSInt> amt = |
| expr->getArg(1)->getIntegerConstantExpr(getContext()); |
| assert(amt && "Expected argument to be a constant"); |
| return builder.createShiftLeft(loc, ops[0], amt->getZExtValue()); |
| } |
| case NEON::BI__builtin_neon_vshrd_n_s64: { |
| std::optional<llvm::APSInt> amt = |
| expr->getArg(1)->getIntegerConstantExpr(getContext()); |
| assert(amt && "Expected argument to be a constant"); |
| return builder.createShiftRight( |
| loc, ops[0], std::min(static_cast<uint64_t>(63), amt->getZExtValue())); |
| } |
| case NEON::BI__builtin_neon_vshrd_n_u64: { |
| std::optional<llvm::APSInt> amt = |
| expr->getArg(1)->getIntegerConstantExpr(getContext()); |
| assert(amt && "Expected argument to be a constant"); |
| uint64_t shiftAmt = amt->getZExtValue(); |
| // Right-shifting an unsigned value by its size yields 0. |
| if (shiftAmt == 64) |
| return builder.getConstInt(loc, builder.getUInt64Ty(), 0); |
| return builder.createShiftRight(loc, ops[0], shiftAmt); |
| } |
| case NEON::BI__builtin_neon_vsrad_n_s64: |
| case NEON::BI__builtin_neon_vsrad_n_u64: |
| case NEON::BI__builtin_neon_vqdmlalh_lane_s16: |
| case NEON::BI__builtin_neon_vqdmlalh_laneq_s16: |
| case NEON::BI__builtin_neon_vqdmlslh_lane_s16: |
| case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: |
| case NEON::BI__builtin_neon_vqdmlals_s32: |
| case NEON::BI__builtin_neon_vqdmlsls_s32: |
| case NEON::BI__builtin_neon_vqdmlals_lane_s32: |
| case NEON::BI__builtin_neon_vqdmlals_laneq_s32: |
| case NEON::BI__builtin_neon_vqdmlsls_lane_s32: |
| case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: { |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| case NEON::BI__builtin_neon_vget_lane_bf16: |
| case NEON::BI__builtin_neon_vduph_lane_bf16: |
| case NEON::BI__builtin_neon_vduph_lane_f16: |
| case NEON::BI__builtin_neon_vgetq_lane_bf16: |
| case NEON::BI__builtin_neon_vduph_laneq_bf16: |
| case NEON::BI__builtin_neon_vduph_laneq_f16: { |
| return cir::VecExtractOp::create(builder, loc, ops[0], ops[1]); |
| } |
| case NEON::BI__builtin_neon_vcvt_bf16_f32: |
| case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: |
| case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: |
| case clang::AArch64::BI_InterlockedAdd: |
| case clang::AArch64::BI_InterlockedAdd_acq: |
| case clang::AArch64::BI_InterlockedAdd_rel: |
| case clang::AArch64::BI_InterlockedAdd_nf: |
| case clang::AArch64::BI_InterlockedAdd64: |
| case clang::AArch64::BI_InterlockedAdd64_acq: |
| case clang::AArch64::BI_InterlockedAdd64_rel: |
| case clang::AArch64::BI_InterlockedAdd64_nf: |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| cir::VectorType ty = getNeonType(this, type, loc); |
| if (!ty) |
| return nullptr; |
| |
| llvm::StringRef intrName; |
| |
| switch (builtinID) { |
| default: |
| return std::nullopt; |
| case NEON::BI__builtin_neon_vbsl_v: |
| case NEON::BI__builtin_neon_vbslq_v: { |
| |
| cir::VectorType bitTy = getIntVecFromVecTy(builder, ty); |
| ops[0] = builder.createBitcast(ops[0], bitTy); |
| ops[1] = builder.createBitcast(ops[1], bitTy); |
| ops[2] = builder.createBitcast(ops[2], bitTy); |
| |
| ops[1] = builder.createAnd(loc, ops[0], ops[1]); |
| ops[2] = builder.createAnd(loc, builder.createNot(ops[0]), ops[2]); |
| ops[0] = builder.createOr(loc, ops[1], ops[2]); |
| return builder.createBitcast(ops[0], ty); |
| } |
| case NEON::BI__builtin_neon_vfma_lane_v: |
| case NEON::BI__builtin_neon_vfmaq_lane_v: |
| case NEON::BI__builtin_neon_vfma_laneq_v: |
| case NEON::BI__builtin_neon_vfmaq_laneq_v: |
| case NEON::BI__builtin_neon_vfmah_lane_f16: |
| case NEON::BI__builtin_neon_vfmas_lane_f32: |
| case NEON::BI__builtin_neon_vfmah_laneq_f16: |
| case NEON::BI__builtin_neon_vfmas_laneq_f32: |
| case NEON::BI__builtin_neon_vfmad_lane_f64: |
| case NEON::BI__builtin_neon_vfmad_laneq_f64: |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| case NEON::BI__builtin_neon_vmull_v: { |
| intrName = usgn ? "aarch64.neon.umull" : "aarch64.neon.smull"; |
| if (type.isPoly()) |
| intrName = "aarch64.neon.pmull"; |
| cir::VectorType argTy = builder.getExtendedOrTruncatedElementVectorType( |
| ty, /*isExtended*/ false, !usgn); |
| return emitNeonCall(cgm, builder, {argTy, argTy}, ops, intrName, ty, loc); |
| } |
| case NEON::BI__builtin_neon_vmax_v: |
| case NEON::BI__builtin_neon_vmaxq_v: |
| intrName = usgn ? "aarch64.neon.umax" : "aarch64.neon.smax"; |
| if (cir::isFPOrVectorOfFPType(ty)) |
| intrName = "aarch64.neon.fmax"; |
| return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc); |
| case NEON::BI__builtin_neon_vmaxh_f16: |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| case NEON::BI__builtin_neon_vmin_v: |
| case NEON::BI__builtin_neon_vminq_v: |
| intrName = usgn ? "aarch64.neon.umin" : "aarch64.neon.smin"; |
| if (cir::isFPOrVectorOfFPType(ty)) |
| intrName = "aarch64.neon.fmin"; |
| return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc); |
| case NEON::BI__builtin_neon_vminh_f16: |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| case NEON::BI__builtin_neon_vabd_v: |
| case NEON::BI__builtin_neon_vabdq_v: |
| intrName = usgn ? "aarch64.neon.uabd" : "aarch64.neon.sabd"; |
| if (cir::isFPOrVectorOfFPType(ty)) |
| intrName = "aarch64.neon.fabd"; |
| return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc); |
| case NEON::BI__builtin_neon_vpadal_v: |
| case NEON::BI__builtin_neon_vpadalq_v: { |
| intrName = usgn ? "aarch64.neon.uaddlp" : "aarch64.neon.saddlp"; |
| llvm::SmallVector<mlir::Value> inputs{ops[1]}; |
| mlir::Value pairwiseSum = |
| emitNeonCall(cgm, builder, {getNeonPairwiseWidenInputType(ty, usgn)}, |
| inputs, intrName, ty, loc); |
| mlir::Value accumValue = builder.createBitcast(loc, ops[0], ty); |
| return cir::AddOp::create(builder, loc, ty, pairwiseSum, accumValue); |
| } |
| case NEON::BI__builtin_neon_vpmin_v: |
| case NEON::BI__builtin_neon_vpminq_v: |
| intrName = usgn ? "aarch64.neon.uminp" : "aarch64.neon.sminp"; |
| if (cir::isFPOrVectorOfFPType(ty)) |
| intrName = "aarch64.neon.fminp"; |
| return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc); |
| case NEON::BI__builtin_neon_vpmax_v: |
| case NEON::BI__builtin_neon_vpmaxq_v: |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| case NEON::BI__builtin_neon_vminnm_v: |
| case NEON::BI__builtin_neon_vminnmq_v: |
| intrName = "aarch64.neon.fminnm"; |
| return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc); |
| case NEON::BI__builtin_neon_vminnmh_f16: |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| case NEON::BI__builtin_neon_vmaxnm_v: |
| case NEON::BI__builtin_neon_vmaxnmq_v: |
| intrName = "aarch64.neon.fmaxnm"; |
| return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc); |
| case NEON::BI__builtin_neon_vmaxnmh_f16: |
| case NEON::BI__builtin_neon_vrecpss_f32: |
| case NEON::BI__builtin_neon_vrecpsd_f64: |
| case NEON::BI__builtin_neon_vrecpsh_f16: |
| case NEON::BI__builtin_neon_vqshrun_n_v: |
| case NEON::BI__builtin_neon_vqrshrun_n_v: |
| case NEON::BI__builtin_neon_vqshrn_n_v: |
| case NEON::BI__builtin_neon_vrshrn_n_v: |
| case NEON::BI__builtin_neon_vqrshrn_n_v: |
| case NEON::BI__builtin_neon_vrndah_f16: |
| case NEON::BI__builtin_neon_vrnda_v: |
| case NEON::BI__builtin_neon_vrndaq_v: |
| case NEON::BI__builtin_neon_vrndih_f16: |
| case NEON::BI__builtin_neon_vrndmh_f16: |
| case NEON::BI__builtin_neon_vrndm_v: |
| case NEON::BI__builtin_neon_vrndmq_v: |
| case NEON::BI__builtin_neon_vrndnh_f16: |
| case NEON::BI__builtin_neon_vrndn_v: |
| case NEON::BI__builtin_neon_vrndnq_v: |
| case NEON::BI__builtin_neon_vrndns_f32: |
| case NEON::BI__builtin_neon_vrndph_f16: |
| case NEON::BI__builtin_neon_vrndp_v: |
| case NEON::BI__builtin_neon_vrndpq_v: |
| case NEON::BI__builtin_neon_vrndxh_f16: |
| case NEON::BI__builtin_neon_vrndx_v: |
| case NEON::BI__builtin_neon_vrndxq_v: |
| case NEON::BI__builtin_neon_vrndh_f16: |
| case NEON::BI__builtin_neon_vrnd32x_f32: |
| case NEON::BI__builtin_neon_vrnd32xq_f32: |
| case NEON::BI__builtin_neon_vrnd32x_f64: |
| case NEON::BI__builtin_neon_vrnd32xq_f64: |
| case NEON::BI__builtin_neon_vrnd32z_f32: |
| case NEON::BI__builtin_neon_vrnd32zq_f32: |
| case NEON::BI__builtin_neon_vrnd32z_f64: |
| case NEON::BI__builtin_neon_vrnd32zq_f64: |
| case NEON::BI__builtin_neon_vrnd64x_f32: |
| case NEON::BI__builtin_neon_vrnd64xq_f32: |
| case NEON::BI__builtin_neon_vrnd64x_f64: |
| case NEON::BI__builtin_neon_vrnd64xq_f64: |
| case NEON::BI__builtin_neon_vrnd64z_f32: |
| case NEON::BI__builtin_neon_vrnd64zq_f32: |
| case NEON::BI__builtin_neon_vrnd64z_f64: |
| case NEON::BI__builtin_neon_vrnd64zq_f64: |
| case NEON::BI__builtin_neon_vrnd_v: |
| case NEON::BI__builtin_neon_vrndq_v: |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| case NEON::BI__builtin_neon_vcvt_f64_v: |
| case NEON::BI__builtin_neon_vcvtq_f64_v: |
| ops[0] = builder.createBitcast(ops[0], ty); |
| ty = getNeonType( |
| this, NeonTypeFlags(NeonTypeFlags::Float64, false, type.isQuad()), loc); |
| return builder.createCast(loc, cir::CastKind::int_to_float, ops[0], ty); |
| case NEON::BI__builtin_neon_vcvt_f64_f32: |
| case NEON::BI__builtin_neon_vcvt_f32_f64: |
| case NEON::BI__builtin_neon_vcvt_s32_v: |
| case NEON::BI__builtin_neon_vcvt_u32_v: |
| case NEON::BI__builtin_neon_vcvt_s64_v: |
| case NEON::BI__builtin_neon_vcvt_u64_v: |
| case NEON::BI__builtin_neon_vcvt_s16_f16: |
| case NEON::BI__builtin_neon_vcvt_u16_f16: |
| case NEON::BI__builtin_neon_vcvtq_s32_v: |
| case NEON::BI__builtin_neon_vcvtq_u32_v: |
| case NEON::BI__builtin_neon_vcvtq_s64_v: |
| case NEON::BI__builtin_neon_vcvtq_u64_v: |
| case NEON::BI__builtin_neon_vcvtq_s16_f16: |
| case NEON::BI__builtin_neon_vcvtq_u16_f16: |
| case NEON::BI__builtin_neon_vcvta_s16_f16: |
| case NEON::BI__builtin_neon_vcvta_u16_f16: |
| case NEON::BI__builtin_neon_vcvta_s32_v: |
| case NEON::BI__builtin_neon_vcvtaq_s16_f16: |
| case NEON::BI__builtin_neon_vcvtaq_s32_v: |
| case NEON::BI__builtin_neon_vcvta_u32_v: |
| case NEON::BI__builtin_neon_vcvtaq_u16_f16: |
| case NEON::BI__builtin_neon_vcvtaq_u32_v: |
| case NEON::BI__builtin_neon_vcvta_s64_v: |
| case NEON::BI__builtin_neon_vcvtaq_s64_v: |
| case NEON::BI__builtin_neon_vcvta_u64_v: |
| case NEON::BI__builtin_neon_vcvtaq_u64_v: |
| case NEON::BI__builtin_neon_vcvtm_s16_f16: |
| case NEON::BI__builtin_neon_vcvtm_s32_v: |
| case NEON::BI__builtin_neon_vcvtmq_s16_f16: |
| case NEON::BI__builtin_neon_vcvtmq_s32_v: |
| case NEON::BI__builtin_neon_vcvtm_u16_f16: |
| case NEON::BI__builtin_neon_vcvtm_u32_v: |
| case NEON::BI__builtin_neon_vcvtmq_u16_f16: |
| case NEON::BI__builtin_neon_vcvtmq_u32_v: |
| case NEON::BI__builtin_neon_vcvtm_s64_v: |
| case NEON::BI__builtin_neon_vcvtmq_s64_v: |
| case NEON::BI__builtin_neon_vcvtm_u64_v: |
| case NEON::BI__builtin_neon_vcvtmq_u64_v: |
| case NEON::BI__builtin_neon_vcvtn_s16_f16: |
| case NEON::BI__builtin_neon_vcvtn_s32_v: |
| case NEON::BI__builtin_neon_vcvtnq_s16_f16: |
| case NEON::BI__builtin_neon_vcvtnq_s32_v: |
| case NEON::BI__builtin_neon_vcvtn_u16_f16: |
| case NEON::BI__builtin_neon_vcvtn_u32_v: |
| case NEON::BI__builtin_neon_vcvtnq_u16_f16: |
| case NEON::BI__builtin_neon_vcvtnq_u32_v: |
| case NEON::BI__builtin_neon_vcvtn_s64_v: |
| case NEON::BI__builtin_neon_vcvtnq_s64_v: |
| case NEON::BI__builtin_neon_vcvtn_u64_v: |
| case NEON::BI__builtin_neon_vcvtnq_u64_v: |
| case NEON::BI__builtin_neon_vcvtp_s16_f16: |
| case NEON::BI__builtin_neon_vcvtp_s32_v: |
| case NEON::BI__builtin_neon_vcvtpq_s16_f16: |
| case NEON::BI__builtin_neon_vcvtpq_s32_v: |
| case NEON::BI__builtin_neon_vcvtp_u16_f16: |
| case NEON::BI__builtin_neon_vcvtp_u32_v: |
| case NEON::BI__builtin_neon_vcvtpq_u16_f16: |
| case NEON::BI__builtin_neon_vcvtpq_u32_v: |
| case NEON::BI__builtin_neon_vcvtp_s64_v: |
| case NEON::BI__builtin_neon_vcvtpq_s64_v: |
| case NEON::BI__builtin_neon_vcvtp_u64_v: |
| case NEON::BI__builtin_neon_vcvtpq_u64_v: |
| case NEON::BI__builtin_neon_vmulx_v: |
| case NEON::BI__builtin_neon_vmulxq_v: |
| case NEON::BI__builtin_neon_vmulxh_lane_f16: |
| case NEON::BI__builtin_neon_vmulxh_laneq_f16: |
| case NEON::BI__builtin_neon_vmul_lane_v: |
| case NEON::BI__builtin_neon_vmul_laneq_v: |
| case NEON::BI__builtin_neon_vpmaxnm_v: |
| case NEON::BI__builtin_neon_vpmaxnmq_v: |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| case NEON::BI__builtin_neon_vpminnm_v: |
| case NEON::BI__builtin_neon_vpminnmq_v: |
| intrName = "aarch64.neon.fminnmp"; |
| return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc); |
| case NEON::BI__builtin_neon_vsqrth_f16: |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| case NEON::BI__builtin_neon_vsqrt_v: |
| case NEON::BI__builtin_neon_vsqrtq_v: |
| assert(!cir::MissingFeatures::emitConstrainedFPCall()); |
| return emitNeonCall(cgm, builder, {ty}, ops, "sqrt", ty, loc); |
| case NEON::BI__builtin_neon_vrbit_v: |
| case NEON::BI__builtin_neon_vrbitq_v: |
| case NEON::BI__builtin_neon_vmaxv_f16: |
| case NEON::BI__builtin_neon_vmaxvq_f16: |
| case NEON::BI__builtin_neon_vminv_f16: |
| case NEON::BI__builtin_neon_vminvq_f16: |
| case NEON::BI__builtin_neon_vmaxnmv_f16: |
| case NEON::BI__builtin_neon_vmaxnmvq_f16: |
| case NEON::BI__builtin_neon_vminnmv_f16: |
| case NEON::BI__builtin_neon_vminnmvq_f16: |
| case NEON::BI__builtin_neon_vmul_n_f64: |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| case NEON::BI__builtin_neon_vaddlv_u8: |
| case NEON::BI__builtin_neon_vaddlvq_u8: |
| case NEON::BI__builtin_neon_vaddlv_u16: |
| case NEON::BI__builtin_neon_vaddlvq_u16: |
| case NEON::BI__builtin_neon_vaddlv_s8: |
| case NEON::BI__builtin_neon_vaddlvq_s8: |
| case NEON::BI__builtin_neon_vaddlv_s16: |
| case NEON::BI__builtin_neon_vaddlvq_s16: { |
| mlir::Type argTy = convertType(expr->getArg(0)->getType()); |
| mlir::Type userRetTy = convertType(expr->getType()); |
| auto eltTy = mlir::cast<cir::IntType>( |
| mlir::cast<cir::VectorType>(argTy).getElementType()); |
| bool isUnsigned = !eltTy.isSigned(); |
| // These builtins only use 8 and 16-bit element vectors; the intrinsic |
| // always produces i32. The C result is i32 for 16-bit elements, but i16 |
| // for 8-bit elements, so we emit at i32 and narrow only in that case. |
| bool needsTrunc = eltTy.getWidth() == 8; |
| intrName = isUnsigned ? "aarch64.neon.uaddlv" : "aarch64.neon.saddlv"; |
| mlir::Type intrRetTy = userRetTy; |
| if (needsTrunc) |
| intrRetTy = isUnsigned ? builder.getUInt32Ty() : builder.getSInt32Ty(); |
| mlir::Value result = |
| emitNeonCall(cgm, builder, {argTy}, ops, intrName, intrRetTy, loc); |
| if (needsTrunc) |
| result = builder.createIntCast(result, userRetTy); |
| return result; |
| } |
| case NEON::BI__builtin_neon_vsri_n_v: |
| case NEON::BI__builtin_neon_vsriq_n_v: |
| case NEON::BI__builtin_neon_vsli_n_v: |
| case NEON::BI__builtin_neon_vsliq_n_v: |
| case NEON::BI__builtin_neon_vsra_n_v: |
| case NEON::BI__builtin_neon_vsraq_n_v: |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| case NEON::BI__builtin_neon_vrsra_n_v: |
| case NEON::BI__builtin_neon_vrsraq_n_v: { |
| intrName = usgn ? "aarch64.neon.urshl" : "aarch64.neon.srshl"; |
| // The llvm intrinsic is expecting negative shift amount for right shift. |
| // Thus we have to make shift amount vec type to be signed. |
| cir::VectorType shiftAmtVecTy = |
| usgn ? getSignChangedVectorType(builder, ty) : ty; |
| llvm::SmallVector<mlir::Value, 2> tmpOps = {ops[1], ops[2]}; |
| mlir::Value tmp = emitNeonCall(cgm, builder, {ty, shiftAmtVecTy}, tmpOps, |
| intrName, ty, loc, |
| /*isConstrainedFPIntrinsic=*/false, |
| /*shift=*/1, /*rightshift=*/true); |
| ops[0] = builder.createBitcast(ops[0], ty); |
| return builder.createAdd(loc, ops[0], tmp); |
| } |
| case NEON::BI__builtin_neon_vld1_v: |
| case NEON::BI__builtin_neon_vld1q_v: |
| case NEON::BI__builtin_neon_vst1_v: |
| case NEON::BI__builtin_neon_vst1q_v: |
| case NEON::BI__builtin_neon_vld1_lane_v: |
| case NEON::BI__builtin_neon_vld1q_lane_v: |
| case NEON::BI__builtin_neon_vldap1_lane_s64: |
| case NEON::BI__builtin_neon_vldap1q_lane_s64: |
| case NEON::BI__builtin_neon_vld1_dup_v: |
| case NEON::BI__builtin_neon_vld1q_dup_v: |
| case NEON::BI__builtin_neon_vst1_lane_v: |
| case NEON::BI__builtin_neon_vst1q_lane_v: |
| case NEON::BI__builtin_neon_vstl1_lane_s64: |
| case NEON::BI__builtin_neon_vstl1q_lane_s64: |
| case NEON::BI__builtin_neon_vld2_v: |
| case NEON::BI__builtin_neon_vld2q_v: |
| case NEON::BI__builtin_neon_vld3_v: |
| case NEON::BI__builtin_neon_vld3q_v: |
| case NEON::BI__builtin_neon_vld4_v: |
| case NEON::BI__builtin_neon_vld4q_v: |
| case NEON::BI__builtin_neon_vld2_dup_v: |
| case NEON::BI__builtin_neon_vld2q_dup_v: |
| case NEON::BI__builtin_neon_vld3_dup_v: |
| case NEON::BI__builtin_neon_vld3q_dup_v: |
| case NEON::BI__builtin_neon_vld4_dup_v: |
| case NEON::BI__builtin_neon_vld4q_dup_v: |
| case NEON::BI__builtin_neon_vld2_lane_v: |
| case NEON::BI__builtin_neon_vld2q_lane_v: |
| case NEON::BI__builtin_neon_vld3_lane_v: |
| case NEON::BI__builtin_neon_vld3q_lane_v: |
| case NEON::BI__builtin_neon_vld4_lane_v: |
| case NEON::BI__builtin_neon_vld4q_lane_v: |
| case NEON::BI__builtin_neon_vst2_v: |
| case NEON::BI__builtin_neon_vst2q_v: |
| case NEON::BI__builtin_neon_vst2_lane_v: |
| case NEON::BI__builtin_neon_vst2q_lane_v: |
| case NEON::BI__builtin_neon_vst3_v: |
| case NEON::BI__builtin_neon_vst3q_v: |
| case NEON::BI__builtin_neon_vst3_lane_v: |
| case NEON::BI__builtin_neon_vst3q_lane_v: |
| case NEON::BI__builtin_neon_vst4_v: |
| case NEON::BI__builtin_neon_vst4q_v: |
| case NEON::BI__builtin_neon_vst4_lane_v: |
| case NEON::BI__builtin_neon_vst4q_lane_v: |
| case NEON::BI__builtin_neon_vtrn_v: |
| case NEON::BI__builtin_neon_vtrnq_v: |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| case NEON::BI__builtin_neon_vuzp_v: |
| case NEON::BI__builtin_neon_vuzpq_v: { |
| ops[1] = builder.createBitcast(ops[1], ty); |
| ops[2] = builder.createBitcast(ops[2], ty); |
| // Adding a bitcast here as Ops[0] might be a void pointer. |
| mlir::Value baseAddr = |
| builder.createBitcast(ops[0], builder.getPointerTo(ty)); |
| mlir::Value sv; |
| for (unsigned vi = 0; vi != 2; ++vi) { |
| llvm::SmallVector<int64_t, 16> indices; |
| for (unsigned i = 0, e = ty.getSize(); i != e; ++i) { |
| indices.push_back(2 * i + vi); |
| } |
| cir::ConstantOp idx = builder.getConstInt(loc, builder.getSInt32Ty(), vi); |
| mlir::Value addr = builder.createPtrStride(loc, baseAddr, idx); |
| sv = builder.createVecShuffle(loc, ops[1], ops[2], indices); |
| (void)builder.CIRBaseBuilderTy::createStore(loc, sv, addr); |
| } |
| return sv; |
| } |
| case NEON::BI__builtin_neon_vzip_v: |
| case NEON::BI__builtin_neon_vzipq_v: { |
| ops[1] = builder.createBitcast(ops[1], ty); |
| ops[2] = builder.createBitcast(ops[2], ty); |
| // Adding a bitcast here as Ops[0] might be a void pointer. |
| mlir::Value baseAddr = |
| builder.createBitcast(ops[0], builder.getPointerTo(ty)); |
| mlir::Value sv; |
| for (unsigned vi = 0; vi != 2; ++vi) { |
| llvm::SmallVector<int64_t, 16> indices; |
| for (unsigned i = 0, e = ty.getSize(); i != e; i += 2) { |
| indices.push_back((i + vi * e) >> 1); |
| indices.push_back(((i + vi * e) >> 1) + e); |
| } |
| cir::ConstantOp idx = builder.getConstInt(loc, builder.getSInt32Ty(), vi); |
| mlir::Value addr = builder.createPtrStride(loc, baseAddr, idx); |
| sv = builder.createVecShuffle(loc, ops[1], ops[2], indices); |
| (void)builder.CIRBaseBuilderTy::createStore(loc, sv, addr); |
| } |
| return sv; |
| } |
| case NEON::BI__builtin_neon_vqtbl1q_v: |
| case NEON::BI__builtin_neon_vqtbl2q_v: |
| case NEON::BI__builtin_neon_vqtbl3q_v: |
| case NEON::BI__builtin_neon_vqtbl4q_v: |
| case NEON::BI__builtin_neon_vqtbx1q_v: |
| case NEON::BI__builtin_neon_vqtbx2q_v: |
| case NEON::BI__builtin_neon_vqtbx3q_v: |
| case NEON::BI__builtin_neon_vqtbx4q_v: |
| case NEON::BI__builtin_neon_vsqadd_v: |
| case NEON::BI__builtin_neon_vsqaddq_v: |
| case NEON::BI__builtin_neon_vuqadd_v: |
| case NEON::BI__builtin_neon_vuqaddq_v: |
| case NEON::BI__builtin_neon_vluti2_laneq_mf8: |
| case NEON::BI__builtin_neon_vluti2_laneq_bf16: |
| case NEON::BI__builtin_neon_vluti2_laneq_f16: |
| case NEON::BI__builtin_neon_vluti2_laneq_p16: |
| case NEON::BI__builtin_neon_vluti2_laneq_p8: |
| case NEON::BI__builtin_neon_vluti2_laneq_s16: |
| case NEON::BI__builtin_neon_vluti2_laneq_s8: |
| case NEON::BI__builtin_neon_vluti2_laneq_u16: |
| case NEON::BI__builtin_neon_vluti2_laneq_u8: |
| case NEON::BI__builtin_neon_vluti2q_laneq_mf8: |
| case NEON::BI__builtin_neon_vluti2q_laneq_bf16: |
| case NEON::BI__builtin_neon_vluti2q_laneq_f16: |
| case NEON::BI__builtin_neon_vluti2q_laneq_p16: |
| case NEON::BI__builtin_neon_vluti2q_laneq_p8: |
| case NEON::BI__builtin_neon_vluti2q_laneq_s16: |
| case NEON::BI__builtin_neon_vluti2q_laneq_s8: |
| case NEON::BI__builtin_neon_vluti2q_laneq_u16: |
| case NEON::BI__builtin_neon_vluti2q_laneq_u8: |
| case NEON::BI__builtin_neon_vluti2_lane_mf8: |
| case NEON::BI__builtin_neon_vluti2_lane_bf16: |
| case NEON::BI__builtin_neon_vluti2_lane_f16: |
| case NEON::BI__builtin_neon_vluti2_lane_p16: |
| case NEON::BI__builtin_neon_vluti2_lane_p8: |
| case NEON::BI__builtin_neon_vluti2_lane_s16: |
| case NEON::BI__builtin_neon_vluti2_lane_s8: |
| case NEON::BI__builtin_neon_vluti2_lane_u16: |
| case NEON::BI__builtin_neon_vluti2_lane_u8: |
| case NEON::BI__builtin_neon_vluti2q_lane_mf8: |
| case NEON::BI__builtin_neon_vluti2q_lane_bf16: |
| case NEON::BI__builtin_neon_vluti2q_lane_f16: |
| case NEON::BI__builtin_neon_vluti2q_lane_p16: |
| case NEON::BI__builtin_neon_vluti2q_lane_p8: |
| case NEON::BI__builtin_neon_vluti2q_lane_s16: |
| case NEON::BI__builtin_neon_vluti2q_lane_s8: |
| case NEON::BI__builtin_neon_vluti2q_lane_u16: |
| case NEON::BI__builtin_neon_vluti2q_lane_u8: |
| case NEON::BI__builtin_neon_vluti4q_lane_mf8: |
| case NEON::BI__builtin_neon_vluti4q_lane_p8: |
| case NEON::BI__builtin_neon_vluti4q_lane_s8: |
| case NEON::BI__builtin_neon_vluti4q_lane_u8: |
| case NEON::BI__builtin_neon_vluti4q_laneq_mf8: |
| case NEON::BI__builtin_neon_vluti4q_laneq_p8: |
| case NEON::BI__builtin_neon_vluti4q_laneq_s8: |
| case NEON::BI__builtin_neon_vluti4q_laneq_u8: |
| case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2: |
| case NEON::BI__builtin_neon_vluti4q_lane_f16_x2: |
| case NEON::BI__builtin_neon_vluti4q_lane_p16_x2: |
| case NEON::BI__builtin_neon_vluti4q_lane_s16_x2: |
| case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: |
| case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2: |
| case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2: |
| case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2: |
| case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2: |
| case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: |
| case NEON::BI__builtin_neon_vmmlaq_f16_mf8_fpm: |
| case NEON::BI__builtin_neon_vmmlaq_f32_mf8_fpm: |
| case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm: |
| case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm: |
| case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm: |
| case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm: |
| case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm: |
| case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm: |
| case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm: |
| case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm: |
| case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm: |
| case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm: |
| case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm: |
| case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm: |
| case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm: |
| case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm: |
| case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm: |
| case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: |
| case NEON::BI__builtin_neon_vdot_f16_mf8_fpm: |
| case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm: |
| case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm: |
| case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm: |
| case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm: |
| case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm: |
| case NEON::BI__builtin_neon_vdot_f32_mf8_fpm: |
| case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm: |
| case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm: |
| case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm: |
| case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm: |
| case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm: |
| case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm: |
| case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm: |
| case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm: |
| case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm: |
| case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm: |
| case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm: |
| case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm: |
| case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm: |
| case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm: |
| case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm: |
| case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm: |
| case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm: |
| case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm: |
| case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm: |
| case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm: |
| case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm: |
| case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm: |
| case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm: |
| case NEON::BI__builtin_neon_vamin_f16: |
| case NEON::BI__builtin_neon_vaminq_f16: |
| case NEON::BI__builtin_neon_vamin_f32: |
| case NEON::BI__builtin_neon_vaminq_f32: |
| case NEON::BI__builtin_neon_vaminq_f64: |
| case NEON::BI__builtin_neon_vamax_f16: |
| case NEON::BI__builtin_neon_vamaxq_f16: |
| case NEON::BI__builtin_neon_vamax_f32: |
| case NEON::BI__builtin_neon_vamaxq_f32: |
| case NEON::BI__builtin_neon_vamaxq_f64: |
| case NEON::BI__builtin_neon_vscale_f16: |
| case NEON::BI__builtin_neon_vscaleq_f16: |
| case NEON::BI__builtin_neon_vscale_f32: |
| case NEON::BI__builtin_neon_vscaleq_f32: |
| case NEON::BI__builtin_neon_vscaleq_f64: |
| cgm.errorNYI(expr->getSourceRange(), |
| std::string("unimplemented AArch64 builtin call: ") + |
| getContext().BuiltinInfo.getName(builtinID)); |
| return mlir::Value{}; |
| } |
| |
| // Unreachable: All cases in the switch above return. |
| } |