blob: 304edc2c3a2c374717df5f6373b54122b37e714a [file] [log] [blame]
//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the auto-upgrade helper functions.
// This is where deprecated IR intrinsics and other IR features are updated to
// current specifications.
//
//===----------------------------------------------------------------------===//
#include "llvm/IR/AutoUpgrade.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Regex.h"
#include "llvm/TargetParser/Triple.h"
#include <cstring>
using namespace llvm;
static cl::opt<bool>
DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
cl::desc("Disable autoupgrade of debug info"));
static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
// changed their type from v4f32 to v2i64.
static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
Function *&NewFn) {
// Check whether this is an old version of the function, which received
// v4f32 arguments.
Type *Arg0Type = F->getFunctionType()->getParamType(0);
if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
return false;
// Yes, it's old, replace it with new version.
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
return true;
}
// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
// arguments have changed their type from i32 to i8.
static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
Function *&NewFn) {
// Check that the last argument is an i32.
Type *LastArgType = F->getFunctionType()->getParamType(
F->getFunctionType()->getNumParams() - 1);
if (!LastArgType->isIntegerTy(32))
return false;
// Move this function aside and map down.
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
return true;
}
// Upgrade the declaration of fp compare intrinsics that change return type
// from scalar to vXi1 mask.
static bool UpgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
Function *&NewFn) {
// Check if the return type is a vector.
if (F->getReturnType()->isVectorTy())
return false;
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
return true;
}
static bool UpgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID,
Function *&NewFn) {
if (F->getReturnType()->getScalarType()->isBFloatTy())
return false;
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
return true;
}
static bool UpgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID,
Function *&NewFn) {
if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
return false;
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
return true;
}
static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
// All of the intrinsics matches below should be marked with which llvm
// version started autoupgrading them. At some point in the future we would
// like to use this information to remove upgrade code for some older
// intrinsics. It is currently undecided how we will determine that future
// point.
if (Name == "addcarryx.u32" || // Added in 8.0
Name == "addcarryx.u64" || // Added in 8.0
Name == "addcarry.u32" || // Added in 8.0
Name == "addcarry.u64" || // Added in 8.0
Name == "subborrow.u32" || // Added in 8.0
Name == "subborrow.u64" || // Added in 8.0
Name.startswith("sse2.padds.") || // Added in 8.0
Name.startswith("sse2.psubs.") || // Added in 8.0
Name.startswith("sse2.paddus.") || // Added in 8.0
Name.startswith("sse2.psubus.") || // Added in 8.0
Name.startswith("avx2.padds.") || // Added in 8.0
Name.startswith("avx2.psubs.") || // Added in 8.0
Name.startswith("avx2.paddus.") || // Added in 8.0
Name.startswith("avx2.psubus.") || // Added in 8.0
Name.startswith("avx512.padds.") || // Added in 8.0
Name.startswith("avx512.psubs.") || // Added in 8.0
Name.startswith("avx512.mask.padds.") || // Added in 8.0
Name.startswith("avx512.mask.psubs.") || // Added in 8.0
Name.startswith("avx512.mask.paddus.") || // Added in 8.0
Name.startswith("avx512.mask.psubus.") || // Added in 8.0
Name=="ssse3.pabs.b.128" || // Added in 6.0
Name=="ssse3.pabs.w.128" || // Added in 6.0
Name=="ssse3.pabs.d.128" || // Added in 6.0
Name.startswith("fma4.vfmadd.s") || // Added in 7.0
Name.startswith("fma.vfmadd.") || // Added in 7.0
Name.startswith("fma.vfmsub.") || // Added in 7.0
Name.startswith("fma.vfmsubadd.") || // Added in 7.0
Name.startswith("fma.vfnmadd.") || // Added in 7.0
Name.startswith("fma.vfnmsub.") || // Added in 7.0
Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
Name.startswith("avx512.kunpck") || //added in 6.0
Name.startswith("avx2.pabs.") || // Added in 6.0
Name.startswith("avx512.mask.pabs.") || // Added in 6.0
Name.startswith("avx512.broadcastm") || // Added in 6.0
Name == "sse.sqrt.ss" || // Added in 7.0
Name == "sse2.sqrt.sd" || // Added in 7.0
Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
Name.startswith("avx.sqrt.p") || // Added in 7.0
Name.startswith("sse2.sqrt.p") || // Added in 7.0
Name.startswith("sse.sqrt.p") || // Added in 7.0
Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
Name.startswith("sse2.pcmpeq.") || // Added in 3.1
Name.startswith("sse2.pcmpgt.") || // Added in 3.1
Name.startswith("avx2.pcmpeq.") || // Added in 3.1
Name.startswith("avx2.pcmpgt.") || // Added in 3.1
Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
Name.startswith("avx.vperm2f128.") || // Added in 6.0
Name == "avx2.vperm2i128" || // Added in 6.0
Name == "sse.add.ss" || // Added in 4.0
Name == "sse2.add.sd" || // Added in 4.0
Name == "sse.sub.ss" || // Added in 4.0
Name == "sse2.sub.sd" || // Added in 4.0
Name == "sse.mul.ss" || // Added in 4.0
Name == "sse2.mul.sd" || // Added in 4.0
Name == "sse.div.ss" || // Added in 4.0
Name == "sse2.div.sd" || // Added in 4.0
Name == "sse41.pmaxsb" || // Added in 3.9
Name == "sse2.pmaxs.w" || // Added in 3.9
Name == "sse41.pmaxsd" || // Added in 3.9
Name == "sse2.pmaxu.b" || // Added in 3.9
Name == "sse41.pmaxuw" || // Added in 3.9
Name == "sse41.pmaxud" || // Added in 3.9
Name == "sse41.pminsb" || // Added in 3.9
Name == "sse2.pmins.w" || // Added in 3.9
Name == "sse41.pminsd" || // Added in 3.9
Name == "sse2.pminu.b" || // Added in 3.9
Name == "sse41.pminuw" || // Added in 3.9
Name == "sse41.pminud" || // Added in 3.9
Name == "avx512.kand.w" || // Added in 7.0
Name == "avx512.kandn.w" || // Added in 7.0
Name == "avx512.knot.w" || // Added in 7.0
Name == "avx512.kor.w" || // Added in 7.0
Name == "avx512.kxor.w" || // Added in 7.0
Name == "avx512.kxnor.w" || // Added in 7.0
Name == "avx512.kortestc.w" || // Added in 7.0
Name == "avx512.kortestz.w" || // Added in 7.0
Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
Name.startswith("avx2.pmax") || // Added in 3.9
Name.startswith("avx2.pmin") || // Added in 3.9
Name.startswith("avx512.mask.pmax") || // Added in 4.0
Name.startswith("avx512.mask.pmin") || // Added in 4.0
Name.startswith("avx2.vbroadcast") || // Added in 3.8
Name.startswith("avx2.pbroadcast") || // Added in 3.8
Name.startswith("avx.vpermil.") || // Added in 3.1
Name.startswith("sse2.pshuf") || // Added in 3.9
Name.startswith("avx512.pbroadcast") || // Added in 3.9
Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
Name.startswith("avx512.mask.movddup") || // Added in 3.9
Name.startswith("avx512.mask.movshdup") || // Added in 3.9
Name.startswith("avx512.mask.movsldup") || // Added in 3.9
Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
Name.startswith("avx512.mask.punpckl") || // Added in 3.9
Name.startswith("avx512.mask.punpckh") || // Added in 3.9
Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
Name.startswith("avx512.mask.pand.") || // Added in 3.9
Name.startswith("avx512.mask.pandn.") || // Added in 3.9
Name.startswith("avx512.mask.por.") || // Added in 3.9
Name.startswith("avx512.mask.pxor.") || // Added in 3.9
Name.startswith("avx512.mask.and.") || // Added in 3.9
Name.startswith("avx512.mask.andn.") || // Added in 3.9
Name.startswith("avx512.mask.or.") || // Added in 3.9
Name.startswith("avx512.mask.xor.") || // Added in 3.9
Name.startswith("avx512.mask.padd.") || // Added in 4.0
Name.startswith("avx512.mask.psub.") || // Added in 4.0
Name.startswith("avx512.mask.pmull.") || // Added in 4.0
Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
Name == "avx512.mask.vcvtph2ps.128" || // Added in 11.0
Name == "avx512.mask.vcvtph2ps.256" || // Added in 11.0
Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
Name == "avx512.cvtusi2sd" || // Added in 7.0
Name.startswith("avx512.mask.permvar.") || // Added in 7.0
Name == "sse2.pmulu.dq" || // Added in 7.0
Name == "sse41.pmuldq" || // Added in 7.0
Name == "avx2.pmulu.dq" || // Added in 7.0
Name == "avx2.pmul.dq" || // Added in 7.0
Name == "avx512.pmulu.dq.512" || // Added in 7.0
Name == "avx512.pmul.dq.512" || // Added in 7.0
Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
Name.startswith("avx512.cmp.p") || // Added in 12.0
Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
Name.startswith("avx512.mask.psll.d") || // Added in 4.0
Name.startswith("avx512.mask.psll.q") || // Added in 4.0
Name.startswith("avx512.mask.psll.w") || // Added in 4.0
Name.startswith("avx512.mask.psra.d") || // Added in 4.0
Name.startswith("avx512.mask.psra.q") || // Added in 4.0
Name.startswith("avx512.mask.psra.w") || // Added in 4.0
Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
Name.startswith("avx512.mask.pslli") || // Added in 4.0
Name.startswith("avx512.mask.psrai") || // Added in 4.0
Name.startswith("avx512.mask.psrli") || // Added in 4.0
Name.startswith("avx512.mask.psllv") || // Added in 4.0
Name.startswith("avx512.mask.psrav") || // Added in 4.0
Name.startswith("avx512.mask.psrlv") || // Added in 4.0
Name.startswith("sse41.pmovsx") || // Added in 3.8
Name.startswith("sse41.pmovzx") || // Added in 3.9
Name.startswith("avx2.pmovsx") || // Added in 3.9
Name.startswith("avx2.pmovzx") || // Added in 3.9
Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
Name.startswith("avx512.vpshld.") || // Added in 8.0
Name.startswith("avx512.vpshrd.") || // Added in 8.0
Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
Name.startswith("avx512.mask.conflict.") || // Added in 9.0
Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
Name == "sse.cvtsi2ss" || // Added in 7.0
Name == "sse.cvtsi642ss" || // Added in 7.0
Name == "sse2.cvtsi2sd" || // Added in 7.0
Name == "sse2.cvtsi642sd" || // Added in 7.0
Name == "sse2.cvtss2sd" || // Added in 7.0
Name == "sse2.cvtdq2pd" || // Added in 3.9
Name == "sse2.cvtdq2ps" || // Added in 7.0
Name == "sse2.cvtps2pd" || // Added in 3.9
Name == "avx.cvtdq2.pd.256" || // Added in 3.9
Name == "avx.cvtdq2.ps.256" || // Added in 7.0
Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
Name.startswith("vcvtph2ps.") || // Added in 11.0
Name.startswith("avx.vinsertf128.") || // Added in 3.7
Name == "avx2.vinserti128" || // Added in 3.7
Name.startswith("avx512.mask.insert") || // Added in 4.0
Name.startswith("avx.vextractf128.") || // Added in 3.7
Name == "avx2.vextracti128" || // Added in 3.7
Name.startswith("avx512.mask.vextract") || // Added in 4.0
Name.startswith("sse4a.movnt.") || // Added in 3.9
Name.startswith("avx.movnt.") || // Added in 3.2
Name.startswith("avx512.storent.") || // Added in 3.9
Name == "sse41.movntdqa" || // Added in 5.0
Name == "avx2.movntdqa" || // Added in 5.0
Name == "avx512.movntdqa" || // Added in 5.0
Name == "sse2.storel.dq" || // Added in 3.9
Name.startswith("sse.storeu.") || // Added in 3.9
Name.startswith("sse2.storeu.") || // Added in 3.9
Name.startswith("avx.storeu.") || // Added in 3.9
Name.startswith("avx512.mask.storeu.") || // Added in 3.9
Name.startswith("avx512.mask.store.p") || // Added in 3.9
Name.startswith("avx512.mask.store.b.") || // Added in 3.9
Name.startswith("avx512.mask.store.w.") || // Added in 3.9
Name.startswith("avx512.mask.store.d.") || // Added in 3.9
Name.startswith("avx512.mask.store.q.") || // Added in 3.9
Name == "avx512.mask.store.ss" || // Added in 7.0
Name.startswith("avx512.mask.loadu.") || // Added in 3.9
Name.startswith("avx512.mask.load.") || // Added in 3.9
Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
Name.startswith("avx512.mask.expand.b") || // Added in 9.0
Name.startswith("avx512.mask.expand.w") || // Added in 9.0
Name.startswith("avx512.mask.expand.d") || // Added in 9.0
Name.startswith("avx512.mask.expand.q") || // Added in 9.0
Name.startswith("avx512.mask.expand.p") || // Added in 9.0
Name.startswith("avx512.mask.compress.b") || // Added in 9.0
Name.startswith("avx512.mask.compress.w") || // Added in 9.0
Name.startswith("avx512.mask.compress.d") || // Added in 9.0
Name.startswith("avx512.mask.compress.q") || // Added in 9.0
Name.startswith("avx512.mask.compress.p") || // Added in 9.0
Name == "sse42.crc32.64.8" || // Added in 3.4
Name.startswith("avx.vbroadcast.s") || // Added in 3.5
Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
Name.startswith("avx512.mask.palignr.") || // Added in 3.9
Name.startswith("avx512.mask.valign.") || // Added in 4.0
Name.startswith("sse2.psll.dq") || // Added in 3.7
Name.startswith("sse2.psrl.dq") || // Added in 3.7
Name.startswith("avx2.psll.dq") || // Added in 3.7
Name.startswith("avx2.psrl.dq") || // Added in 3.7
Name.startswith("avx512.psll.dq") || // Added in 3.9
Name.startswith("avx512.psrl.dq") || // Added in 3.9
Name == "sse41.pblendw" || // Added in 3.7
Name.startswith("sse41.blendp") || // Added in 3.7
Name.startswith("avx.blend.p") || // Added in 3.7
Name == "avx2.pblendw" || // Added in 3.7
Name.startswith("avx2.pblendd.") || // Added in 3.7
Name.startswith("avx.vbroadcastf128") || // Added in 4.0
Name == "avx2.vbroadcasti128" || // Added in 3.7
Name.startswith("avx512.mask.broadcastf32x4.") || // Added in 6.0
Name.startswith("avx512.mask.broadcastf64x2.") || // Added in 6.0
Name.startswith("avx512.mask.broadcastf32x8.") || // Added in 6.0
Name.startswith("avx512.mask.broadcastf64x4.") || // Added in 6.0
Name.startswith("avx512.mask.broadcasti32x4.") || // Added in 6.0
Name.startswith("avx512.mask.broadcasti64x2.") || // Added in 6.0
Name.startswith("avx512.mask.broadcasti32x8.") || // Added in 6.0
Name.startswith("avx512.mask.broadcasti64x4.") || // Added in 6.0
Name == "xop.vpcmov" || // Added in 3.8
Name == "xop.vpcmov.256" || // Added in 5.0
Name.startswith("avx512.mask.move.s") || // Added in 4.0
Name.startswith("avx512.cvtmask2") || // Added in 5.0
Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
Name.startswith("xop.vprot") || // Added in 8.0
Name.startswith("avx512.prol") || // Added in 8.0
Name.startswith("avx512.pror") || // Added in 8.0
Name.startswith("avx512.mask.prorv.") || // Added in 8.0
Name.startswith("avx512.mask.pror.") || // Added in 8.0
Name.startswith("avx512.mask.prolv.") || // Added in 8.0
Name.startswith("avx512.mask.prol.") || // Added in 8.0
Name.startswith("avx512.ptestm") || //Added in 6.0
Name.startswith("avx512.ptestnm") || //Added in 6.0
Name.startswith("avx512.mask.pavg")) // Added in 6.0
return true;
return false;
}
static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
Function *&NewFn) {
// Only handle intrinsics that start with "x86.".
if (!Name.startswith("x86."))
return false;
// Remove "x86." prefix.
Name = Name.substr(4);
if (ShouldUpgradeX86Intrinsic(F, Name)) {
NewFn = nullptr;
return true;
}
if (Name == "rdtscp") { // Added in 8.0
// If this intrinsic has 0 operands, it's the new version.
if (F->getFunctionType()->getNumParams() == 0)
return false;
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::x86_rdtscp);
return true;
}
// SSE4.1 ptest functions may have an old signature.
if (Name.startswith("sse41.ptest")) { // Added in 3.2
if (Name.substr(11) == "c")
return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
if (Name.substr(11) == "z")
return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
if (Name.substr(11) == "nzc")
return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
}
// Several blend and other instructions with masks used the wrong number of
// bits.
if (Name == "sse41.insertps") // Added in 3.6
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
NewFn);
if (Name == "sse41.dppd") // Added in 3.6
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
NewFn);
if (Name == "sse41.dpps") // Added in 3.6
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
NewFn);
if (Name == "sse41.mpsadbw") // Added in 3.6
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
NewFn);
if (Name == "avx.dp.ps.256") // Added in 3.6
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
NewFn);
if (Name == "avx2.mpsadbw") // Added in 3.6
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
NewFn);
if (Name == "avx512.mask.cmp.pd.128") // Added in 7.0
return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_128,
NewFn);
if (Name == "avx512.mask.cmp.pd.256") // Added in 7.0
return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_256,
NewFn);
if (Name == "avx512.mask.cmp.pd.512") // Added in 7.0
return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
NewFn);
if (Name == "avx512.mask.cmp.ps.128") // Added in 7.0
return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_128,
NewFn);
if (Name == "avx512.mask.cmp.ps.256") // Added in 7.0
return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_256,
NewFn);
if (Name == "avx512.mask.cmp.ps.512") // Added in 7.0
return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
NewFn);
if (Name == "avx512bf16.cvtne2ps2bf16.128") // Added in 9.0
return UpgradeX86BF16Intrinsic(
F, Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128, NewFn);
if (Name == "avx512bf16.cvtne2ps2bf16.256") // Added in 9.0
return UpgradeX86BF16Intrinsic(
F, Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256, NewFn);
if (Name == "avx512bf16.cvtne2ps2bf16.512") // Added in 9.0
return UpgradeX86BF16Intrinsic(
F, Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512, NewFn);
if (Name == "avx512bf16.mask.cvtneps2bf16.128") // Added in 9.0
return UpgradeX86BF16Intrinsic(
F, Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128, NewFn);
if (Name == "avx512bf16.cvtneps2bf16.256") // Added in 9.0
return UpgradeX86BF16Intrinsic(
F, Intrinsic::x86_avx512bf16_cvtneps2bf16_256, NewFn);
if (Name == "avx512bf16.cvtneps2bf16.512") // Added in 9.0
return UpgradeX86BF16Intrinsic(
F, Intrinsic::x86_avx512bf16_cvtneps2bf16_512, NewFn);
if (Name == "avx512bf16.dpbf16ps.128") // Added in 9.0
return UpgradeX86BF16DPIntrinsic(
F, Intrinsic::x86_avx512bf16_dpbf16ps_128, NewFn);
if (Name == "avx512bf16.dpbf16ps.256") // Added in 9.0
return UpgradeX86BF16DPIntrinsic(
F, Intrinsic::x86_avx512bf16_dpbf16ps_256, NewFn);
if (Name == "avx512bf16.dpbf16ps.512") // Added in 9.0
return UpgradeX86BF16DPIntrinsic(
F, Intrinsic::x86_avx512bf16_dpbf16ps_512, NewFn);
// frcz.ss/sd may need to have an argument dropped. Added in 3.2
if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::x86_xop_vfrcz_ss);
return true;
}
if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::x86_xop_vfrcz_sd);
return true;
}
// Upgrade any XOP PERMIL2 index operand still using a float/double vector.
if (Name.startswith("xop.vpermil2")) { // Added in 3.9
auto Idx = F->getFunctionType()->getParamType(2);
if (Idx->isFPOrFPVectorTy()) {
rename(F);
unsigned IdxSize = Idx->getPrimitiveSizeInBits();
unsigned EltSize = Idx->getScalarSizeInBits();
Intrinsic::ID Permil2ID;
if (EltSize == 64 && IdxSize == 128)
Permil2ID = Intrinsic::x86_xop_vpermil2pd;
else if (EltSize == 32 && IdxSize == 128)
Permil2ID = Intrinsic::x86_xop_vpermil2ps;
else if (EltSize == 64 && IdxSize == 256)
Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
else
Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
return true;
}
}
if (Name == "seh.recoverfp") {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
return true;
}
return false;
}
static Intrinsic::ID ShouldUpgradeNVPTXBF16Intrinsic(StringRef Name) {
if (Name.consume_front("abs."))
return StringSwitch<Intrinsic::ID>(Name)
.Case("bf16", Intrinsic::nvvm_abs_bf16)
.Case("bf16x2", Intrinsic::nvvm_abs_bf16x2)
.Default(Intrinsic::not_intrinsic);
if (Name.consume_front("fma.rn."))
return StringSwitch<Intrinsic::ID>(Name)
.Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
.Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
.Case("ftz_bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
.Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
.Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
.Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
.Case("ftz_sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
.Case("ftz_sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
.Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
.Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
.Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
.Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
.Default(Intrinsic::not_intrinsic);
if (Name.consume_front("fmax."))
return StringSwitch<Intrinsic::ID>(Name)
.Case("bf16", Intrinsic::nvvm_fmax_bf16)
.Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
.Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
.Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
.Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
.Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
.Case("ftz.nan.xorsign.abs.bf16",
Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
.Case("ftz.nan.xorsign.abs.bf16x2",
Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
.Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
.Case("ftz.xorsign.abs.bf16x2",
Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
.Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
.Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
.Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
.Case("nan.xorsign.abs.bf16x2",
Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
.Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
.Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
.Default(Intrinsic::not_intrinsic);
if (Name.consume_front("fmin."))
return StringSwitch<Intrinsic::ID>(Name)
.Case("bf16", Intrinsic::nvvm_fmin_bf16)
.Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
.Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
.Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
.Case("ftz.nan_bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
.Case("ftz.nan_bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
.Case("ftz.nan.xorsign.abs.bf16",
Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
.Case("ftz.nan.xorsign.abs.bf16x2",
Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
.Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
.Case("ftz.xorsign.abs.bf16x2",
Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
.Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
.Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
.Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
.Case("nan.xorsign.abs.bf16x2",
Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
.Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
.Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
.Default(Intrinsic::not_intrinsic);
if (Name.consume_front("neg."))
return StringSwitch<Intrinsic::ID>(Name)
.Case("bf16", Intrinsic::nvvm_neg_bf16)
.Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
.Default(Intrinsic::not_intrinsic);
return Intrinsic::not_intrinsic;
}
static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
assert(F && "Illegal to upgrade a non-existent Function.");
// Quickly eliminate it, if it's not a candidate.
StringRef Name = F->getName();
if (Name.size() <= 7 || !Name.startswith("llvm."))
return false;
Name = Name.substr(5); // Strip off "llvm."
switch (Name[0]) {
default: break;
case 'a': {
if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
F->arg_begin()->getType());
return true;
}
if (Name.startswith("aarch64.neon.frintn")) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::roundeven,
F->arg_begin()->getType());
return true;
}
if (Name.startswith("aarch64.neon.rbit")) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
F->arg_begin()->getType());
return true;
}
if (Name == "aarch64.sve.bfdot.lane") {
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::aarch64_sve_bfdot_lane_v2);
return true;
}
if (Name == "aarch64.sve.bfmlalb.lane") {
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::aarch64_sve_bfmlalb_lane_v2);
return true;
}
if (Name == "aarch64.sve.bfmlalt.lane") {
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::aarch64_sve_bfmlalt_lane_v2);
return true;
}
static const Regex LdRegex("^aarch64\\.sve\\.ld[234](.nxv[a-z0-9]+|$)");
if (LdRegex.match(Name)) {
Type *ScalarTy =
dyn_cast<VectorType>(F->getReturnType())->getElementType();
ElementCount EC =
dyn_cast<VectorType>(F->arg_begin()->getType())->getElementCount();
Type *Ty = VectorType::get(ScalarTy, EC);
Intrinsic::ID ID =
StringSwitch<Intrinsic::ID>(Name)
.StartsWith("aarch64.sve.ld2", Intrinsic::aarch64_sve_ld2_sret)
.StartsWith("aarch64.sve.ld3", Intrinsic::aarch64_sve_ld3_sret)
.StartsWith("aarch64.sve.ld4", Intrinsic::aarch64_sve_ld4_sret)
.Default(Intrinsic::not_intrinsic);
NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Ty);
return true;
}
if (Name.startswith("aarch64.sve.tuple.get")) {
Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::vector_extract, Tys);
return true;
}
if (Name.startswith("aarch64.sve.tuple.set")) {
auto Args = F->getFunctionType()->params();
Type *Tys[] = {Args[0], Args[2], Args[1]};
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::vector_insert, Tys);
return true;
}
static const Regex CreateTupleRegex(
"^aarch64\\.sve\\.tuple\\.create[234](.nxv[a-z0-9]+|$)");
if (CreateTupleRegex.match(Name)) {
auto Args = F->getFunctionType()->params();
Type *Tys[] = {F->getReturnType(), Args[1]};
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::vector_insert, Tys);
return true;
}
if (Name.startswith("arm.neon.vclz")) {
Type* args[2] = {
F->arg_begin()->getType(),
Type::getInt1Ty(F->getContext())
};
// Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
// the end of the name. Change name from llvm.arm.neon.vclz.* to
// llvm.ctlz.*
FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
"llvm.ctlz." + Name.substr(14), F->getParent());
return true;
}
if (Name.startswith("arm.neon.vcnt")) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
F->arg_begin()->getType());
return true;
}
static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
if (vstRegex.match(Name)) {
static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
Intrinsic::arm_neon_vst2,
Intrinsic::arm_neon_vst3,
Intrinsic::arm_neon_vst4};
static const Intrinsic::ID StoreLaneInts[] = {
Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
Intrinsic::arm_neon_vst4lane
};
auto fArgs = F->getFunctionType()->params();
Type *Tys[] = {fArgs[0], fArgs[1]};
if (!Name.contains("lane"))
NewFn = Intrinsic::getDeclaration(F->getParent(),
StoreInts[fArgs.size() - 3], Tys);
else
NewFn = Intrinsic::getDeclaration(F->getParent(),
StoreLaneInts[fArgs.size() - 5], Tys);
return true;
}
if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
return true;
}
if (Name.startswith("arm.neon.vqadds.")) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat,
F->arg_begin()->getType());
return true;
}
if (Name.startswith("arm.neon.vqaddu.")) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat,
F->arg_begin()->getType());
return true;
}
if (Name.startswith("arm.neon.vqsubs.")) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat,
F->arg_begin()->getType());
return true;
}
if (Name.startswith("arm.neon.vqsubu.")) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat,
F->arg_begin()->getType());
return true;
}
if (Name.startswith("aarch64.neon.addp")) {
if (F->arg_size() != 2)
break; // Invalid IR.
VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
if (Ty && Ty->getElementType()->isFloatingPointTy()) {
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::aarch64_neon_faddp, Ty);
return true;
}
}
// Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and v16i8
// respectively
if ((Name.startswith("arm.neon.bfdot.") ||
Name.startswith("aarch64.neon.bfdot.")) &&
Name.endswith("i8")) {
Intrinsic::ID IID =
StringSwitch<Intrinsic::ID>(Name)
.Cases("arm.neon.bfdot.v2f32.v8i8",
"arm.neon.bfdot.v4f32.v16i8",
Intrinsic::arm_neon_bfdot)
.Cases("aarch64.neon.bfdot.v2f32.v8i8",
"aarch64.neon.bfdot.v4f32.v16i8",
Intrinsic::aarch64_neon_bfdot)
.Default(Intrinsic::not_intrinsic);
if (IID == Intrinsic::not_intrinsic)
break;
size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
assert((OperandWidth == 64 || OperandWidth == 128) &&
"Unexpected operand width");
LLVMContext &Ctx = F->getParent()->getContext();
std::array<Type *, 2> Tys {{
F->getReturnType(),
FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)
}};
NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
return true;
}
// Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic anymore
// and accept v8bf16 instead of v16i8
if ((Name.startswith("arm.neon.bfm") ||
Name.startswith("aarch64.neon.bfm")) &&
Name.endswith(".v4f32.v16i8")) {
Intrinsic::ID IID =
StringSwitch<Intrinsic::ID>(Name)
.Case("arm.neon.bfmmla.v4f32.v16i8",
Intrinsic::arm_neon_bfmmla)
.Case("arm.neon.bfmlalb.v4f32.v16i8",
Intrinsic::arm_neon_bfmlalb)
.Case("arm.neon.bfmlalt.v4f32.v16i8",
Intrinsic::arm_neon_bfmlalt)
.Case("aarch64.neon.bfmmla.v4f32.v16i8",
Intrinsic::aarch64_neon_bfmmla)
.Case("aarch64.neon.bfmlalb.v4f32.v16i8",
Intrinsic::aarch64_neon_bfmlalb)
.Case("aarch64.neon.bfmlalt.v4f32.v16i8",
Intrinsic::aarch64_neon_bfmlalt)
.Default(Intrinsic::not_intrinsic);
if (IID == Intrinsic::not_intrinsic)
break;
std::array<Type *, 0> Tys;
NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
return true;
}
if (Name == "arm.mve.vctp64" &&
cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
// A vctp64 returning a v4i1 is converted to return a v2i1. Rename the
// function and deal with it below in UpgradeIntrinsicCall.
rename(F);
return true;
}
// These too are changed to accept a v2i1 insteead of the old v4i1.
if (Name == "arm.mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
Name == "arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
Name == "arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
Name == "arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
Name ==
"arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
Name == "arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
Name == "arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
Name == "arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
Name ==
"arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
Name == "arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
Name == "arm.cde.vcx1q.predicated.v2i64.v4i1" ||
Name == "arm.cde.vcx1qa.predicated.v2i64.v4i1" ||
Name == "arm.cde.vcx2q.predicated.v2i64.v4i1" ||
Name == "arm.cde.vcx2qa.predicated.v2i64.v4i1" ||
Name == "arm.cde.vcx3q.predicated.v2i64.v4i1" ||
Name == "arm.cde.vcx3qa.predicated.v2i64.v4i1")
return true;
if (Name.consume_front("amdgcn.")) {
if (Name == "alignbit") {
// Target specific intrinsic became redundant
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::fshr,
{F->getReturnType()});
return true;
}
if (Name.startswith("atomic.inc") || Name.startswith("atomic.dec")) {
// This was replaced with atomicrmw uinc_wrap and udec_wrap, so there's no
// new declaration.
NewFn = nullptr;
return true;
}
if (Name.startswith("ldexp.")) {
// Target specific intrinsic became redundant
NewFn = Intrinsic::getDeclaration(
F->getParent(), Intrinsic::ldexp,
{F->getReturnType(), F->getArg(1)->getType()});
return true;
}
}
break;
}
case 'c': {
if (Name.startswith("ctlz.") && F->arg_size() == 1) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
F->arg_begin()->getType());
return true;
}
if (Name.startswith("cttz.") && F->arg_size() == 1) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
F->arg_begin()->getType());
return true;
}
if (Name.equals("coro.end") && F->arg_size() == 2) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::coro_end);
return true;
}
break;
}
case 'd':
if (Name.consume_front("dbg.")) {
if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
return true;
}
break; // No other 'dbg.*'.
}
break;
case 'e':
if (Name.consume_front("experimental.vector.")) {
Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
.StartsWith("extract.", Intrinsic::vector_extract)
.StartsWith("insert.", Intrinsic::vector_insert)
.Default(Intrinsic::not_intrinsic);
if (ID != Intrinsic::not_intrinsic) {
const auto *FT = F->getFunctionType();
SmallVector<Type *, 2> Tys;
if (ID == Intrinsic::vector_extract)
// Extracting overloads the return type.
Tys.push_back(FT->getReturnType());
Tys.push_back(FT->getParamType(0));
if (ID == Intrinsic::vector_insert)
// Inserting overloads the inserted type.
Tys.push_back(FT->getParamType(1));
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
return true;
}
if (Name.consume_front("reduce.")) {
SmallVector<StringRef, 2> Groups;
static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
if (R.match(Name, &Groups))
ID = StringSwitch<Intrinsic::ID>(Groups[1])
.Case("add", Intrinsic::vector_reduce_add)
.Case("mul", Intrinsic::vector_reduce_mul)
.Case("and", Intrinsic::vector_reduce_and)
.Case("or", Intrinsic::vector_reduce_or)
.Case("xor", Intrinsic::vector_reduce_xor)
.Case("smax", Intrinsic::vector_reduce_smax)
.Case("smin", Intrinsic::vector_reduce_smin)
.Case("umax", Intrinsic::vector_reduce_umax)
.Case("umin", Intrinsic::vector_reduce_umin)
.Case("fmax", Intrinsic::vector_reduce_fmax)
.Case("fmin", Intrinsic::vector_reduce_fmin)
.Default(Intrinsic::not_intrinsic);
bool V2 = false;
if (ID == Intrinsic::not_intrinsic) {
static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
Groups.clear();
V2 = true;
if (R2.match(Name, &Groups))
ID = StringSwitch<Intrinsic::ID>(Groups[1])
.Case("fadd", Intrinsic::vector_reduce_fadd)
.Case("fmul", Intrinsic::vector_reduce_fmul)
.Default(Intrinsic::not_intrinsic);
}
if (ID != Intrinsic::not_intrinsic) {
rename(F);
auto Args = F->getFunctionType()->params();
NewFn =
Intrinsic::getDeclaration(F->getParent(), ID, {Args[V2 ? 1 : 0]});
return true;
}
break; // No other 'expermental.vector.reduce.*'.
}
break; // No other 'experimental.vector.*'.
}
break; // No other 'e*'.
case 'f':
if (Name.startswith("flt.rounds")) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::get_rounding);
return true;
}
break;
case 'i':
if (Name.startswith("invariant.group.barrier")) {
// Rename invariant.group.barrier to launder.invariant.group
auto Args = F->getFunctionType()->params();
Type* ObjectPtr[1] = {Args[0]};
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::launder_invariant_group, ObjectPtr);
return true;
}
break;
case 'm': {
// Updating the memory intrinsics (memcpy/memmove/memset) that have an
// alignment parameter to embedding the alignment as an attribute of
// the pointer args.
if (unsigned ID = StringSwitch<unsigned>(Name)
.StartsWith("memcpy.", Intrinsic::memcpy)
.StartsWith("memmove.", Intrinsic::memmove)
.Default(0)) {
if (F->arg_size() == 5) {
rename(F);
// Get the types of dest, src, and len
ArrayRef<Type *> ParamTypes =
F->getFunctionType()->params().slice(0, 3);
NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ParamTypes);
return true;
}
}
if (Name.startswith("memset.") && F->arg_size() == 5) {
rename(F);
// Get the types of dest, and len
const auto *FT = F->getFunctionType();
Type *ParamTypes[2] = {
FT->getParamType(0), // Dest
FT->getParamType(2) // len
};
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
ParamTypes);
return true;
}
break;
}
case 'n': {
if (Name.consume_front("nvvm.")) {
// Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
if (F->arg_size() == 1) {
Intrinsic::ID IID =
StringSwitch<Intrinsic::ID>(Name)
.Cases("brev32", "brev64", Intrinsic::bitreverse)
.Case("clz.i", Intrinsic::ctlz)
.Case("popc.i", Intrinsic::ctpop)
.Default(Intrinsic::not_intrinsic);
if (IID != Intrinsic::not_intrinsic) {
NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
{F->getReturnType()});
return true;
}
}
// Check for nvvm intrinsics that need a return type adjustment.
if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
Intrinsic::ID IID = ShouldUpgradeNVPTXBF16Intrinsic(Name);
if (IID != Intrinsic::not_intrinsic) {
NewFn = nullptr;
return true;
}
}
// The following nvvm intrinsics correspond exactly to an LLVM idiom, but
// not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
//
// TODO: We could add lohi.i2d.
bool Expand = false;
if (Name.consume_front("abs."))
// nvvm.abs.{i,ii}
Expand = Name == "i" || Name == "ll";
else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f")
Expand = true;
else if (Name.consume_front("max.") || Name.consume_front("min."))
// nvvm.{min,max}.{i,ii,ui,ull}
Expand = Name == "i" || Name == "ll" || Name == "ui" || Name == "ull";
else if (Name.consume_front("atomic.load.add."))
// nvvm.atomic.load.add.{f32.p,f64.p}
Expand = Name.startswith("f32.p") || Name.startswith("f64.p");
else
Expand = false;
if (Expand) {
NewFn = nullptr;
return true;
}
break; // No other 'nvvm.*'.
}
break;
}
case 'o':
// We only need to change the name to match the mangling including the
// address space.
if (Name.startswith("objectsize.")) {
Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
if (F->arg_size() == 2 || F->arg_size() == 3 ||
F->getName() !=
Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
Tys);
return true;
}
}
break;
case 'p':
if (Name.startswith("ptr.annotation.") && F->arg_size() == 4) {
rename(F);
NewFn = Intrinsic::getDeclaration(
F->getParent(), Intrinsic::ptr_annotation,
{F->arg_begin()->getType(), F->getArg(1)->getType()});
return true;
}
break;
case 'r': {
if (Name.consume_front("riscv.")) {
Intrinsic::ID ID;
ID = StringSwitch<Intrinsic::ID>(Name)
.Case("aes32dsi", Intrinsic::riscv_aes32dsi)
.Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
.Case("aes32esi", Intrinsic::riscv_aes32esi)
.Case("aes32esmi", Intrinsic::riscv_aes32esmi)
.Default(Intrinsic::not_intrinsic);
if (ID != Intrinsic::not_intrinsic) {
if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
return true;
}
break; // No other applicable upgrades.
}
ID = StringSwitch<Intrinsic::ID>(Name)
.StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
.StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
.Default(Intrinsic::not_intrinsic);
if (ID != Intrinsic::not_intrinsic) {
if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
return true;
}
break; // No other applicable upgrades.
}
ID = StringSwitch<Intrinsic::ID>(Name)
.StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
.StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
.StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
.StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
.StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
.StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
.Default(Intrinsic::not_intrinsic);
if (ID != Intrinsic::not_intrinsic) {
if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
return true;
}
break; // No other applicable upgrades.
}
break; // No other 'riscv.*' intrinsics
}
} break;
case 's':
if (Name == "stackprotectorcheck") {
NewFn = nullptr;
return true;
}
break;
case 'v': {
if (Name == "var.annotation" && F->arg_size() == 4) {
rename(F);
NewFn = Intrinsic::getDeclaration(
F->getParent(), Intrinsic::var_annotation,
{{F->arg_begin()->getType(), F->getArg(1)->getType()}});
return true;
}
break;
}
case 'w':
if (Name.consume_front("wasm.")) {
Intrinsic::ID ID =
StringSwitch<Intrinsic::ID>(Name)
.StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
.StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
.StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
.Default(Intrinsic::not_intrinsic);
if (ID != Intrinsic::not_intrinsic) {
rename(F);
NewFn =
Intrinsic::getDeclaration(F->getParent(), ID, F->getReturnType());
return true;
}
if (Name.consume_front("dot.i8x16.i7x16.")) {
ID = StringSwitch<Intrinsic::ID>(Name)
.Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
.Case("add.signed",
Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
.Default(Intrinsic::not_intrinsic);
if (ID != Intrinsic::not_intrinsic) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
return true;
}
break; // No other 'wasm.dot.i8x16.i7x16.*'.
}
break; // No other 'wasm.*'.
}
break;
case 'x':
if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
return true;
}
auto *ST = dyn_cast<StructType>(F->getReturnType());
if (ST && (!ST->isLiteral() || ST->isPacked())) {
// Replace return type with literal non-packed struct. Only do this for
// intrinsics declared to return a struct, not for intrinsics with
// overloaded return type, in which case the exact struct type will be
// mangled into the name.
SmallVector<Intrinsic::IITDescriptor> Desc;
Intrinsic::getIntrinsicInfoTableEntries(F->getIntrinsicID(), Desc);
if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
auto *FT = F->getFunctionType();
auto *NewST = StructType::get(ST->getContext(), ST->elements());
auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
std::string Name = F->getName().str();
rename(F);
NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
Name, F->getParent());
// The new function may also need remangling.
if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
NewFn = *Result;
return true;
}
}
// Remangle our intrinsic since we upgrade the mangling
auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
if (Result != std::nullopt) {
NewFn = *Result;
return true;
}
// This may not belong here. This function is effectively being overloaded
// to both detect an intrinsic which needs upgrading, and to provide the
// upgraded form of the intrinsic. We should perhaps have two separate
// functions for this.
return false;
}
bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
NewFn = nullptr;
bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
assert(F != NewFn && "Intrinsic function upgraded to the same function");
// Upgrade intrinsic attributes. This does not change the function.
if (NewFn)
F = NewFn;
if (Intrinsic::ID id = F->getIntrinsicID())
F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
return Upgraded;
}
GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
GV->getName() == "llvm.global_dtors")) ||
!GV->hasInitializer())
return nullptr;
ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
if (!ATy)
return nullptr;
StructType *STy = dyn_cast<StructType>(ATy->getElementType());
if (!STy || STy->getNumElements() != 2)
return nullptr;
LLVMContext &C = GV->getContext();
IRBuilder<> IRB(C);
auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
IRB.getPtrTy());
Constant *Init = GV->getInitializer();
unsigned N = Init->getNumOperands();
std::vector<Constant *> NewCtors(N);
for (unsigned i = 0; i != N; ++i) {
auto Ctor = cast<Constant>(Init->getOperand(i));
NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
Ctor->getAggregateElement(1),
Constant::getNullValue(IRB.getPtrTy()));
}
Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
NewInit, GV->getName());
}
// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
// to byte shuffles.
static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
Value *Op, unsigned Shift) {
auto *ResultTy = cast<FixedVectorType>(Op->getType());
unsigned NumElts = ResultTy->getNumElements() * 8;
// Bitcast from a 64-bit element type to a byte element type.
Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
Op = Builder.CreateBitCast(Op, VecTy, "cast");
// We'll be shuffling in zeroes.
Value *Res = Constant::getNullValue(VecTy);
// If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
// we'll just return the zero vector.
if (Shift < 16) {
int Idxs[64];
// 256/512-bit version is split into 2/4 16-byte lanes.
for (unsigned l = 0; l != NumElts; l += 16)
for (unsigned i = 0; i != 16; ++i) {
unsigned Idx = NumElts + i - Shift;
if (Idx < NumElts)
Idx -= NumElts - 16; // end of lane, switch operand.
Idxs[l + i] = Idx + l;
}
Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
}
// Bitcast back to a 64-bit element type.
return Builder.CreateBitCast(Res, ResultTy, "cast");
}
// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
// to byte shuffles.
static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
unsigned Shift) {
auto *ResultTy = cast<FixedVectorType>(Op->getType());
unsigned NumElts = ResultTy->getNumElements() * 8;
// Bitcast from a 64-bit element type to a byte element type.
Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
Op = Builder.CreateBitCast(Op, VecTy, "cast");
// We'll be shuffling in zeroes.
Value *Res = Constant::getNullValue(VecTy);
// If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
// we'll just return the zero vector.
if (Shift < 16) {
int Idxs[64];
// 256/512-bit version is split into 2/4 16-byte lanes.
for (unsigned l = 0; l != NumElts; l += 16)
for (unsigned i = 0; i != 16; ++i) {
unsigned Idx = i + Shift;
if (Idx >= 16)
Idx += NumElts - 16; // end of lane, switch operand.
Idxs[l + i] = Idx + l;
}
Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
}
// Bitcast back to a 64-bit element type.
return Builder.CreateBitCast(Res, ResultTy, "cast");
}
static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
unsigned NumElts) {
assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
llvm::VectorType *MaskTy = FixedVectorType::get(
Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
Mask = Builder.CreateBitCast(Mask, MaskTy);
// If we have less than 8 elements (1, 2 or 4), then the starting mask was an
// i8 and we need to extract down to the right number of elements.
if (NumElts <= 4) {
int Indices[4];
for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i;
Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
"extract");
}
return Mask;
}
static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
Value *Op0, Value *Op1) {
// If the mask is all ones just emit the first operation.
if (const auto *C = dyn_cast<Constant>(Mask))
if (C->isAllOnesValue())
return Op0;
Mask = getX86MaskVec(Builder, Mask,
cast<FixedVectorType>(Op0->getType())->getNumElements());
return Builder.CreateSelect(Mask, Op0, Op1);
}
static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask,
Value *Op0, Value *Op1) {
// If the mask is all ones just emit the first operation.
if (const auto *C = dyn_cast<Constant>(Mask))
if (C->isAllOnesValue())
return Op0;
auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
Mask->getType()->getIntegerBitWidth());
Mask = Builder.CreateBitCast(Mask, MaskTy);
Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
return Builder.CreateSelect(Mask, Op0, Op1);
}
// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
Value *Op1, Value *Shift,
Value *Passthru, Value *Mask,
bool IsVALIGN) {
unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
// Mask the immediate for VALIGN.
if (IsVALIGN)
ShiftVal &= (NumElts - 1);
// If palignr is shifting the pair of vectors more than the size of two
// lanes, emit zero.
if (ShiftVal >= 32)
return llvm::Constant::getNullValue(Op0->getType());
// If palignr is shifting the pair of input vectors more than one lane,
// but less than two lanes, convert to shifting in zeroes.
if (ShiftVal > 16) {
ShiftVal -= 16;
Op1 = Op0;
Op0 = llvm::Constant::getNullValue(Op0->getType());
}
int Indices[64];
// 256-bit palignr operates on 128-bit lanes so we need to handle that
for (unsigned l = 0; l < NumElts; l += 16) {
for (unsigned i = 0; i != 16; ++i) {
unsigned Idx = ShiftVal + i;
if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
Idx += NumElts - 16; // End of lane, switch operand.
Indices[l + i] = Idx + l;
}
}
Value *Align = Builder.CreateShuffleVector(
Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
return EmitX86Select(Builder, Mask, Align, Passthru);
}
static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI,
bool ZeroMask, bool IndexForm) {
Type *Ty = CI.getType();
unsigned VecWidth = Ty->getPrimitiveSizeInBits();
unsigned EltWidth = Ty->getScalarSizeInBits();
bool IsFloat = Ty->isFPOrFPVectorTy();
Intrinsic::ID IID;
if (VecWidth == 128 && EltWidth == 32 && IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_d_128;
else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_q_128;
else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_d_256;
else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_q_256;
else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_d_512;
else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_q_512;
else if (VecWidth == 128 && EltWidth == 16)
IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
else if (VecWidth == 256 && EltWidth == 16)
IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
else if (VecWidth == 512 && EltWidth == 16)
IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
else if (VecWidth == 128 && EltWidth == 8)
IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
else if (VecWidth == 256 && EltWidth == 8)
IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
else if (VecWidth == 512 && EltWidth == 8)
IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
else
llvm_unreachable("Unexpected intrinsic");
Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
CI.getArgOperand(2) };
// If this isn't index form we need to swap operand 0 and 1.
if (!IndexForm)
std::swap(Args[0], Args[1]);
Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
Args);
Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
: Builder.CreateBitCast(CI.getArgOperand(1),
Ty);
return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
}
static Value *UpgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI,
Intrinsic::ID IID) {
Type *Ty = CI.getType();
Value *Op0 = CI.getOperand(0);
Value *Op1 = CI.getOperand(1);
Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
if (CI.arg_size() == 4) { // For masked intrinsics.
Value *VecSrc = CI.getOperand(2);
Value *Mask = CI.getOperand(3);
Res = EmitX86Select(Builder, Mask, Res, VecSrc);
}
return Res;
}
static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI,
bool IsRotateRight) {
Type *Ty = CI.getType();
Value *Src = CI.getArgOperand(0);
Value *Amt = CI.getArgOperand(1);
// Amount may be scalar immediate, in which case create a splat vector.
// Funnel shifts amounts are treated as modulo and types are all power-of-2 so
// we only care about the lowest log2 bits anyway.
if (Amt->getType() != Ty) {
unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
Amt = Builder.CreateVectorSplat(NumElts, Amt);
}
Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
if (CI.arg_size() == 4) { // For masked intrinsics.
Value *VecSrc = CI.getOperand(2);
Value *Mask = CI.getOperand(3);
Res = EmitX86Select(Builder, Mask, Res, VecSrc);
}
return Res;
}
static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
bool IsSigned) {
Type *Ty = CI.getType();
Value *LHS = CI.getArgOperand(0);
Value *RHS = CI.getArgOperand(1);
CmpInst::Predicate Pred;
switch (Imm) {
case 0x0:
Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
break;
case 0x1:
Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
break;
case 0x2:
Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
break;
case 0x3:
Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
break;
case 0x4:
Pred = ICmpInst::ICMP_EQ;
break;
case 0x5:
Pred = ICmpInst::ICMP_NE;
break;
case 0x6:
return Constant::getNullValue(Ty); // FALSE
case 0x7:
return Constant::getAllOnesValue(Ty); // TRUE
default:
llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
}
Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
Value *Ext = Builder.CreateSExt(Cmp, Ty);
return Ext;
}
static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI,
bool IsShiftRight, bool ZeroMask) {
Type *Ty = CI.getType();
Value *Op0 = CI.getArgOperand(0);
Value *Op1 = CI.getArgOperand(1);
Value *Amt = CI.getArgOperand(2);
if (IsShiftRight)
std::swap(Op0, Op1);
// Amount may be scalar immediate, in which case create a splat vector.
// Funnel shifts amounts are treated as modulo and types are all power-of-2 so
// we only care about the lowest log2 bits anyway.
if (Amt->getType() != Ty) {
unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
Amt = Builder.CreateVectorSplat(NumElts, Amt);
}
Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
unsigned NumArgs = CI.arg_size();
if (NumArgs >= 4) { // For masked intrinsics.
Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
CI.getArgOperand(0);
Value *Mask = CI.getOperand(NumArgs - 1);
Res = EmitX86Select(Builder, Mask, Res, VecSrc);
}
return Res;
}
static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
Value *Ptr, Value *Data, Value *Mask,
bool Aligned) {
// Cast the pointer to the right type.
Ptr = Builder.CreateBitCast(Ptr,
llvm::PointerType::getUnqual(Data->getType()));
const Align Alignment =
Aligned
? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
: Align(1);
// If the mask is all ones just emit a regular store.
if (const auto *C = dyn_cast<Constant>(Mask))
if (C->isAllOnesValue())
return Builder.CreateAlignedStore(Data, Ptr, Alignment);
// Convert the mask from an integer type to a vector of i1.
unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
Mask = getX86MaskVec(Builder, Mask, NumElts);
return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
}
static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
Value *Ptr, Value *Passthru, Value *Mask,
bool Aligned) {
Type *ValTy = Passthru->getType();
// Cast the pointer to the right type.
Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
const Align Alignment =
Aligned
? Align(
Passthru->getType()->getPrimitiveSizeInBits().getFixedValue() /
8)
: Align(1);
// If the mask is all ones just emit a regular store.
if (const auto *C = dyn_cast<Constant>(Mask))
if (C->isAllOnesValue())
return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
// Convert the mask from an integer type to a vector of i1.
unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
Mask = getX86MaskVec(Builder, Mask, NumElts);
return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
}
static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
Type *Ty = CI.getType();
Value *Op0 = CI.getArgOperand(0);
Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty);
Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
if (CI.arg_size() == 3)
Res = EmitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
return Res;
}
static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
Type *Ty = CI.getType();
// Arguments have a vXi32 type so cast to vXi64.
Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
if (IsSigned) {
// Shift left then arithmetic shift right.
Constant *ShiftAmt = ConstantInt::get(Ty, 32);
LHS = Builder.CreateShl(LHS, ShiftAmt);
LHS = Builder.CreateAShr(LHS, ShiftAmt);
RHS = Builder.CreateShl(RHS, ShiftAmt);
RHS = Builder.CreateAShr(RHS, ShiftAmt);
} else {
// Clear the upper bits.
Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
LHS = Builder.CreateAnd(LHS, Mask);
RHS = Builder.CreateAnd(RHS, Mask);
}
Value *Res = Builder.CreateMul(LHS, RHS);
if (CI.arg_size() == 4)
Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
return Res;
}
// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
Value *Mask) {
unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
if (Mask) {
const auto *C = dyn_cast<Constant>(Mask);
if (!C || !C->isAllOnesValue())
Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
}
if (NumElts < 8) {
int Indices[8];
for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i;
for (unsigned i = NumElts; i != 8; ++i)
Indices[i] = NumElts + i % NumElts;
Vec = Builder.CreateShuffleVector(Vec,
Constant::getNullValue(Vec->getType()),
Indices);
}
return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
}
static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI,
unsigned CC, bool Signed) {
Value *Op0 = CI.getArgOperand(0);
unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
Value *Cmp;
if (CC == 3) {
Cmp = Constant::getNullValue(
FixedVectorType::get(Builder.getInt1Ty(), NumElts));
} else if (CC == 7) {
Cmp = Constant::getAllOnesValue(
FixedVectorType::get(Builder.getInt1Ty(), NumElts));
} else {
ICmpInst::Predicate Pred;
switch (CC) {
default: llvm_unreachable("Unknown condition code");
case 0: Pred = ICmpInst::ICMP_EQ; break;
case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
case 4: Pred = ICmpInst::ICMP_NE; break;
case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
}
Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
}
Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
}
// Replace a masked intrinsic with an older unmasked intrinsic.
static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI,
Intrinsic::ID IID) {
Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
Value *Rep = Builder.CreateCall(Intrin,
{ CI.getArgOperand(0), CI.getArgOperand(1) });
return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
}
static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI) {
Value* A = CI.getArgOperand(0);
Value* B = CI.getArgOperand(1);
Value* Src = CI.getArgOperand(2);
Value* Mask = CI.getArgOperand(3);
Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
Value* Cmp = Builder.CreateIsNotNull(AndNode);
Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
return Builder.CreateInsertElement(A, Select, (uint64_t)0);
}
static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI) {
Value* Op = CI.getArgOperand(0);
Type* ReturnOp = CI.getType();
unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
Value *Mask = getX86MaskVec(Builder, Op, NumElts);
return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
}
// Replace intrinsic with unmasked version and a select.
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
CallBase &CI, Value *&Rep) {
Name = Name.substr(12); // Remove avx512.mask.
unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
unsigned EltWidth = CI.getType()->getScalarSizeInBits();
Intrinsic::ID IID;
if (Name.startswith("max.p")) {
if (VecWidth == 128 && EltWidth == 32)
IID = Intrinsic::x86_sse_max_ps;
else if (VecWidth == 128 && EltWidth == 64)
IID = Intrinsic::x86_sse2_max_pd;
else if (VecWidth == 256 && EltWidth == 32)
IID = Intrinsic::x86_avx_max_ps_256;
else if (VecWidth == 256 && EltWidth == 64)
IID = Intrinsic::x86_avx_max_pd_256;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("min.p")) {
if (VecWidth == 128 && EltWidth == 32)
IID = Intrinsic::x86_sse_min_ps;
else if (VecWidth == 128 && EltWidth == 64)
IID = Intrinsic::x86_sse2_min_pd;
else if (VecWidth == 256 && EltWidth == 32)
IID = Intrinsic::x86_avx_min_ps_256;
else if (VecWidth == 256 && EltWidth == 64)
IID = Intrinsic::x86_avx_min_pd_256;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("pshuf.b.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_ssse3_pshuf_b_128;
else if (VecWidth == 256)
IID = Intrinsic::x86_avx2_pshuf_b;
else if (VecWidth == 512)
IID = Intrinsic::x86_avx512_pshuf_b_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("pmul.hr.sw.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
else if (VecWidth == 256)
IID = Intrinsic::x86_avx2_pmul_hr_sw;
else if (VecWidth == 512)
IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("pmulh.w.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_sse2_pmulh_w;
else if (VecWidth == 256)
IID = Intrinsic::x86_avx2_pmulh_w;
else if (VecWidth == 512)
IID = Intrinsic::x86_avx512_pmulh_w_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("pmulhu.w.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_sse2_pmulhu_w;
else if (VecWidth == 256)
IID = Intrinsic::x86_avx2_pmulhu_w;
else if (VecWidth == 512)
IID = Intrinsic::x86_avx512_pmulhu_w_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("pmaddw.d.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_sse2_pmadd_wd;
else if (VecWidth == 256)
IID = Intrinsic::x86_avx2_pmadd_wd;
else if (VecWidth == 512)
IID = Intrinsic::x86_avx512_pmaddw_d_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("pmaddubs.w.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
else if (VecWidth == 256)
IID = Intrinsic::x86_avx2_pmadd_ub_sw;
else if (VecWidth == 512)
IID = Intrinsic::x86_avx512_pmaddubs_w_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("packsswb.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_sse2_packsswb_128;
else if (VecWidth == 256)
IID = Intrinsic::x86_avx2_packsswb;
else if (VecWidth == 512)
IID = Intrinsic::x86_avx512_packsswb_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("packssdw.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_sse2_packssdw_128;
else if (VecWidth == 256)
IID = Intrinsic::x86_avx2_packssdw;
else if (VecWidth == 512)
IID = Intrinsic::x86_avx512_packssdw_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("packuswb.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_sse2_packuswb_128;
else if (VecWidth == 256)
IID = Intrinsic::x86_avx2_packuswb;
else if (VecWidth == 512)
IID = Intrinsic::x86_avx512_packuswb_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("packusdw.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_sse41_packusdw;
else if (VecWidth == 256)
IID = Intrinsic::x86_avx2_packusdw;
else if (VecWidth == 512)
IID = Intrinsic::x86_avx512_packusdw_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("vpermilvar.")) {
if (VecWidth == 128 && EltWidth == 32)
IID = Intrinsic::x86_avx_vpermilvar_ps;
else if (VecWidth == 128 && EltWidth == 64)
IID = Intrinsic::x86_avx_vpermilvar_pd;
else if (VecWidth == 256 && EltWidth == 32)
IID = Intrinsic::x86_avx_vpermilvar_ps_256;
else if (VecWidth == 256 && EltWidth == 64)
IID = Intrinsic::x86_avx_vpermilvar_pd_256;
else if (VecWidth == 512 && EltWidth == 32)
IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
else if (VecWidth == 512 && EltWidth == 64)
IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name == "cvtpd2dq.256") {
IID = Intrinsic::x86_avx_cvt_pd2dq_256;
} else if (Name == "cvtpd2ps.256") {
IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
} else if (Name == "cvttpd2dq.256") {
IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
} else if (Name == "cvttps2dq.128") {
IID = Intrinsic::x86_sse2_cvttps2dq;
} else if (Name == "cvttps2dq.256") {
IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
} else if (Name.startswith("permvar.")) {
bool IsFloat = CI.getType()->isFPOrFPVectorTy();
if (VecWidth == 256 && EltWidth == 32 && IsFloat)
IID = Intrinsic::x86_avx2_permps;
else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
IID = Intrinsic::x86_avx2_permd;
else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
IID = Intrinsic::x86_avx512_permvar_df_256;
else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
IID = Intrinsic::x86_avx512_permvar_di_256;
else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
IID = Intrinsic::x86_avx512_permvar_sf_512;
else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
IID = Intrinsic::x86_avx512_permvar_si_512;
else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
IID = Intrinsic::x86_avx512_permvar_df_512;
else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
IID = Intrinsic::x86_avx512_permvar_di_512;
else if (VecWidth == 128 && EltWidth == 16)
IID = Intrinsic::x86_avx512_permvar_hi_128;
else if (VecWidth == 256 && EltWidth == 16)
IID = Intrinsic::x86_avx512_permvar_hi_256;
else if (VecWidth == 512 && EltWidth == 16)
IID = Intrinsic::x86_avx512_permvar_hi_512;
else if (VecWidth == 128 && EltWidth == 8)
IID = Intrinsic::x86_avx512_permvar_qi_128;
else if (VecWidth == 256 && EltWidth == 8)
IID = Intrinsic::x86_avx512_permvar_qi_256;
else if (VecWidth == 512 && EltWidth == 8)
IID = Intrinsic::x86_avx512_permvar_qi_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("dbpsadbw.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_avx512_dbpsadbw_128;
else if (VecWidth == 256)
IID = Intrinsic::x86_avx512_dbpsadbw_256;
else if (VecWidth == 512)
IID = Intrinsic::x86_avx512_dbpsadbw_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("pmultishift.qb.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_avx512_pmultishift_qb_128;
else if (VecWidth == 256)
IID = Intrinsic::x86_avx512_pmultishift_qb_256;
else if (VecWidth == 512)
IID = Intrinsic::x86_avx512_pmultishift_qb_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("conflict.")) {
if (Name[9] == 'd' && VecWidth == 128)
IID = Intrinsic::x86_avx512_conflict_d_128;
else if (Name[9] == 'd' && VecWidth == 256)
IID = Intrinsic::x86_avx512_conflict_d_256;
else if (Name[9] == 'd' && VecWidth == 512)
IID = Intrinsic::x86_avx512_conflict_d_512;
else if (Name[9] == 'q' && VecWidth == 128)
IID = Intrinsic::x86_avx512_conflict_q_128;
else if (Name[9] == 'q' && VecWidth == 256)
IID = Intrinsic::x86_avx512_conflict_q_256;
else if (Name[9] == 'q' && VecWidth == 512)
IID = Intrinsic::x86_avx512_conflict_q_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("pavg.")) {
if (Name[5] == 'b' && VecWidth == 128)
IID = Intrinsic::x86_sse2_pavg_b;
else if (Name[5] == 'b' && VecWidth == 256)
IID = Intrinsic::x86_avx2_pavg_b;
else if (Name[5] == 'b' && VecWidth == 512)
IID = Intrinsic::x86_avx512_pavg_b_512;
else if (Name[5] == 'w' && VecWidth == 128)
IID = Intrinsic::x86_sse2_pavg_w;
else if (Name[5] == 'w' && VecWidth == 256)
IID = Intrinsic::x86_avx2_pavg_w;
else if (Name[5] == 'w' && VecWidth == 512)
IID = Intrinsic::x86_avx512_pavg_w_512;
else
llvm_unreachable("Unexpected intrinsic");
} else
return false;
SmallVector<Value *, 4> Args(CI.args());
Args.pop_back();
Args.pop_back();
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
Args);
unsigned NumArgs = CI.arg_size();
Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
CI.getArgOperand(NumArgs - 2));
return true;
}
/// Upgrade comment in call to inline asm that represents an objc retain release
/// marker.
void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
size_t Pos;
if (AsmStr->find("mov\tfp") == 0 &&
AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
(Pos = AsmStr->find("# marker")) != std::string::npos) {
AsmStr->replace(Pos, 1, ";");
}
}
static Value *UpgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
IRBuilder<> &Builder) {
if (Name == "mve.vctp64.old") {
// Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
// correct type.
Value *VCTP = Builder.CreateCall(
Intrinsic::getDeclaration(F->getParent(), Intrinsic::arm_mve_vctp64),
CI->getArgOperand(0), CI->getName());
Value *C1 = Builder.CreateCall(
Intrinsic::getDeclaration(
F->getParent(), Intrinsic::arm_mve_pred_v2i,
{VectorType::get(Builder.getInt1Ty(), 2, false)}),
VCTP);
return Builder.CreateCall(
Intrinsic::getDeclaration(
F->getParent(), Intrinsic::arm_mve_pred_i2v,
{VectorType::get(Builder.getInt1Ty(), 4, false)}),
C1);
} else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
Name ==
"mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
Name ==
"mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
std::vector<Type *> Tys;
unsigned ID = CI->getIntrinsicID();
Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
switch (ID) {
case Intrinsic::arm_mve_mull_int_predicated:
case Intrinsic::arm_mve_vqdmull_predicated:
case Intrinsic::arm_mve_vldr_gather_base_predicated:
Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
break;
case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
case Intrinsic::arm_mve_vstr_scatter_base_predicated:
case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
V2I1Ty};
break;
case Intrinsic::arm_mve_vldr_gather_offset_predicated:
Tys = {CI->getType(), CI->getOperand(0)->getType(),
CI->getOperand(1)->getType(), V2I1Ty};
break;
case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
CI->getOperand(2)->getType(), V2I1Ty};
break;
case Intrinsic::arm_cde_vcx1q_predicated:
case Intrinsic::arm_cde_vcx1qa_predicated:
case Intrinsic::arm_cde_vcx2q_predicated:
case Intrinsic::arm_cde_vcx2qa_predicated:
case Intrinsic::arm_cde_vcx3q_predicated:
case Intrinsic::arm_cde_vcx3qa_predicated:
Tys = {CI->getOperand(1)->getType(), V2I1Ty};
break;
default:
llvm_unreachable("Unhandled Intrinsic!");
}
std::vector<Value *> Ops;
for (Value *Op : CI->args()) {
Type *Ty = Op->getType();
if (Ty->getScalarSizeInBits() == 1) {
Value *C1 = Builder.CreateCall(
Intrinsic::getDeclaration(
F->getParent(), Intrinsic::arm_mve_pred_v2i,
{VectorType::get(Builder.getInt1Ty(), 4, false)}),
Op);
Op = Builder.CreateCall(
Intrinsic::getDeclaration(F->getParent(),
Intrinsic::arm_mve_pred_i2v, {V2I1Ty}),
C1);
}
Ops.push_back(Op);
}
Function *Fn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
return Builder.CreateCall(Fn, Ops, CI->getName());
}
llvm_unreachable("Unknown function for ARM CallBase upgrade.");
}
static Value *UpgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
Function *F, IRBuilder<> &Builder) {
const bool IsInc = Name.startswith("atomic.inc.");
if (IsInc || Name.startswith("atomic.dec.")) {
if (CI->getNumOperands() != 6) // Malformed bitcode.
return nullptr;
AtomicRMWInst::BinOp RMWOp =
IsInc ? AtomicRMWInst::UIncWrap : AtomicRMWInst::UDecWrap;
Value *Ptr = CI->getArgOperand(0);
Value *Val = CI->getArgOperand(1);
ConstantInt *OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
if (Order == AtomicOrdering::NotAtomic ||
Order == AtomicOrdering::Unordered)
Order = AtomicOrdering::SequentiallyConsistent;
// The scope argument never really worked correctly. Use agent as the most
// conservative option which should still always produce the instruction.
SyncScope::ID SSID = F->getContext().getOrInsertSyncScopeID("agent");
AtomicRMWInst *RMW =
Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
if (!VolatileArg || !VolatileArg->isZero())
RMW->setVolatile(true);
return RMW;
}
llvm_unreachable("Unknown function for AMDGPU intrinsic upgrade.");
}
/// Upgrade a call to an old intrinsic. All argument and return casting must be
/// provided to seamlessly integrate with existing context.
void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
// Note dyn_cast to Function is not quite the same as getCalledFunction, which
// checks the callee's function type matches. It's likely we need to handle
// type changes here.
Function *F = dyn_cast<Function>(CI->getCalledOperand());
if (!F)
return;
LLVMContext &C = CI->getContext();
IRBuilder<> Builder(C);
Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
if (!NewFn) {
// Get the Function's name.
StringRef Name = F->getName();
assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
Name = Name.substr(5);
bool IsX86 = Name.startswith("x86.");
if (IsX86)
Name = Name.substr(4);
bool IsNVVM = Name.startswith("nvvm.");
if (IsNVVM)
Name = Name.substr(5);
bool IsARM = Name.startswith("arm.");
if (IsARM)
Name = Name.substr(4);
bool IsAMDGCN = Name.startswith("amdgcn.");
if (IsAMDGCN)
Name = Name.substr(7);
if (IsX86 && Name.startswith("sse4a.movnt.")) {
SmallVector<Metadata *, 1> Elts;
Elts.push_back(
ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
MDNode *Node = MDNode::get(C, Elts);
Value *Arg0 = CI->getArgOperand(0);
Value *Arg1 = CI->getArgOperand(1);
// Nontemporal (unaligned) store of the 0'th element of the float/double
// vector.
Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
Value *Extract =
Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
SI->setMetadata(LLVMContext::MD_nontemporal, Node);
// Remove intrinsic.
CI->eraseFromParent();