blob: 479ac90b7d526eac067e3941f1ca67cd9811757b [file] [log] [blame]
//===----- HexagonQFPOptimizer.cpp - Qualcomm-FP to IEEE-FP conversions
// optimizer ------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Basic infrastructure for optimizing intermediate conversion instructions
// generated while performing vector floating point operations.
// Currently run at the starting of the code generation for Hexagon, cleans
// up redundant conversion instructions and replaces the uses of conversion
// with appropriate machine operand. Liveness is preserved after this pass.
//
// @note: The redundant conversion instructions are not eliminated in this pass.
// In this pass, we are only trying to replace the uses of conversion
// instructions with its appropriate QFP instruction. We are leaving the job to
// Dead instruction Elimination pass to remove redundant conversion
// instructions.
//
// Brief overview of working of this QFP optimizer.
// This version of Hexagon QFP optimizer basically iterates over each
// instruction, checks whether if it belongs to hexagon floating point HVX
// arithmetic instruction category(Add, Sub, Mul). And then it finds the unique
// definition for the machine operands corresponding to the instruction.
//
// Example:
// MachineInstruction *MI be the HVX vadd instruction
// MI -> $v0 = V6_vadd_sf $v1, $v2
// MachineOperand *DefMI1 = MRI->getVRegDef(MI->getOperand(1).getReg());
// MachineOperand *DefMI2 = MRI->getVRegDef(MI->getOperand(2).getReg());
//
// In the above example, DefMI1 and DefMI2 gives the unique definitions
// corresponding to the operands($v1 and &v2 respectively) of instruction MI.
//
// If both of the definitions are not conversion instructions(V6_vconv_sf_qf32,
// V6_vconv_hf_qf16), then it will skip optimizing the current instruction and
// iterates over next instruction.
//
// If one the definitions is conversion instruction then our pass will replace
// the arithmetic instruction with its corresponding mix variant.
// In the above example, if $v1 is conversion instruction
// DefMI1 -> $v1 = V6_vconv_sf_qf32 $v3
// After Transformation:
// MI -> $v0 = V6_vadd_qf32_mix $v3, $v2 ($v1 is replaced with $v3)
//
// If both the definitions are conversion instructions then the instruction will
// be replaced with its qf variant
// In the above example, if $v1 and $v2 are conversion instructions
// DefMI1 -> $v1 = V6_vconv_sf_qf32 $v3
// DefMI2 -> $v2 = V6_vconv_sf_qf32 $v4
// After Transformation:
// MI -> $v0 = V6_vadd_qf32 $v3, $v4 ($v1 is replaced with $v3, $v2 is replaced
// with $v4)
//
// Currently, in this pass, we are not handling the case when the definitions
// are PHI inst.
//
//===----------------------------------------------------------------------===//
#include <unordered_set>
#define HEXAGON_QFP_OPTIMIZER "QFP optimizer pass"
#include "Hexagon.h"
#include "HexagonInstrInfo.h"
#include "HexagonSubtarget.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <map>
#include <vector>
#define DEBUG_TYPE "hexagon-qfp-optimizer"
using namespace llvm;
cl::opt<bool>
DisableQFOptimizer("disable-qfp-opt", cl::init(false),
cl::desc("Disable optimization of Qfloat operations."));
namespace {
const std::map<unsigned short, unsigned short> QFPInstMap{
{Hexagon::V6_vadd_hf, Hexagon::V6_vadd_qf16_mix},
{Hexagon::V6_vadd_qf16_mix, Hexagon::V6_vadd_qf16},
{Hexagon::V6_vadd_sf, Hexagon::V6_vadd_qf32_mix},
{Hexagon::V6_vadd_qf32_mix, Hexagon::V6_vadd_qf32},
{Hexagon::V6_vsub_hf, Hexagon::V6_vsub_qf16_mix},
{Hexagon::V6_vsub_qf16_mix, Hexagon::V6_vsub_qf16},
{Hexagon::V6_vsub_sf, Hexagon::V6_vsub_qf32_mix},
{Hexagon::V6_vsub_qf32_mix, Hexagon::V6_vsub_qf32},
{Hexagon::V6_vmpy_qf16_hf, Hexagon::V6_vmpy_qf16_mix_hf},
{Hexagon::V6_vmpy_qf16_mix_hf, Hexagon::V6_vmpy_qf16},
{Hexagon::V6_vmpy_qf32_hf, Hexagon::V6_vmpy_qf32_mix_hf},
{Hexagon::V6_vmpy_qf32_mix_hf, Hexagon::V6_vmpy_qf32_qf16},
{Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}};
} // namespace
namespace llvm {
FunctionPass *createHexagonQFPOptimizer();
void initializeHexagonQFPOptimizerPass(PassRegistry &);
} // namespace llvm
namespace {
struct HexagonQFPOptimizer : public MachineFunctionPass {
public:
static char ID;
HexagonQFPOptimizer() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
bool optimizeQfp(MachineInstr *MI, MachineBasicBlock *MBB);
StringRef getPassName() const override { return HEXAGON_QFP_OPTIMIZER; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
private:
const HexagonSubtarget *HST = nullptr;
const HexagonInstrInfo *HII = nullptr;
const MachineRegisterInfo *MRI = nullptr;
};
char HexagonQFPOptimizer::ID = 0;
} // namespace
INITIALIZE_PASS(HexagonQFPOptimizer, "hexagon-qfp-optimizer",
HEXAGON_QFP_OPTIMIZER, false, false)
FunctionPass *llvm::createHexagonQFPOptimizer() {
return new HexagonQFPOptimizer();
}
bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
MachineBasicBlock *MBB) {
// Early exit:
// - if instruction is invalid or has too few operands (QFP ops need 2 sources
// + 1 dest),
// - or does not have a transformation mapping.
if (MI->getNumOperands() < 3)
return false;
auto It = QFPInstMap.find(MI->getOpcode());
if (It == QFPInstMap.end())
return false;
unsigned short InstTy = It->second;
unsigned Op0F = 0;
unsigned Op1F = 0;
// Get the reaching defs of MI, DefMI1 and DefMI2
MachineInstr *DefMI1 = nullptr;
MachineInstr *DefMI2 = nullptr;
if (MI->getOperand(1).isReg())
DefMI1 = MRI->getVRegDef(MI->getOperand(1).getReg());
if (MI->getOperand(2).isReg())
DefMI2 = MRI->getVRegDef(MI->getOperand(2).getReg());
if (!DefMI1 || !DefMI2)
return false;
MachineOperand &Res = MI->getOperand(0);
MachineInstr *Inst1 = nullptr;
MachineInstr *Inst2 = nullptr;
LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI1->dump();
DefMI2->dump());
// Get the reaching defs of DefMI
if (DefMI1->getNumOperands() > 1 && DefMI1->getOperand(1).isReg() &&
DefMI1->getOperand(1).getReg().isVirtual())
Inst1 = MRI->getVRegDef(DefMI1->getOperand(1).getReg());
if (DefMI2->getNumOperands() > 1 && DefMI2->getOperand(1).isReg() &&
DefMI2->getOperand(1).getReg().isVirtual())
Inst2 = MRI->getVRegDef(DefMI2->getOperand(1).getReg());
unsigned Def1OP = DefMI1->getOpcode();
unsigned Def2OP = DefMI2->getOpcode();
MachineInstrBuilder MIB;
// Case 1: Both reaching defs of MI are qf to sf/hf conversions
if ((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
Def2OP == Hexagon::V6_vconv_sf_qf32) ||
(Def1OP == Hexagon::V6_vconv_hf_qf16 &&
Def2OP == Hexagon::V6_vconv_hf_qf16)) {
// If the reaching defs of DefMI are W register type, we return
if ((Inst1 && Inst1->getNumOperands() > 0 && Inst1->getOperand(0).isReg() &&
MRI->getRegClass(Inst1->getOperand(0).getReg()) ==
&Hexagon::HvxWRRegClass) ||
(Inst2 && Inst2->getNumOperands() > 0 && Inst2->getOperand(0).isReg() &&
MRI->getRegClass(Inst2->getOperand(0).getReg()) ==
&Hexagon::HvxWRRegClass))
return false;
// Analyze the use operands of the conversion to get their KILL status
MachineOperand &Src1 = DefMI1->getOperand(1);
MachineOperand &Src2 = DefMI2->getOperand(1);
Op0F = getKillRegState(Src1.isKill());
Src1.setIsKill(false);
Op1F = getKillRegState(Src2.isKill());
Src2.setIsKill(false);
if (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf) {
auto OuterIt = QFPInstMap.find(MI->getOpcode());
if (OuterIt == QFPInstMap.end())
return false;
auto InnerIt = QFPInstMap.find(OuterIt->second);
if (InnerIt == QFPInstMap.end())
return false;
InstTy = InnerIt->second;
}
MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
.addReg(Src1.getReg(), Op0F, Src1.getSubReg())
.addReg(Src2.getReg(), Op1F, Src2.getSubReg());
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
return true;
// Case 2: Left operand is conversion to sf/hf
} else if (((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
Def2OP != Hexagon::V6_vconv_sf_qf32) ||
(Def1OP == Hexagon::V6_vconv_hf_qf16 &&
Def2OP != Hexagon::V6_vconv_hf_qf16)) &&
!DefMI2->isPHI() &&
(MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) {
if (Inst1 && MRI->getRegClass(Inst1->getOperand(0).getReg()) ==
&Hexagon::HvxWRRegClass)
return false;
MachineOperand &Src1 = DefMI1->getOperand(1);
MachineOperand &Src2 = MI->getOperand(2);
Op0F = getKillRegState(Src1.isKill());
Src1.setIsKill(false);
Op1F = getKillRegState(Src2.isKill());
MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
.addReg(Src1.getReg(), Op0F, Src1.getSubReg())
.addReg(Src2.getReg(), Op1F, Src2.getSubReg());
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
return true;
// Case 2: Left operand is conversion to sf/hf
} else if (((Def1OP != Hexagon::V6_vconv_sf_qf32 &&
Def2OP == Hexagon::V6_vconv_sf_qf32) ||
(Def1OP != Hexagon::V6_vconv_hf_qf16 &&
Def2OP == Hexagon::V6_vconv_hf_qf16)) &&
!DefMI1->isPHI() &&
(MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) {
// The second operand of original instruction is converted.
// In "mix" instructions, "qf" operand is always the first operand.
// Caveat: vsub is not commutative w.r.t operands.
if (InstTy == Hexagon::V6_vsub_qf16_mix ||
InstTy == Hexagon::V6_vsub_qf32_mix)
return false;
if (Inst2 && MRI->getRegClass(Inst2->getOperand(0).getReg()) ==
&Hexagon::HvxWRRegClass)
return false;
MachineOperand &Src1 = MI->getOperand(1);
MachineOperand &Src2 = DefMI2->getOperand(1);
Op1F = getKillRegState(Src2.isKill());
Src2.setIsKill(false);
Op0F = getKillRegState(Src1.isKill());
MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
.addReg(Src2.getReg(), Op1F,
Src2.getSubReg()) // Notice the operands are flipped.
.addReg(Src1.getReg(), Op0F, Src1.getSubReg());
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
return true;
}
return false;
}
bool HexagonQFPOptimizer::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
if (DisableQFOptimizer)
return Changed;
HST = &MF.getSubtarget<HexagonSubtarget>();
if (!HST->useHVXV68Ops() || !HST->usePackets() ||
skipFunction(MF.getFunction()))
return false;
HII = HST->getInstrInfo();
MRI = &MF.getRegInfo();
MachineFunction::iterator MBBI = MF.begin();
LLVM_DEBUG(dbgs() << "\n=== Running QFPOptimzer Pass for : " << MF.getName()
<< " Optimize intermediate conversions ===\n");
while (MBBI != MF.end()) {
MachineBasicBlock *MBB = &*MBBI;
MachineBasicBlock::iterator MII = MBBI->instr_begin();
while (MII != MBBI->instr_end()) {
MachineInstr *MI = &*MII;
++MII; // As MI might be removed.
if (QFPInstMap.count(MI->getOpcode()) &&
MI->getOpcode() != Hexagon::V6_vconv_sf_qf32 &&
MI->getOpcode() != Hexagon::V6_vconv_hf_qf16) {
LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump());
if (optimizeQfp(MI, MBB)) {
MI->eraseFromParent();
LLVM_DEBUG(dbgs() << "\t....Removing....");
Changed = true;
}
}
}
++MBBI;
}
return Changed;
}