blob: 0bfa897ecf40473ffd0ff5f32945fb3f1df94be8 [file] [log] [blame]
Tim Northover69fa84a2016-10-14 22:18:18 +00001//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
Tim Northover33b07d62016-07-22 20:03:43 +00002//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Tim Northover33b07d62016-07-22 20:03:43 +00006//
7//===----------------------------------------------------------------------===//
8//
Tim Northover69fa84a2016-10-14 22:18:18 +00009/// \file This file implements the LegalizerHelper class to legalize
Tim Northover33b07d62016-07-22 20:03:43 +000010/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
Tim Northover69fa84a2016-10-14 22:18:18 +000015#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
Tim Northoveredb3c8c2016-08-29 19:07:16 +000016#include "llvm/CodeGen/GlobalISel/CallLowering.h"
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +000017#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
Jessica Delfc672b62023-02-21 09:40:07 +010018#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
serge-sans-pailleed98c1b2022-03-09 22:29:31 +010019#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
Tim Northover69fa84a2016-10-14 22:18:18 +000020#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
Jessica Paquette324af792021-05-25 16:54:20 -070021#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
Matt Arsenault0b7de792020-07-26 21:25:10 -040022#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
serge-sans-pailleed98c1b2022-03-09 22:29:31 +010023#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
Amara Emersona35c2c72021-02-21 14:17:03 -080024#include "llvm/CodeGen/GlobalISel/Utils.h"
Amara Emerson41ebbed2025-01-05 21:32:27 -080025#include "llvm/CodeGen/LowLevelTypeUtils.h"
Chen Zheng6ee2f772022-12-12 09:53:53 +000026#include "llvm/CodeGen/MachineConstantPool.h"
serge-sans-pailleed98c1b2022-03-09 22:29:31 +010027#include "llvm/CodeGen/MachineFrameInfo.h"
Tim Northover33b07d62016-07-22 20:03:43 +000028#include "llvm/CodeGen/MachineRegisterInfo.h"
Joseph Huber615b7ee2024-07-20 07:29:04 -050029#include "llvm/CodeGen/RuntimeLibcallUtil.h"
Amara Emersone20b91c2019-08-27 19:54:27 +000030#include "llvm/CodeGen/TargetFrameLowering.h"
Aditya Nandakumarc0333f72018-08-21 17:30:31 +000031#include "llvm/CodeGen/TargetInstrInfo.h"
David Blaikieb3bde2e2017-11-17 01:07:10 +000032#include "llvm/CodeGen/TargetLowering.h"
Amara Emerson9f39ba12021-05-19 21:35:05 -070033#include "llvm/CodeGen/TargetOpcodes.h"
David Blaikieb3bde2e2017-11-17 01:07:10 +000034#include "llvm/CodeGen/TargetSubtargetInfo.h"
Amara Emerson9f39ba12021-05-19 21:35:05 -070035#include "llvm/IR/Instructions.h"
Tim Northover33b07d62016-07-22 20:03:43 +000036#include "llvm/Support/Debug.h"
Aditya Nandakumarc0333f72018-08-21 17:30:31 +000037#include "llvm/Support/MathExtras.h"
Tim Northover33b07d62016-07-22 20:03:43 +000038#include "llvm/Support/raw_ostream.h"
Mirko Brkusanin36527cb2021-09-07 11:30:11 +020039#include "llvm/Target/TargetMachine.h"
Kazu Hirata267f21a2022-08-28 10:41:51 -070040#include <numeric>
Kazu Hirata3ccbfc32022-11-26 14:44:54 -080041#include <optional>
Tim Northover33b07d62016-07-22 20:03:43 +000042
Daniel Sanders5377fb32017-04-20 15:46:12 +000043#define DEBUG_TYPE "legalizer"
Tim Northover33b07d62016-07-22 20:03:43 +000044
45using namespace llvm;
Daniel Sanders9ade5592018-01-29 17:37:29 +000046using namespace LegalizeActions;
Matt Arsenault0b7de792020-07-26 21:25:10 -040047using namespace MIPatternMatch;
Tim Northover33b07d62016-07-22 20:03:43 +000048
Matt Arsenaultc83b8232019-02-07 17:38:00 +000049/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
50///
51/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
52/// with any leftover piece as type \p LeftoverTy
53///
Matt Arsenaultd3093c22019-02-28 00:16:32 +000054/// Returns -1 in the first element of the pair if the breakdown is not
55/// satisfiable.
56static std::pair<int, int>
57getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
Matt Arsenaultc83b8232019-02-07 17:38:00 +000058 assert(!LeftoverTy.isValid() && "this is an out argument");
59
60 unsigned Size = OrigTy.getSizeInBits();
61 unsigned NarrowSize = NarrowTy.getSizeInBits();
62 unsigned NumParts = Size / NarrowSize;
63 unsigned LeftoverSize = Size - NumParts * NarrowSize;
64 assert(Size > NarrowSize);
65
66 if (LeftoverSize == 0)
Matt Arsenaultd3093c22019-02-28 00:16:32 +000067 return {NumParts, 0};
Matt Arsenaultc83b8232019-02-07 17:38:00 +000068
69 if (NarrowTy.isVector()) {
70 unsigned EltSize = OrigTy.getScalarSizeInBits();
71 if (LeftoverSize % EltSize != 0)
Matt Arsenaultd3093c22019-02-28 00:16:32 +000072 return {-1, -1};
David Green34de2152024-05-13 21:58:41 +010073 LeftoverTy =
74 LLT::scalarOrVector(ElementCount::getFixed(LeftoverSize / EltSize),
75 OrigTy.getElementType());
Matt Arsenaultc83b8232019-02-07 17:38:00 +000076 } else {
77 LeftoverTy = LLT::scalar(LeftoverSize);
78 }
79
Matt Arsenaultd3093c22019-02-28 00:16:32 +000080 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
81 return std::make_pair(NumParts, NumLeftover);
Matt Arsenaultc83b8232019-02-07 17:38:00 +000082}
83
Konstantin Schwarz76986bd2020-02-06 10:01:57 -080084static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
85
86 if (!Ty.isScalar())
87 return nullptr;
88
89 switch (Ty.getSizeInBits()) {
90 case 16:
91 return Type::getHalfTy(Ctx);
92 case 32:
93 return Type::getFloatTy(Ctx);
94 case 64:
95 return Type::getDoubleTy(Ctx);
Matt Arsenault0da582d2020-07-19 09:56:15 -040096 case 80:
97 return Type::getX86_FP80Ty(Ctx);
Konstantin Schwarz76986bd2020-02-06 10:01:57 -080098 case 128:
99 return Type::getFP128Ty(Ctx);
100 default:
101 return nullptr;
102 }
103}
104
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +0000105LegalizerHelper::LegalizerHelper(MachineFunction &MF,
Aditya Nandakumar500e3ea2019-01-16 00:40:37 +0000106 GISelChangeObserver &Observer,
107 MachineIRBuilder &Builder)
Matt Arsenault7f8b2e12020-06-09 17:02:12 -0400108 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
Matt Arsenaultadbcc8e2020-07-31 11:41:05 -0400109 LI(*MF.getSubtarget().getLegalizerInfo()),
Jessica Delfc672b62023-02-21 09:40:07 +0100110 TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
Tim Northover33b07d62016-07-22 20:03:43 +0000111
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +0000112LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
Aditya Nandakumar500e3ea2019-01-16 00:40:37 +0000113 GISelChangeObserver &Observer,
Jessica Delfc672b62023-02-21 09:40:07 +0100114 MachineIRBuilder &B, GISelKnownBits *KB)
115 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
116 TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
Matt Arsenaultd55d5922020-08-19 10:46:59 -0400117
Tim Northover69fa84a2016-10-14 22:18:18 +0000118LegalizerHelper::LegalizeResult
Jessica Paquette324af792021-05-25 16:54:20 -0700119LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
120 LostDebugLocObserver &LocObserver) {
Matt Arsenaultc1d771d2020-06-07 21:56:42 -0400121 LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
Daniel Sanders5377fb32017-04-20 15:46:12 +0000122
Matt Arsenault32823092020-06-07 20:57:28 -0400123 MIRBuilder.setInstrAndDebugLoc(MI);
124
Sameer Sahasrabuddhed9847cd2023-07-31 12:14:34 +0530125 if (isa<GIntrinsic>(MI))
Matt Arsenault7f8b2e12020-06-09 17:02:12 -0400126 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
Daniel Sanders262ed0e2018-01-24 17:17:46 +0000127 auto Step = LI.getAction(MI, MRI);
128 switch (Step.Action) {
Daniel Sanders9ade5592018-01-29 17:37:29 +0000129 case Legal:
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000130 LLVM_DEBUG(dbgs() << ".. Already legal\n");
Tim Northover33b07d62016-07-22 20:03:43 +0000131 return AlreadyLegal;
Daniel Sanders9ade5592018-01-29 17:37:29 +0000132 case Libcall:
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000133 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
Jessica Paquette324af792021-05-25 16:54:20 -0700134 return libcall(MI, LocObserver);
Daniel Sanders9ade5592018-01-29 17:37:29 +0000135 case NarrowScalar:
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000136 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
Daniel Sanders262ed0e2018-01-24 17:17:46 +0000137 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
Daniel Sanders9ade5592018-01-29 17:37:29 +0000138 case WidenScalar:
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000139 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
Daniel Sanders262ed0e2018-01-24 17:17:46 +0000140 return widenScalar(MI, Step.TypeIdx, Step.NewType);
Matt Arsenault39c55ce2020-02-13 15:52:32 -0500141 case Bitcast:
142 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
143 return bitcast(MI, Step.TypeIdx, Step.NewType);
Daniel Sanders9ade5592018-01-29 17:37:29 +0000144 case Lower:
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000145 LLVM_DEBUG(dbgs() << ".. Lower\n");
Daniel Sanders262ed0e2018-01-24 17:17:46 +0000146 return lower(MI, Step.TypeIdx, Step.NewType);
Daniel Sanders9ade5592018-01-29 17:37:29 +0000147 case FewerElements:
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000148 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
Daniel Sanders262ed0e2018-01-24 17:17:46 +0000149 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
Matt Arsenault18ec3822019-02-11 22:00:39 +0000150 case MoreElements:
151 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
152 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
Daniel Sanders9ade5592018-01-29 17:37:29 +0000153 case Custom:
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000154 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
David Greend659bd12024-01-03 07:59:36 +0000155 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
156 : UnableToLegalize;
Tim Northover33b07d62016-07-22 20:03:43 +0000157 default:
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000158 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
Tim Northover33b07d62016-07-22 20:03:43 +0000159 return UnableToLegalize;
160 }
161}
162
Matt Arsenaulte3a676e2019-06-24 15:50:29 +0000163void LegalizerHelper::insertParts(Register DstReg,
Matt Arsenaultc7bce732019-01-31 02:46:05 +0000164 LLT ResultTy, LLT PartTy,
Matt Arsenaulte3a676e2019-06-24 15:50:29 +0000165 ArrayRef<Register> PartRegs,
Matt Arsenaultc7bce732019-01-31 02:46:05 +0000166 LLT LeftoverTy,
Matt Arsenaulte3a676e2019-06-24 15:50:29 +0000167 ArrayRef<Register> LeftoverRegs) {
Matt Arsenaultc7bce732019-01-31 02:46:05 +0000168 if (!LeftoverTy.isValid()) {
169 assert(LeftoverRegs.empty());
170
Matt Arsenault81511e52019-02-05 00:13:44 +0000171 if (!ResultTy.isVector()) {
Diana Picusf95a5fb2023-01-09 11:59:00 +0100172 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
Matt Arsenault81511e52019-02-05 00:13:44 +0000173 return;
174 }
175
Matt Arsenaultc7bce732019-01-31 02:46:05 +0000176 if (PartTy.isVector())
177 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
178 else
179 MIRBuilder.buildBuildVector(DstReg, PartRegs);
180 return;
181 }
182
Petar Avramovic29f88b92021-12-23 14:09:51 +0100183 // Merge sub-vectors with different number of elements and insert into DstReg.
184 if (ResultTy.isVector()) {
185 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
Craig Toppere3284d82024-12-10 07:18:20 -0800186 SmallVector<Register, 8> AllRegs(PartRegs.begin(), PartRegs.end());
187 AllRegs.append(LeftoverRegs.begin(), LeftoverRegs.end());
Petar Avramovic29f88b92021-12-23 14:09:51 +0100188 return mergeMixedSubvectors(DstReg, AllRegs);
189 }
190
Matt Arsenault31a96592021-06-07 18:57:03 -0400191 SmallVector<Register> GCDRegs;
Jessica Paquette47aeeff2021-07-08 16:45:45 -0700192 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
193 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
194 extractGCDType(GCDRegs, GCDTy, PartReg);
Matt Arsenault31a96592021-06-07 18:57:03 -0400195 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
196 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
Matt Arsenaultc7bce732019-01-31 02:46:05 +0000197}
198
Petar Avramovic29f88b92021-12-23 14:09:51 +0100199void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
200 Register Reg) {
201 LLT Ty = MRI.getType(Reg);
202 SmallVector<Register, 8> RegElts;
chuongg3fcfe1b62024-01-15 16:40:39 +0000203 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
204 MIRBuilder, MRI);
Petar Avramovic29f88b92021-12-23 14:09:51 +0100205 Elts.append(RegElts);
206}
207
208/// Merge \p PartRegs with different types into \p DstReg.
209void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
210 ArrayRef<Register> PartRegs) {
211 SmallVector<Register, 8> AllElts;
212 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
213 appendVectorElts(AllElts, PartRegs[i]);
214
215 Register Leftover = PartRegs[PartRegs.size() - 1];
David Green34de2152024-05-13 21:58:41 +0100216 if (!MRI.getType(Leftover).isVector())
Petar Avramovic29f88b92021-12-23 14:09:51 +0100217 AllElts.push_back(Leftover);
218 else
219 appendVectorElts(AllElts, Leftover);
220
Diana Picusf95a5fb2023-01-09 11:59:00 +0100221 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
Petar Avramovic29f88b92021-12-23 14:09:51 +0100222}
223
Matt Arsenault31adc282020-08-03 14:13:38 -0400224/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
Matt Arsenaulta66d2812020-01-10 10:41:29 -0500225static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
226 const MachineInstr &MI) {
227 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
228
Matt Arsenault31adc282020-08-03 14:13:38 -0400229 const int StartIdx = Regs.size();
Matt Arsenaulta66d2812020-01-10 10:41:29 -0500230 const int NumResults = MI.getNumOperands() - 1;
Matt Arsenault31adc282020-08-03 14:13:38 -0400231 Regs.resize(Regs.size() + NumResults);
Matt Arsenaulta66d2812020-01-10 10:41:29 -0500232 for (int I = 0; I != NumResults; ++I)
Matt Arsenault31adc282020-08-03 14:13:38 -0400233 Regs[StartIdx + I] = MI.getOperand(I).getReg();
Matt Arsenaulta66d2812020-01-10 10:41:29 -0500234}
235
Matt Arsenault31adc282020-08-03 14:13:38 -0400236void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
237 LLT GCDTy, Register SrcReg) {
Matt Arsenaulta66d2812020-01-10 10:41:29 -0500238 LLT SrcTy = MRI.getType(SrcReg);
Matt Arsenaulta66d2812020-01-10 10:41:29 -0500239 if (SrcTy == GCDTy) {
240 // If the source already evenly divides the result type, we don't need to do
241 // anything.
242 Parts.push_back(SrcReg);
243 } else {
244 // Need to split into common type sized pieces.
245 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
246 getUnmergeResults(Parts, *Unmerge);
247 }
Matt Arsenault31adc282020-08-03 14:13:38 -0400248}
Matt Arsenaulta66d2812020-01-10 10:41:29 -0500249
Matt Arsenault31adc282020-08-03 14:13:38 -0400250LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
251 LLT NarrowTy, Register SrcReg) {
252 LLT SrcTy = MRI.getType(SrcReg);
253 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
254 extractGCDType(Parts, GCDTy, SrcReg);
Matt Arsenaulta66d2812020-01-10 10:41:29 -0500255 return GCDTy;
256}
257
Matt Arsenaultcd7650c2020-01-11 19:05:06 -0500258LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
259 SmallVectorImpl<Register> &VRegs,
260 unsigned PadStrategy) {
Matt Arsenaulta66d2812020-01-10 10:41:29 -0500261 LLT LCMTy = getLCMType(DstTy, NarrowTy);
262
263 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
264 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
265 int NumOrigSrc = VRegs.size();
266
267 Register PadReg;
268
269 // Get a value we can use to pad the source value if the sources won't evenly
270 // cover the result type.
271 if (NumOrigSrc < NumParts * NumSubParts) {
272 if (PadStrategy == TargetOpcode::G_ZEXT)
273 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
274 else if (PadStrategy == TargetOpcode::G_ANYEXT)
275 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
276 else {
277 assert(PadStrategy == TargetOpcode::G_SEXT);
278
279 // Shift the sign bit of the low register through the high register.
280 auto ShiftAmt =
281 MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
282 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
283 }
284 }
285
286 // Registers for the final merge to be produced.
Matt Arsenaultde8451f2020-02-04 10:34:22 -0500287 SmallVector<Register, 4> Remerge(NumParts);
Matt Arsenaulta66d2812020-01-10 10:41:29 -0500288
289 // Registers needed for intermediate merges, which will be merged into a
290 // source for Remerge.
Matt Arsenaultde8451f2020-02-04 10:34:22 -0500291 SmallVector<Register, 4> SubMerge(NumSubParts);
Matt Arsenaulta66d2812020-01-10 10:41:29 -0500292
293 // Once we've fully read off the end of the original source bits, we can reuse
294 // the same high bits for remaining padding elements.
295 Register AllPadReg;
296
297 // Build merges to the LCM type to cover the original result type.
298 for (int I = 0; I != NumParts; ++I) {
299 bool AllMergePartsArePadding = true;
300
301 // Build the requested merges to the requested type.
302 for (int J = 0; J != NumSubParts; ++J) {
303 int Idx = I * NumSubParts + J;
304 if (Idx >= NumOrigSrc) {
305 SubMerge[J] = PadReg;
306 continue;
307 }
308
309 SubMerge[J] = VRegs[Idx];
310
311 // There are meaningful bits here we can't reuse later.
312 AllMergePartsArePadding = false;
313 }
314
315 // If we've filled up a complete piece with padding bits, we can directly
316 // emit the natural sized constant if applicable, rather than a merge of
317 // smaller constants.
318 if (AllMergePartsArePadding && !AllPadReg) {
319 if (PadStrategy == TargetOpcode::G_ANYEXT)
320 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
321 else if (PadStrategy == TargetOpcode::G_ZEXT)
322 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
323
324 // If this is a sign extension, we can't materialize a trivial constant
325 // with the right type and have to produce a merge.
326 }
327
328 if (AllPadReg) {
329 // Avoid creating additional instructions if we're just adding additional
330 // copies of padding bits.
331 Remerge[I] = AllPadReg;
332 continue;
333 }
334
335 if (NumSubParts == 1)
336 Remerge[I] = SubMerge[0];
337 else
Diana Picusf95a5fb2023-01-09 11:59:00 +0100338 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
Matt Arsenaulta66d2812020-01-10 10:41:29 -0500339
340 // In the sign extend padding case, re-use the first all-signbit merge.
341 if (AllMergePartsArePadding && !AllPadReg)
342 AllPadReg = Remerge[I];
343 }
344
Matt Arsenaultcd7650c2020-01-11 19:05:06 -0500345 VRegs = std::move(Remerge);
346 return LCMTy;
347}
348
349void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
350 ArrayRef<Register> RemergeRegs) {
351 LLT DstTy = MRI.getType(DstReg);
352
Matt Arsenaulta66d2812020-01-10 10:41:29 -0500353 // Create the merge to the widened source, and extract the relevant bits into
354 // the result.
Matt Arsenaultcd7650c2020-01-11 19:05:06 -0500355
356 if (DstTy == LCMTy) {
Diana Picusf95a5fb2023-01-09 11:59:00 +0100357 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
Matt Arsenaultcd7650c2020-01-11 19:05:06 -0500358 return;
359 }
360
Diana Picusf95a5fb2023-01-09 11:59:00 +0100361 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
Matt Arsenaultcd7650c2020-01-11 19:05:06 -0500362 if (DstTy.isScalar() && LCMTy.isScalar()) {
363 MIRBuilder.buildTrunc(DstReg, Remerge);
364 return;
365 }
366
367 if (LCMTy.isVector()) {
Matt Arsenaulte75afc92020-07-28 10:15:42 -0400368 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
369 SmallVector<Register, 8> UnmergeDefs(NumDefs);
370 UnmergeDefs[0] = DstReg;
371 for (unsigned I = 1; I != NumDefs; ++I)
372 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
373
374 MIRBuilder.buildUnmerge(UnmergeDefs,
Diana Picusf95a5fb2023-01-09 11:59:00 +0100375 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
Matt Arsenaultcd7650c2020-01-11 19:05:06 -0500376 return;
377 }
378
379 llvm_unreachable("unhandled case");
Matt Arsenaulta66d2812020-01-10 10:41:29 -0500380}
381
Tim Northovere0418412017-02-08 23:23:39 +0000382static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
Matt Arsenault0da582d2020-07-19 09:56:15 -0400383#define RTLIBCASE_INT(LibcallPrefix) \
Dominik Montadafeb20a12020-03-02 16:28:17 +0100384 do { \
385 switch (Size) { \
386 case 32: \
387 return RTLIB::LibcallPrefix##32; \
388 case 64: \
389 return RTLIB::LibcallPrefix##64; \
390 case 128: \
391 return RTLIB::LibcallPrefix##128; \
392 default: \
393 llvm_unreachable("unexpected size"); \
394 } \
395 } while (0)
396
Matt Arsenault0da582d2020-07-19 09:56:15 -0400397#define RTLIBCASE(LibcallPrefix) \
398 do { \
399 switch (Size) { \
400 case 32: \
401 return RTLIB::LibcallPrefix##32; \
402 case 64: \
403 return RTLIB::LibcallPrefix##64; \
404 case 80: \
405 return RTLIB::LibcallPrefix##80; \
406 case 128: \
407 return RTLIB::LibcallPrefix##128; \
408 default: \
409 llvm_unreachable("unexpected size"); \
410 } \
411 } while (0)
Dominik Montadafeb20a12020-03-02 16:28:17 +0100412
Tim Northovere0418412017-02-08 23:23:39 +0000413 switch (Opcode) {
Kai Nackeb3837532022-08-02 13:12:38 -0400414 case TargetOpcode::G_MUL:
415 RTLIBCASE_INT(MUL_I);
Diana Picuse97822e2017-04-24 07:22:31 +0000416 case TargetOpcode::G_SDIV:
Matt Arsenault0da582d2020-07-19 09:56:15 -0400417 RTLIBCASE_INT(SDIV_I);
Diana Picuse97822e2017-04-24 07:22:31 +0000418 case TargetOpcode::G_UDIV:
Matt Arsenault0da582d2020-07-19 09:56:15 -0400419 RTLIBCASE_INT(UDIV_I);
Diana Picus02e11012017-06-15 10:53:31 +0000420 case TargetOpcode::G_SREM:
Matt Arsenault0da582d2020-07-19 09:56:15 -0400421 RTLIBCASE_INT(SREM_I);
Diana Picus02e11012017-06-15 10:53:31 +0000422 case TargetOpcode::G_UREM:
Matt Arsenault0da582d2020-07-19 09:56:15 -0400423 RTLIBCASE_INT(UREM_I);
Diana Picus0528e2c2018-11-26 11:07:02 +0000424 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
Matt Arsenault0da582d2020-07-19 09:56:15 -0400425 RTLIBCASE_INT(CTLZ_I);
Diana Picus1314a282017-04-11 10:52:34 +0000426 case TargetOpcode::G_FADD:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100427 RTLIBCASE(ADD_F);
Javed Absar5cde1cc2017-10-30 13:51:56 +0000428 case TargetOpcode::G_FSUB:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100429 RTLIBCASE(SUB_F);
Diana Picus9faa09b2017-11-23 12:44:20 +0000430 case TargetOpcode::G_FMUL:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100431 RTLIBCASE(MUL_F);
Diana Picusc01f7f12017-11-23 13:26:07 +0000432 case TargetOpcode::G_FDIV:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100433 RTLIBCASE(DIV_F);
Jessica Paquette84bedac2019-01-30 23:46:15 +0000434 case TargetOpcode::G_FEXP:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100435 RTLIBCASE(EXP_F);
Jessica Paquettee7941212019-04-03 16:58:32 +0000436 case TargetOpcode::G_FEXP2:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100437 RTLIBCASE(EXP2_F);
Matt Arsenaultb14e83d2023-08-12 07:20:00 -0400438 case TargetOpcode::G_FEXP10:
439 RTLIBCASE(EXP10_F);
Tim Northovere0418412017-02-08 23:23:39 +0000440 case TargetOpcode::G_FREM:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100441 RTLIBCASE(REM_F);
Tim Northovere0418412017-02-08 23:23:39 +0000442 case TargetOpcode::G_FPOW:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100443 RTLIBCASE(POW_F);
David Green5550e9c2024-01-04 07:26:23 +0000444 case TargetOpcode::G_FPOWI:
445 RTLIBCASE(POWI_F);
Diana Picuse74243d2018-01-12 11:30:45 +0000446 case TargetOpcode::G_FMA:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100447 RTLIBCASE(FMA_F);
Jessica Paquette7db82d72019-01-28 18:34:18 +0000448 case TargetOpcode::G_FSIN:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100449 RTLIBCASE(SIN_F);
Jessica Paquette7db82d72019-01-28 18:34:18 +0000450 case TargetOpcode::G_FCOS:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100451 RTLIBCASE(COS_F);
Farzon Lotfi1d874332024-06-05 15:01:33 -0400452 case TargetOpcode::G_FTAN:
453 RTLIBCASE(TAN_F);
Farzon Lotfi0b58f342024-07-11 15:58:43 -0400454 case TargetOpcode::G_FASIN:
455 RTLIBCASE(ASIN_F);
456 case TargetOpcode::G_FACOS:
457 RTLIBCASE(ACOS_F);
458 case TargetOpcode::G_FATAN:
459 RTLIBCASE(ATAN_F);
Tex Riddellc03d09c2024-10-24 17:53:12 -0700460 case TargetOpcode::G_FATAN2:
461 RTLIBCASE(ATAN2_F);
Farzon Lotfi0b58f342024-07-11 15:58:43 -0400462 case TargetOpcode::G_FSINH:
463 RTLIBCASE(SINH_F);
464 case TargetOpcode::G_FCOSH:
465 RTLIBCASE(COSH_F);
466 case TargetOpcode::G_FTANH:
467 RTLIBCASE(TANH_F);
Jessica Paquettec49428a2019-01-28 19:53:14 +0000468 case TargetOpcode::G_FLOG10:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100469 RTLIBCASE(LOG10_F);
Jessica Paquette2d73ecd2019-01-28 21:27:23 +0000470 case TargetOpcode::G_FLOG:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100471 RTLIBCASE(LOG_F);
Jessica Paquette0154bd12019-01-30 21:16:04 +0000472 case TargetOpcode::G_FLOG2:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100473 RTLIBCASE(LOG2_F);
Matt Arsenaulteece6ba2023-04-26 22:02:42 -0400474 case TargetOpcode::G_FLDEXP:
475 RTLIBCASE(LDEXP_F);
Petar Avramovicfaaa2b5d2019-06-06 09:02:24 +0000476 case TargetOpcode::G_FCEIL:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100477 RTLIBCASE(CEIL_F);
Petar Avramovicfaaa2b5d2019-06-06 09:02:24 +0000478 case TargetOpcode::G_FFLOOR:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100479 RTLIBCASE(FLOOR_F);
480 case TargetOpcode::G_FMINNUM:
481 RTLIBCASE(FMIN_F);
482 case TargetOpcode::G_FMAXNUM:
483 RTLIBCASE(FMAX_F);
484 case TargetOpcode::G_FSQRT:
485 RTLIBCASE(SQRT_F);
486 case TargetOpcode::G_FRINT:
487 RTLIBCASE(RINT_F);
488 case TargetOpcode::G_FNEARBYINT:
489 RTLIBCASE(NEARBYINT_F);
Craig Topperd5d14172024-09-18 12:07:44 -0700490 case TargetOpcode::G_INTRINSIC_TRUNC:
491 RTLIBCASE(TRUNC_F);
492 case TargetOpcode::G_INTRINSIC_ROUND:
493 RTLIBCASE(ROUND_F);
Matt Arsenault0da582d2020-07-19 09:56:15 -0400494 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
495 RTLIBCASE(ROUNDEVEN_F);
David Green28d28d52024-04-15 09:41:08 +0100496 case TargetOpcode::G_INTRINSIC_LRINT:
497 RTLIBCASE(LRINT_F);
David Green8d49ce12024-04-17 18:38:24 +0100498 case TargetOpcode::G_INTRINSIC_LLRINT:
499 RTLIBCASE(LLRINT_F);
Tim Northovere0418412017-02-08 23:23:39 +0000500 }
501 llvm_unreachable("Unknown libcall function");
Craig Topperebcaa572024-11-25 18:00:03 -0800502#undef RTLIBCASE_INT
503#undef RTLIBCASE
Tim Northovere0418412017-02-08 23:23:39 +0000504}
505
Jessica Paquette727328a2019-09-13 20:25:58 +0000506/// True if an instruction is in tail position in its caller. Intended for
507/// legalizing libcalls as tail calls when possible.
David Greend659bd12024-01-03 07:59:36 +0000508static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result,
509 MachineInstr &MI,
Jon Roelofsa14b4e32021-07-06 08:28:11 -0700510 const TargetInstrInfo &TII,
511 MachineRegisterInfo &MRI) {
Vedant Kumarf1a71b52020-04-16 15:23:57 -0700512 MachineBasicBlock &MBB = *MI.getParent();
513 const Function &F = MBB.getParent()->getFunction();
Jessica Paquette727328a2019-09-13 20:25:58 +0000514
515 // Conservatively require the attributes of the call to match those of
516 // the return. Ignore NoAlias and NonNull because they don't affect the
517 // call sequence.
518 AttributeList CallerAttrs = F.getAttributes();
Nikita Popovc63a3172022-01-15 22:14:16 +0100519 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
Jessica Paquette727328a2019-09-13 20:25:58 +0000520 .removeAttribute(Attribute::NoAlias)
521 .removeAttribute(Attribute::NonNull)
522 .hasAttributes())
523 return false;
524
525 // It's not safe to eliminate the sign / zero extension of the return value.
Arthur Eubanksd7593eb2021-08-13 11:59:18 -0700526 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
527 CallerAttrs.hasRetAttr(Attribute::SExt))
Jessica Paquette727328a2019-09-13 20:25:58 +0000528 return false;
529
Jon Roelofsa14b4e32021-07-06 08:28:11 -0700530 // Only tail call if the following instruction is a standard return or if we
531 // have a `thisreturn` callee, and a sequence like:
532 //
533 // G_MEMCPY %0, %1, %2
534 // $x0 = COPY %0
535 // RET_ReallyLR implicit $x0
Vedant Kumarf1a71b52020-04-16 15:23:57 -0700536 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
Jon Roelofsa14b4e32021-07-06 08:28:11 -0700537 if (Next != MBB.instr_end() && Next->isCopy()) {
David Greend659bd12024-01-03 07:59:36 +0000538 if (MI.getOpcode() == TargetOpcode::G_BZERO)
Jon Roelofsa14b4e32021-07-06 08:28:11 -0700539 return false;
Jon Roelofsa14b4e32021-07-06 08:28:11 -0700540
David Greend659bd12024-01-03 07:59:36 +0000541 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
542 // mempy/etc routines return the same parameter. For other it will be the
543 // returned value.
Jon Roelofsa14b4e32021-07-06 08:28:11 -0700544 Register VReg = MI.getOperand(0).getReg();
545 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
546 return false;
547
548 Register PReg = Next->getOperand(0).getReg();
549 if (!PReg.isPhysical())
550 return false;
551
552 auto Ret = next_nodbg(Next, MBB.instr_end());
553 if (Ret == MBB.instr_end() || !Ret->isReturn())
554 return false;
555
556 if (Ret->getNumImplicitOperands() != 1)
557 return false;
558
David Greend659bd12024-01-03 07:59:36 +0000559 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
Jon Roelofsa14b4e32021-07-06 08:28:11 -0700560 return false;
561
562 // Skip over the COPY that we just validated.
563 Next = Ret;
564 }
565
Vedant Kumarf1a71b52020-04-16 15:23:57 -0700566 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
Jessica Paquette727328a2019-09-13 20:25:58 +0000567 return false;
568
569 return true;
570}
571
Diana Picusfc1675e2017-07-05 12:57:24 +0000572LegalizerHelper::LegalizeResult
Dominik Montada9fedb692020-03-26 13:59:08 +0100573llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
Diana Picusfc1675e2017-07-05 12:57:24 +0000574 const CallLowering::ArgInfo &Result,
Dominik Montada9fedb692020-03-26 13:59:08 +0100575 ArrayRef<CallLowering::ArgInfo> Args,
David Greend659bd12024-01-03 07:59:36 +0000576 const CallingConv::ID CC, LostDebugLocObserver &LocObserver,
577 MachineInstr *MI) {
Diana Picuse97822e2017-04-24 07:22:31 +0000578 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
Diana Picusd0104ea2017-07-06 09:09:33 +0000579
Tim Northovere1a5f662019-08-09 08:26:38 +0000580 CallLowering::CallLoweringInfo Info;
Dominik Montada9fedb692020-03-26 13:59:08 +0100581 Info.CallConv = CC;
Tim Northovere1a5f662019-08-09 08:26:38 +0000582 Info.Callee = MachineOperand::CreateES(Name);
583 Info.OrigRet = Result;
David Greend659bd12024-01-03 07:59:36 +0000584 if (MI)
585 Info.IsTailCall =
586 (Result.Ty->isVoidTy() ||
587 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
588 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
589 *MIRBuilder.getMRI());
590
Tim Northovere1a5f662019-08-09 08:26:38 +0000591 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
592 if (!CLI.lowerCall(MIRBuilder, Info))
Diana Picus02e11012017-06-15 10:53:31 +0000593 return LegalizerHelper::UnableToLegalize;
Diana Picusd0104ea2017-07-06 09:09:33 +0000594
David Greend659bd12024-01-03 07:59:36 +0000595 if (MI && Info.LoweredTailCall) {
596 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
597
598 // Check debug locations before removing the return.
599 LocObserver.checkpoint(true);
600
601 // We must have a return following the call (or debug insts) to get past
602 // isLibCallInTailPosition.
603 do {
604 MachineInstr *Next = MI->getNextNode();
605 assert(Next &&
606 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
607 "Expected instr following MI to be return or debug inst?");
608 // We lowered a tail call, so the call is now the return from the block.
609 // Delete the old return.
610 Next->eraseFromParent();
611 } while (MI->getNextNode());
612
613 // We expect to lose the debug location from the return.
614 LocObserver.checkpoint(false);
615 }
Diana Picuse97822e2017-04-24 07:22:31 +0000616 return LegalizerHelper::Legalized;
617}
618
Dominik Montada9fedb692020-03-26 13:59:08 +0100619LegalizerHelper::LegalizeResult
620llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
621 const CallLowering::ArgInfo &Result,
David Greend659bd12024-01-03 07:59:36 +0000622 ArrayRef<CallLowering::ArgInfo> Args,
623 LostDebugLocObserver &LocObserver, MachineInstr *MI) {
Dominik Montada9fedb692020-03-26 13:59:08 +0100624 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
625 const char *Name = TLI.getLibcallName(Libcall);
David Green47c65cf2024-02-17 08:57:14 +0000626 if (!Name)
627 return LegalizerHelper::UnableToLegalize;
Dominik Montada9fedb692020-03-26 13:59:08 +0100628 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
David Greend659bd12024-01-03 07:59:36 +0000629 return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
Dominik Montada9fedb692020-03-26 13:59:08 +0100630}
631
Diana Picus65ed3642018-01-17 13:34:10 +0000632// Useful for libcalls where all operands have the same type.
Diana Picus02e11012017-06-15 10:53:31 +0000633static LegalizerHelper::LegalizeResult
634simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
David Greend659bd12024-01-03 07:59:36 +0000635 Type *OpType, LostDebugLocObserver &LocObserver) {
Diana Picus02e11012017-06-15 10:53:31 +0000636 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
Diana Picuse74243d2018-01-12 11:30:45 +0000637
Matt Arsenault9b057f62021-07-08 11:26:30 -0400638 // FIXME: What does the original arg index mean here?
Diana Picuse74243d2018-01-12 11:30:45 +0000639 SmallVector<CallLowering::ArgInfo, 3> Args;
Kazu Hirata259cd6f2021-11-25 22:17:10 -0800640 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
641 Args.push_back({MO.getReg(), OpType, 0});
Matt Arsenault9b057f62021-07-08 11:26:30 -0400642 return createLibcall(MIRBuilder, Libcall,
David Greend659bd12024-01-03 07:59:36 +0000643 {MI.getOperand(0).getReg(), OpType, 0}, Args,
644 LocObserver, &MI);
Diana Picus02e11012017-06-15 10:53:31 +0000645}
646
Amara Emersoncf12c782019-07-19 00:24:45 +0000647LegalizerHelper::LegalizeResult
648llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
Jessica Paquette324af792021-05-25 16:54:20 -0700649 MachineInstr &MI, LostDebugLocObserver &LocObserver) {
Amara Emersoncf12c782019-07-19 00:24:45 +0000650 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
651
652 SmallVector<CallLowering::ArgInfo, 3> Args;
Amara Emerson509a4942019-09-28 05:33:21 +0000653 // Add all the args, except for the last which is an imm denoting 'tail'.
Matt Arsenault0b7f6cc2020-08-03 09:00:24 -0400654 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
Amara Emersoncf12c782019-07-19 00:24:45 +0000655 Register Reg = MI.getOperand(i).getReg();
656
657 // Need derive an IR type for call lowering.
658 LLT OpLLT = MRI.getType(Reg);
659 Type *OpTy = nullptr;
660 if (OpLLT.isPointer())
Bjorn Petterssona7ee80f2023-08-11 14:38:53 +0200661 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
Amara Emersoncf12c782019-07-19 00:24:45 +0000662 else
663 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
Matt Arsenault9b057f62021-07-08 11:26:30 -0400664 Args.push_back({Reg, OpTy, 0});
Amara Emersoncf12c782019-07-19 00:24:45 +0000665 }
666
667 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
668 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
Amara Emersoncf12c782019-07-19 00:24:45 +0000669 RTLIB::Libcall RTLibcall;
Jessica Paquette23f657c2021-03-24 23:45:36 -0700670 unsigned Opc = MI.getOpcode();
671 switch (Opc) {
672 case TargetOpcode::G_BZERO:
673 RTLibcall = RTLIB::BZERO;
674 break;
Matt Arsenault0b7f6cc2020-08-03 09:00:24 -0400675 case TargetOpcode::G_MEMCPY:
Amara Emersoncf12c782019-07-19 00:24:45 +0000676 RTLibcall = RTLIB::MEMCPY;
Jon Roelofsafaf9282021-07-02 13:08:57 -0700677 Args[0].Flags[0].setReturned();
Amara Emersoncf12c782019-07-19 00:24:45 +0000678 break;
Matt Arsenault0b7f6cc2020-08-03 09:00:24 -0400679 case TargetOpcode::G_MEMMOVE:
Amara Emersoncf12c782019-07-19 00:24:45 +0000680 RTLibcall = RTLIB::MEMMOVE;
Jon Roelofsafaf9282021-07-02 13:08:57 -0700681 Args[0].Flags[0].setReturned();
Amara Emersoncf12c782019-07-19 00:24:45 +0000682 break;
Matt Arsenault0b7f6cc2020-08-03 09:00:24 -0400683 case TargetOpcode::G_MEMSET:
684 RTLibcall = RTLIB::MEMSET;
Jon Roelofsafaf9282021-07-02 13:08:57 -0700685 Args[0].Flags[0].setReturned();
Matt Arsenault0b7f6cc2020-08-03 09:00:24 -0400686 break;
Amara Emersoncf12c782019-07-19 00:24:45 +0000687 default:
Jon Roelofsafaf9282021-07-02 13:08:57 -0700688 llvm_unreachable("unsupported opcode");
Amara Emersoncf12c782019-07-19 00:24:45 +0000689 }
690 const char *Name = TLI.getLibcallName(RTLibcall);
691
Jessica Paquette23f657c2021-03-24 23:45:36 -0700692 // Unsupported libcall on the target.
693 if (!Name) {
694 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
695 << MIRBuilder.getTII().getName(Opc) << "\n");
696 return LegalizerHelper::UnableToLegalize;
697 }
698
Tim Northovere1a5f662019-08-09 08:26:38 +0000699 CallLowering::CallLoweringInfo Info;
700 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
701 Info.Callee = MachineOperand::CreateES(Name);
Matt Arsenault9b057f62021-07-08 11:26:30 -0400702 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
David Greend659bd12024-01-03 07:59:36 +0000703 Info.IsTailCall =
704 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
705 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
Jessica Paquette727328a2019-09-13 20:25:58 +0000706
Tim Northovere1a5f662019-08-09 08:26:38 +0000707 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
708 if (!CLI.lowerCall(MIRBuilder, Info))
Amara Emersoncf12c782019-07-19 00:24:45 +0000709 return LegalizerHelper::UnableToLegalize;
710
Jessica Paquette727328a2019-09-13 20:25:58 +0000711 if (Info.LoweredTailCall) {
712 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
Jessica Paquette324af792021-05-25 16:54:20 -0700713
714 // Check debug locations before removing the return.
715 LocObserver.checkpoint(true);
716
Vedant Kumarf1a71b52020-04-16 15:23:57 -0700717 // We must have a return following the call (or debug insts) to get past
Jessica Paquette727328a2019-09-13 20:25:58 +0000718 // isLibCallInTailPosition.
Vedant Kumarf1a71b52020-04-16 15:23:57 -0700719 do {
720 MachineInstr *Next = MI.getNextNode();
Jon Roelofsa14b4e32021-07-06 08:28:11 -0700721 assert(Next &&
722 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
Vedant Kumarf1a71b52020-04-16 15:23:57 -0700723 "Expected instr following MI to be return or debug inst?");
724 // We lowered a tail call, so the call is now the return from the block.
725 // Delete the old return.
726 Next->eraseFromParent();
727 } while (MI.getNextNode());
Jessica Paquette324af792021-05-25 16:54:20 -0700728
729 // We expect to lose the debug location from the return.
730 LocObserver.checkpoint(false);
Jessica Paquette727328a2019-09-13 20:25:58 +0000731 }
732
Amara Emersoncf12c782019-07-19 00:24:45 +0000733 return LegalizerHelper::Legalized;
734}
735
Thomas Preud'hommece61b0e2024-01-04 10:15:16 +0000736static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
737 unsigned Opc = MI.getOpcode();
738 auto &AtomicMI = cast<GMemOperation>(MI);
739 auto &MMO = AtomicMI.getMMO();
740 auto Ordering = MMO.getMergedOrdering();
741 LLT MemType = MMO.getMemoryType();
742 uint64_t MemSize = MemType.getSizeInBytes();
743 if (MemType.isVector())
744 return RTLIB::UNKNOWN_LIBCALL;
745
Him188ba461f82024-07-25 11:07:31 +0100746#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
Thomas Preud'hommece61b0e2024-01-04 10:15:16 +0000747#define LCALL5(A) \
748 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
749 switch (Opc) {
750 case TargetOpcode::G_ATOMIC_CMPXCHG:
751 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
752 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
753 return getOutlineAtomicHelper(LC, Ordering, MemSize);
754 }
755 case TargetOpcode::G_ATOMICRMW_XCHG: {
756 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
757 return getOutlineAtomicHelper(LC, Ordering, MemSize);
758 }
759 case TargetOpcode::G_ATOMICRMW_ADD:
760 case TargetOpcode::G_ATOMICRMW_SUB: {
761 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
762 return getOutlineAtomicHelper(LC, Ordering, MemSize);
763 }
764 case TargetOpcode::G_ATOMICRMW_AND: {
765 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
766 return getOutlineAtomicHelper(LC, Ordering, MemSize);
767 }
768 case TargetOpcode::G_ATOMICRMW_OR: {
769 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
770 return getOutlineAtomicHelper(LC, Ordering, MemSize);
771 }
772 case TargetOpcode::G_ATOMICRMW_XOR: {
773 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
774 return getOutlineAtomicHelper(LC, Ordering, MemSize);
775 }
776 default:
777 return RTLIB::UNKNOWN_LIBCALL;
778 }
779#undef LCALLS
780#undef LCALL5
781}
782
783static LegalizerHelper::LegalizeResult
784createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI) {
785 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
786
787 Type *RetTy;
788 SmallVector<Register> RetRegs;
789 SmallVector<CallLowering::ArgInfo, 3> Args;
790 unsigned Opc = MI.getOpcode();
791 switch (Opc) {
792 case TargetOpcode::G_ATOMIC_CMPXCHG:
793 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
794 Register Success;
795 LLT SuccessLLT;
796 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
797 MI.getFirst4RegLLTs();
798 RetRegs.push_back(Ret);
799 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
800 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
801 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
802 NewLLT) = MI.getFirst5RegLLTs();
803 RetRegs.push_back(Success);
804 RetTy = StructType::get(
805 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
806 }
807 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
808 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
809 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
810 break;
811 }
812 case TargetOpcode::G_ATOMICRMW_XCHG:
813 case TargetOpcode::G_ATOMICRMW_ADD:
814 case TargetOpcode::G_ATOMICRMW_SUB:
815 case TargetOpcode::G_ATOMICRMW_AND:
816 case TargetOpcode::G_ATOMICRMW_OR:
817 case TargetOpcode::G_ATOMICRMW_XOR: {
818 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
819 RetRegs.push_back(Ret);
820 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
821 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
822 Val =
823 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
824 .getReg(0);
825 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
826 Val =
827 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
828 .getReg(0);
829 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
830 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
831 break;
832 }
833 default:
834 llvm_unreachable("unsupported opcode");
835 }
836
837 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
838 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
839 RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(MI);
840 const char *Name = TLI.getLibcallName(RTLibcall);
841
842 // Unsupported libcall on the target.
843 if (!Name) {
844 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
845 << MIRBuilder.getTII().getName(Opc) << "\n");
846 return LegalizerHelper::UnableToLegalize;
847 }
848
849 CallLowering::CallLoweringInfo Info;
850 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
851 Info.Callee = MachineOperand::CreateES(Name);
852 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
853
854 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
855 if (!CLI.lowerCall(MIRBuilder, Info))
856 return LegalizerHelper::UnableToLegalize;
857
858 return LegalizerHelper::Legalized;
859}
860
Diana Picus65ed3642018-01-17 13:34:10 +0000861static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
862 Type *FromType) {
863 auto ToMVT = MVT::getVT(ToType);
864 auto FromMVT = MVT::getVT(FromType);
865
866 switch (Opcode) {
867 case TargetOpcode::G_FPEXT:
868 return RTLIB::getFPEXT(FromMVT, ToMVT);
869 case TargetOpcode::G_FPTRUNC:
870 return RTLIB::getFPROUND(FromMVT, ToMVT);
Diana Picus4ed0ee72018-01-30 07:54:52 +0000871 case TargetOpcode::G_FPTOSI:
872 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
873 case TargetOpcode::G_FPTOUI:
874 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
Diana Picus517531e2018-01-30 09:15:17 +0000875 case TargetOpcode::G_SITOFP:
876 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
877 case TargetOpcode::G_UITOFP:
878 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
Diana Picus65ed3642018-01-17 13:34:10 +0000879 }
880 llvm_unreachable("Unsupported libcall function");
881}
882
883static LegalizerHelper::LegalizeResult
884conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
Craig Topper4cf2cf12024-12-04 10:42:49 -0800885 Type *FromType, LostDebugLocObserver &LocObserver,
886 const TargetLowering &TLI, bool IsSigned = false) {
887 CallLowering::ArgInfo Arg = {MI.getOperand(1).getReg(), FromType, 0};
888 if (FromType->isIntegerTy()) {
889 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
890 Arg.Flags[0].setSExt();
891 else
892 Arg.Flags[0].setZExt();
893 }
894
Diana Picus65ed3642018-01-17 13:34:10 +0000895 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
Craig Topper4cf2cf12024-12-04 10:42:49 -0800896 return createLibcall(MIRBuilder, Libcall,
897 {MI.getOperand(0).getReg(), ToType, 0}, Arg, LocObserver,
898 &MI);
Diana Picus65ed3642018-01-17 13:34:10 +0000899}
900
Serge Pavlov462d5832023-10-09 21:13:39 +0700901static RTLIB::Libcall
902getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI) {
903 RTLIB::Libcall RTLibcall;
904 switch (MI.getOpcode()) {
Serge Pavlov7fc7ef12024-01-10 14:18:00 +0700905 case TargetOpcode::G_GET_FPENV:
906 RTLibcall = RTLIB::FEGETENV;
907 break;
908 case TargetOpcode::G_SET_FPENV:
909 case TargetOpcode::G_RESET_FPENV:
910 RTLibcall = RTLIB::FESETENV;
911 break;
Serge Pavlov462d5832023-10-09 21:13:39 +0700912 case TargetOpcode::G_GET_FPMODE:
913 RTLibcall = RTLIB::FEGETMODE;
914 break;
915 case TargetOpcode::G_SET_FPMODE:
916 case TargetOpcode::G_RESET_FPMODE:
917 RTLibcall = RTLIB::FESETMODE;
918 break;
919 default:
920 llvm_unreachable("Unexpected opcode");
921 }
922 return RTLibcall;
923}
924
925// Some library functions that read FP state (fegetmode, fegetenv) write the
926// state into a region in memory. IR intrinsics that do the same operations
927// (get_fpmode, get_fpenv) return the state as integer value. To implement these
928// intrinsics via the library functions, we need to use temporary variable,
929// for example:
930//
931// %0:_(s32) = G_GET_FPMODE
932//
933// is transformed to:
934//
935// %1:_(p0) = G_FRAME_INDEX %stack.0
936// BL &fegetmode
937// %0:_(s32) = G_LOAD % 1
938//
939LegalizerHelper::LegalizeResult
940LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
David Greend659bd12024-01-03 07:59:36 +0000941 MachineInstr &MI,
942 LostDebugLocObserver &LocObserver) {
Serge Pavlov462d5832023-10-09 21:13:39 +0700943 const DataLayout &DL = MIRBuilder.getDataLayout();
944 auto &MF = MIRBuilder.getMF();
945 auto &MRI = *MIRBuilder.getMRI();
946 auto &Ctx = MF.getFunction().getContext();
947
948 // Create temporary, where library function will put the read state.
949 Register Dst = MI.getOperand(0).getReg();
950 LLT StateTy = MRI.getType(Dst);
951 TypeSize StateSize = StateTy.getSizeInBytes();
952 Align TempAlign = getStackTemporaryAlignment(StateTy);
953 MachinePointerInfo TempPtrInfo;
954 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
955
956 // Create a call to library function, with the temporary as an argument.
957 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
958 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
959 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
960 auto Res =
961 createLibcall(MIRBuilder, RTLibcall,
962 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
David Greend659bd12024-01-03 07:59:36 +0000963 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
964 LocObserver, nullptr);
Serge Pavlov462d5832023-10-09 21:13:39 +0700965 if (Res != LegalizerHelper::Legalized)
966 return Res;
967
968 // Create a load from the temporary.
969 MachineMemOperand *MMO = MF.getMachineMemOperand(
970 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
971 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
972
973 return LegalizerHelper::Legalized;
974}
975
976// Similar to `createGetStateLibcall` the function calls a library function
977// using transient space in stack. In this case the library function reads
978// content of memory region.
979LegalizerHelper::LegalizeResult
980LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
David Greend659bd12024-01-03 07:59:36 +0000981 MachineInstr &MI,
982 LostDebugLocObserver &LocObserver) {
Serge Pavlov462d5832023-10-09 21:13:39 +0700983 const DataLayout &DL = MIRBuilder.getDataLayout();
984 auto &MF = MIRBuilder.getMF();
985 auto &MRI = *MIRBuilder.getMRI();
986 auto &Ctx = MF.getFunction().getContext();
987
988 // Create temporary, where library function will get the new state.
989 Register Src = MI.getOperand(0).getReg();
990 LLT StateTy = MRI.getType(Src);
991 TypeSize StateSize = StateTy.getSizeInBytes();
992 Align TempAlign = getStackTemporaryAlignment(StateTy);
993 MachinePointerInfo TempPtrInfo;
994 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
995
996 // Put the new state into the temporary.
997 MachineMemOperand *MMO = MF.getMachineMemOperand(
998 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
999 MIRBuilder.buildStore(Src, Temp, *MMO);
1000
1001 // Create a call to library function, with the temporary as an argument.
1002 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1003 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1004 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1005 return createLibcall(MIRBuilder, RTLibcall,
1006 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
David Greend659bd12024-01-03 07:59:36 +00001007 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1008 LocObserver, nullptr);
Serge Pavlov462d5832023-10-09 21:13:39 +07001009}
1010
Him188ba461f82024-07-25 11:07:31 +01001011/// Returns the corresponding libcall for the given Pred and
1012/// the ICMP predicate that should be generated to compare with #0
1013/// after the libcall.
1014static std::pair<RTLIB::Libcall, CmpInst::Predicate>
Craig Topper43b6b782024-11-26 15:48:49 -08001015getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size) {
1016#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1017 do { \
1018 switch (Size) { \
1019 case 32: \
1020 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1021 case 64: \
1022 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1023 case 128: \
1024 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1025 default: \
1026 llvm_unreachable("unexpected size"); \
1027 } \
1028 } while (0)
Him188ba461f82024-07-25 11:07:31 +01001029
1030 switch (Pred) {
1031 case CmpInst::FCMP_OEQ:
Craig Topper43b6b782024-11-26 15:48:49 -08001032 RTLIBCASE_CMP(OEQ_F, CmpInst::ICMP_EQ);
Him188ba461f82024-07-25 11:07:31 +01001033 case CmpInst::FCMP_UNE:
Craig Topper43b6b782024-11-26 15:48:49 -08001034 RTLIBCASE_CMP(UNE_F, CmpInst::ICMP_NE);
Him188ba461f82024-07-25 11:07:31 +01001035 case CmpInst::FCMP_OGE:
Craig Topper43b6b782024-11-26 15:48:49 -08001036 RTLIBCASE_CMP(OGE_F, CmpInst::ICMP_SGE);
Him188ba461f82024-07-25 11:07:31 +01001037 case CmpInst::FCMP_OLT:
Craig Topper43b6b782024-11-26 15:48:49 -08001038 RTLIBCASE_CMP(OLT_F, CmpInst::ICMP_SLT);
Him188ba461f82024-07-25 11:07:31 +01001039 case CmpInst::FCMP_OLE:
Craig Topper43b6b782024-11-26 15:48:49 -08001040 RTLIBCASE_CMP(OLE_F, CmpInst::ICMP_SLE);
Him188ba461f82024-07-25 11:07:31 +01001041 case CmpInst::FCMP_OGT:
Craig Topper43b6b782024-11-26 15:48:49 -08001042 RTLIBCASE_CMP(OGT_F, CmpInst::ICMP_SGT);
Him188ba461f82024-07-25 11:07:31 +01001043 case CmpInst::FCMP_UNO:
Craig Topper43b6b782024-11-26 15:48:49 -08001044 RTLIBCASE_CMP(UO_F, CmpInst::ICMP_NE);
Him188ba461f82024-07-25 11:07:31 +01001045 default:
1046 return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE};
1047 }
1048}
1049
1050LegalizerHelper::LegalizeResult
1051LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
1052 MachineInstr &MI,
1053 LostDebugLocObserver &LocObserver) {
1054 auto &MF = MIRBuilder.getMF();
1055 auto &Ctx = MF.getFunction().getContext();
1056 const GFCmp *Cmp = cast<GFCmp>(&MI);
1057
1058 LLT OpLLT = MRI.getType(Cmp->getLHSReg());
Craig Topper43b6b782024-11-26 15:48:49 -08001059 unsigned Size = OpLLT.getSizeInBits();
1060 if ((Size != 32 && Size != 64 && Size != 128) ||
1061 OpLLT != MRI.getType(Cmp->getRHSReg()))
Him188ba461f82024-07-25 11:07:31 +01001062 return UnableToLegalize;
1063
1064 Type *OpType = getFloatTypeForLLT(Ctx, OpLLT);
1065
1066 // DstReg type is s32
1067 const Register DstReg = Cmp->getReg(0);
Craig Topper43b6b782024-11-26 15:48:49 -08001068 LLT DstTy = MRI.getType(DstReg);
Him188ba461f82024-07-25 11:07:31 +01001069 const auto Cond = Cmp->getCond();
1070
1071 // Reference:
1072 // https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
1073 // Generates a libcall followed by ICMP.
Craig Topper43b6b782024-11-26 15:48:49 -08001074 const auto BuildLibcall = [&](const RTLIB::Libcall Libcall,
1075 const CmpInst::Predicate ICmpPred,
1076 const DstOp &Res) -> Register {
Him188ba461f82024-07-25 11:07:31 +01001077 // FCMP libcall always returns an i32, and needs an ICMP with #0.
1078 constexpr LLT TempLLT = LLT::scalar(32);
1079 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1080 // Generate libcall, holding result in Temp
1081 const auto Status = createLibcall(
1082 MIRBuilder, Libcall, {Temp, Type::getInt32Ty(Ctx), 0},
1083 {{Cmp->getLHSReg(), OpType, 0}, {Cmp->getRHSReg(), OpType, 1}},
1084 LocObserver, &MI);
1085 if (!Status)
1086 return {};
1087
1088 // Compare temp with #0 to get the final result.
1089 return MIRBuilder
1090 .buildICmp(ICmpPred, Res, Temp, MIRBuilder.buildConstant(TempLLT, 0))
1091 .getReg(0);
1092 };
1093
1094 // Simple case if we have a direct mapping from predicate to libcall
Craig Topper43b6b782024-11-26 15:48:49 -08001095 if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond, Size);
Him188ba461f82024-07-25 11:07:31 +01001096 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1097 ICmpPred != CmpInst::BAD_ICMP_PREDICATE) {
1098 if (BuildLibcall(Libcall, ICmpPred, DstReg)) {
1099 return Legalized;
1100 }
1101 return UnableToLegalize;
1102 }
1103
1104 // No direct mapping found, should be generated as combination of libcalls.
1105
1106 switch (Cond) {
1107 case CmpInst::FCMP_UEQ: {
1108 // FCMP_UEQ: unordered or equal
1109 // Convert into (FCMP_OEQ || FCMP_UNO).
1110
Craig Topper43b6b782024-11-26 15:48:49 -08001111 const auto [OeqLibcall, OeqPred] =
1112 getFCMPLibcallDesc(CmpInst::FCMP_OEQ, Size);
1113 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
Him188ba461f82024-07-25 11:07:31 +01001114
Craig Topper43b6b782024-11-26 15:48:49 -08001115 const auto [UnoLibcall, UnoPred] =
1116 getFCMPLibcallDesc(CmpInst::FCMP_UNO, Size);
1117 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
Him188ba461f82024-07-25 11:07:31 +01001118 if (Oeq && Uno)
1119 MIRBuilder.buildOr(DstReg, Oeq, Uno);
1120 else
1121 return UnableToLegalize;
1122
1123 break;
1124 }
1125 case CmpInst::FCMP_ONE: {
1126 // FCMP_ONE: ordered and operands are unequal
1127 // Convert into (!FCMP_OEQ && !FCMP_UNO).
1128
1129 // We inverse the predicate instead of generating a NOT
1130 // to save one instruction.
1131 // On AArch64 isel can even select two cmp into a single ccmp.
Craig Topper43b6b782024-11-26 15:48:49 -08001132 const auto [OeqLibcall, OeqPred] =
1133 getFCMPLibcallDesc(CmpInst::FCMP_OEQ, Size);
Him188ba461f82024-07-25 11:07:31 +01001134 const auto NotOeq =
Craig Topper43b6b782024-11-26 15:48:49 -08001135 BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred), DstTy);
Him188ba461f82024-07-25 11:07:31 +01001136
Craig Topper43b6b782024-11-26 15:48:49 -08001137 const auto [UnoLibcall, UnoPred] =
1138 getFCMPLibcallDesc(CmpInst::FCMP_UNO, Size);
Him188ba461f82024-07-25 11:07:31 +01001139 const auto NotUno =
Craig Topper43b6b782024-11-26 15:48:49 -08001140 BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred), DstTy);
Him188ba461f82024-07-25 11:07:31 +01001141
1142 if (NotOeq && NotUno)
1143 MIRBuilder.buildAnd(DstReg, NotOeq, NotUno);
1144 else
1145 return UnableToLegalize;
1146
1147 break;
1148 }
1149 case CmpInst::FCMP_ULT:
1150 case CmpInst::FCMP_UGE:
1151 case CmpInst::FCMP_UGT:
1152 case CmpInst::FCMP_ULE:
1153 case CmpInst::FCMP_ORD: {
1154 // Convert into: !(inverse(Pred))
1155 // E.g. FCMP_ULT becomes !FCMP_OGE
1156 // This is equivalent to the following, but saves some instructions.
1157 // MIRBuilder.buildNot(
1158 // PredTy,
1159 // MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
1160 // Op1, Op2));
1161 const auto [InversedLibcall, InversedPred] =
Craig Topper43b6b782024-11-26 15:48:49 -08001162 getFCMPLibcallDesc(CmpInst::getInversePredicate(Cond), Size);
Him188ba461f82024-07-25 11:07:31 +01001163 if (!BuildLibcall(InversedLibcall,
1164 CmpInst::getInversePredicate(InversedPred), DstReg))
1165 return UnableToLegalize;
1166 break;
1167 }
1168 default:
1169 return UnableToLegalize;
1170 }
1171
1172 return Legalized;
1173}
1174
Serge Pavlov462d5832023-10-09 21:13:39 +07001175// The function is used to legalize operations that set default environment
1176// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
1177// On most targets supported in glibc FE_DFL_MODE is defined as
1178// `((const femode_t *) -1)`. Such assumption is used here. If for some target
1179// it is not true, the target must provide custom lowering.
1180LegalizerHelper::LegalizeResult
1181LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
David Greend659bd12024-01-03 07:59:36 +00001182 MachineInstr &MI,
1183 LostDebugLocObserver &LocObserver) {
Serge Pavlov462d5832023-10-09 21:13:39 +07001184 const DataLayout &DL = MIRBuilder.getDataLayout();
1185 auto &MF = MIRBuilder.getMF();
1186 auto &Ctx = MF.getFunction().getContext();
1187
1188 // Create an argument for the library function.
1189 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
1190 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
1191 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
1192 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
1193 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
1194 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1195 MIRBuilder.buildIntToPtr(Dest, DefValue);
1196
1197 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1198 return createLibcall(MIRBuilder, RTLibcall,
1199 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
David Greend659bd12024-01-03 07:59:36 +00001200 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}),
1201 LocObserver, &MI);
Serge Pavlov462d5832023-10-09 21:13:39 +07001202}
1203
Tim Northover69fa84a2016-10-14 22:18:18 +00001204LegalizerHelper::LegalizeResult
Jessica Paquette324af792021-05-25 16:54:20 -07001205LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
Matthias Braunf1caa282017-12-15 22:22:58 +00001206 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
Tim Northoveredb3c8c2016-08-29 19:07:16 +00001207
Tim Northoveredb3c8c2016-08-29 19:07:16 +00001208 switch (MI.getOpcode()) {
1209 default:
1210 return UnableToLegalize;
Kai Nackeb3837532022-08-02 13:12:38 -04001211 case TargetOpcode::G_MUL:
Diana Picuse97822e2017-04-24 07:22:31 +00001212 case TargetOpcode::G_SDIV:
Diana Picus02e11012017-06-15 10:53:31 +00001213 case TargetOpcode::G_UDIV:
1214 case TargetOpcode::G_SREM:
Diana Picus0528e2c2018-11-26 11:07:02 +00001215 case TargetOpcode::G_UREM:
1216 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
Serge Pavlov462d5832023-10-09 21:13:39 +07001217 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1218 unsigned Size = LLTy.getSizeInBits();
Petar Avramovic0a5e4eb2018-12-18 15:59:51 +00001219 Type *HLTy = IntegerType::get(Ctx, Size);
David Greend659bd12024-01-03 07:59:36 +00001220 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
Diana Picusfc1675e2017-07-05 12:57:24 +00001221 if (Status != Legalized)
1222 return Status;
1223 break;
Diana Picuse97822e2017-04-24 07:22:31 +00001224 }
Diana Picus1314a282017-04-11 10:52:34 +00001225 case TargetOpcode::G_FADD:
Javed Absar5cde1cc2017-10-30 13:51:56 +00001226 case TargetOpcode::G_FSUB:
Diana Picus9faa09b2017-11-23 12:44:20 +00001227 case TargetOpcode::G_FMUL:
Diana Picusc01f7f12017-11-23 13:26:07 +00001228 case TargetOpcode::G_FDIV:
Diana Picuse74243d2018-01-12 11:30:45 +00001229 case TargetOpcode::G_FMA:
Tim Northovere0418412017-02-08 23:23:39 +00001230 case TargetOpcode::G_FPOW:
Jessica Paquette7db82d72019-01-28 18:34:18 +00001231 case TargetOpcode::G_FREM:
1232 case TargetOpcode::G_FCOS:
Jessica Paquettec49428a2019-01-28 19:53:14 +00001233 case TargetOpcode::G_FSIN:
Farzon Lotfi1d874332024-06-05 15:01:33 -04001234 case TargetOpcode::G_FTAN:
Farzon Lotfi0b58f342024-07-11 15:58:43 -04001235 case TargetOpcode::G_FACOS:
1236 case TargetOpcode::G_FASIN:
1237 case TargetOpcode::G_FATAN:
Tex Riddellc03d09c2024-10-24 17:53:12 -07001238 case TargetOpcode::G_FATAN2:
Farzon Lotfi0b58f342024-07-11 15:58:43 -04001239 case TargetOpcode::G_FCOSH:
1240 case TargetOpcode::G_FSINH:
1241 case TargetOpcode::G_FTANH:
Jessica Paquette2d73ecd2019-01-28 21:27:23 +00001242 case TargetOpcode::G_FLOG10:
Jessica Paquette0154bd12019-01-30 21:16:04 +00001243 case TargetOpcode::G_FLOG:
Jessica Paquette84bedac2019-01-30 23:46:15 +00001244 case TargetOpcode::G_FLOG2:
Jessica Paquettee7941212019-04-03 16:58:32 +00001245 case TargetOpcode::G_FEXP:
Petar Avramovicfaaa2b5d2019-06-06 09:02:24 +00001246 case TargetOpcode::G_FEXP2:
Matt Arsenaultb14e83d2023-08-12 07:20:00 -04001247 case TargetOpcode::G_FEXP10:
Petar Avramovicfaaa2b5d2019-06-06 09:02:24 +00001248 case TargetOpcode::G_FCEIL:
Dominik Montadafeb20a12020-03-02 16:28:17 +01001249 case TargetOpcode::G_FFLOOR:
1250 case TargetOpcode::G_FMINNUM:
1251 case TargetOpcode::G_FMAXNUM:
1252 case TargetOpcode::G_FSQRT:
1253 case TargetOpcode::G_FRINT:
Matt Arsenault0da582d2020-07-19 09:56:15 -04001254 case TargetOpcode::G_FNEARBYINT:
Craig Topperd5d14172024-09-18 12:07:44 -07001255 case TargetOpcode::G_INTRINSIC_TRUNC:
1256 case TargetOpcode::G_INTRINSIC_ROUND:
Matt Arsenault0da582d2020-07-19 09:56:15 -04001257 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
Serge Pavlov462d5832023-10-09 21:13:39 +07001258 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1259 unsigned Size = LLTy.getSizeInBits();
Konstantin Schwarz76986bd2020-02-06 10:01:57 -08001260 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
Matt Arsenault0da582d2020-07-19 09:56:15 -04001261 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1262 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
Jessica Paquette7db82d72019-01-28 18:34:18 +00001263 return UnableToLegalize;
1264 }
David Greend659bd12024-01-03 07:59:36 +00001265 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
Diana Picusfc1675e2017-07-05 12:57:24 +00001266 if (Status != Legalized)
1267 return Status;
1268 break;
Tim Northoveredb3c8c2016-08-29 19:07:16 +00001269 }
David Green8d49ce12024-04-17 18:38:24 +01001270 case TargetOpcode::G_INTRINSIC_LRINT:
1271 case TargetOpcode::G_INTRINSIC_LLRINT: {
David Green28d28d52024-04-15 09:41:08 +01001272 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1273 unsigned Size = LLTy.getSizeInBits();
1274 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1275 Type *ITy = IntegerType::get(
1276 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1277 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1278 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1279 return UnableToLegalize;
1280 }
1281 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1282 LegalizeResult Status =
1283 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1284 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1285 if (Status != Legalized)
1286 return Status;
1287 MI.eraseFromParent();
1288 return Legalized;
1289 }
Craig Toppera15400d2024-12-02 13:30:46 -08001290 case TargetOpcode::G_FPOWI:
1291 case TargetOpcode::G_FLDEXP: {
David Green5550e9c2024-01-04 07:26:23 +00001292 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1293 unsigned Size = LLTy.getSizeInBits();
1294 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1295 Type *ITy = IntegerType::get(
1296 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1297 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1298 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1299 return UnableToLegalize;
1300 }
1301 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
Craig Topperbee33b52024-12-02 09:06:38 -08001302 SmallVector<CallLowering::ArgInfo, 2> Args = {
David Green5550e9c2024-01-04 07:26:23 +00001303 {MI.getOperand(1).getReg(), HLTy, 0},
1304 {MI.getOperand(2).getReg(), ITy, 1}};
Craig Topperbee33b52024-12-02 09:06:38 -08001305 Args[1].Flags[0].setSExt();
David Green5550e9c2024-01-04 07:26:23 +00001306 LegalizeResult Status =
1307 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), HLTy, 0},
1308 Args, LocObserver, &MI);
1309 if (Status != Legalized)
1310 return Status;
1311 break;
1312 }
Konstantin Schwarz76986bd2020-02-06 10:01:57 -08001313 case TargetOpcode::G_FPEXT:
Diana Picus65ed3642018-01-17 13:34:10 +00001314 case TargetOpcode::G_FPTRUNC: {
Konstantin Schwarz76986bd2020-02-06 10:01:57 -08001315 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1316 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1317 if (!FromTy || !ToTy)
Diana Picus65ed3642018-01-17 13:34:10 +00001318 return UnableToLegalize;
David Greend659bd12024-01-03 07:59:36 +00001319 LegalizeResult Status =
Craig Topper4cf2cf12024-12-04 10:42:49 -08001320 conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver, TLI);
Diana Picus65ed3642018-01-17 13:34:10 +00001321 if (Status != Legalized)
1322 return Status;
1323 break;
1324 }
Him188ba461f82024-07-25 11:07:31 +01001325 case TargetOpcode::G_FCMP: {
1326 LegalizeResult Status = createFCMPLibcall(MIRBuilder, MI, LocObserver);
1327 if (Status != Legalized)
1328 return Status;
1329 MI.eraseFromParent();
1330 return Status;
1331 }
Diana Picus4ed0ee72018-01-30 07:54:52 +00001332 case TargetOpcode::G_FPTOSI:
1333 case TargetOpcode::G_FPTOUI: {
1334 // FIXME: Support other types
David Greene8876242024-06-21 10:24:57 +01001335 Type *FromTy =
1336 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
Diana Picus4ed0ee72018-01-30 07:54:52 +00001337 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
David Greene8876242024-06-21 10:24:57 +01001338 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
Diana Picus4ed0ee72018-01-30 07:54:52 +00001339 return UnableToLegalize;
1340 LegalizeResult Status = conversionLibcall(
Craig Topper4cf2cf12024-12-04 10:42:49 -08001341 MI, MIRBuilder, Type::getIntNTy(Ctx, ToSize), FromTy, LocObserver, TLI);
Diana Picus4ed0ee72018-01-30 07:54:52 +00001342 if (Status != Legalized)
1343 return Status;
1344 break;
1345 }
Diana Picus517531e2018-01-30 09:15:17 +00001346 case TargetOpcode::G_SITOFP:
1347 case TargetOpcode::G_UITOFP: {
Diana Picus517531e2018-01-30 09:15:17 +00001348 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
Him188365f5b42024-07-15 16:24:24 +01001349 Type *ToTy =
1350 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1351 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
Diana Picus517531e2018-01-30 09:15:17 +00001352 return UnableToLegalize;
Craig Topper4cf2cf12024-12-04 10:42:49 -08001353 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SITOFP;
1354 LegalizeResult Status =
1355 conversionLibcall(MI, MIRBuilder, ToTy, Type::getIntNTy(Ctx, FromSize),
1356 LocObserver, TLI, IsSigned);
Diana Picus517531e2018-01-30 09:15:17 +00001357 if (Status != Legalized)
1358 return Status;
1359 break;
1360 }
Thomas Preud'hommece61b0e2024-01-04 10:15:16 +00001361 case TargetOpcode::G_ATOMICRMW_XCHG:
1362 case TargetOpcode::G_ATOMICRMW_ADD:
1363 case TargetOpcode::G_ATOMICRMW_SUB:
1364 case TargetOpcode::G_ATOMICRMW_AND:
1365 case TargetOpcode::G_ATOMICRMW_OR:
1366 case TargetOpcode::G_ATOMICRMW_XOR:
1367 case TargetOpcode::G_ATOMIC_CMPXCHG:
1368 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1369 auto Status = createAtomicLibcall(MIRBuilder, MI);
1370 if (Status != Legalized)
1371 return Status;
1372 break;
1373 }
Jessica Paquette23f657c2021-03-24 23:45:36 -07001374 case TargetOpcode::G_BZERO:
Matt Arsenault0b7f6cc2020-08-03 09:00:24 -04001375 case TargetOpcode::G_MEMCPY:
1376 case TargetOpcode::G_MEMMOVE:
1377 case TargetOpcode::G_MEMSET: {
Jessica Paquette23f657c2021-03-24 23:45:36 -07001378 LegalizeResult Result =
Jessica Paquette324af792021-05-25 16:54:20 -07001379 createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
Jessica Paquette23f657c2021-03-24 23:45:36 -07001380 if (Result != Legalized)
1381 return Result;
Matt Arsenault0b7f6cc2020-08-03 09:00:24 -04001382 MI.eraseFromParent();
1383 return Result;
1384 }
Serge Pavlov7fc7ef12024-01-10 14:18:00 +07001385 case TargetOpcode::G_GET_FPENV:
Serge Pavlov462d5832023-10-09 21:13:39 +07001386 case TargetOpcode::G_GET_FPMODE: {
David Greend659bd12024-01-03 07:59:36 +00001387 LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
Serge Pavlov462d5832023-10-09 21:13:39 +07001388 if (Result != Legalized)
1389 return Result;
1390 break;
1391 }
Serge Pavlov7fc7ef12024-01-10 14:18:00 +07001392 case TargetOpcode::G_SET_FPENV:
Serge Pavlov462d5832023-10-09 21:13:39 +07001393 case TargetOpcode::G_SET_FPMODE: {
David Greend659bd12024-01-03 07:59:36 +00001394 LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
Serge Pavlov462d5832023-10-09 21:13:39 +07001395 if (Result != Legalized)
1396 return Result;
1397 break;
1398 }
Serge Pavlov7fc7ef12024-01-10 14:18:00 +07001399 case TargetOpcode::G_RESET_FPENV:
Serge Pavlov462d5832023-10-09 21:13:39 +07001400 case TargetOpcode::G_RESET_FPMODE: {
David Greend659bd12024-01-03 07:59:36 +00001401 LegalizeResult Result =
1402 createResetStateLibcall(MIRBuilder, MI, LocObserver);
Serge Pavlov462d5832023-10-09 21:13:39 +07001403 if (Result != Legalized)
1404 return Result;
1405 break;
1406 }
Tim Northoveredb3c8c2016-08-29 19:07:16 +00001407 }
Diana Picusfc1675e2017-07-05 12:57:24 +00001408
1409 MI.eraseFromParent();
1410 return Legalized;
Tim Northoveredb3c8c2016-08-29 19:07:16 +00001411}
1412
Tim Northover69fa84a2016-10-14 22:18:18 +00001413LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
1414 unsigned TypeIdx,
1415 LLT NarrowTy) {
Daniel Sanders27fe8a52018-04-27 19:48:53 +00001416 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1417 uint64_t NarrowSize = NarrowTy.getSizeInBits();
Kristof Beylsaf9814a2017-11-07 10:34:34 +00001418
Tim Northover9656f142016-08-04 20:54:13 +00001419 switch (MI.getOpcode()) {
1420 default:
1421 return UnableToLegalize;
Tim Northoverff5e7e12017-06-30 20:27:36 +00001422 case TargetOpcode::G_IMPLICIT_DEF: {
Dominik Montada35950fe2020-03-23 12:30:55 +01001423 Register DstReg = MI.getOperand(0).getReg();
1424 LLT DstTy = MRI.getType(DstReg);
1425
1426 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1427 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1428 // FIXME: Although this would also be legal for the general case, it causes
1429 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1430 // combines not being hit). This seems to be a problem related to the
1431 // artifact combiner.
1432 if (SizeOp0 % NarrowSize != 0) {
1433 LLT ImplicitTy = NarrowTy;
1434 if (DstTy.isVector())
Sander de Smalend5e14ba2021-06-24 09:58:21 +01001435 ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
Dominik Montada35950fe2020-03-23 12:30:55 +01001436
1437 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1438 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1439
1440 MI.eraseFromParent();
1441 return Legalized;
1442 }
1443
Kristof Beylsaf9814a2017-11-07 10:34:34 +00001444 int NumParts = SizeOp0 / NarrowSize;
Tim Northoverff5e7e12017-06-30 20:27:36 +00001445
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00001446 SmallVector<Register, 2> DstRegs;
Volkan Keles02bb1742018-02-14 19:58:36 +00001447 for (int i = 0; i < NumParts; ++i)
Dominik Montada35950fe2020-03-23 12:30:55 +01001448 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
Amara Emerson5ec146042018-12-10 18:44:58 +00001449
Dominik Montada35950fe2020-03-23 12:30:55 +01001450 if (DstTy.isVector())
Amara Emerson5ec146042018-12-10 18:44:58 +00001451 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1452 else
Diana Picusf95a5fb2023-01-09 11:59:00 +01001453 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
Tim Northoverff5e7e12017-06-30 20:27:36 +00001454 MI.eraseFromParent();
1455 return Legalized;
1456 }
Matt Arsenault71872722019-04-10 17:27:53 +00001457 case TargetOpcode::G_CONSTANT: {
1458 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1459 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1460 unsigned TotalSize = Ty.getSizeInBits();
1461 unsigned NarrowSize = NarrowTy.getSizeInBits();
1462 int NumParts = TotalSize / NarrowSize;
1463
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00001464 SmallVector<Register, 4> PartRegs;
Matt Arsenault71872722019-04-10 17:27:53 +00001465 for (int I = 0; I != NumParts; ++I) {
1466 unsigned Offset = I * NarrowSize;
1467 auto K = MIRBuilder.buildConstant(NarrowTy,
1468 Val.lshr(Offset).trunc(NarrowSize));
1469 PartRegs.push_back(K.getReg(0));
1470 }
1471
1472 LLT LeftoverTy;
1473 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00001474 SmallVector<Register, 1> LeftoverRegs;
Matt Arsenault71872722019-04-10 17:27:53 +00001475 if (LeftoverBits != 0) {
1476 LeftoverTy = LLT::scalar(LeftoverBits);
1477 auto K = MIRBuilder.buildConstant(
1478 LeftoverTy,
1479 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1480 LeftoverRegs.push_back(K.getReg(0));
1481 }
1482
1483 insertParts(MI.getOperand(0).getReg(),
1484 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1485
1486 MI.eraseFromParent();
1487 return Legalized;
1488 }
Matt Arsenault25e99382020-01-10 10:07:24 -05001489 case TargetOpcode::G_SEXT:
Matt Arsenault917156172020-01-10 09:47:17 -05001490 case TargetOpcode::G_ZEXT:
Matt Arsenaultbe31a7b2020-01-10 11:02:18 -05001491 case TargetOpcode::G_ANYEXT:
1492 return narrowScalarExt(MI, TypeIdx, NarrowTy);
Petar Avramovic5b4c5c22019-08-21 09:26:39 +00001493 case TargetOpcode::G_TRUNC: {
1494 if (TypeIdx != 1)
1495 return UnableToLegalize;
1496
1497 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1498 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1499 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1500 return UnableToLegalize;
1501 }
1502
Jay Foad63f73542020-01-16 12:37:00 +00001503 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1504 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
Petar Avramovic5b4c5c22019-08-21 09:26:39 +00001505 MI.eraseFromParent();
1506 return Legalized;
1507 }
Yingwei Zheng821bcba2024-05-22 23:35:37 +08001508 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
Petar Avramovic29f88b92021-12-23 14:09:51 +01001509 case TargetOpcode::G_FREEZE: {
1510 if (TypeIdx != 0)
1511 return UnableToLegalize;
1512
1513 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1514 // Should widen scalar first
1515 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1516 return UnableToLegalize;
1517
1518 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1519 SmallVector<Register, 8> Parts;
1520 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1521 Parts.push_back(
Yingwei Zheng821bcba2024-05-22 23:35:37 +08001522 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1523 .getReg(0));
Petar Avramovic29f88b92021-12-23 14:09:51 +01001524 }
1525
Diana Picusf95a5fb2023-01-09 11:59:00 +01001526 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
Petar Avramovic29f88b92021-12-23 14:09:51 +01001527 MI.eraseFromParent();
1528 return Legalized;
1529 }
Justin Bogner62ce4b02021-02-02 17:02:52 -08001530 case TargetOpcode::G_ADD:
Cassie Jones362463882021-02-14 14:37:55 -05001531 case TargetOpcode::G_SUB:
Cassie Jonese1532642021-02-22 17:11:23 -05001532 case TargetOpcode::G_SADDO:
1533 case TargetOpcode::G_SSUBO:
Cassie Jones8f956a52021-02-22 17:11:35 -05001534 case TargetOpcode::G_SADDE:
1535 case TargetOpcode::G_SSUBE:
Cassie Jonesc63b33b2021-02-22 17:10:58 -05001536 case TargetOpcode::G_UADDO:
1537 case TargetOpcode::G_USUBO:
Cassie Jones8f956a52021-02-22 17:11:35 -05001538 case TargetOpcode::G_UADDE:
1539 case TargetOpcode::G_USUBE:
Cassie Jones362463882021-02-14 14:37:55 -05001540 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
Matt Arsenault211e89d2019-01-27 00:52:51 +00001541 case TargetOpcode::G_MUL:
Petar Avramovic5229f472019-03-11 10:08:44 +00001542 case TargetOpcode::G_UMULH:
Petar Avramovic0b17e592019-03-11 10:00:17 +00001543 return narrowScalarMul(MI, NarrowTy);
Matt Arsenault1cf713662019-02-12 14:54:52 +00001544 case TargetOpcode::G_EXTRACT:
1545 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1546 case TargetOpcode::G_INSERT:
1547 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
Justin Bognerd09c3ce2017-01-19 01:05:48 +00001548 case TargetOpcode::G_LOAD: {
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07001549 auto &LoadMI = cast<GLoad>(MI);
1550 Register DstReg = LoadMI.getDstReg();
Matt Arsenault18619af2019-01-29 18:13:02 +00001551 LLT DstTy = MRI.getType(DstReg);
Matt Arsenault7f09fd62019-02-05 00:26:12 +00001552 if (DstTy.isVector())
Matt Arsenault045bc9a2019-01-30 02:35:38 +00001553 return UnableToLegalize;
Matt Arsenault18619af2019-01-29 18:13:02 +00001554
David Green601e1022024-03-17 18:15:56 +00001555 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001556 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07001557 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
Matt Arsenault18619af2019-01-29 18:13:02 +00001558 MIRBuilder.buildAnyExt(DstReg, TmpReg);
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07001559 LoadMI.eraseFromParent();
Matt Arsenault18619af2019-01-29 18:13:02 +00001560 return Legalized;
1561 }
1562
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07001563 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
Justin Bognerd09c3ce2017-01-19 01:05:48 +00001564 }
Matt Arsenault6614f852019-01-22 19:02:10 +00001565 case TargetOpcode::G_ZEXTLOAD:
1566 case TargetOpcode::G_SEXTLOAD: {
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07001567 auto &LoadMI = cast<GExtLoad>(MI);
1568 Register DstReg = LoadMI.getDstReg();
1569 Register PtrReg = LoadMI.getPointerReg();
Matt Arsenault6614f852019-01-22 19:02:10 +00001570
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001571 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07001572 auto &MMO = LoadMI.getMMO();
David Green601e1022024-03-17 18:15:56 +00001573 unsigned MemSize = MMO.getSizeInBits().getValue();
Matt Arsenault2cbbc6e2021-01-05 23:25:18 -05001574
1575 if (MemSize == NarrowSize) {
Matt Arsenault6614f852019-01-22 19:02:10 +00001576 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
Matt Arsenault2cbbc6e2021-01-05 23:25:18 -05001577 } else if (MemSize < NarrowSize) {
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07001578 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
Matt Arsenault2cbbc6e2021-01-05 23:25:18 -05001579 } else if (MemSize > NarrowSize) {
1580 // FIXME: Need to split the load.
1581 return UnableToLegalize;
Matt Arsenault6614f852019-01-22 19:02:10 +00001582 }
1583
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07001584 if (isa<GZExtLoad>(LoadMI))
Matt Arsenault6614f852019-01-22 19:02:10 +00001585 MIRBuilder.buildZExt(DstReg, TmpReg);
1586 else
1587 MIRBuilder.buildSExt(DstReg, TmpReg);
1588
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07001589 LoadMI.eraseFromParent();
Matt Arsenault6614f852019-01-22 19:02:10 +00001590 return Legalized;
1591 }
Justin Bognerfde01042017-01-18 17:29:54 +00001592 case TargetOpcode::G_STORE: {
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07001593 auto &StoreMI = cast<GStore>(MI);
Matt Arsenault18619af2019-01-29 18:13:02 +00001594
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07001595 Register SrcReg = StoreMI.getValueReg();
Matt Arsenault18619af2019-01-29 18:13:02 +00001596 LLT SrcTy = MRI.getType(SrcReg);
Matt Arsenault7f09fd62019-02-05 00:26:12 +00001597 if (SrcTy.isVector())
1598 return UnableToLegalize;
1599
1600 int NumParts = SizeOp0 / NarrowSize;
1601 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1602 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1603 if (SrcTy.isVector() && LeftoverBits != 0)
1604 return UnableToLegalize;
Matt Arsenault18619af2019-01-29 18:13:02 +00001605
David Green601e1022024-03-17 18:15:56 +00001606 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001607 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
Matt Arsenault18619af2019-01-29 18:13:02 +00001608 MIRBuilder.buildTrunc(TmpReg, SrcReg);
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07001609 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1610 StoreMI.eraseFromParent();
Matt Arsenault18619af2019-01-29 18:13:02 +00001611 return Legalized;
1612 }
1613
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07001614 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
Justin Bognerfde01042017-01-18 17:29:54 +00001615 }
Matt Arsenault81511e52019-02-05 00:13:44 +00001616 case TargetOpcode::G_SELECT:
1617 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
Petar Avramovic150fd432018-12-18 11:36:14 +00001618 case TargetOpcode::G_AND:
1619 case TargetOpcode::G_OR:
1620 case TargetOpcode::G_XOR: {
Quentin Colombetc2f3cea2017-10-03 04:53:56 +00001621 // Legalize bitwise operation:
1622 // A = BinOp<Ty> B, C
1623 // into:
1624 // B1, ..., BN = G_UNMERGE_VALUES B
1625 // C1, ..., CN = G_UNMERGE_VALUES C
1626 // A1 = BinOp<Ty/N> B1, C2
1627 // ...
1628 // AN = BinOp<Ty/N> BN, CN
1629 // A = G_MERGE_VALUES A1, ..., AN
Matt Arsenault9e0eeba2019-04-10 17:07:56 +00001630 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
Quentin Colombetc2f3cea2017-10-03 04:53:56 +00001631 }
Matt Arsenault30989e42019-01-22 21:42:11 +00001632 case TargetOpcode::G_SHL:
1633 case TargetOpcode::G_LSHR:
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00001634 case TargetOpcode::G_ASHR:
1635 return narrowScalarShift(MI, TypeIdx, NarrowTy);
Matt Arsenaultd5684f72019-01-31 02:09:57 +00001636 case TargetOpcode::G_CTLZ:
1637 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1638 case TargetOpcode::G_CTTZ:
1639 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1640 case TargetOpcode::G_CTPOP:
Petar Avramovic2b66d322020-01-27 09:43:38 +01001641 if (TypeIdx == 1)
1642 switch (MI.getOpcode()) {
1643 case TargetOpcode::G_CTLZ:
Matt Arsenault312a9d12020-02-07 12:24:15 -05001644 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
Petar Avramovic2b66d322020-01-27 09:43:38 +01001645 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
Petar Avramovic8bc7ba52020-01-27 09:51:06 +01001646 case TargetOpcode::G_CTTZ:
Matt Arsenault312a9d12020-02-07 12:24:15 -05001647 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
Petar Avramovic8bc7ba52020-01-27 09:51:06 +01001648 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
Petar Avramoviccbf03aee2020-01-27 09:59:50 +01001649 case TargetOpcode::G_CTPOP:
1650 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
Petar Avramovic2b66d322020-01-27 09:43:38 +01001651 default:
1652 return UnableToLegalize;
1653 }
Matt Arsenaultd5684f72019-01-31 02:09:57 +00001654
1655 Observer.changingInstr(MI);
1656 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1657 Observer.changedInstr(MI);
1658 return Legalized;
Matt Arsenaultcbaada62019-02-02 23:29:55 +00001659 case TargetOpcode::G_INTTOPTR:
1660 if (TypeIdx != 1)
1661 return UnableToLegalize;
1662
1663 Observer.changingInstr(MI);
1664 narrowScalarSrc(MI, NarrowTy, 1);
1665 Observer.changedInstr(MI);
1666 return Legalized;
1667 case TargetOpcode::G_PTRTOINT:
1668 if (TypeIdx != 0)
1669 return UnableToLegalize;
1670
1671 Observer.changingInstr(MI);
1672 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1673 Observer.changedInstr(MI);
1674 return Legalized;
Petar Avramovicbe20e362019-07-09 14:36:17 +00001675 case TargetOpcode::G_PHI: {
Nikita Popovc35761d2021-03-01 21:37:26 +01001676 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1677 // NarrowSize.
1678 if (SizeOp0 % NarrowSize != 0)
1679 return UnableToLegalize;
1680
Petar Avramovicbe20e362019-07-09 14:36:17 +00001681 unsigned NumParts = SizeOp0 / NarrowSize;
Matt Arsenaultde8451f2020-02-04 10:34:22 -05001682 SmallVector<Register, 2> DstRegs(NumParts);
1683 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
Petar Avramovicbe20e362019-07-09 14:36:17 +00001684 Observer.changingInstr(MI);
1685 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1686 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
Amara Emerson53445f52022-11-13 01:43:04 -08001687 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
Petar Avramovicbe20e362019-07-09 14:36:17 +00001688 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
chuongg3fcfe1b62024-01-15 16:40:39 +00001689 SrcRegs[i / 2], MIRBuilder, MRI);
Petar Avramovicbe20e362019-07-09 14:36:17 +00001690 }
1691 MachineBasicBlock &MBB = *MI.getParent();
1692 MIRBuilder.setInsertPt(MBB, MI);
1693 for (unsigned i = 0; i < NumParts; ++i) {
1694 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1695 MachineInstrBuilder MIB =
1696 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1697 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1698 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1699 }
Amara Emerson02bcc862019-09-13 21:49:24 +00001700 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
Diana Picusf95a5fb2023-01-09 11:59:00 +01001701 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
Petar Avramovicbe20e362019-07-09 14:36:17 +00001702 Observer.changedInstr(MI);
1703 MI.eraseFromParent();
1704 return Legalized;
1705 }
Matt Arsenault434d6642019-07-15 19:37:34 +00001706 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1707 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1708 if (TypeIdx != 2)
1709 return UnableToLegalize;
1710
1711 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1712 Observer.changingInstr(MI);
1713 narrowScalarSrc(MI, NarrowTy, OpIdx);
1714 Observer.changedInstr(MI);
1715 return Legalized;
1716 }
Petar Avramovic1e626352019-07-17 12:08:01 +00001717 case TargetOpcode::G_ICMP: {
Jessica Paquette47d07802021-06-29 17:01:28 -07001718 Register LHS = MI.getOperand(2).getReg();
1719 LLT SrcTy = MRI.getType(LHS);
Petar Avramovic1e626352019-07-17 12:08:01 +00001720 CmpInst::Predicate Pred =
1721 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1722
Jessica Paquette47d07802021-06-29 17:01:28 -07001723 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1724 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1725 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
chuongg3fcfe1b62024-01-15 16:40:39 +00001726 LHSLeftoverRegs, MIRBuilder, MRI))
Jessica Paquette47d07802021-06-29 17:01:28 -07001727 return UnableToLegalize;
1728
1729 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1730 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1731 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
chuongg3fcfe1b62024-01-15 16:40:39 +00001732 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
Jessica Paquette47d07802021-06-29 17:01:28 -07001733 return UnableToLegalize;
1734
1735 // We now have the LHS and RHS of the compare split into narrow-type
1736 // registers, plus potentially some leftover type.
1737 Register Dst = MI.getOperand(0).getReg();
1738 LLT ResTy = MRI.getType(Dst);
1739 if (ICmpInst::isEquality(Pred)) {
1740 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1741 // them together. For each equal part, the result should be all 0s. For
1742 // each non-equal part, we'll get at least one 1.
1743 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1744 SmallVector<Register, 4> Xors;
1745 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1746 auto LHS = std::get<0>(LHSAndRHS);
1747 auto RHS = std::get<1>(LHSAndRHS);
1748 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1749 Xors.push_back(Xor);
1750 }
1751
1752 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1753 // to the desired narrow type so that we can OR them together later.
1754 SmallVector<Register, 4> WidenedXors;
1755 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1756 auto LHS = std::get<0>(LHSAndRHS);
1757 auto RHS = std::get<1>(LHSAndRHS);
1758 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1759 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1760 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1761 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1762 Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
1763 }
1764
1765 // Now, for each part we broke up, we know if they are equal/not equal
1766 // based off the G_XOR. We can OR these all together and compare against
1767 // 0 to get the result.
1768 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1769 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1770 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1771 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1772 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
Petar Avramovic1e626352019-07-17 12:08:01 +00001773 } else {
Craig Topper7ece5602024-12-12 09:50:26 -08001774 Register CmpIn;
1775 for (unsigned I = 0, E = LHSPartRegs.size(); I != E; ++I) {
1776 Register CmpOut;
1777 CmpInst::Predicate PartPred;
1778
1779 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1780 PartPred = Pred;
1781 CmpOut = Dst;
1782 } else {
1783 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1784 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1785 }
1786
1787 if (!CmpIn) {
1788 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[I],
1789 RHSPartRegs[I]);
1790 } else {
1791 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[I],
1792 RHSPartRegs[I]);
1793 auto CmpEq = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1794 LHSPartRegs[I], RHSPartRegs[I]);
1795 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1796 }
1797
1798 CmpIn = CmpOut;
1799 }
1800
1801 for (unsigned I = 0, E = LHSLeftoverRegs.size(); I != E; ++I) {
1802 Register CmpOut;
1803 CmpInst::Predicate PartPred;
1804
1805 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1806 PartPred = Pred;
1807 CmpOut = Dst;
1808 } else {
1809 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1810 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1811 }
1812
1813 if (!CmpIn) {
1814 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[I],
1815 RHSLeftoverRegs[I]);
1816 } else {
1817 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[I],
1818 RHSLeftoverRegs[I]);
1819 auto CmpEq =
1820 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1821 LHSLeftoverRegs[I], RHSLeftoverRegs[I]);
1822 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1823 }
1824
1825 CmpIn = CmpOut;
1826 }
Petar Avramovic1e626352019-07-17 12:08:01 +00001827 }
Petar Avramovic1e626352019-07-17 12:08:01 +00001828 MI.eraseFromParent();
1829 return Legalized;
1830 }
David Greenf297d0b2024-01-28 15:42:36 +00001831 case TargetOpcode::G_FCMP:
1832 if (TypeIdx != 0)
1833 return UnableToLegalize;
1834
1835 Observer.changingInstr(MI);
1836 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1837 Observer.changedInstr(MI);
1838 return Legalized;
1839
Daniel Sanderse9a57c22019-08-09 21:11:20 +00001840 case TargetOpcode::G_SEXT_INREG: {
1841 if (TypeIdx != 0)
1842 return UnableToLegalize;
1843
Daniel Sanderse9a57c22019-08-09 21:11:20 +00001844 int64_t SizeInBits = MI.getOperand(2).getImm();
1845
1846 // So long as the new type has more bits than the bits we're extending we
1847 // don't need to break it apart.
Craig Topper5d501b12023-11-24 08:39:38 -08001848 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
Daniel Sanderse9a57c22019-08-09 21:11:20 +00001849 Observer.changingInstr(MI);
1850 // We don't lose any non-extension bits by truncating the src and
1851 // sign-extending the dst.
1852 MachineOperand &MO1 = MI.getOperand(1);
Jay Foad63f73542020-01-16 12:37:00 +00001853 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
Jay Foadb482e1b2020-01-23 11:51:35 +00001854 MO1.setReg(TruncMIB.getReg(0));
Daniel Sanderse9a57c22019-08-09 21:11:20 +00001855
1856 MachineOperand &MO2 = MI.getOperand(0);
1857 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1858 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
Jay Foad63f73542020-01-16 12:37:00 +00001859 MIRBuilder.buildSExt(MO2, DstExt);
Daniel Sanderse9a57c22019-08-09 21:11:20 +00001860 MO2.setReg(DstExt);
1861 Observer.changedInstr(MI);
1862 return Legalized;
1863 }
1864
1865 // Break it apart. Components below the extension point are unmodified. The
1866 // component containing the extension point becomes a narrower SEXT_INREG.
1867 // Components above it are ashr'd from the component containing the
1868 // extension point.
1869 if (SizeOp0 % NarrowSize != 0)
1870 return UnableToLegalize;
1871 int NumParts = SizeOp0 / NarrowSize;
1872
1873 // List the registers where the destination will be scattered.
1874 SmallVector<Register, 2> DstRegs;
1875 // List the registers where the source will be split.
1876 SmallVector<Register, 2> SrcRegs;
1877
1878 // Create all the temporary registers.
1879 for (int i = 0; i < NumParts; ++i) {
1880 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1881
1882 SrcRegs.push_back(SrcReg);
1883 }
1884
1885 // Explode the big arguments into smaller chunks.
Jay Foad63f73542020-01-16 12:37:00 +00001886 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
Daniel Sanderse9a57c22019-08-09 21:11:20 +00001887
1888 Register AshrCstReg =
1889 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
Jay Foadb482e1b2020-01-23 11:51:35 +00001890 .getReg(0);
Craig Topper5d501b12023-11-24 08:39:38 -08001891 Register FullExtensionReg;
1892 Register PartialExtensionReg;
Daniel Sanderse9a57c22019-08-09 21:11:20 +00001893
1894 // Do the operation on each small part.
1895 for (int i = 0; i < NumParts; ++i) {
Craig Topper5d501b12023-11-24 08:39:38 -08001896 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
Daniel Sanderse9a57c22019-08-09 21:11:20 +00001897 DstRegs.push_back(SrcRegs[i]);
Craig Topper5d501b12023-11-24 08:39:38 -08001898 PartialExtensionReg = DstRegs.back();
1899 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
Daniel Sanderse9a57c22019-08-09 21:11:20 +00001900 assert(PartialExtensionReg &&
1901 "Expected to visit partial extension before full");
1902 if (FullExtensionReg) {
1903 DstRegs.push_back(FullExtensionReg);
1904 continue;
1905 }
Jay Foad28bb43b2020-01-16 12:09:48 +00001906 DstRegs.push_back(
1907 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
Jay Foadb482e1b2020-01-23 11:51:35 +00001908 .getReg(0));
Daniel Sanderse9a57c22019-08-09 21:11:20 +00001909 FullExtensionReg = DstRegs.back();
1910 } else {
1911 DstRegs.push_back(
1912 MIRBuilder
1913 .buildInstr(
1914 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1915 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
Jay Foadb482e1b2020-01-23 11:51:35 +00001916 .getReg(0));
Daniel Sanderse9a57c22019-08-09 21:11:20 +00001917 PartialExtensionReg = DstRegs.back();
1918 }
1919 }
1920
1921 // Gather the destination registers into the final destination.
1922 Register DstReg = MI.getOperand(0).getReg();
Diana Picusf95a5fb2023-01-09 11:59:00 +01001923 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
Daniel Sanderse9a57c22019-08-09 21:11:20 +00001924 MI.eraseFromParent();
1925 return Legalized;
1926 }
Petar Avramovic98f72a52019-12-30 18:06:29 +01001927 case TargetOpcode::G_BSWAP:
1928 case TargetOpcode::G_BITREVERSE: {
Petar Avramovic94a24e72019-12-30 11:13:22 +01001929 if (SizeOp0 % NarrowSize != 0)
1930 return UnableToLegalize;
1931
1932 Observer.changingInstr(MI);
1933 SmallVector<Register, 2> SrcRegs, DstRegs;
1934 unsigned NumParts = SizeOp0 / NarrowSize;
chuongg3fcfe1b62024-01-15 16:40:39 +00001935 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
1936 MIRBuilder, MRI);
Petar Avramovic94a24e72019-12-30 11:13:22 +01001937
1938 for (unsigned i = 0; i < NumParts; ++i) {
1939 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1940 {SrcRegs[NumParts - 1 - i]});
1941 DstRegs.push_back(DstPart.getReg(0));
1942 }
1943
Diana Picusf95a5fb2023-01-09 11:59:00 +01001944 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
Petar Avramovic94a24e72019-12-30 11:13:22 +01001945
1946 Observer.changedInstr(MI);
1947 MI.eraseFromParent();
1948 return Legalized;
1949 }
Matt Arsenaultf6176f82020-07-25 11:00:35 -04001950 case TargetOpcode::G_PTR_ADD:
Matt Arsenaultef3e83122020-05-23 18:10:34 -04001951 case TargetOpcode::G_PTRMASK: {
1952 if (TypeIdx != 1)
1953 return UnableToLegalize;
1954 Observer.changingInstr(MI);
1955 narrowScalarSrc(MI, NarrowTy, 2);
1956 Observer.changedInstr(MI);
1957 return Legalized;
1958 }
Matt Arsenault83a25a12021-03-26 17:29:36 -04001959 case TargetOpcode::G_FPTOUI:
1960 case TargetOpcode::G_FPTOSI:
David Greenfeac7612024-09-16 10:33:59 +01001961 case TargetOpcode::G_FPTOUI_SAT:
1962 case TargetOpcode::G_FPTOSI_SAT:
Matt Arsenault83a25a12021-03-26 17:29:36 -04001963 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
Petar Avramovic6a1030a2020-07-20 16:12:19 +02001964 case TargetOpcode::G_FPEXT:
1965 if (TypeIdx != 0)
1966 return UnableToLegalize;
1967 Observer.changingInstr(MI);
1968 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1969 Observer.changedInstr(MI);
1970 return Legalized;
Matt Arsenaulteece6ba2023-04-26 22:02:42 -04001971 case TargetOpcode::G_FLDEXP:
1972 case TargetOpcode::G_STRICT_FLDEXP:
1973 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
Michael Maitland54a9f0e2024-03-26 20:17:22 -04001974 case TargetOpcode::G_VSCALE: {
1975 Register Dst = MI.getOperand(0).getReg();
1976 LLT Ty = MRI.getType(Dst);
1977
1978 // Assume VSCALE(1) fits into a legal integer
1979 const APInt One(NarrowTy.getSizeInBits(), 1);
1980 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
1981 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
1982 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
1983 MIRBuilder.buildMul(Dst, ZExt, C);
1984
1985 MI.eraseFromParent();
1986 return Legalized;
1987 }
Tim Northover9656f142016-08-04 20:54:13 +00001988 }
Tim Northover33b07d62016-07-22 20:03:43 +00001989}
1990
Matt Arsenault3af85fa2020-03-29 18:04:53 -04001991Register LegalizerHelper::coerceToScalar(Register Val) {
1992 LLT Ty = MRI.getType(Val);
1993 if (Ty.isScalar())
1994 return Val;
1995
1996 const DataLayout &DL = MIRBuilder.getDataLayout();
1997 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
1998 if (Ty.isPointer()) {
1999 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2000 return Register();
2001 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2002 }
2003
2004 Register NewVal = Val;
2005
2006 assert(Ty.isVector());
Jay Foadd57515bd2024-02-13 08:21:35 +00002007 if (Ty.isPointerVector())
Matt Arsenault3af85fa2020-03-29 18:04:53 -04002008 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2009 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2010}
2011
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002012void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
2013 unsigned OpIdx, unsigned ExtOpcode) {
2014 MachineOperand &MO = MI.getOperand(OpIdx);
Jay Foad63f73542020-01-16 12:37:00 +00002015 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
Jay Foadb482e1b2020-01-23 11:51:35 +00002016 MO.setReg(ExtB.getReg(0));
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002017}
2018
Matt Arsenault30989e42019-01-22 21:42:11 +00002019void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
2020 unsigned OpIdx) {
2021 MachineOperand &MO = MI.getOperand(OpIdx);
Jay Foad63f73542020-01-16 12:37:00 +00002022 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
Jay Foadb482e1b2020-01-23 11:51:35 +00002023 MO.setReg(ExtB.getReg(0));
Matt Arsenault30989e42019-01-22 21:42:11 +00002024}
2025
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002026void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
2027 unsigned OpIdx, unsigned TruncOpcode) {
2028 MachineOperand &MO = MI.getOperand(OpIdx);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002029 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002030 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
Jay Foad63f73542020-01-16 12:37:00 +00002031 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002032 MO.setReg(DstExt);
2033}
2034
Matt Arsenaultd5684f72019-01-31 02:09:57 +00002035void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
2036 unsigned OpIdx, unsigned ExtOpcode) {
2037 MachineOperand &MO = MI.getOperand(OpIdx);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002038 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
Matt Arsenaultd5684f72019-01-31 02:09:57 +00002039 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
Jay Foad63f73542020-01-16 12:37:00 +00002040 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
Matt Arsenaultd5684f72019-01-31 02:09:57 +00002041 MO.setReg(DstTrunc);
2042}
2043
Matt Arsenault18ec3822019-02-11 22:00:39 +00002044void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
2045 unsigned OpIdx) {
2046 MachineOperand &MO = MI.getOperand(OpIdx);
Matt Arsenault18ec3822019-02-11 22:00:39 +00002047 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
Petar Avramovic29f88b92021-12-23 14:09:51 +01002048 Register Dst = MO.getReg();
2049 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2050 MO.setReg(DstExt);
2051 MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
Matt Arsenault18ec3822019-02-11 22:00:39 +00002052}
2053
Matt Arsenault26b7e852019-02-19 16:30:19 +00002054void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
2055 unsigned OpIdx) {
2056 MachineOperand &MO = MI.getOperand(OpIdx);
Petar Avramovic29f88b92021-12-23 14:09:51 +01002057 SmallVector<Register, 8> Regs;
2058 MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
Matt Arsenault26b7e852019-02-19 16:30:19 +00002059}
2060
Matt Arsenault39c55ce2020-02-13 15:52:32 -05002061void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
2062 MachineOperand &Op = MI.getOperand(OpIdx);
2063 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
2064}
2065
2066void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
2067 MachineOperand &MO = MI.getOperand(OpIdx);
2068 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2069 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2070 MIRBuilder.buildBitcast(MO, CastDst);
2071 MO.setReg(CastDst);
2072}
2073
Tim Northover69fa84a2016-10-14 22:18:18 +00002074LegalizerHelper::LegalizeResult
Mitch Phillipsae70b212021-07-26 19:32:49 -07002075LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
2076 LLT WideTy) {
2077 if (TypeIdx != 1)
2078 return UnableToLegalize;
2079
Amara Emerson719024a2023-02-23 16:35:39 -08002080 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
Matt Arsenault43cbca52019-07-03 23:08:06 +00002081 if (DstTy.isVector())
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002082 return UnableToLegalize;
2083
Amara Emerson719024a2023-02-23 16:35:39 -08002084 LLT SrcTy = MRI.getType(Src1Reg);
Matt Arsenault0966dd02019-07-17 20:22:44 +00002085 const int DstSize = DstTy.getSizeInBits();
2086 const int SrcSize = SrcTy.getSizeInBits();
2087 const int WideSize = WideTy.getSizeInBits();
2088 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
Matt Arsenaultc9f14f22019-07-01 19:36:10 +00002089
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002090 unsigned NumOps = MI.getNumOperands();
2091 unsigned NumSrc = MI.getNumOperands() - 1;
2092 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2093
Matt Arsenault0966dd02019-07-17 20:22:44 +00002094 if (WideSize >= DstSize) {
2095 // Directly pack the bits in the target type.
Amara Emerson719024a2023-02-23 16:35:39 -08002096 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002097
Matt Arsenault0966dd02019-07-17 20:22:44 +00002098 for (unsigned I = 2; I != NumOps; ++I) {
2099 const unsigned Offset = (I - 1) * PartSize;
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002100
Matt Arsenault0966dd02019-07-17 20:22:44 +00002101 Register SrcReg = MI.getOperand(I).getReg();
2102 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
2103
2104 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
2105
Matt Arsenault5faa5332019-08-01 18:13:16 +00002106 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
Matt Arsenault0966dd02019-07-17 20:22:44 +00002107 MRI.createGenericVirtualRegister(WideTy);
2108
2109 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
2110 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
2111 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
2112 ResultReg = NextResult;
2113 }
2114
2115 if (WideSize > DstSize)
2116 MIRBuilder.buildTrunc(DstReg, ResultReg);
Matt Arsenault5faa5332019-08-01 18:13:16 +00002117 else if (DstTy.isPointer())
2118 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
Matt Arsenault0966dd02019-07-17 20:22:44 +00002119
2120 MI.eraseFromParent();
2121 return Legalized;
2122 }
2123
2124 // Unmerge the original values to the GCD type, and recombine to the next
2125 // multiple greater than the original type.
2126 //
2127 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
2128 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
2129 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
2130 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
2131 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
2132 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
2133 // %12:_(s12) = G_MERGE_VALUES %10, %11
2134 //
2135 // Padding with undef if necessary:
2136 //
2137 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
2138 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
2139 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
2140 // %7:_(s2) = G_IMPLICIT_DEF
2141 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
2142 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
2143 // %10:_(s12) = G_MERGE_VALUES %8, %9
2144
Kazu Hirata267f21a2022-08-28 10:41:51 -07002145 const int GCD = std::gcd(SrcSize, WideSize);
Matt Arsenault0966dd02019-07-17 20:22:44 +00002146 LLT GCDTy = LLT::scalar(GCD);
2147
2148 SmallVector<Register, 8> Parts;
2149 SmallVector<Register, 8> NewMergeRegs;
2150 SmallVector<Register, 8> Unmerges;
2151 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
2152
2153 // Decompose the original operands if they don't evenly divide.
Kazu Hirata259cd6f2021-11-25 22:17:10 -08002154 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
2155 Register SrcReg = MO.getReg();
Matt Arsenault0966dd02019-07-17 20:22:44 +00002156 if (GCD == SrcSize) {
2157 Unmerges.push_back(SrcReg);
2158 } else {
2159 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2160 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2161 Unmerges.push_back(Unmerge.getReg(J));
2162 }
2163 }
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002164
Matt Arsenault0966dd02019-07-17 20:22:44 +00002165 // Pad with undef to the next size that is a multiple of the requested size.
2166 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
2167 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
2168 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
2169 Unmerges.push_back(UndefReg);
2170 }
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002171
Matt Arsenault0966dd02019-07-17 20:22:44 +00002172 const int PartsPerGCD = WideSize / GCD;
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002173
Matt Arsenault0966dd02019-07-17 20:22:44 +00002174 // Build merges of each piece.
2175 ArrayRef<Register> Slicer(Unmerges);
2176 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
Diana Picusf95a5fb2023-01-09 11:59:00 +01002177 auto Merge =
2178 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
Matt Arsenault0966dd02019-07-17 20:22:44 +00002179 NewMergeRegs.push_back(Merge.getReg(0));
2180 }
2181
2182 // A truncate may be necessary if the requested type doesn't evenly divide the
2183 // original result type.
2184 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
Diana Picusf95a5fb2023-01-09 11:59:00 +01002185 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
Matt Arsenault0966dd02019-07-17 20:22:44 +00002186 } else {
Diana Picusf95a5fb2023-01-09 11:59:00 +01002187 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
Matt Arsenault0966dd02019-07-17 20:22:44 +00002188 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002189 }
2190
2191 MI.eraseFromParent();
2192 return Legalized;
2193}
2194
2195LegalizerHelper::LegalizeResult
2196LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
2197 LLT WideTy) {
2198 if (TypeIdx != 0)
2199 return UnableToLegalize;
2200
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002201 int NumDst = MI.getNumOperands() - 1;
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00002202 Register SrcReg = MI.getOperand(NumDst).getReg();
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002203 LLT SrcTy = MRI.getType(SrcReg);
Matt Arsenaultbc101ff2020-01-21 11:12:36 -05002204 if (SrcTy.isVector())
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002205 return UnableToLegalize;
2206
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00002207 Register Dst0Reg = MI.getOperand(0).getReg();
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002208 LLT DstTy = MRI.getType(Dst0Reg);
2209 if (!DstTy.isScalar())
2210 return UnableToLegalize;
2211
Dominik Montadaccf49b92020-03-20 14:46:01 +01002212 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
Matt Arsenaultbc101ff2020-01-21 11:12:36 -05002213 if (SrcTy.isPointer()) {
2214 const DataLayout &DL = MIRBuilder.getDataLayout();
2215 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
Dominik Montadaccf49b92020-03-20 14:46:01 +01002216 LLVM_DEBUG(
2217 dbgs() << "Not casting non-integral address space integer\n");
Matt Arsenaultbc101ff2020-01-21 11:12:36 -05002218 return UnableToLegalize;
2219 }
2220
2221 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
2222 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2223 }
2224
Dominik Montadaccf49b92020-03-20 14:46:01 +01002225 // Widen SrcTy to WideTy. This does not affect the result, but since the
2226 // user requested this size, it is probably better handled than SrcTy and
Daniel Thornburgh2e2999c2022-01-18 18:03:26 -08002227 // should reduce the total number of legalization artifacts.
Dominik Montadaccf49b92020-03-20 14:46:01 +01002228 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2229 SrcTy = WideTy;
2230 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2231 }
2232
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002233 // Theres no unmerge type to target. Directly extract the bits from the
2234 // source type
2235 unsigned DstSize = DstTy.getSizeInBits();
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002236
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002237 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
2238 for (int I = 1; I != NumDst; ++I) {
2239 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
2240 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2241 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
2242 }
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002243
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002244 MI.eraseFromParent();
2245 return Legalized;
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002246 }
2247
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002248 // Extend the source to a wider type.
2249 LLT LCMTy = getLCMType(SrcTy, WideTy);
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002250
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002251 Register WideSrc = SrcReg;
Matt Arsenaultbc101ff2020-01-21 11:12:36 -05002252 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2253 // TODO: If this is an integral address space, cast to integer and anyext.
2254 if (SrcTy.isPointer()) {
2255 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2256 return UnableToLegalize;
2257 }
2258
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002259 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
Matt Arsenaultbc101ff2020-01-21 11:12:36 -05002260 }
2261
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002262 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002263
Dominik Montada113114a2020-09-28 16:38:35 +02002264 // Create a sequence of unmerges and merges to the original results. Since we
2265 // may have widened the source, we will need to pad the results with dead defs
2266 // to cover the source register.
2267 // e.g. widen s48 to s64:
2268 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002269 //
2270 // =>
Dominik Montada113114a2020-09-28 16:38:35 +02002271 // %4:_(s192) = G_ANYEXT %0:_(s96)
2272 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2273 // ; unpack to GCD type, with extra dead defs
2274 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2275 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2276 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2277 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2278 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2279 const LLT GCDTy = getGCDType(WideTy, DstTy);
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002280 const int NumUnmerge = Unmerge->getNumOperands() - 1;
Dominik Montada113114a2020-09-28 16:38:35 +02002281 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002282
Dominik Montada113114a2020-09-28 16:38:35 +02002283 // Directly unmerge to the destination without going through a GCD type
2284 // if possible
2285 if (PartsPerRemerge == 1) {
2286 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002287
Dominik Montada113114a2020-09-28 16:38:35 +02002288 for (int I = 0; I != NumUnmerge; ++I) {
2289 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2290
2291 for (int J = 0; J != PartsPerUnmerge; ++J) {
2292 int Idx = I * PartsPerUnmerge + J;
2293 if (Idx < NumDst)
2294 MIB.addDef(MI.getOperand(Idx).getReg());
2295 else {
2296 // Create dead def for excess components.
2297 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2298 }
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002299 }
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002300
Dominik Montada113114a2020-09-28 16:38:35 +02002301 MIB.addUse(Unmerge.getReg(I));
2302 }
2303 } else {
2304 SmallVector<Register, 16> Parts;
2305 for (int J = 0; J != NumUnmerge; ++J)
2306 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2307
2308 SmallVector<Register, 8> RemergeParts;
2309 for (int I = 0; I != NumDst; ++I) {
2310 for (int J = 0; J < PartsPerRemerge; ++J) {
2311 const int Idx = I * PartsPerRemerge + J;
2312 RemergeParts.emplace_back(Parts[Idx]);
2313 }
2314
Diana Picusf95a5fb2023-01-09 11:59:00 +01002315 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
Dominik Montada113114a2020-09-28 16:38:35 +02002316 RemergeParts.clear();
2317 }
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002318 }
2319
2320 MI.eraseFromParent();
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002321 return Legalized;
2322}
2323
2324LegalizerHelper::LegalizeResult
Matt Arsenault1cf713662019-02-12 14:54:52 +00002325LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2326 LLT WideTy) {
Amara Emerson719024a2023-02-23 16:35:39 -08002327 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
Matt Arsenaultfbe92a52019-02-18 22:39:27 +00002328 unsigned Offset = MI.getOperand(2).getImm();
2329
2330 if (TypeIdx == 0) {
2331 if (SrcTy.isVector() || DstTy.isVector())
2332 return UnableToLegalize;
2333
2334 SrcOp Src(SrcReg);
2335 if (SrcTy.isPointer()) {
2336 // Extracts from pointers can be handled only if they are really just
2337 // simple integers.
2338 const DataLayout &DL = MIRBuilder.getDataLayout();
2339 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2340 return UnableToLegalize;
2341
2342 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2343 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2344 SrcTy = SrcAsIntTy;
2345 }
2346
2347 if (DstTy.isPointer())
2348 return UnableToLegalize;
2349
2350 if (Offset == 0) {
2351 // Avoid a shift in the degenerate case.
2352 MIRBuilder.buildTrunc(DstReg,
2353 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2354 MI.eraseFromParent();
2355 return Legalized;
2356 }
2357
2358 // Do a shift in the source type.
2359 LLT ShiftTy = SrcTy;
2360 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2361 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2362 ShiftTy = WideTy;
Matt Arsenault90b76da2020-07-29 13:31:59 -04002363 }
Matt Arsenaultfbe92a52019-02-18 22:39:27 +00002364
2365 auto LShr = MIRBuilder.buildLShr(
2366 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2367 MIRBuilder.buildTrunc(DstReg, LShr);
2368 MI.eraseFromParent();
2369 return Legalized;
2370 }
2371
Matt Arsenault8f624ab2019-04-22 15:10:42 +00002372 if (SrcTy.isScalar()) {
2373 Observer.changingInstr(MI);
2374 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2375 Observer.changedInstr(MI);
2376 return Legalized;
2377 }
2378
Matt Arsenault1cf713662019-02-12 14:54:52 +00002379 if (!SrcTy.isVector())
2380 return UnableToLegalize;
2381
Matt Arsenault1cf713662019-02-12 14:54:52 +00002382 if (DstTy != SrcTy.getElementType())
2383 return UnableToLegalize;
2384
Matt Arsenault1cf713662019-02-12 14:54:52 +00002385 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2386 return UnableToLegalize;
2387
2388 Observer.changingInstr(MI);
2389 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2390
2391 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2392 Offset);
2393 widenScalarDst(MI, WideTy.getScalarType(), 0);
2394 Observer.changedInstr(MI);
2395 return Legalized;
2396}
2397
2398LegalizerHelper::LegalizeResult
2399LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2400 LLT WideTy) {
Matt Arsenault5cbd4e42020-07-18 12:27:16 -04002401 if (TypeIdx != 0 || WideTy.isVector())
Matt Arsenault1cf713662019-02-12 14:54:52 +00002402 return UnableToLegalize;
2403 Observer.changingInstr(MI);
2404 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2405 widenScalarDst(MI, WideTy);
2406 Observer.changedInstr(MI);
2407 return Legalized;
2408}
2409
2410LegalizerHelper::LegalizeResult
Cassie Jonesf22f4552021-01-28 13:20:35 -05002411LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2412 LLT WideTy) {
Cassie Jonesf22f4552021-01-28 13:20:35 -05002413 unsigned Opcode;
2414 unsigned ExtOpcode;
Kazu Hirata3ccbfc32022-11-26 14:44:54 -08002415 std::optional<Register> CarryIn;
Cassie Jonesf22f4552021-01-28 13:20:35 -05002416 switch (MI.getOpcode()) {
2417 default:
2418 llvm_unreachable("Unexpected opcode!");
2419 case TargetOpcode::G_SADDO:
2420 Opcode = TargetOpcode::G_ADD;
2421 ExtOpcode = TargetOpcode::G_SEXT;
2422 break;
2423 case TargetOpcode::G_SSUBO:
2424 Opcode = TargetOpcode::G_SUB;
2425 ExtOpcode = TargetOpcode::G_SEXT;
2426 break;
2427 case TargetOpcode::G_UADDO:
2428 Opcode = TargetOpcode::G_ADD;
2429 ExtOpcode = TargetOpcode::G_ZEXT;
2430 break;
2431 case TargetOpcode::G_USUBO:
2432 Opcode = TargetOpcode::G_SUB;
2433 ExtOpcode = TargetOpcode::G_ZEXT;
2434 break;
2435 case TargetOpcode::G_SADDE:
2436 Opcode = TargetOpcode::G_UADDE;
2437 ExtOpcode = TargetOpcode::G_SEXT;
2438 CarryIn = MI.getOperand(4).getReg();
2439 break;
2440 case TargetOpcode::G_SSUBE:
2441 Opcode = TargetOpcode::G_USUBE;
2442 ExtOpcode = TargetOpcode::G_SEXT;
2443 CarryIn = MI.getOperand(4).getReg();
2444 break;
2445 case TargetOpcode::G_UADDE:
2446 Opcode = TargetOpcode::G_UADDE;
2447 ExtOpcode = TargetOpcode::G_ZEXT;
2448 CarryIn = MI.getOperand(4).getReg();
2449 break;
2450 case TargetOpcode::G_USUBE:
2451 Opcode = TargetOpcode::G_USUBE;
2452 ExtOpcode = TargetOpcode::G_ZEXT;
2453 CarryIn = MI.getOperand(4).getReg();
2454 break;
2455 }
2456
Matt Arsenault0e489922022-04-12 11:49:22 -04002457 if (TypeIdx == 1) {
2458 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2459
2460 Observer.changingInstr(MI);
Matt Arsenault0e489922022-04-12 11:49:22 -04002461 if (CarryIn)
2462 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
Tomas Matheson9a390d62022-08-23 17:01:53 +01002463 widenScalarDst(MI, WideTy, 1);
Matt Arsenault0e489922022-04-12 11:49:22 -04002464
2465 Observer.changedInstr(MI);
2466 return Legalized;
2467 }
2468
Mitch Phillipsc9466ed2021-01-22 14:25:31 -08002469 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2470 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2471 // Do the arithmetic in the larger type.
Cassie Jonesf22f4552021-01-28 13:20:35 -05002472 Register NewOp;
2473 if (CarryIn) {
2474 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2475 NewOp = MIRBuilder
2476 .buildInstr(Opcode, {WideTy, CarryOutTy},
2477 {LHSExt, RHSExt, *CarryIn})
2478 .getReg(0);
2479 } else {
2480 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2481 }
Mitch Phillipsc9466ed2021-01-22 14:25:31 -08002482 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2483 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2484 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2485 // There is no overflow if the ExtOp is the same as NewOp.
2486 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2487 // Now trunc the NewOp to the original result.
2488 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2489 MI.eraseFromParent();
2490 return Legalized;
2491}
2492
2493LegalizerHelper::LegalizeResult
Bevin Hansson5de6c562020-07-16 17:02:04 +02002494LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2495 LLT WideTy) {
Matt Arsenault6a8c11a2020-07-12 13:58:53 -04002496 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
Bevin Hansson5de6c562020-07-16 17:02:04 +02002497 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2498 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2499 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2500 MI.getOpcode() == TargetOpcode::G_USHLSAT;
Matt Arsenault6a8c11a2020-07-12 13:58:53 -04002501 // We can convert this to:
2502 // 1. Any extend iN to iM
2503 // 2. SHL by M-N
Bevin Hansson5de6c562020-07-16 17:02:04 +02002504 // 3. [US][ADD|SUB|SHL]SAT
Matt Arsenault6a8c11a2020-07-12 13:58:53 -04002505 // 4. L/ASHR by M-N
2506 //
2507 // It may be more efficient to lower this to a min and a max operation in
2508 // the higher precision arithmetic if the promoted operation isn't legal,
2509 // but this decision is up to the target's lowering request.
2510 Register DstReg = MI.getOperand(0).getReg();
2511
2512 unsigned NewBits = WideTy.getScalarSizeInBits();
2513 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2514
Bevin Hansson5de6c562020-07-16 17:02:04 +02002515 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2516 // must not left shift the RHS to preserve the shift amount.
Matt Arsenault6a8c11a2020-07-12 13:58:53 -04002517 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
Bevin Hansson5de6c562020-07-16 17:02:04 +02002518 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2519 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
Matt Arsenault6a8c11a2020-07-12 13:58:53 -04002520 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2521 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
Bevin Hansson5de6c562020-07-16 17:02:04 +02002522 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
Matt Arsenault6a8c11a2020-07-12 13:58:53 -04002523
2524 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2525 {ShiftL, ShiftR}, MI.getFlags());
2526
2527 // Use a shift that will preserve the number of sign bits when the trunc is
2528 // folded away.
2529 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2530 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2531
2532 MIRBuilder.buildTrunc(DstReg, Result);
2533 MI.eraseFromParent();
2534 return Legalized;
2535}
2536
2537LegalizerHelper::LegalizeResult
Pushpinder Singhd0e54222021-03-09 06:10:00 +00002538LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2539 LLT WideTy) {
Matt Arsenault95c2bcb2022-04-12 12:03:04 -04002540 if (TypeIdx == 1) {
2541 Observer.changingInstr(MI);
2542 widenScalarDst(MI, WideTy, 1);
2543 Observer.changedInstr(MI);
2544 return Legalized;
2545 }
Pushpinder Singhd0e54222021-03-09 06:10:00 +00002546
2547 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
Amara Emerson719024a2023-02-23 16:35:39 -08002548 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
Pushpinder Singhd0e54222021-03-09 06:10:00 +00002549 LLT SrcTy = MRI.getType(LHS);
2550 LLT OverflowTy = MRI.getType(OriginalOverflow);
2551 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2552
2553 // To determine if the result overflowed in the larger type, we extend the
2554 // input to the larger type, do the multiply (checking if it overflows),
2555 // then also check the high bits of the result to see if overflow happened
2556 // there.
2557 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2558 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2559 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2560
Craig Topper37505582023-10-13 20:34:45 -07002561 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2562 // so we don't need to check the overflow result of larger type Mulo.
2563 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2564
2565 unsigned MulOpc =
2566 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2567
2568 MachineInstrBuilder Mulo;
2569 if (WideMulCanOverflow)
2570 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2571 {LeftOperand, RightOperand});
2572 else
2573 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2574
Pushpinder Singhd0e54222021-03-09 06:10:00 +00002575 auto Mul = Mulo->getOperand(0);
2576 MIRBuilder.buildTrunc(Result, Mul);
2577
2578 MachineInstrBuilder ExtResult;
2579 // Overflow occurred if it occurred in the larger type, or if the high part
2580 // of the result does not zero/sign-extend the low part. Check this second
2581 // possibility first.
2582 if (IsSigned) {
2583 // For signed, overflow occurred when the high part does not sign-extend
2584 // the low part.
2585 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2586 } else {
2587 // Unsigned overflow occurred when the high part does not zero-extend the
2588 // low part.
2589 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2590 }
2591
Craig Topper37505582023-10-13 20:34:45 -07002592 if (WideMulCanOverflow) {
Pushpinder Singhd0e54222021-03-09 06:10:00 +00002593 auto Overflow =
2594 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2595 // Finally check if the multiplication in the larger type itself overflowed.
2596 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2597 } else {
2598 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2599 }
2600 MI.eraseFromParent();
2601 return Legalized;
2602}
2603
2604LegalizerHelper::LegalizeResult
Tim Northover69fa84a2016-10-14 22:18:18 +00002605LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
David Green10fe5312024-08-18 11:08:49 +01002606 unsigned Opcode = MI.getOpcode();
2607 switch (Opcode) {
Tim Northover32335812016-08-04 18:35:11 +00002608 default:
2609 return UnableToLegalize;
Tim Northover291e0da2021-07-21 09:05:56 +01002610 case TargetOpcode::G_ATOMICRMW_XCHG:
2611 case TargetOpcode::G_ATOMICRMW_ADD:
2612 case TargetOpcode::G_ATOMICRMW_SUB:
2613 case TargetOpcode::G_ATOMICRMW_AND:
2614 case TargetOpcode::G_ATOMICRMW_OR:
2615 case TargetOpcode::G_ATOMICRMW_XOR:
2616 case TargetOpcode::G_ATOMICRMW_MIN:
2617 case TargetOpcode::G_ATOMICRMW_MAX:
2618 case TargetOpcode::G_ATOMICRMW_UMIN:
2619 case TargetOpcode::G_ATOMICRMW_UMAX:
2620 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2621 Observer.changingInstr(MI);
2622 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2623 widenScalarDst(MI, WideTy, 0);
2624 Observer.changedInstr(MI);
2625 return Legalized;
2626 case TargetOpcode::G_ATOMIC_CMPXCHG:
2627 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2628 Observer.changingInstr(MI);
2629 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2630 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2631 widenScalarDst(MI, WideTy, 0);
2632 Observer.changedInstr(MI);
2633 return Legalized;
2634 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2635 if (TypeIdx == 0) {
2636 Observer.changingInstr(MI);
2637 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2638 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2639 widenScalarDst(MI, WideTy, 0);
2640 Observer.changedInstr(MI);
2641 return Legalized;
2642 }
2643 assert(TypeIdx == 1 &&
2644 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2645 Observer.changingInstr(MI);
2646 widenScalarDst(MI, WideTy, 1);
2647 Observer.changedInstr(MI);
2648 return Legalized;
Matt Arsenault1cf713662019-02-12 14:54:52 +00002649 case TargetOpcode::G_EXTRACT:
2650 return widenScalarExtract(MI, TypeIdx, WideTy);
2651 case TargetOpcode::G_INSERT:
2652 return widenScalarInsert(MI, TypeIdx, WideTy);
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002653 case TargetOpcode::G_MERGE_VALUES:
2654 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2655 case TargetOpcode::G_UNMERGE_VALUES:
2656 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
Cassie Jonesaa8f3672021-01-25 16:57:20 -05002657 case TargetOpcode::G_SADDO:
Mitch Phillipsc9466ed2021-01-22 14:25:31 -08002658 case TargetOpcode::G_SSUBO:
Aditya Nandakumar6d47a412018-08-29 03:17:08 +00002659 case TargetOpcode::G_UADDO:
Mitch Phillipsc9466ed2021-01-22 14:25:31 -08002660 case TargetOpcode::G_USUBO:
Cassie Jonesf22f4552021-01-28 13:20:35 -05002661 case TargetOpcode::G_SADDE:
2662 case TargetOpcode::G_SSUBE:
2663 case TargetOpcode::G_UADDE:
2664 case TargetOpcode::G_USUBE:
2665 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
Pushpinder Singhd0e54222021-03-09 06:10:00 +00002666 case TargetOpcode::G_UMULO:
2667 case TargetOpcode::G_SMULO:
2668 return widenScalarMulo(MI, TypeIdx, WideTy);
Matt Arsenault6a8c11a2020-07-12 13:58:53 -04002669 case TargetOpcode::G_SADDSAT:
2670 case TargetOpcode::G_SSUBSAT:
Bevin Hansson5de6c562020-07-16 17:02:04 +02002671 case TargetOpcode::G_SSHLSAT:
Matt Arsenault6a8c11a2020-07-12 13:58:53 -04002672 case TargetOpcode::G_UADDSAT:
2673 case TargetOpcode::G_USUBSAT:
Bevin Hansson5de6c562020-07-16 17:02:04 +02002674 case TargetOpcode::G_USHLSAT:
2675 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
Aditya Nandakumarc1061832018-08-22 17:59:18 +00002676 case TargetOpcode::G_CTTZ:
2677 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2678 case TargetOpcode::G_CTLZ:
2679 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2680 case TargetOpcode::G_CTPOP: {
Matt Arsenaultd5684f72019-01-31 02:09:57 +00002681 if (TypeIdx == 0) {
Matt Arsenault3d6a49b2019-02-04 22:26:33 +00002682 Observer.changingInstr(MI);
Matt Arsenaultd5684f72019-01-31 02:09:57 +00002683 widenScalarDst(MI, WideTy, 0);
Matt Arsenault3d6a49b2019-02-04 22:26:33 +00002684 Observer.changedInstr(MI);
Matt Arsenaultd5684f72019-01-31 02:09:57 +00002685 return Legalized;
2686 }
2687
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002688 Register SrcReg = MI.getOperand(1).getReg();
Matt Arsenault3d6a49b2019-02-04 22:26:33 +00002689
Jay Foad57b91072021-08-06 11:05:42 +01002690 // First extend the input.
David Green10fe5312024-08-18 11:08:49 +01002691 unsigned ExtOpc = Opcode == TargetOpcode::G_CTTZ ||
2692 Opcode == TargetOpcode::G_CTTZ_ZERO_UNDEF
Jay Foad57b91072021-08-06 11:05:42 +01002693 ? TargetOpcode::G_ANYEXT
2694 : TargetOpcode::G_ZEXT;
2695 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
Matt Arsenault3d6a49b2019-02-04 22:26:33 +00002696 LLT CurTy = MRI.getType(SrcReg);
David Green10fe5312024-08-18 11:08:49 +01002697 unsigned NewOpc = Opcode;
Jay Foadcd2594e2021-08-04 14:37:45 +01002698 if (NewOpc == TargetOpcode::G_CTTZ) {
Aditya Nandakumarc1061832018-08-22 17:59:18 +00002699 // The count is the same in the larger type except if the original
2700 // value was zero. This can be handled by setting the bit just off
2701 // the top of the original type.
2702 auto TopBit =
2703 APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
Matt Arsenault3d6a49b2019-02-04 22:26:33 +00002704 MIBSrc = MIRBuilder.buildOr(
2705 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
Jay Foadcd2594e2021-08-04 14:37:45 +01002706 // Now we know the operand is non-zero, use the more relaxed opcode.
2707 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
Aditya Nandakumarc1061832018-08-22 17:59:18 +00002708 }
Matt Arsenault3d6a49b2019-02-04 22:26:33 +00002709
Manish Kausik H69192e02024-07-08 18:31:32 +05302710 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2711
David Green10fe5312024-08-18 11:08:49 +01002712 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
Manish Kausik H69192e02024-07-08 18:31:32 +05302713 // An optimization where the result is the CTLZ after the left shift by
2714 // (Difference in widety and current ty), that is,
2715 // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
2716 // Result = ctlz MIBSrc
2717 MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc,
2718 MIRBuilder.buildConstant(WideTy, SizeDiff));
2719 }
2720
Aditya Nandakumarc1061832018-08-22 17:59:18 +00002721 // Perform the operation at the larger size.
Jay Foadcd2594e2021-08-04 14:37:45 +01002722 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
Aditya Nandakumarc1061832018-08-22 17:59:18 +00002723 // This is already the correct result for CTPOP and CTTZs
David Green10fe5312024-08-18 11:08:49 +01002724 if (Opcode == TargetOpcode::G_CTLZ) {
Aditya Nandakumarc1061832018-08-22 17:59:18 +00002725 // The correct result is NewOp - (Difference in widety and current ty).
Jay Foad28bb43b2020-01-16 12:09:48 +00002726 MIBNewOp = MIRBuilder.buildSub(
2727 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
Aditya Nandakumarc1061832018-08-22 17:59:18 +00002728 }
Matt Arsenault3d6a49b2019-02-04 22:26:33 +00002729
2730 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2731 MI.eraseFromParent();
Aditya Nandakumarc1061832018-08-22 17:59:18 +00002732 return Legalized;
2733 }
Matt Arsenaultd1bfc8d2019-01-31 02:34:03 +00002734 case TargetOpcode::G_BSWAP: {
2735 Observer.changingInstr(MI);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002736 Register DstReg = MI.getOperand(0).getReg();
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002737
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002738 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2739 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2740 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
Matt Arsenaultd1bfc8d2019-01-31 02:34:03 +00002741 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2742
2743 MI.getOperand(0).setReg(DstExt);
2744
2745 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2746
2747 LLT Ty = MRI.getType(DstReg);
2748 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2749 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
Jay Foad28bb43b2020-01-16 12:09:48 +00002750 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
Matt Arsenaultd1bfc8d2019-01-31 02:34:03 +00002751
2752 MIRBuilder.buildTrunc(DstReg, ShrReg);
2753 Observer.changedInstr(MI);
2754 return Legalized;
2755 }
Matt Arsenault5ff310e2019-09-04 20:46:15 +00002756 case TargetOpcode::G_BITREVERSE: {
2757 Observer.changingInstr(MI);
2758
2759 Register DstReg = MI.getOperand(0).getReg();
2760 LLT Ty = MRI.getType(DstReg);
2761 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2762
2763 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2764 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2765 MI.getOperand(0).setReg(DstExt);
2766 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2767
2768 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2769 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2770 MIRBuilder.buildTrunc(DstReg, Shift);
2771 Observer.changedInstr(MI);
2772 return Legalized;
2773 }
Dominik Montada55e3a7c2020-04-14 11:25:05 +02002774 case TargetOpcode::G_FREEZE:
Yingwei Zheng821bcba2024-05-22 23:35:37 +08002775 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
Dominik Montada55e3a7c2020-04-14 11:25:05 +02002776 Observer.changingInstr(MI);
2777 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2778 widenScalarDst(MI, WideTy);
2779 Observer.changedInstr(MI);
2780 return Legalized;
2781
Mirko Brkusanin35ef4c92021-06-03 18:09:45 +02002782 case TargetOpcode::G_ABS:
2783 Observer.changingInstr(MI);
2784 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2785 widenScalarDst(MI, WideTy);
2786 Observer.changedInstr(MI);
2787 return Legalized;
2788
Tim Northover61c16142016-08-04 21:39:49 +00002789 case TargetOpcode::G_ADD:
2790 case TargetOpcode::G_AND:
2791 case TargetOpcode::G_MUL:
2792 case TargetOpcode::G_OR:
2793 case TargetOpcode::G_XOR:
Justin Bognerddb80ae2017-01-19 07:51:17 +00002794 case TargetOpcode::G_SUB:
Tuan Chuong Goh13a78fd2024-03-04 14:27:21 +00002795 case TargetOpcode::G_SHUFFLE_VECTOR:
Matt Arsenault1cf713662019-02-12 14:54:52 +00002796 // Perform operation at larger width (any extension is fines here, high bits
Tim Northover32335812016-08-04 18:35:11 +00002797 // don't affect the result) and then truncate the result back to the
2798 // original type.
Daniel Sandersd001e0e2018-12-12 23:48:13 +00002799 Observer.changingInstr(MI);
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002800 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2801 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2802 widenScalarDst(MI, WideTy);
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00002803 Observer.changedInstr(MI);
Roman Tereshin27bba442018-05-09 01:43:12 +00002804 return Legalized;
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002805
Brendon Cahoonf9f5d412021-04-30 09:57:44 -04002806 case TargetOpcode::G_SBFX:
2807 case TargetOpcode::G_UBFX:
2808 Observer.changingInstr(MI);
2809
2810 if (TypeIdx == 0) {
2811 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2812 widenScalarDst(MI, WideTy);
2813 } else {
2814 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2815 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2816 }
2817
2818 Observer.changedInstr(MI);
2819 return Legalized;
2820
Roman Tereshin6d266382018-05-09 21:43:30 +00002821 case TargetOpcode::G_SHL:
Matt Arsenault012ecbb2019-05-16 04:08:46 +00002822 Observer.changingInstr(MI);
Matt Arsenault30989e42019-01-22 21:42:11 +00002823
2824 if (TypeIdx == 0) {
2825 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2826 widenScalarDst(MI, WideTy);
2827 } else {
2828 assert(TypeIdx == 1);
2829 // The "number of bits to shift" operand must preserve its value as an
2830 // unsigned integer:
2831 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2832 }
2833
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00002834 Observer.changedInstr(MI);
Roman Tereshin6d266382018-05-09 21:43:30 +00002835 return Legalized;
2836
Craig Topperd605d9d2023-12-04 13:00:34 -08002837 case TargetOpcode::G_ROTR:
2838 case TargetOpcode::G_ROTL:
2839 if (TypeIdx != 1)
2840 return UnableToLegalize;
2841
2842 Observer.changingInstr(MI);
2843 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2844 Observer.changedInstr(MI);
2845 return Legalized;
2846
Tim Northover7a753d92016-08-26 17:46:06 +00002847 case TargetOpcode::G_SDIV:
Roman Tereshin27bba442018-05-09 01:43:12 +00002848 case TargetOpcode::G_SREM:
Matt Arsenault0f3ba442019-05-23 17:58:48 +00002849 case TargetOpcode::G_SMIN:
2850 case TargetOpcode::G_SMAX:
Daniel Sandersd001e0e2018-12-12 23:48:13 +00002851 Observer.changingInstr(MI);
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002852 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2853 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2854 widenScalarDst(MI, WideTy);
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00002855 Observer.changedInstr(MI);
Roman Tereshin27bba442018-05-09 01:43:12 +00002856 return Legalized;
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002857
Christudasan Devadasan90d78402021-04-12 15:49:47 +05302858 case TargetOpcode::G_SDIVREM:
2859 Observer.changingInstr(MI);
2860 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2861 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2862 widenScalarDst(MI, WideTy);
2863 widenScalarDst(MI, WideTy, 1);
2864 Observer.changedInstr(MI);
2865 return Legalized;
2866
Roman Tereshin6d266382018-05-09 21:43:30 +00002867 case TargetOpcode::G_ASHR:
Matt Arsenault30989e42019-01-22 21:42:11 +00002868 case TargetOpcode::G_LSHR:
Daniel Sandersd001e0e2018-12-12 23:48:13 +00002869 Observer.changingInstr(MI);
Matt Arsenault30989e42019-01-22 21:42:11 +00002870
2871 if (TypeIdx == 0) {
David Green10fe5312024-08-18 11:08:49 +01002872 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
2873 : TargetOpcode::G_ZEXT;
Matt Arsenault30989e42019-01-22 21:42:11 +00002874
2875 widenScalarSrc(MI, WideTy, 1, CvtOp);
2876 widenScalarDst(MI, WideTy);
2877 } else {
2878 assert(TypeIdx == 1);
2879 // The "number of bits to shift" operand must preserve its value as an
2880 // unsigned integer:
2881 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2882 }
2883
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00002884 Observer.changedInstr(MI);
Roman Tereshin6d266382018-05-09 21:43:30 +00002885 return Legalized;
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002886 case TargetOpcode::G_UDIV:
2887 case TargetOpcode::G_UREM:
Daniel Sandersd001e0e2018-12-12 23:48:13 +00002888 Observer.changingInstr(MI);
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002889 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2890 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2891 widenScalarDst(MI, WideTy);
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00002892 Observer.changedInstr(MI);
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002893 return Legalized;
Christudasan Devadasan90d78402021-04-12 15:49:47 +05302894 case TargetOpcode::G_UDIVREM:
2895 Observer.changingInstr(MI);
2896 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2897 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2898 widenScalarDst(MI, WideTy);
2899 widenScalarDst(MI, WideTy, 1);
2900 Observer.changedInstr(MI);
2901 return Legalized;
Craig Topper54dac272024-12-15 23:16:58 -08002902 case TargetOpcode::G_UMIN:
2903 case TargetOpcode::G_UMAX: {
2904 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2905
2906 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
2907 unsigned ExtOpc =
2908 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(Ty, Ctx),
2909 getApproximateEVTForLLT(WideTy, Ctx))
2910 ? TargetOpcode::G_SEXT
2911 : TargetOpcode::G_ZEXT;
2912
2913 Observer.changingInstr(MI);
2914 widenScalarSrc(MI, WideTy, 1, ExtOpc);
2915 widenScalarSrc(MI, WideTy, 2, ExtOpc);
2916 widenScalarDst(MI, WideTy);
2917 Observer.changedInstr(MI);
2918 return Legalized;
2919 }
Christudasan Devadasan90d78402021-04-12 15:49:47 +05302920
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002921 case TargetOpcode::G_SELECT:
Daniel Sandersd001e0e2018-12-12 23:48:13 +00002922 Observer.changingInstr(MI);
Petar Avramovic09dff332018-12-25 14:42:30 +00002923 if (TypeIdx == 0) {
2924 // Perform operation at larger width (any extension is fine here, high
2925 // bits don't affect the result) and then truncate the result back to the
2926 // original type.
2927 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2928 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2929 widenScalarDst(MI, WideTy);
2930 } else {
Matt Arsenault6d8e1b42019-01-30 02:57:43 +00002931 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
Petar Avramovic09dff332018-12-25 14:42:30 +00002932 // Explicit extension is required here since high bits affect the result.
Matt Arsenault6d8e1b42019-01-30 02:57:43 +00002933 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
Petar Avramovic09dff332018-12-25 14:42:30 +00002934 }
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00002935 Observer.changedInstr(MI);
Roman Tereshin27bba442018-05-09 01:43:12 +00002936 return Legalized;
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002937
Ahmed Bougachab6137062017-01-23 21:10:14 +00002938 case TargetOpcode::G_FPTOSI:
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002939 case TargetOpcode::G_FPTOUI:
David Green28d28d52024-04-15 09:41:08 +01002940 case TargetOpcode::G_INTRINSIC_LRINT:
David Green8d49ce12024-04-17 18:38:24 +01002941 case TargetOpcode::G_INTRINSIC_LLRINT:
Min-Yih Hsu7c3c8a12023-11-22 16:43:20 -08002942 case TargetOpcode::G_IS_FPCLASS:
Daniel Sandersd001e0e2018-12-12 23:48:13 +00002943 Observer.changingInstr(MI);
Matt Arsenaulted85b0c2019-10-01 01:06:48 +00002944
2945 if (TypeIdx == 0)
2946 widenScalarDst(MI, WideTy);
2947 else
2948 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2949
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00002950 Observer.changedInstr(MI);
Roman Tereshin27bba442018-05-09 01:43:12 +00002951 return Legalized;
Ahmed Bougachad2948232017-01-20 01:37:24 +00002952 case TargetOpcode::G_SITOFP:
Daniel Sandersd001e0e2018-12-12 23:48:13 +00002953 Observer.changingInstr(MI);
Petar Avramovic68500332020-07-16 16:31:57 +02002954
2955 if (TypeIdx == 0)
2956 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2957 else
2958 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2959
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00002960 Observer.changedInstr(MI);
Roman Tereshin27bba442018-05-09 01:43:12 +00002961 return Legalized;
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002962 case TargetOpcode::G_UITOFP:
Daniel Sandersd001e0e2018-12-12 23:48:13 +00002963 Observer.changingInstr(MI);
Petar Avramovic68500332020-07-16 16:31:57 +02002964
2965 if (TypeIdx == 0)
2966 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2967 else
2968 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2969
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00002970 Observer.changedInstr(MI);
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002971 return Legalized;
David Greenfeac7612024-09-16 10:33:59 +01002972 case TargetOpcode::G_FPTOSI_SAT:
2973 case TargetOpcode::G_FPTOUI_SAT:
2974 Observer.changingInstr(MI);
2975
2976 if (TypeIdx == 0) {
2977 Register OldDst = MI.getOperand(0).getReg();
2978 LLT Ty = MRI.getType(OldDst);
2979 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
2980 Register NewDst;
2981 MI.getOperand(0).setReg(ExtReg);
2982 uint64_t ShortBits = Ty.getScalarSizeInBits();
2983 uint64_t WideBits = WideTy.getScalarSizeInBits();
2984 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2985 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
2986 // z = i16 fptosi_sat(a)
2987 // ->
2988 // x = i32 fptosi_sat(a)
2989 // y = smin(x, 32767)
2990 // z = smax(y, -32768)
2991 auto MaxVal = MIRBuilder.buildConstant(
2992 WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
2993 auto MinVal = MIRBuilder.buildConstant(
2994 WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
2995 Register MidReg =
2996 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
2997 NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
2998 } else {
2999 // z = i16 fptoui_sat(a)
3000 // ->
3001 // x = i32 fptoui_sat(a)
3002 // y = smin(x, 65535)
3003 auto MaxVal = MIRBuilder.buildConstant(
3004 WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
3005 NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3006 }
3007 MIRBuilder.buildTrunc(OldDst, NewDst);
3008 } else
3009 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3010
3011 Observer.changedInstr(MI);
3012 return Legalized;
Daniel Sanders5eb9f582018-04-28 18:14:50 +00003013 case TargetOpcode::G_LOAD:
Daniel Sanders5eb9f582018-04-28 18:14:50 +00003014 case TargetOpcode::G_SEXTLOAD:
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003015 case TargetOpcode::G_ZEXTLOAD:
Daniel Sandersd001e0e2018-12-12 23:48:13 +00003016 Observer.changingInstr(MI);
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003017 widenScalarDst(MI, WideTy);
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00003018 Observer.changedInstr(MI);
Tim Northover3c73e362016-08-23 18:20:09 +00003019 return Legalized;
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003020
Tim Northover3c73e362016-08-23 18:20:09 +00003021 case TargetOpcode::G_STORE: {
Matt Arsenault92c50012019-01-30 02:04:31 +00003022 if (TypeIdx != 0)
3023 return UnableToLegalize;
3024
3025 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
Amara Emerson7e3180a2025-01-05 21:31:34 -08003026 assert(!Ty.isPointerOrPointerVector() && "Can't widen type");
3027 if (!Ty.isScalar()) {
3028 // We need to widen the vector element type.
3029 Observer.changingInstr(MI);
3030 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
3031 // We also need to adjust the MMO to turn this into a truncating store.
3032 MachineMemOperand &MMO = **MI.memoperands_begin();
3033 MachineFunction &MF = MIRBuilder.getMF();
3034 auto *NewMMO = MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), Ty);
3035 MI.setMemRefs(MF, {NewMMO});
3036 Observer.changedInstr(MI);
3037 return Legalized;
3038 }
Tim Northover548feee2017-03-21 22:22:05 +00003039
Daniel Sandersd001e0e2018-12-12 23:48:13 +00003040 Observer.changingInstr(MI);
Matt Arsenault92c50012019-01-30 02:04:31 +00003041
3042 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3043 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3044 widenScalarSrc(MI, WideTy, 0, ExtType);
3045
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00003046 Observer.changedInstr(MI);
Tim Northover3c73e362016-08-23 18:20:09 +00003047 return Legalized;
3048 }
Tim Northoverea904f92016-08-19 22:40:00 +00003049 case TargetOpcode::G_CONSTANT: {
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003050 MachineOperand &SrcMO = MI.getOperand(1);
3051 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
Aditya Nandakumar6da7dbb2019-12-03 10:40:03 -08003052 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3053 MRI.getType(MI.getOperand(0).getReg()));
3054 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3055 ExtOpc == TargetOpcode::G_ANYEXT) &&
3056 "Illegal Extend");
3057 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3058 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3059 ? SrcVal.sext(WideTy.getSizeInBits())
3060 : SrcVal.zext(WideTy.getSizeInBits());
Daniel Sandersd001e0e2018-12-12 23:48:13 +00003061 Observer.changingInstr(MI);
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003062 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3063
3064 widenScalarDst(MI, WideTy);
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00003065 Observer.changedInstr(MI);
Tim Northoverea904f92016-08-19 22:40:00 +00003066 return Legalized;
3067 }
Tim Northovera11be042016-08-19 22:40:08 +00003068 case TargetOpcode::G_FCONSTANT: {
Amara Emersond4f84df2022-07-14 00:53:59 -07003069 // To avoid changing the bits of the constant due to extension to a larger
3070 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003071 MachineOperand &SrcMO = MI.getOperand(1);
Amara Emersond4f84df2022-07-14 00:53:59 -07003072 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
3073 MIRBuilder.setInstrAndDebugLoc(MI);
3074 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
3075 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
3076 MI.eraseFromParent();
Roman Tereshin25cbfe62018-05-08 22:53:09 +00003077 return Legalized;
Roman Tereshin27bba442018-05-09 01:43:12 +00003078 }
Matt Arsenaultbefee402019-01-09 07:34:14 +00003079 case TargetOpcode::G_IMPLICIT_DEF: {
3080 Observer.changingInstr(MI);
3081 widenScalarDst(MI, WideTy);
3082 Observer.changedInstr(MI);
3083 return Legalized;
3084 }
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003085 case TargetOpcode::G_BRCOND:
Daniel Sandersd001e0e2018-12-12 23:48:13 +00003086 Observer.changingInstr(MI);
Petar Avramovic5d9b8ee2019-02-14 11:39:53 +00003087 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00003088 Observer.changedInstr(MI);
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003089 return Legalized;
3090
3091 case TargetOpcode::G_FCMP:
Daniel Sandersd001e0e2018-12-12 23:48:13 +00003092 Observer.changingInstr(MI);
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003093 if (TypeIdx == 0)
3094 widenScalarDst(MI, WideTy);
3095 else {
3096 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3097 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
Roman Tereshin27bba442018-05-09 01:43:12 +00003098 }
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00003099 Observer.changedInstr(MI);
Roman Tereshin27bba442018-05-09 01:43:12 +00003100 return Legalized;
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003101
3102 case TargetOpcode::G_ICMP:
Daniel Sandersd001e0e2018-12-12 23:48:13 +00003103 Observer.changingInstr(MI);
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003104 if (TypeIdx == 0)
3105 widenScalarDst(MI, WideTy);
3106 else {
Craig Topper11587292024-12-15 22:55:58 -08003107 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
3108 CmpInst::Predicate Pred =
3109 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3110
3111 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3112 unsigned ExtOpcode =
3113 (CmpInst::isSigned(Pred) ||
3114 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(SrcTy, Ctx),
3115 getApproximateEVTForLLT(WideTy, Ctx)))
3116 ? TargetOpcode::G_SEXT
3117 : TargetOpcode::G_ZEXT;
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003118 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
3119 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
3120 }
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00003121 Observer.changedInstr(MI);
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003122 return Legalized;
3123
Daniel Sanderse74c5b92019-11-01 13:18:00 -07003124 case TargetOpcode::G_PTR_ADD:
3125 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
Daniel Sandersd001e0e2018-12-12 23:48:13 +00003126 Observer.changingInstr(MI);
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003127 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00003128 Observer.changedInstr(MI);
Tim Northover22d82cf2016-09-15 11:02:19 +00003129 return Legalized;
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003130
Aditya Nandakumar892979e2017-08-25 04:57:27 +00003131 case TargetOpcode::G_PHI: {
3132 assert(TypeIdx == 0 && "Expecting only Idx 0");
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003133
Daniel Sandersd001e0e2018-12-12 23:48:13 +00003134 Observer.changingInstr(MI);
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003135 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
3136 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
Amara Emerson53445f52022-11-13 01:43:04 -08003137 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003138 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
Aditya Nandakumar892979e2017-08-25 04:57:27 +00003139 }
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003140
3141 MachineBasicBlock &MBB = *MI.getParent();
Amara Emerson9d647212019-09-16 23:46:03 +00003142 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003143 widenScalarDst(MI, WideTy);
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00003144 Observer.changedInstr(MI);
Aditya Nandakumar892979e2017-08-25 04:57:27 +00003145 return Legalized;
3146 }
Matt Arsenault63786292019-01-22 20:38:15 +00003147 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3148 if (TypeIdx == 0) {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003149 Register VecReg = MI.getOperand(1).getReg();
Matt Arsenault63786292019-01-22 20:38:15 +00003150 LLT VecTy = MRI.getType(VecReg);
3151 Observer.changingInstr(MI);
3152
Sander de Smalend5e14ba2021-06-24 09:58:21 +01003153 widenScalarSrc(
3154 MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
Amara Emersondafcbfd2021-09-24 22:52:30 -07003155 TargetOpcode::G_ANYEXT);
Matt Arsenault63786292019-01-22 20:38:15 +00003156
3157 widenScalarDst(MI, WideTy, 0);
3158 Observer.changedInstr(MI);
3159 return Legalized;
3160 }
3161
Amara Emersoncbd86d82018-10-25 14:04:54 +00003162 if (TypeIdx != 2)
3163 return UnableToLegalize;
Daniel Sandersd001e0e2018-12-12 23:48:13 +00003164 Observer.changingInstr(MI);
Matt Arsenault1a276d12019-10-01 15:51:37 -04003165 // TODO: Probably should be zext
Amara Emersoncbd86d82018-10-25 14:04:54 +00003166 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00003167 Observer.changedInstr(MI);
Amara Emersoncbd86d82018-10-25 14:04:54 +00003168 return Legalized;
Matt Arsenault63786292019-01-22 20:38:15 +00003169 }
Matt Arsenault1a276d12019-10-01 15:51:37 -04003170 case TargetOpcode::G_INSERT_VECTOR_ELT: {
Alleneaf23b22023-09-12 21:15:01 +08003171 if (TypeIdx == 0) {
3172 Observer.changingInstr(MI);
3173 const LLT WideEltTy = WideTy.getElementType();
3174
3175 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3176 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
3177 widenScalarDst(MI, WideTy, 0);
3178 Observer.changedInstr(MI);
3179 return Legalized;
3180 }
3181
Matt Arsenault1a276d12019-10-01 15:51:37 -04003182 if (TypeIdx == 1) {
3183 Observer.changingInstr(MI);
3184
3185 Register VecReg = MI.getOperand(1).getReg();
3186 LLT VecTy = MRI.getType(VecReg);
Sander de Smalend5e14ba2021-06-24 09:58:21 +01003187 LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
Matt Arsenault1a276d12019-10-01 15:51:37 -04003188
3189 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
3190 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3191 widenScalarDst(MI, WideVecTy, 0);
3192 Observer.changedInstr(MI);
3193 return Legalized;
3194 }
3195
3196 if (TypeIdx == 2) {
3197 Observer.changingInstr(MI);
3198 // TODO: Probably should be zext
3199 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
3200 Observer.changedInstr(MI);
Matt Arsenaulte4f19d12020-06-16 11:39:44 -04003201 return Legalized;
Matt Arsenault1a276d12019-10-01 15:51:37 -04003202 }
3203
Matt Arsenaulte4f19d12020-06-16 11:39:44 -04003204 return UnableToLegalize;
Matt Arsenault1a276d12019-10-01 15:51:37 -04003205 }
Matt Arsenault745fd9f2019-01-20 19:10:31 +00003206 case TargetOpcode::G_FADD:
3207 case TargetOpcode::G_FMUL:
3208 case TargetOpcode::G_FSUB:
3209 case TargetOpcode::G_FMA:
Matt Arsenaultcf103722019-09-06 20:49:10 +00003210 case TargetOpcode::G_FMAD:
Matt Arsenault745fd9f2019-01-20 19:10:31 +00003211 case TargetOpcode::G_FNEG:
3212 case TargetOpcode::G_FABS:
Matt Arsenault9dba67f2019-02-11 17:05:20 +00003213 case TargetOpcode::G_FCANONICALIZE:
Matt Arsenault6ce1b4f2019-07-10 16:31:19 +00003214 case TargetOpcode::G_FMINNUM:
3215 case TargetOpcode::G_FMAXNUM:
3216 case TargetOpcode::G_FMINNUM_IEEE:
3217 case TargetOpcode::G_FMAXNUM_IEEE:
3218 case TargetOpcode::G_FMINIMUM:
3219 case TargetOpcode::G_FMAXIMUM:
Matt Arsenault745fd9f2019-01-20 19:10:31 +00003220 case TargetOpcode::G_FDIV:
3221 case TargetOpcode::G_FREM:
Jessica Paquette453ab1d2018-12-21 17:05:26 +00003222 case TargetOpcode::G_FCEIL:
Jessica Paquetteebdb0212019-02-11 17:22:58 +00003223 case TargetOpcode::G_FFLOOR:
Jessica Paquette7db82d72019-01-28 18:34:18 +00003224 case TargetOpcode::G_FCOS:
3225 case TargetOpcode::G_FSIN:
Farzon Lotfi1d874332024-06-05 15:01:33 -04003226 case TargetOpcode::G_FTAN:
Farzon Lotfi0b58f342024-07-11 15:58:43 -04003227 case TargetOpcode::G_FACOS:
3228 case TargetOpcode::G_FASIN:
3229 case TargetOpcode::G_FATAN:
Tex Riddellc03d09c2024-10-24 17:53:12 -07003230 case TargetOpcode::G_FATAN2:
Farzon Lotfi0b58f342024-07-11 15:58:43 -04003231 case TargetOpcode::G_FCOSH:
3232 case TargetOpcode::G_FSINH:
3233 case TargetOpcode::G_FTANH:
Jessica Paquettec49428a2019-01-28 19:53:14 +00003234 case TargetOpcode::G_FLOG10:
Jessica Paquette2d73ecd2019-01-28 21:27:23 +00003235 case TargetOpcode::G_FLOG:
Jessica Paquette0154bd12019-01-30 21:16:04 +00003236 case TargetOpcode::G_FLOG2:
Jessica Paquetted5c69e02019-04-19 23:41:52 +00003237 case TargetOpcode::G_FRINT:
Jessica Paquetteba557672019-04-25 16:44:40 +00003238 case TargetOpcode::G_FNEARBYINT:
Jessica Paquette22457f82019-01-30 21:03:52 +00003239 case TargetOpcode::G_FSQRT:
Jessica Paquette84bedac2019-01-30 23:46:15 +00003240 case TargetOpcode::G_FEXP:
Jessica Paquettee7941212019-04-03 16:58:32 +00003241 case TargetOpcode::G_FEXP2:
Matt Arsenaultb14e83d2023-08-12 07:20:00 -04003242 case TargetOpcode::G_FEXP10:
Jessica Paquettedfd87f62019-04-19 16:28:08 +00003243 case TargetOpcode::G_FPOW:
Jessica Paquette56342642019-04-23 18:20:44 +00003244 case TargetOpcode::G_INTRINSIC_TRUNC:
Jessica Paquette3cc6d1f2019-04-23 21:11:57 +00003245 case TargetOpcode::G_INTRINSIC_ROUND:
Matt Arsenault0da582d2020-07-19 09:56:15 -04003246 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
Matt Arsenault745fd9f2019-01-20 19:10:31 +00003247 assert(TypeIdx == 0);
Jessica Paquette453ab1d2018-12-21 17:05:26 +00003248 Observer.changingInstr(MI);
Matt Arsenault745fd9f2019-01-20 19:10:31 +00003249
3250 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3251 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
3252
Jessica Paquette453ab1d2018-12-21 17:05:26 +00003253 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3254 Observer.changedInstr(MI);
3255 return Legalized;
Matt Arsenaulteece6ba2023-04-26 22:02:42 -04003256 case TargetOpcode::G_FPOWI:
3257 case TargetOpcode::G_FLDEXP:
3258 case TargetOpcode::G_STRICT_FLDEXP: {
3259 if (TypeIdx == 0) {
David Green10fe5312024-08-18 11:08:49 +01003260 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
Matt Arsenaulteece6ba2023-04-26 22:02:42 -04003261 return UnableToLegalize;
3262
3263 Observer.changingInstr(MI);
3264 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3265 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3266 Observer.changedInstr(MI);
3267 return Legalized;
3268 }
3269
3270 if (TypeIdx == 1) {
3271 // For some reason SelectionDAG tries to promote to a libcall without
3272 // actually changing the integer type for promotion.
3273 Observer.changingInstr(MI);
3274 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3275 Observer.changedInstr(MI);
3276 return Legalized;
3277 }
3278
3279 return UnableToLegalize;
Matt Arsenault7cd8a022020-07-17 11:01:15 -04003280 }
Matt Arsenault003b58f2023-04-26 21:57:10 -04003281 case TargetOpcode::G_FFREXP: {
3282 Observer.changingInstr(MI);
3283
3284 if (TypeIdx == 0) {
3285 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3286 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3287 } else {
3288 widenScalarDst(MI, WideTy, 1);
3289 }
3290
3291 Observer.changedInstr(MI);
3292 return Legalized;
3293 }
Matt Arsenaultcbaada62019-02-02 23:29:55 +00003294 case TargetOpcode::G_INTTOPTR:
3295 if (TypeIdx != 1)
3296 return UnableToLegalize;
3297
3298 Observer.changingInstr(MI);
3299 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3300 Observer.changedInstr(MI);
3301 return Legalized;
3302 case TargetOpcode::G_PTRTOINT:
3303 if (TypeIdx != 0)
3304 return UnableToLegalize;
3305
3306 Observer.changingInstr(MI);
3307 widenScalarDst(MI, WideTy, 0);
3308 Observer.changedInstr(MI);
3309 return Legalized;
Matt Arsenaultbd791b52019-07-08 13:48:06 +00003310 case TargetOpcode::G_BUILD_VECTOR: {
3311 Observer.changingInstr(MI);
3312
3313 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
3314 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
3315 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
3316
3317 // Avoid changing the result vector type if the source element type was
3318 // requested.
3319 if (TypeIdx == 1) {
Matt Arsenaulta679f272020-07-19 12:29:48 -04003320 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
Matt Arsenaultbd791b52019-07-08 13:48:06 +00003321 } else {
3322 widenScalarDst(MI, WideTy, 0);
3323 }
3324
3325 Observer.changedInstr(MI);
3326 return Legalized;
3327 }
Daniel Sanderse9a57c22019-08-09 21:11:20 +00003328 case TargetOpcode::G_SEXT_INREG:
3329 if (TypeIdx != 0)
3330 return UnableToLegalize;
3331
3332 Observer.changingInstr(MI);
3333 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3334 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
3335 Observer.changedInstr(MI);
3336 return Legalized;
Matt Arsenaultef3e83122020-05-23 18:10:34 -04003337 case TargetOpcode::G_PTRMASK: {
3338 if (TypeIdx != 1)
3339 return UnableToLegalize;
3340 Observer.changingInstr(MI);
3341 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3342 Observer.changedInstr(MI);
3343 return Legalized;
3344 }
David Green295edaa2023-11-27 08:20:54 +00003345 case TargetOpcode::G_VECREDUCE_FADD:
David Green5b5614c2024-01-03 07:49:20 +00003346 case TargetOpcode::G_VECREDUCE_FMUL:
David Greend199478a2023-08-14 09:19:47 +01003347 case TargetOpcode::G_VECREDUCE_FMIN:
3348 case TargetOpcode::G_VECREDUCE_FMAX:
David Greena3f27512023-08-14 10:03:25 +01003349 case TargetOpcode::G_VECREDUCE_FMINIMUM:
Nikita Popovf2f18452024-06-21 08:33:40 +02003350 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
David Greend199478a2023-08-14 09:19:47 +01003351 if (TypeIdx != 0)
3352 return UnableToLegalize;
3353 Observer.changingInstr(MI);
3354 Register VecReg = MI.getOperand(1).getReg();
3355 LLT VecTy = MRI.getType(VecReg);
3356 LLT WideVecTy = VecTy.isVector()
3357 ? LLT::vector(VecTy.getElementCount(), WideTy)
3358 : WideTy;
3359 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3360 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3361 Observer.changedInstr(MI);
3362 return Legalized;
Tim Northover32335812016-08-04 18:35:11 +00003363 }
Michael Maitland54a9f0e2024-03-26 20:17:22 -04003364 case TargetOpcode::G_VSCALE: {
3365 MachineOperand &SrcMO = MI.getOperand(1);
3366 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3367 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3368 // The CImm is always a signed value
3369 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3370 Observer.changingInstr(MI);
3371 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3372 widenScalarDst(MI, WideTy);
3373 Observer.changedInstr(MI);
3374 return Legalized;
3375 }
Michael Maitland8aa3a772024-03-07 13:40:30 -08003376 case TargetOpcode::G_SPLAT_VECTOR: {
3377 if (TypeIdx != 1)
3378 return UnableToLegalize;
3379
3380 Observer.changingInstr(MI);
3381 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3382 Observer.changedInstr(MI);
3383 return Legalized;
3384 }
Michael Maitland6bac4142024-10-21 08:49:13 -04003385 case TargetOpcode::G_INSERT_SUBVECTOR: {
3386 if (TypeIdx != 0)
3387 return UnableToLegalize;
3388
3389 GInsertSubvector &IS = cast<GInsertSubvector>(MI);
3390 Register BigVec = IS.getBigVec();
3391 Register SubVec = IS.getSubVec();
3392
3393 LLT SubVecTy = MRI.getType(SubVec);
3394 LLT SubVecWideTy = SubVecTy.changeElementType(WideTy.getElementType());
3395
3396 // Widen the G_INSERT_SUBVECTOR
3397 auto BigZExt = MIRBuilder.buildZExt(WideTy, BigVec);
3398 auto SubZExt = MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3399 auto WideInsert = MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3400 IS.getIndexImm());
3401
3402 // Truncate back down
3403 auto SplatZero = MIRBuilder.buildSplatVector(
3404 WideTy, MIRBuilder.buildConstant(WideTy.getElementType(), 0));
3405 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, IS.getReg(0), WideInsert,
3406 SplatZero);
3407
3408 MI.eraseFromParent();
3409
3410 return Legalized;
3411 }
Michael Maitland54a9f0e2024-03-26 20:17:22 -04003412 }
Tim Northover33b07d62016-07-22 20:03:43 +00003413}
3414
Matt Arsenault936483f2020-01-09 21:53:28 -05003415static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
3416 MachineIRBuilder &B, Register Src, LLT Ty) {
3417 auto Unmerge = B.buildUnmerge(Ty, Src);
3418 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3419 Pieces.push_back(Unmerge.getReg(I));
3420}
3421
Mikhail Gudim35cfaec2024-02-16 18:51:44 -05003422static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3423 MachineIRBuilder &MIRBuilder) {
3424 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
Chen Zheng6ee2f772022-12-12 09:53:53 +00003425 MachineFunction &MF = MIRBuilder.getMF();
3426 const DataLayout &DL = MIRBuilder.getDataLayout();
Chen Zheng6ee2f772022-12-12 09:53:53 +00003427 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3428 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
Mikhail Gudim35cfaec2024-02-16 18:51:44 -05003429 LLT DstLLT = MRI.getType(DstReg);
3430
3431 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
Chen Zheng6ee2f772022-12-12 09:53:53 +00003432
3433 auto Addr = MIRBuilder.buildConstantPool(
Mikhail Gudim35cfaec2024-02-16 18:51:44 -05003434 AddrPtrTy,
3435 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
Chen Zheng6ee2f772022-12-12 09:53:53 +00003436
Mikhail Gudim35cfaec2024-02-16 18:51:44 -05003437 MachineMemOperand *MMO =
3438 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
3439 MachineMemOperand::MOLoad, DstLLT, Alignment);
Chen Zheng6ee2f772022-12-12 09:53:53 +00003440
Mikhail Gudim35cfaec2024-02-16 18:51:44 -05003441 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3442}
3443
3444LegalizerHelper::LegalizeResult
3445LegalizerHelper::lowerConstant(MachineInstr &MI) {
3446 const MachineOperand &ConstOperand = MI.getOperand(1);
3447 const Constant *ConstantVal = ConstOperand.getCImm();
3448
3449 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3450 MI.eraseFromParent();
3451
3452 return Legalized;
3453}
3454
3455LegalizerHelper::LegalizeResult
3456LegalizerHelper::lowerFConstant(MachineInstr &MI) {
3457 const MachineOperand &ConstOperand = MI.getOperand(1);
3458 const Constant *ConstantVal = ConstOperand.getFPImm();
3459
3460 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
Chen Zheng6ee2f772022-12-12 09:53:53 +00003461 MI.eraseFromParent();
3462
3463 return Legalized;
3464}
3465
3466LegalizerHelper::LegalizeResult
Matt Arsenault936483f2020-01-09 21:53:28 -05003467LegalizerHelper::lowerBitcast(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08003468 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
Matt Arsenault33e90862020-06-09 11:19:12 -04003469 if (SrcTy.isVector()) {
3470 LLT SrcEltTy = SrcTy.getElementType();
Matt Arsenault936483f2020-01-09 21:53:28 -05003471 SmallVector<Register, 8> SrcRegs;
Matt Arsenault33e90862020-06-09 11:19:12 -04003472
3473 if (DstTy.isVector()) {
3474 int NumDstElt = DstTy.getNumElements();
3475 int NumSrcElt = SrcTy.getNumElements();
3476
3477 LLT DstEltTy = DstTy.getElementType();
3478 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3479 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3480
3481 // If there's an element size mismatch, insert intermediate casts to match
3482 // the result element type.
3483 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3484 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3485 //
3486 // =>
3487 //
3488 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3489 // %3:_(<2 x s8>) = G_BITCAST %2
3490 // %4:_(<2 x s8>) = G_BITCAST %3
3491 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
Sander de Smalend5e14ba2021-06-24 09:58:21 +01003492 DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
Matt Arsenault33e90862020-06-09 11:19:12 -04003493 SrcPartTy = SrcEltTy;
3494 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3495 //
3496 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3497 //
3498 // =>
3499 //
3500 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3501 // %3:_(s16) = G_BITCAST %2
3502 // %4:_(s16) = G_BITCAST %3
3503 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
Sander de Smalend5e14ba2021-06-24 09:58:21 +01003504 SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
Matt Arsenault33e90862020-06-09 11:19:12 -04003505 DstCastTy = DstEltTy;
3506 }
3507
3508 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3509 for (Register &SrcReg : SrcRegs)
3510 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3511 } else
3512 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3513
Diana Picusf95a5fb2023-01-09 11:59:00 +01003514 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
Matt Arsenault936483f2020-01-09 21:53:28 -05003515 MI.eraseFromParent();
3516 return Legalized;
3517 }
3518
Matt Arsenault33e90862020-06-09 11:19:12 -04003519 if (DstTy.isVector()) {
Matt Arsenault936483f2020-01-09 21:53:28 -05003520 SmallVector<Register, 8> SrcRegs;
3521 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
Diana Picusf95a5fb2023-01-09 11:59:00 +01003522 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
Matt Arsenault936483f2020-01-09 21:53:28 -05003523 MI.eraseFromParent();
3524 return Legalized;
3525 }
3526
3527 return UnableToLegalize;
3528}
3529
Matt Arsenaulte2f1b482020-06-15 21:35:15 -04003530/// Figure out the bit offset into a register when coercing a vector index for
3531/// the wide element type. This is only for the case when promoting vector to
3532/// one with larger elements.
3533//
3534///
3535/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3536/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3537static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,
3538 Register Idx,
3539 unsigned NewEltSize,
3540 unsigned OldEltSize) {
3541 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3542 LLT IdxTy = B.getMRI()->getType(Idx);
3543
3544 // Now figure out the amount we need to shift to get the target bits.
3545 auto OffsetMask = B.buildConstant(
Chris Lattner735f4672021-09-08 22:13:13 -07003546 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
Matt Arsenaulte2f1b482020-06-15 21:35:15 -04003547 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3548 return B.buildShl(IdxTy, OffsetIdx,
3549 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3550}
3551
Matt Arsenault212570a2020-06-15 11:54:49 -04003552/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3553/// is casting to a vector with a smaller element size, perform multiple element
3554/// extracts and merge the results. If this is coercing to a vector with larger
3555/// elements, index the bitcasted vector and extract the target element with bit
3556/// operations. This is intended to force the indexing in the native register
3557/// size for architectures that can dynamically index the register file.
3558LegalizerHelper::LegalizeResult
3559LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
3560 LLT CastTy) {
3561 if (TypeIdx != 1)
3562 return UnableToLegalize;
3563
Amara Emerson719024a2023-02-23 16:35:39 -08003564 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
Matt Arsenault212570a2020-06-15 11:54:49 -04003565
3566 LLT SrcEltTy = SrcVecTy.getElementType();
3567 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3568 unsigned OldNumElts = SrcVecTy.getNumElements();
3569
3570 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3571 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3572
3573 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3574 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3575 if (NewNumElts > OldNumElts) {
3576 // Decreasing the vector element size
3577 //
3578 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3579 // =>
3580 // v4i32:castx = bitcast x:v2i64
3581 //
3582 // i64 = bitcast
3583 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3584 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3585 //
3586 if (NewNumElts % OldNumElts != 0)
3587 return UnableToLegalize;
3588
3589 // Type of the intermediate result vector.
3590 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
Sander de Smalen968980e2021-06-25 08:25:41 +01003591 LLT MidTy =
3592 LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
Matt Arsenault212570a2020-06-15 11:54:49 -04003593
3594 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3595
3596 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3597 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3598
3599 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3600 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3601 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3602 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3603 NewOps[I] = Elt.getReg(0);
3604 }
3605
3606 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3607 MIRBuilder.buildBitcast(Dst, NewVec);
3608 MI.eraseFromParent();
3609 return Legalized;
3610 }
3611
3612 if (NewNumElts < OldNumElts) {
3613 if (NewEltSize % OldEltSize != 0)
3614 return UnableToLegalize;
3615
3616 // This only depends on powers of 2 because we use bit tricks to figure out
3617 // the bit offset we need to shift to get the target element. A general
3618 // expansion could emit division/multiply.
3619 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3620 return UnableToLegalize;
3621
3622 // Increasing the vector element size.
3623 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3624 //
3625 // =>
3626 //
3627 // %cast = G_BITCAST %vec
3628 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3629 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3630 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3631 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3632 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3633 // %elt = G_TRUNC %elt_bits
3634
3635 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3636 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3637
3638 // Divide to get the index in the wider element type.
3639 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3640
3641 Register WideElt = CastVec;
3642 if (CastTy.isVector()) {
3643 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3644 ScaledIdx).getReg(0);
3645 }
3646
Matt Arsenaulte2f1b482020-06-15 21:35:15 -04003647 // Compute the bit offset into the register of the target element.
3648 Register OffsetBits = getBitcastWiderVectorElementOffset(
3649 MIRBuilder, Idx, NewEltSize, OldEltSize);
Matt Arsenault212570a2020-06-15 11:54:49 -04003650
3651 // Shift the wide element to get the target element.
3652 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3653 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3654 MI.eraseFromParent();
3655 return Legalized;
3656 }
3657
3658 return UnableToLegalize;
3659}
3660
Matt Arsenaulte2f1b482020-06-15 21:35:15 -04003661/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3662/// TargetReg, while preserving other bits in \p TargetReg.
3663///
3664/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3665static Register buildBitFieldInsert(MachineIRBuilder &B,
3666 Register TargetReg, Register InsertReg,
3667 Register OffsetBits) {
3668 LLT TargetTy = B.getMRI()->getType(TargetReg);
3669 LLT InsertTy = B.getMRI()->getType(InsertReg);
3670 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3671 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3672
3673 // Produce a bitmask of the value to insert
3674 auto EltMask = B.buildConstant(
3675 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3676 InsertTy.getSizeInBits()));
3677 // Shift it into position
3678 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3679 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3680
3681 // Clear out the bits in the wide element
3682 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3683
3684 // The value to insert has all zeros already, so stick it into the masked
3685 // wide element.
3686 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3687}
3688
3689/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3690/// is increasing the element size, perform the indexing in the target element
3691/// type, and use bit operations to insert at the element position. This is
3692/// intended for architectures that can dynamically index the register file and
3693/// want to force indexing in the native register size.
3694LegalizerHelper::LegalizeResult
3695LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
3696 LLT CastTy) {
3697 if (TypeIdx != 0)
3698 return UnableToLegalize;
3699
Amara Emerson719024a2023-02-23 16:35:39 -08003700 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3701 MI.getFirst4RegLLTs();
3702 LLT VecTy = DstTy;
Matt Arsenaulte2f1b482020-06-15 21:35:15 -04003703
3704 LLT VecEltTy = VecTy.getElementType();
3705 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3706 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3707 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3708
3709 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3710 unsigned OldNumElts = VecTy.getNumElements();
3711
3712 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3713 if (NewNumElts < OldNumElts) {
3714 if (NewEltSize % OldEltSize != 0)
3715 return UnableToLegalize;
3716
3717 // This only depends on powers of 2 because we use bit tricks to figure out
3718 // the bit offset we need to shift to get the target element. A general
3719 // expansion could emit division/multiply.
3720 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3721 return UnableToLegalize;
3722
3723 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3724 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3725
3726 // Divide to get the index in the wider element type.
3727 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3728
3729 Register ExtractedElt = CastVec;
3730 if (CastTy.isVector()) {
3731 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3732 ScaledIdx).getReg(0);
3733 }
3734
3735 // Compute the bit offset into the register of the target element.
3736 Register OffsetBits = getBitcastWiderVectorElementOffset(
3737 MIRBuilder, Idx, NewEltSize, OldEltSize);
3738
3739 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3740 Val, OffsetBits);
3741 if (CastTy.isVector()) {
3742 InsertedElt = MIRBuilder.buildInsertVectorElement(
3743 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3744 }
3745
3746 MIRBuilder.buildBitcast(Dst, InsertedElt);
3747 MI.eraseFromParent();
3748 return Legalized;
3749 }
3750
3751 return UnableToLegalize;
3752}
3753
chuongg30d5db4e2024-07-15 12:00:47 +01003754// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
3755// those that have smaller than legal operands.
3756//
3757// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
3758//
3759// ===>
3760//
3761// s32 = G_BITCAST <4 x s8>
3762// s32 = G_BITCAST <4 x s8>
3763// s32 = G_BITCAST <4 x s8>
3764// s32 = G_BITCAST <4 x s8>
3765// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
3766// <16 x s8> = G_BITCAST <4 x s32>
3767LegalizerHelper::LegalizeResult
3768LegalizerHelper::bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx,
3769 LLT CastTy) {
3770 // Convert it to CONCAT instruction
3771 auto ConcatMI = dyn_cast<GConcatVectors>(&MI);
3772 if (!ConcatMI) {
3773 return UnableToLegalize;
3774 }
3775
3776 // Check if bitcast is Legal
3777 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
3778 LLT SrcScalTy = LLT::scalar(SrcTy.getSizeInBits());
3779
3780 // Check if the build vector is Legal
3781 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3782 return UnableToLegalize;
3783 }
3784
3785 // Bitcast the sources
3786 SmallVector<Register> BitcastRegs;
3787 for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3788 BitcastRegs.push_back(
3789 MIRBuilder.buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3790 .getReg(0));
3791 }
3792
3793 // Build the scalar values into a vector
3794 Register BuildReg =
3795 MIRBuilder.buildBuildVector(CastTy, BitcastRegs).getReg(0);
3796 MIRBuilder.buildBitcast(DstReg, BuildReg);
3797
3798 MI.eraseFromParent();
3799 return Legalized;
3800}
3801
David Greend3ce0692024-11-23 17:00:51 +00003802// This bitcasts a shuffle vector to a different type currently of the same
3803// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
3804// will be used instead.
3805//
3806// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
3807// ===>
3808// <4 x s64> = G_PTRTOINT <4 x p0>
3809// <4 x s64> = G_PTRTOINT <4 x p0>
3810// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
3811// <16 x p0> = G_INTTOPTR <16 x s64>
3812LegalizerHelper::LegalizeResult
3813LegalizerHelper::bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx,
3814 LLT CastTy) {
3815 auto ShuffleMI = cast<GShuffleVector>(&MI);
3816 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
3817 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
3818
3819 // We currently only handle vectors of the same size.
3820 if (TypeIdx != 0 ||
3821 CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
3822 CastTy.getElementCount() != DstTy.getElementCount())
3823 return UnableToLegalize;
3824
3825 LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType());
3826
3827 auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
3828 auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
3829 auto Shuf =
3830 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
3831 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
3832
3833 MI.eraseFromParent();
3834 return Legalized;
3835}
3836
Michael Maitlandf957d082024-10-01 14:08:49 -04003837/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
3838///
3839/// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
3840///
3841/// ===>
3842///
3843/// <vscale x 2 x i1> = G_BITCAST <vscale x 16 x i1>
3844/// <vscale x 1 x i8> = G_EXTRACT_SUBVECTOR <vscale x 2 x i1>, N / 8
3845/// <vscale x 8 x i1> = G_BITCAST <vscale x 1 x i8>
3846LegalizerHelper::LegalizeResult
3847LegalizerHelper::bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx,
3848 LLT CastTy) {
3849 auto ES = cast<GExtractSubvector>(&MI);
3850
3851 if (!CastTy.isVector())
3852 return UnableToLegalize;
3853
3854 if (TypeIdx != 0)
3855 return UnableToLegalize;
3856
3857 Register Dst = ES->getReg(0);
3858 Register Src = ES->getSrcVec();
3859 uint64_t Idx = ES->getIndexImm();
3860
3861 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3862
3863 LLT DstTy = MRI.getType(Dst);
3864 LLT SrcTy = MRI.getType(Src);
3865 ElementCount DstTyEC = DstTy.getElementCount();
3866 ElementCount SrcTyEC = SrcTy.getElementCount();
3867 auto DstTyMinElts = DstTyEC.getKnownMinValue();
3868 auto SrcTyMinElts = SrcTyEC.getKnownMinValue();
3869
3870 if (DstTy == CastTy)
3871 return Legalized;
3872
3873 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
3874 return UnableToLegalize;
3875
3876 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
3877 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
3878 if (CastEltSize < DstEltSize)
3879 return UnableToLegalize;
3880
3881 auto AdjustAmt = CastEltSize / DstEltSize;
3882 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
3883 SrcTyMinElts % AdjustAmt != 0)
3884 return UnableToLegalize;
3885
3886 Idx /= AdjustAmt;
3887 SrcTy = LLT::vector(SrcTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
3888 auto CastVec = MIRBuilder.buildBitcast(SrcTy, Src);
3889 auto PromotedES = MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
3890 MIRBuilder.buildBitcast(Dst, PromotedES);
3891
3892 ES->eraseFromParent();
3893 return Legalized;
3894}
3895
Michael Maitland6bac4142024-10-21 08:49:13 -04003896/// This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
3897///
3898/// <vscale x 16 x i1> = G_INSERT_SUBVECTOR <vscale x 16 x i1>,
3899/// <vscale x 8 x i1>,
3900/// N
3901///
3902/// ===>
3903///
3904/// <vscale x 2 x i8> = G_BITCAST <vscale x 16 x i1>
3905/// <vscale x 1 x i8> = G_BITCAST <vscale x 8 x i1>
3906/// <vscale x 2 x i8> = G_INSERT_SUBVECTOR <vscale x 2 x i8>,
3907/// <vscale x 1 x i8>, N / 8
3908/// <vscale x 16 x i1> = G_BITCAST <vscale x 2 x i8>
3909LegalizerHelper::LegalizeResult
3910LegalizerHelper::bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx,
3911 LLT CastTy) {
3912 auto ES = cast<GInsertSubvector>(&MI);
3913
3914 if (!CastTy.isVector())
3915 return UnableToLegalize;
3916
3917 if (TypeIdx != 0)
3918 return UnableToLegalize;
3919
3920 Register Dst = ES->getReg(0);
3921 Register BigVec = ES->getBigVec();
3922 Register SubVec = ES->getSubVec();
3923 uint64_t Idx = ES->getIndexImm();
3924
3925 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3926
3927 LLT DstTy = MRI.getType(Dst);
3928 LLT BigVecTy = MRI.getType(BigVec);
3929 LLT SubVecTy = MRI.getType(SubVec);
3930
3931 if (DstTy == CastTy)
3932 return Legalized;
3933
3934 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
3935 return UnableToLegalize;
3936
3937 ElementCount DstTyEC = DstTy.getElementCount();
3938 ElementCount BigVecTyEC = BigVecTy.getElementCount();
3939 ElementCount SubVecTyEC = SubVecTy.getElementCount();
3940 auto DstTyMinElts = DstTyEC.getKnownMinValue();
3941 auto BigVecTyMinElts = BigVecTyEC.getKnownMinValue();
3942 auto SubVecTyMinElts = SubVecTyEC.getKnownMinValue();
3943
3944 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
3945 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
3946 if (CastEltSize < DstEltSize)
3947 return UnableToLegalize;
3948
3949 auto AdjustAmt = CastEltSize / DstEltSize;
3950 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
3951 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
3952 return UnableToLegalize;
3953
3954 Idx /= AdjustAmt;
3955 BigVecTy = LLT::vector(BigVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
3956 SubVecTy = LLT::vector(SubVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
3957 auto CastBigVec = MIRBuilder.buildBitcast(BigVecTy, BigVec);
3958 auto CastSubVec = MIRBuilder.buildBitcast(SubVecTy, SubVec);
3959 auto PromotedIS =
3960 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
3961 MIRBuilder.buildBitcast(Dst, PromotedIS);
3962
3963 ES->eraseFromParent();
3964 return Legalized;
3965}
3966
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07003967LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
Matt Arsenault54615ec2020-07-31 10:09:00 -04003968 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07003969 Register DstReg = LoadMI.getDstReg();
3970 Register PtrReg = LoadMI.getPointerReg();
Matt Arsenault54615ec2020-07-31 10:09:00 -04003971 LLT DstTy = MRI.getType(DstReg);
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07003972 MachineMemOperand &MMO = LoadMI.getMMO();
Matt Arsenaulta601b302021-06-08 17:11:12 -04003973 LLT MemTy = MMO.getMemoryType();
3974 MachineFunction &MF = MIRBuilder.getMF();
Matt Arsenaulta601b302021-06-08 17:11:12 -04003975
3976 unsigned MemSizeInBits = MemTy.getSizeInBits();
3977 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
3978
3979 if (MemSizeInBits != MemStoreSizeInBits) {
Matt Arsenaulte46badd2021-07-26 14:10:26 -04003980 if (MemTy.isVector())
3981 return UnableToLegalize;
3982
Matt Arsenaulta601b302021-06-08 17:11:12 -04003983 // Promote to a byte-sized load if not loading an integral number of
3984 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
3985 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
3986 MachineMemOperand *NewMMO =
3987 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
3988
3989 Register LoadReg = DstReg;
3990 LLT LoadTy = DstTy;
3991
3992 // If this wasn't already an extending load, we need to widen the result
3993 // register to avoid creating a load with a narrower result than the source.
3994 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
3995 LoadTy = WideMemTy;
3996 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
3997 }
3998
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07003999 if (isa<GSExtLoad>(LoadMI)) {
Matt Arsenaulta601b302021-06-08 17:11:12 -04004000 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4001 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
Matt Arsenaultd1f97a32022-04-10 19:50:47 -04004002 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
Matt Arsenaulta601b302021-06-08 17:11:12 -04004003 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4004 // The extra bits are guaranteed to be zero, since we stored them that
4005 // way. A zext load from Wide thus automatically gives zext from MemVT.
4006 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4007 } else {
4008 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4009 }
4010
4011 if (DstTy != LoadTy)
4012 MIRBuilder.buildTrunc(DstReg, LoadReg);
4013
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07004014 LoadMI.eraseFromParent();
Matt Arsenaulta601b302021-06-08 17:11:12 -04004015 return Legalized;
4016 }
Matt Arsenault54615ec2020-07-31 10:09:00 -04004017
Matt Arsenault47269da2021-06-10 09:28:20 -04004018 // Big endian lowering not implemented.
4019 if (MIRBuilder.getDataLayout().isBigEndian())
Matt Arsenault9d7299b2021-06-09 21:22:00 -04004020 return UnableToLegalize;
Matt Arsenault54615ec2020-07-31 10:09:00 -04004021
Matt Arsenaultf19226d2021-07-22 08:11:14 -04004022 // This load needs splitting into power of 2 sized loads.
4023 //
Matt Arsenault47269da2021-06-10 09:28:20 -04004024 // Our strategy here is to generate anyextending loads for the smaller
4025 // types up to next power-2 result type, and then combine the two larger
4026 // result values together, before truncating back down to the non-pow-2
4027 // type.
4028 // E.g. v1 = i24 load =>
4029 // v2 = i32 zextload (2 byte)
4030 // v3 = i32 load (1 byte)
4031 // v4 = i32 shl v3, 16
4032 // v5 = i32 or v4, v2
4033 // v1 = i24 trunc v5
4034 // By doing this we generate the correct truncate which should get
4035 // combined away as an artifact with a matching extend.
Matt Arsenaultf19226d2021-07-22 08:11:14 -04004036
4037 uint64_t LargeSplitSize, SmallSplitSize;
4038
4039 if (!isPowerOf2_32(MemSizeInBits)) {
Matt Arsenaulte46badd2021-07-26 14:10:26 -04004040 // This load needs splitting into power of 2 sized loads.
Kazu Hirataf20b5072023-01-28 09:06:31 -08004041 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
Matt Arsenaultf19226d2021-07-22 08:11:14 -04004042 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4043 } else {
Matt Arsenaulte46badd2021-07-26 14:10:26 -04004044 // This is already a power of 2, but we still need to split this in half.
4045 //
Matt Arsenaultf19226d2021-07-22 08:11:14 -04004046 // Assume we're being asked to decompose an unaligned load.
4047 // TODO: If this requires multiple splits, handle them all at once.
4048 auto &Ctx = MF.getFunction().getContext();
4049 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4050 return UnableToLegalize;
4051
4052 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4053 }
Matt Arsenault54615ec2020-07-31 10:09:00 -04004054
Matt Arsenaulte46badd2021-07-26 14:10:26 -04004055 if (MemTy.isVector()) {
4056 // TODO: Handle vector extloads
4057 if (MemTy != DstTy)
4058 return UnableToLegalize;
4059
4060 // TODO: We can do better than scalarizing the vector and at least split it
4061 // in half.
4062 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
4063 }
4064
Matt Arsenault47269da2021-06-10 09:28:20 -04004065 MachineMemOperand *LargeMMO =
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07004066 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4067 MachineMemOperand *SmallMMO =
4068 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
Matt Arsenault54615ec2020-07-31 10:09:00 -04004069
Matt Arsenault47269da2021-06-10 09:28:20 -04004070 LLT PtrTy = MRI.getType(PtrReg);
4071 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
4072 LLT AnyExtTy = LLT::scalar(AnyExtSize);
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07004073 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4074 PtrReg, *LargeMMO);
Matt Arsenault54615ec2020-07-31 10:09:00 -04004075
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07004076 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
4077 LargeSplitSize / 8);
Matt Arsenault47269da2021-06-10 09:28:20 -04004078 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07004079 auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
4080 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
4081 SmallPtr, *SmallMMO);
Matt Arsenault54615ec2020-07-31 10:09:00 -04004082
Matt Arsenault47269da2021-06-10 09:28:20 -04004083 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4084 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
Matt Arsenault54615ec2020-07-31 10:09:00 -04004085
Matt Arsenault47269da2021-06-10 09:28:20 -04004086 if (AnyExtTy == DstTy)
4087 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
Matt Arsenaultf19226d2021-07-22 08:11:14 -04004088 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
Matt Arsenault9d7299b2021-06-09 21:22:00 -04004089 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4090 MIRBuilder.buildTrunc(DstReg, {Or});
Matt Arsenaultf19226d2021-07-22 08:11:14 -04004091 } else {
4092 assert(DstTy.isPointer() && "expected pointer");
4093 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4094
4095 // FIXME: We currently consider this to be illegal for non-integral address
4096 // spaces, but we need still need a way to reinterpret the bits.
4097 MIRBuilder.buildIntToPtr(DstReg, Or);
Matt Arsenault54615ec2020-07-31 10:09:00 -04004098 }
4099
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07004100 LoadMI.eraseFromParent();
Matt Arsenault47269da2021-06-10 09:28:20 -04004101 return Legalized;
Matt Arsenault54615ec2020-07-31 10:09:00 -04004102}
4103
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07004104LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
Matt Arsenault54615ec2020-07-31 10:09:00 -04004105 // Lower a non-power of 2 store into multiple pow-2 stores.
4106 // E.g. split an i24 store into an i16 store + i8 store.
4107 // We do this by first extending the stored value to the next largest power
4108 // of 2 type, and then using truncating stores to store the components.
4109 // By doing this, likewise with G_LOAD, generate an extend that can be
4110 // artifact-combined away instead of leaving behind extracts.
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07004111 Register SrcReg = StoreMI.getValueReg();
4112 Register PtrReg = StoreMI.getPointerReg();
Matt Arsenault54615ec2020-07-31 10:09:00 -04004113 LLT SrcTy = MRI.getType(SrcReg);
Matt Arsenaulta601b302021-06-08 17:11:12 -04004114 MachineFunction &MF = MIRBuilder.getMF();
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07004115 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
Matt Arsenaulta601b302021-06-08 17:11:12 -04004116 LLT MemTy = MMO.getMemoryType();
4117
Matt Arsenaulta601b302021-06-08 17:11:12 -04004118 unsigned StoreWidth = MemTy.getSizeInBits();
4119 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
4120
4121 if (StoreWidth != StoreSizeInBits) {
Matt Arsenaultebc17a02021-07-27 11:08:06 -04004122 if (SrcTy.isVector())
4123 return UnableToLegalize;
4124
Matt Arsenaulta601b302021-06-08 17:11:12 -04004125 // Promote to a byte-sized store with upper bits zero if not
4126 // storing an integral number of bytes. For example, promote
4127 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
4128 LLT WideTy = LLT::scalar(StoreSizeInBits);
4129
4130 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4131 // Avoid creating a store with a narrower source than result.
4132 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4133 SrcTy = WideTy;
4134 }
4135
4136 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4137
4138 MachineMemOperand *NewMMO =
4139 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
4140 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07004141 StoreMI.eraseFromParent();
Matt Arsenaulta601b302021-06-08 17:11:12 -04004142 return Legalized;
4143 }
4144
Matt Arsenaultebc17a02021-07-27 11:08:06 -04004145 if (MemTy.isVector()) {
Matt Arsenaultebc17a02021-07-27 11:08:06 -04004146 if (MemTy != SrcTy)
Amara Emerson6b0807f2025-01-06 10:21:42 -08004147 return scalarizeVectorBooleanStore(StoreMI);
Matt Arsenaultebc17a02021-07-27 11:08:06 -04004148
4149 // TODO: We can do better than scalarizing the vector and at least split it
4150 // in half.
4151 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
4152 }
4153
Matt Arsenaultbc2cb912021-07-26 19:41:48 -04004154 unsigned MemSizeInBits = MemTy.getSizeInBits();
4155 uint64_t LargeSplitSize, SmallSplitSize;
4156
4157 if (!isPowerOf2_32(MemSizeInBits)) {
Kazu Hirataf20b5072023-01-28 09:06:31 -08004158 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
Matt Arsenaultbc2cb912021-07-26 19:41:48 -04004159 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
4160 } else {
4161 auto &Ctx = MF.getFunction().getContext();
4162 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4163 return UnableToLegalize; // Don't know what we're being asked to do.
4164
4165 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4166 }
Matt Arsenault54615ec2020-07-31 10:09:00 -04004167
Amara Emerson96378482021-07-16 12:56:11 -07004168 // Extend to the next pow-2. If this store was itself the result of lowering,
4169 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
Matt Arsenaultbc2cb912021-07-26 19:41:48 -04004170 // that's wider than the stored size.
4171 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
4172 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
4173
4174 if (SrcTy.isPointer()) {
4175 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
4176 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4177 }
4178
Amara Emerson96378482021-07-16 12:56:11 -07004179 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
Matt Arsenault54615ec2020-07-31 10:09:00 -04004180
4181 // Obtain the smaller value by shifting away the larger value.
Amara Emerson96378482021-07-16 12:56:11 -07004182 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4183 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
Matt Arsenault54615ec2020-07-31 10:09:00 -04004184
4185 // Generate the PtrAdd and truncating stores.
4186 LLT PtrTy = MRI.getType(PtrReg);
4187 auto OffsetCst = MIRBuilder.buildConstant(
4188 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
Matt Arsenault54615ec2020-07-31 10:09:00 -04004189 auto SmallPtr =
Matt Arsenaultbc2cb912021-07-26 19:41:48 -04004190 MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
Matt Arsenault54615ec2020-07-31 10:09:00 -04004191
Matt Arsenault54615ec2020-07-31 10:09:00 -04004192 MachineMemOperand *LargeMMO =
4193 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4194 MachineMemOperand *SmallMMO =
4195 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
Matt Arsenaultf6555b92021-06-07 14:11:52 -04004196 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4197 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07004198 StoreMI.eraseFromParent();
Matt Arsenault54615ec2020-07-31 10:09:00 -04004199 return Legalized;
4200}
4201
4202LegalizerHelper::LegalizeResult
Amara Emerson6b0807f2025-01-06 10:21:42 -08004203LegalizerHelper::scalarizeVectorBooleanStore(GStore &StoreMI) {
4204 Register SrcReg = StoreMI.getValueReg();
4205 Register PtrReg = StoreMI.getPointerReg();
4206 LLT SrcTy = MRI.getType(SrcReg);
4207 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4208 LLT MemTy = MMO.getMemoryType();
4209 LLT MemScalarTy = MemTy.getElementType();
4210 MachineFunction &MF = MIRBuilder.getMF();
4211
4212 assert(SrcTy.isVector() && "Expect a vector store type");
4213
4214 if (!MemScalarTy.isByteSized()) {
4215 // We need to build an integer scalar of the vector bit pattern.
4216 // It's not legal for us to add padding when storing a vector.
4217 unsigned NumBits = MemTy.getSizeInBits();
4218 LLT IntTy = LLT::scalar(NumBits);
4219 auto CurrVal = MIRBuilder.buildConstant(IntTy, 0);
4220 LLT IdxTy = getLLTForMVT(TLI.getVectorIdxTy(MF.getDataLayout()));
4221
4222 for (unsigned I = 0, E = MemTy.getNumElements(); I < E; ++I) {
4223 auto Elt = MIRBuilder.buildExtractVectorElement(
4224 SrcTy.getElementType(), SrcReg, MIRBuilder.buildConstant(IdxTy, I));
4225 auto Trunc = MIRBuilder.buildTrunc(MemScalarTy, Elt);
4226 auto ZExt = MIRBuilder.buildZExt(IntTy, Trunc);
4227 unsigned ShiftIntoIdx = MF.getDataLayout().isBigEndian()
4228 ? (MemTy.getNumElements() - 1) - I
4229 : I;
4230 auto ShiftAmt = MIRBuilder.buildConstant(
4231 IntTy, ShiftIntoIdx * MemScalarTy.getSizeInBits());
4232 auto Shifted = MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4233 CurrVal = MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4234 }
4235 auto PtrInfo = MMO.getPointerInfo();
4236 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, IntTy);
4237 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4238 StoreMI.eraseFromParent();
4239 return Legalized;
4240 }
4241
4242 // TODO: implement simple scalarization.
4243 return UnableToLegalize;
4244}
4245
4246LegalizerHelper::LegalizeResult
Matt Arsenault39c55ce2020-02-13 15:52:32 -05004247LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
Matt Arsenault39c55ce2020-02-13 15:52:32 -05004248 switch (MI.getOpcode()) {
4249 case TargetOpcode::G_LOAD: {
4250 if (TypeIdx != 0)
4251 return UnableToLegalize;
Matt Arsenault92361252021-06-10 19:32:41 -04004252 MachineMemOperand &MMO = **MI.memoperands_begin();
4253
4254 // Not sure how to interpret a bitcast of an extending load.
4255 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4256 return UnableToLegalize;
Matt Arsenault39c55ce2020-02-13 15:52:32 -05004257
4258 Observer.changingInstr(MI);
4259 bitcastDst(MI, CastTy, 0);
Matt Arsenault92361252021-06-10 19:32:41 -04004260 MMO.setType(CastTy);
Matt Arsenault70320762024-07-01 15:26:09 +02004261 // The range metadata is no longer valid when reinterpreted as a different
4262 // type.
4263 MMO.clearRanges();
Matt Arsenault39c55ce2020-02-13 15:52:32 -05004264 Observer.changedInstr(MI);
4265 return Legalized;
4266 }
4267 case TargetOpcode::G_STORE: {
4268 if (TypeIdx != 0)
4269 return UnableToLegalize;
4270
Matt Arsenault92361252021-06-10 19:32:41 -04004271 MachineMemOperand &MMO = **MI.memoperands_begin();
4272
4273 // Not sure how to interpret a bitcast of a truncating store.
4274 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4275 return UnableToLegalize;
4276
Matt Arsenault39c55ce2020-02-13 15:52:32 -05004277 Observer.changingInstr(MI);
4278 bitcastSrc(MI, CastTy, 0);
Matt Arsenault92361252021-06-10 19:32:41 -04004279 MMO.setType(CastTy);
Matt Arsenault39c55ce2020-02-13 15:52:32 -05004280 Observer.changedInstr(MI);
4281 return Legalized;
4282 }
4283 case TargetOpcode::G_SELECT: {
4284 if (TypeIdx != 0)
4285 return UnableToLegalize;
4286
4287 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
4288 LLVM_DEBUG(
4289 dbgs() << "bitcast action not implemented for vector select\n");
4290 return UnableToLegalize;
4291 }
4292
4293 Observer.changingInstr(MI);
4294 bitcastSrc(MI, CastTy, 2);
4295 bitcastSrc(MI, CastTy, 3);
4296 bitcastDst(MI, CastTy, 0);
4297 Observer.changedInstr(MI);
4298 return Legalized;
4299 }
4300 case TargetOpcode::G_AND:
4301 case TargetOpcode::G_OR:
4302 case TargetOpcode::G_XOR: {
4303 Observer.changingInstr(MI);
4304 bitcastSrc(MI, CastTy, 1);
4305 bitcastSrc(MI, CastTy, 2);
4306 bitcastDst(MI, CastTy, 0);
4307 Observer.changedInstr(MI);
4308 return Legalized;
4309 }
Matt Arsenault212570a2020-06-15 11:54:49 -04004310 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4311 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
Matt Arsenaulte2f1b482020-06-15 21:35:15 -04004312 case TargetOpcode::G_INSERT_VECTOR_ELT:
4313 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
chuongg30d5db4e2024-07-15 12:00:47 +01004314 case TargetOpcode::G_CONCAT_VECTORS:
4315 return bitcastConcatVector(MI, TypeIdx, CastTy);
David Greend3ce0692024-11-23 17:00:51 +00004316 case TargetOpcode::G_SHUFFLE_VECTOR:
4317 return bitcastShuffleVector(MI, TypeIdx, CastTy);
Michael Maitlandf957d082024-10-01 14:08:49 -04004318 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4319 return bitcastExtractSubvector(MI, TypeIdx, CastTy);
Michael Maitland6bac4142024-10-21 08:49:13 -04004320 case TargetOpcode::G_INSERT_SUBVECTOR:
4321 return bitcastInsertSubvector(MI, TypeIdx, CastTy);
Matt Arsenault39c55ce2020-02-13 15:52:32 -05004322 default:
4323 return UnableToLegalize;
4324 }
4325}
4326
Matt Arsenault0da582d2020-07-19 09:56:15 -04004327// Legalize an instruction by changing the opcode in place.
4328void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
4329 Observer.changingInstr(MI);
4330 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
4331 Observer.changedInstr(MI);
4332}
4333
Matt Arsenault39c55ce2020-02-13 15:52:32 -05004334LegalizerHelper::LegalizeResult
Matt Arsenaulta1282922020-07-15 11:10:54 -04004335LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
Tim Northovercecee562016-08-26 17:46:13 +00004336 using namespace TargetOpcode;
Tim Northovercecee562016-08-26 17:46:13 +00004337
4338 switch(MI.getOpcode()) {
4339 default:
4340 return UnableToLegalize;
Chen Zheng6ee2f772022-12-12 09:53:53 +00004341 case TargetOpcode::G_FCONSTANT:
4342 return lowerFConstant(MI);
Matt Arsenault936483f2020-01-09 21:53:28 -05004343 case TargetOpcode::G_BITCAST:
4344 return lowerBitcast(MI);
Tim Northovercecee562016-08-26 17:46:13 +00004345 case TargetOpcode::G_SREM:
4346 case TargetOpcode::G_UREM: {
Matt Arsenaulta1282922020-07-15 11:10:54 -04004347 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
Matt Arsenaultc7e8d8b2020-02-26 17:18:43 -05004348 auto Quot =
4349 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4350 {MI.getOperand(1), MI.getOperand(2)});
Tim Northovercecee562016-08-26 17:46:13 +00004351
Matt Arsenaultc7e8d8b2020-02-26 17:18:43 -05004352 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
4353 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
Tim Northovercecee562016-08-26 17:46:13 +00004354 MI.eraseFromParent();
4355 return Legalized;
4356 }
Matt Arsenault34ed76e2019-10-16 20:46:32 +00004357 case TargetOpcode::G_SADDO:
4358 case TargetOpcode::G_SSUBO:
4359 return lowerSADDO_SSUBO(MI);
Pushpinder Singh41d66692020-08-10 05:47:50 -04004360 case TargetOpcode::G_UMULH:
4361 case TargetOpcode::G_SMULH:
4362 return lowerSMULH_UMULH(MI);
Tim Northover0a9b2792017-02-08 21:22:15 +00004363 case TargetOpcode::G_SMULO:
4364 case TargetOpcode::G_UMULO: {
4365 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
4366 // result.
Amara Emerson719024a2023-02-23 16:35:39 -08004367 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
Matt Arsenaulta1282922020-07-15 11:10:54 -04004368 LLT Ty = MRI.getType(Res);
Tim Northover0a9b2792017-02-08 21:22:15 +00004369
Tim Northover0a9b2792017-02-08 21:22:15 +00004370 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
4371 ? TargetOpcode::G_SMULH
4372 : TargetOpcode::G_UMULH;
4373
Jay Foadf465b1a2020-01-16 14:46:36 +00004374 Observer.changingInstr(MI);
4375 const auto &TII = MIRBuilder.getTII();
4376 MI.setDesc(TII.get(TargetOpcode::G_MUL));
Shengchen Kan37b37832022-03-16 20:21:25 +08004377 MI.removeOperand(1);
Jay Foadf465b1a2020-01-16 14:46:36 +00004378 Observer.changedInstr(MI);
4379
Jay Foadf465b1a2020-01-16 14:46:36 +00004380 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
Matt Arsenaultc7e8d8b2020-02-26 17:18:43 -05004381 auto Zero = MIRBuilder.buildConstant(Ty, 0);
Amara Emerson9de62132018-01-03 04:56:56 +00004382
Amara Emerson1d54e752020-09-29 14:39:54 -07004383 // Move insert point forward so we can use the Res register if needed.
4384 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
4385
Amara Emerson9de62132018-01-03 04:56:56 +00004386 // For *signed* multiply, overflow is detected by checking:
4387 // (hi != (lo >> bitwidth-1))
4388 if (Opcode == TargetOpcode::G_SMULH) {
Jay Foadf465b1a2020-01-16 14:46:36 +00004389 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4390 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
Amara Emerson9de62132018-01-03 04:56:56 +00004391 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
4392 } else {
4393 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
4394 }
Tim Northover0a9b2792017-02-08 21:22:15 +00004395 return Legalized;
4396 }
Volkan Keles5698b2a2017-03-08 18:09:14 +00004397 case TargetOpcode::G_FNEG: {
Amara Emerson719024a2023-02-23 16:35:39 -08004398 auto [Res, SubByReg] = MI.getFirst2Regs();
Matt Arsenaulta1282922020-07-15 11:10:54 -04004399 LLT Ty = MRI.getType(Res);
4400
David Green9f255d82024-09-27 07:43:58 +01004401 auto SignMask = MIRBuilder.buildConstant(
4402 Ty, APInt::getSignMask(Ty.getScalarSizeInBits()));
Eli Friedman3f739f72020-09-23 14:10:33 -07004403 MIRBuilder.buildXor(Res, SubByReg, SignMask);
Volkan Keles5698b2a2017-03-08 18:09:14 +00004404 MI.eraseFromParent();
4405 return Legalized;
4406 }
Matt Arsenault1fe12992022-11-17 23:03:23 -08004407 case TargetOpcode::G_FSUB:
4408 case TargetOpcode::G_STRICT_FSUB: {
Amara Emerson719024a2023-02-23 16:35:39 -08004409 auto [Res, LHS, RHS] = MI.getFirst3Regs();
Matt Arsenaulta1282922020-07-15 11:10:54 -04004410 LLT Ty = MRI.getType(Res);
4411
Volkan Keles225921a2017-03-10 21:25:09 +00004412 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
Matt Arsenault1fe12992022-11-17 23:03:23 -08004413 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
4414
4415 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4416 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
4417 else
4418 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
4419
Volkan Keles225921a2017-03-10 21:25:09 +00004420 MI.eraseFromParent();
4421 return Legalized;
4422 }
Matt Arsenault4d339182019-09-13 00:44:35 +00004423 case TargetOpcode::G_FMAD:
4424 return lowerFMad(MI);
Matt Arsenault19a03502020-03-14 14:52:48 -04004425 case TargetOpcode::G_FFLOOR:
4426 return lowerFFloor(MI);
Sumanth Gundapanenifc832d52024-07-23 11:34:34 -05004427 case TargetOpcode::G_LROUND:
4428 case TargetOpcode::G_LLROUND: {
4429 Register DstReg = MI.getOperand(0).getReg();
4430 Register SrcReg = MI.getOperand(1).getReg();
4431 LLT SrcTy = MRI.getType(SrcReg);
4432 auto Round = MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4433 {SrcReg});
4434 MIRBuilder.buildFPTOSI(DstReg, Round);
4435 MI.eraseFromParent();
4436 return Legalized;
4437 }
Matt Arsenaultf3de8ab2019-12-24 14:49:31 -05004438 case TargetOpcode::G_INTRINSIC_ROUND:
4439 return lowerIntrinsicRound(MI);
Acim-Maravicf3138522023-11-14 18:49:21 +01004440 case TargetOpcode::G_FRINT: {
Matt Arsenault0da582d2020-07-19 09:56:15 -04004441 // Since round even is the assumed rounding mode for unconstrained FP
4442 // operations, rint and roundeven are the same operation.
Acim-Maravicf3138522023-11-14 18:49:21 +01004443 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
Matt Arsenault0da582d2020-07-19 09:56:15 -04004444 return Legalized;
4445 }
Sumanth Gundapaneni0ee32c42024-07-24 14:34:31 -05004446 case TargetOpcode::G_INTRINSIC_LRINT:
4447 case TargetOpcode::G_INTRINSIC_LLRINT: {
4448 Register DstReg = MI.getOperand(0).getReg();
4449 Register SrcReg = MI.getOperand(1).getReg();
4450 LLT SrcTy = MRI.getType(SrcReg);
4451 auto Round =
4452 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4453 MIRBuilder.buildFPTOSI(DstReg, Round);
4454 MI.eraseFromParent();
4455 return Legalized;
4456 }
Daniel Sandersaef1dfc2017-11-30 20:11:42 +00004457 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
Amara Emerson719024a2023-02-23 16:35:39 -08004458 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
Shilei Tian3a106e52024-03-29 15:59:50 -04004459 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4460 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
Daniel Sandersaef1dfc2017-11-30 20:11:42 +00004461 **MI.memoperands_begin());
Shilei Tian3a106e52024-03-29 15:59:50 -04004462 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
4463 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
Daniel Sandersaef1dfc2017-11-30 20:11:42 +00004464 MI.eraseFromParent();
4465 return Legalized;
4466 }
Daniel Sanders5eb9f582018-04-28 18:14:50 +00004467 case TargetOpcode::G_LOAD:
4468 case TargetOpcode::G_SEXTLOAD:
Matt Arsenault54615ec2020-07-31 10:09:00 -04004469 case TargetOpcode::G_ZEXTLOAD:
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07004470 return lowerLoad(cast<GAnyLoad>(MI));
Matt Arsenault54615ec2020-07-31 10:09:00 -04004471 case TargetOpcode::G_STORE:
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07004472 return lowerStore(cast<GStore>(MI));
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00004473 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4474 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4475 case TargetOpcode::G_CTLZ:
4476 case TargetOpcode::G_CTTZ:
4477 case TargetOpcode::G_CTPOP:
Matt Arsenaulta1282922020-07-15 11:10:54 -04004478 return lowerBitCount(MI);
Petar Avramovicbd395692019-02-26 17:22:42 +00004479 case G_UADDO: {
Amara Emerson719024a2023-02-23 16:35:39 -08004480 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
Petar Avramovicbd395692019-02-26 17:22:42 +00004481
Shilei Tian3a106e52024-03-29 15:59:50 -04004482 Register NewRes = MRI.cloneVirtualRegister(Res);
4483
4484 MIRBuilder.buildAdd(NewRes, LHS, RHS);
4485 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
4486
4487 MIRBuilder.buildCopy(Res, NewRes);
Petar Avramovicbd395692019-02-26 17:22:42 +00004488
4489 MI.eraseFromParent();
4490 return Legalized;
4491 }
Petar Avramovicb8276f22018-12-17 12:31:07 +00004492 case G_UADDE: {
Amara Emerson719024a2023-02-23 16:35:39 -08004493 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
Craig Topperebb2e5e2023-08-17 14:27:45 -07004494 const LLT CondTy = MRI.getType(CarryOut);
4495 const LLT Ty = MRI.getType(Res);
Petar Avramovicb8276f22018-12-17 12:31:07 +00004496
Shilei Tian3a106e52024-03-29 15:59:50 -04004497 Register NewRes = MRI.cloneVirtualRegister(Res);
4498
Craig Topperc6dee692023-08-17 20:32:37 -07004499 // Initial add of the two operands.
Matt Arsenaultc7e8d8b2020-02-26 17:18:43 -05004500 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
Craig Topperc6dee692023-08-17 20:32:37 -07004501
4502 // Initial check for carry.
4503 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
4504
4505 // Add the sum and the carry.
Matt Arsenaultc7e8d8b2020-02-26 17:18:43 -05004506 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
Shilei Tian3a106e52024-03-29 15:59:50 -04004507 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
Craig Topperebb2e5e2023-08-17 14:27:45 -07004508
Craig Topperc6dee692023-08-17 20:32:37 -07004509 // Second check for carry. We can only carry if the initial sum is all 1s
4510 // and the carry is set, resulting in a new sum of 0.
4511 auto Zero = MIRBuilder.buildConstant(Ty, 0);
Shilei Tian3a106e52024-03-29 15:59:50 -04004512 auto ResEqZero =
4513 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
Craig Topperc6dee692023-08-17 20:32:37 -07004514 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4515 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
Petar Avramovicb8276f22018-12-17 12:31:07 +00004516
Shilei Tian3a106e52024-03-29 15:59:50 -04004517 MIRBuilder.buildCopy(Res, NewRes);
4518
Petar Avramovicb8276f22018-12-17 12:31:07 +00004519 MI.eraseFromParent();
4520 return Legalized;
4521 }
Petar Avramovic7cecadb2019-01-28 12:10:17 +00004522 case G_USUBO: {
Amara Emerson719024a2023-02-23 16:35:39 -08004523 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
Petar Avramovic7cecadb2019-01-28 12:10:17 +00004524
4525 MIRBuilder.buildSub(Res, LHS, RHS);
4526 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
4527
4528 MI.eraseFromParent();
4529 return Legalized;
4530 }
4531 case G_USUBE: {
Amara Emerson719024a2023-02-23 16:35:39 -08004532 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
Matt Arsenault6fc0d002020-02-26 17:21:10 -05004533 const LLT CondTy = MRI.getType(BorrowOut);
4534 const LLT Ty = MRI.getType(Res);
Petar Avramovic7cecadb2019-01-28 12:10:17 +00004535
Craig Topperc6dee692023-08-17 20:32:37 -07004536 // Initial subtract of the two operands.
Matt Arsenaultc7e8d8b2020-02-26 17:18:43 -05004537 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
Craig Topperc6dee692023-08-17 20:32:37 -07004538
4539 // Initial check for borrow.
4540 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
4541
4542 // Subtract the borrow from the first subtract.
Matt Arsenaultc7e8d8b2020-02-26 17:18:43 -05004543 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
Petar Avramovic7cecadb2019-01-28 12:10:17 +00004544 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
Matt Arsenaultc7e8d8b2020-02-26 17:18:43 -05004545
Craig Topperc6dee692023-08-17 20:32:37 -07004546 // Second check for borrow. We can only borrow if the initial difference is
4547 // 0 and the borrow is set, resulting in a new difference of all 1s.
4548 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4549 auto TmpResEqZero =
4550 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
4551 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4552 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
Petar Avramovic7cecadb2019-01-28 12:10:17 +00004553
4554 MI.eraseFromParent();
4555 return Legalized;
4556 }
Matt Arsenault02b5ca82019-05-17 23:05:13 +00004557 case G_UITOFP:
Matt Arsenaulta1282922020-07-15 11:10:54 -04004558 return lowerUITOFP(MI);
Matt Arsenault02b5ca82019-05-17 23:05:13 +00004559 case G_SITOFP:
Matt Arsenaulta1282922020-07-15 11:10:54 -04004560 return lowerSITOFP(MI);
Petar Avramovic6412b562019-08-30 05:44:02 +00004561 case G_FPTOUI:
Matt Arsenaulta1282922020-07-15 11:10:54 -04004562 return lowerFPTOUI(MI);
Matt Arsenaultea956682020-01-04 17:09:48 -05004563 case G_FPTOSI:
4564 return lowerFPTOSI(MI);
David Greenfeac7612024-09-16 10:33:59 +01004565 case G_FPTOUI_SAT:
4566 case G_FPTOSI_SAT:
4567 return lowerFPTOINT_SAT(MI);
Matt Arsenaultbfbfa182020-01-18 10:08:11 -05004568 case G_FPTRUNC:
Matt Arsenaulta1282922020-07-15 11:10:54 -04004569 return lowerFPTRUNC(MI);
Matt Arsenault7cd8a022020-07-17 11:01:15 -04004570 case G_FPOWI:
4571 return lowerFPOWI(MI);
Matt Arsenault6f74f552019-07-01 17:18:03 +00004572 case G_SMIN:
4573 case G_SMAX:
4574 case G_UMIN:
4575 case G_UMAX:
Matt Arsenaulta1282922020-07-15 11:10:54 -04004576 return lowerMinMax(MI);
Thorsten Schütt2d2d6852024-07-23 10:12:28 +02004577 case G_SCMP:
4578 case G_UCMP:
4579 return lowerThreewayCompare(MI);
Matt Arsenaultb1843e12019-07-09 23:34:29 +00004580 case G_FCOPYSIGN:
Matt Arsenaulta1282922020-07-15 11:10:54 -04004581 return lowerFCopySign(MI);
Matt Arsenault6ce1b4f2019-07-10 16:31:19 +00004582 case G_FMINNUM:
4583 case G_FMAXNUM:
4584 return lowerFMinNumMaxNum(MI);
Matt Arsenault69999602020-03-29 15:51:54 -04004585 case G_MERGE_VALUES:
4586 return lowerMergeValues(MI);
Matt Arsenaultd9d30a42019-08-01 19:10:05 +00004587 case G_UNMERGE_VALUES:
4588 return lowerUnmergeValues(MI);
Daniel Sanderse9a57c22019-08-09 21:11:20 +00004589 case TargetOpcode::G_SEXT_INREG: {
4590 assert(MI.getOperand(2).isImm() && "Expected immediate");
4591 int64_t SizeInBits = MI.getOperand(2).getImm();
4592
Amara Emerson719024a2023-02-23 16:35:39 -08004593 auto [DstReg, SrcReg] = MI.getFirst2Regs();
Daniel Sanderse9a57c22019-08-09 21:11:20 +00004594 LLT DstTy = MRI.getType(DstReg);
4595 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4596
4597 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
Jay Foad63f73542020-01-16 12:37:00 +00004598 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4599 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
Daniel Sanderse9a57c22019-08-09 21:11:20 +00004600 MI.eraseFromParent();
4601 return Legalized;
4602 }
Matt Arsenault0b7de792020-07-26 21:25:10 -04004603 case G_EXTRACT_VECTOR_ELT:
Matt Arsenault1ad051dd2020-07-27 21:13:40 -04004604 case G_INSERT_VECTOR_ELT:
4605 return lowerExtractInsertVectorElt(MI);
Matt Arsenault690645b2019-08-13 16:09:07 +00004606 case G_SHUFFLE_VECTOR:
4607 return lowerShuffleVector(MI);
Lawrence Benson177ce192024-07-17 14:24:24 +02004608 case G_VECTOR_COMPRESS:
4609 return lowerVECTOR_COMPRESS(MI);
Amara Emersone20b91c2019-08-27 19:54:27 +00004610 case G_DYN_STACKALLOC:
4611 return lowerDynStackAlloc(MI);
Matt Arsenault1ca08082023-07-29 19:12:24 -04004612 case G_STACKSAVE:
4613 return lowerStackSave(MI);
4614 case G_STACKRESTORE:
4615 return lowerStackRestore(MI);
Matt Arsenaulta5b9c752019-10-06 01:37:35 +00004616 case G_EXTRACT:
4617 return lowerExtract(MI);
Matt Arsenault4bcdcad2019-10-07 19:13:27 +00004618 case G_INSERT:
4619 return lowerInsert(MI);
Petar Avramovic94a24e72019-12-30 11:13:22 +01004620 case G_BSWAP:
4621 return lowerBswap(MI);
Petar Avramovic98f72a52019-12-30 18:06:29 +01004622 case G_BITREVERSE:
4623 return lowerBitreverse(MI);
Matt Arsenault0ea3c722019-12-27 19:26:51 -05004624 case G_READ_REGISTER:
Matt Arsenaultc5c1bb32020-01-12 13:29:44 -05004625 case G_WRITE_REGISTER:
4626 return lowerReadWriteRegister(MI);
Jay Foadb35833b2020-07-12 14:18:45 -04004627 case G_UADDSAT:
4628 case G_USUBSAT: {
4629 // Try to make a reasonable guess about which lowering strategy to use. The
4630 // target can override this with custom lowering and calling the
4631 // implementation functions.
4632 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
AtariDreamsd5829582024-05-28 12:25:43 -04004633 if (LI.isLegalOrCustom({G_UMIN, Ty}))
Jay Foadb35833b2020-07-12 14:18:45 -04004634 return lowerAddSubSatToMinMax(MI);
4635 return lowerAddSubSatToAddoSubo(MI);
4636 }
4637 case G_SADDSAT:
4638 case G_SSUBSAT: {
4639 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4640
4641 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
4642 // since it's a shorter expansion. However, we would need to figure out the
4643 // preferred boolean type for the carry out for the query.
4644 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4645 return lowerAddSubSatToMinMax(MI);
4646 return lowerAddSubSatToAddoSubo(MI);
4647 }
Bevin Hansson5de6c562020-07-16 17:02:04 +02004648 case G_SSHLSAT:
4649 case G_USHLSAT:
4650 return lowerShlSat(MI);
Mirko Brkusanin35ef4c92021-06-03 18:09:45 +02004651 case G_ABS:
4652 return lowerAbsToAddXor(MI);
Him1880748f422024-09-03 12:47:26 +01004653 case G_FABS:
4654 return lowerFAbs(MI);
Amara Emerson08232192020-09-26 10:02:39 -07004655 case G_SELECT:
4656 return lowerSelect(MI);
Janek van Oirschot587747d2022-12-06 20:36:07 +00004657 case G_IS_FPCLASS:
4658 return lowerISFPCLASS(MI);
Christudasan Devadasan4c6ab482021-03-10 18:03:10 +05304659 case G_SDIVREM:
4660 case G_UDIVREM:
4661 return lowerDIVREM(MI);
Matt Arsenaultb24436a2020-03-19 22:48:13 -04004662 case G_FSHL:
4663 case G_FSHR:
4664 return lowerFunnelShift(MI);
Amara Emersonf5e9be62021-03-26 15:27:15 -07004665 case G_ROTL:
4666 case G_ROTR:
4667 return lowerRotate(MI);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02004668 case G_MEMSET:
4669 case G_MEMCPY:
4670 case G_MEMMOVE:
4671 return lowerMemCpyFamily(MI);
4672 case G_MEMCPY_INLINE:
4673 return lowerMemcpyInline(MI);
Tuan Chuong Goha40c9842023-08-17 16:31:54 +01004674 case G_ZEXT:
4675 case G_SEXT:
4676 case G_ANYEXT:
4677 return lowerEXT(MI);
chuongg3d88d9832023-10-11 16:05:25 +01004678 case G_TRUNC:
4679 return lowerTRUNC(MI);
Amara Emerson95ac3d12021-08-18 00:19:58 -07004680 GISEL_VECREDUCE_CASES_NONSEQ
4681 return lowerVectorReduction(MI);
Michael Maitland6f9cb9a72023-12-08 13:24:27 -05004682 case G_VAARG:
4683 return lowerVAArg(MI);
Tim Northovercecee562016-08-26 17:46:13 +00004684 }
4685}
4686
Matt Arsenault0b7de792020-07-26 21:25:10 -04004687Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty,
4688 Align MinAlign) const {
4689 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4690 // datalayout for the preferred alignment. Also there should be a target hook
4691 // for this to allow targets to reduce the alignment and ignore the
4692 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4693 // the type.
4694 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4695}
4696
4697MachineInstrBuilder
4698LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment,
4699 MachinePointerInfo &PtrInfo) {
4700 MachineFunction &MF = MIRBuilder.getMF();
4701 const DataLayout &DL = MIRBuilder.getDataLayout();
4702 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4703
4704 unsigned AddrSpace = DL.getAllocaAddrSpace();
4705 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4706
4707 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4708 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4709}
4710
Amara Emerson41ebbed2025-01-05 21:32:27 -08004711MachineInstrBuilder LegalizerHelper::createStackStoreLoad(const DstOp &Res,
4712 const SrcOp &Val) {
4713 LLT SrcTy = Val.getLLTTy(MRI);
4714 Align StackTypeAlign =
4715 std::max(getStackTemporaryAlignment(SrcTy),
4716 getStackTemporaryAlignment(Res.getLLTTy(MRI)));
4717 MachinePointerInfo PtrInfo;
4718 auto StackTemp =
4719 createStackTemporary(SrcTy.getSizeInBytes(), StackTypeAlign, PtrInfo);
4720
4721 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
4722 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
4723}
4724
Owen Anderson44b717d2024-02-21 00:42:22 -05004725static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg,
4726 LLT VecTy) {
Matt Arsenault0b7de792020-07-26 21:25:10 -04004727 LLT IdxTy = B.getMRI()->getType(IdxReg);
4728 unsigned NElts = VecTy.getNumElements();
Owen Anderson44b717d2024-02-21 00:42:22 -05004729
4730 int64_t IdxVal;
4731 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
4732 if (IdxVal < VecTy.getNumElements())
4733 return IdxReg;
4734 // If a constant index would be out of bounds, clamp it as well.
4735 }
4736
Matt Arsenault0b7de792020-07-26 21:25:10 -04004737 if (isPowerOf2_32(NElts)) {
4738 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4739 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4740 }
4741
4742 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4743 .getReg(0);
4744}
4745
4746Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
4747 Register Index) {
4748 LLT EltTy = VecTy.getElementType();
4749
4750 // Calculate the element offset and add it to the pointer.
4751 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4752 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4753 "Converting bits to bytes lost precision");
4754
Owen Anderson44b717d2024-02-21 00:42:22 -05004755 Index = clampVectorIndex(MIRBuilder, Index, VecTy);
Matt Arsenault0b7de792020-07-26 21:25:10 -04004756
Jay Foadfd3eaf72024-03-09 09:07:22 +00004757 // Convert index to the correct size for the address space.
4758 const DataLayout &DL = MIRBuilder.getDataLayout();
4759 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4760 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
4761 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
4762 if (IdxTy != MRI.getType(Index))
4763 Index = MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
4764
Matt Arsenault0b7de792020-07-26 21:25:10 -04004765 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4766 MIRBuilder.buildConstant(IdxTy, EltSize));
4767
4768 LLT PtrTy = MRI.getType(VecPtr);
4769 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
4770}
4771
Fangrui Songea2d4c52021-12-24 00:55:54 -08004772#ifndef NDEBUG
Petar Avramovic29f88b92021-12-23 14:09:51 +01004773/// Check that all vector operands have same number of elements. Other operands
4774/// should be listed in NonVecOp.
4775static bool hasSameNumEltsOnAllVectorOperands(
4776 GenericMachineInstr &MI, MachineRegisterInfo &MRI,
4777 std::initializer_list<unsigned> NonVecOpIndices) {
4778 if (MI.getNumMemOperands() != 0)
4779 return false;
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00004780
Petar Avramovic29f88b92021-12-23 14:09:51 +01004781 LLT VecTy = MRI.getType(MI.getReg(0));
4782 if (!VecTy.isVector())
4783 return false;
4784 unsigned NumElts = VecTy.getNumElements();
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00004785
Petar Avramovic29f88b92021-12-23 14:09:51 +01004786 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
4787 MachineOperand &Op = MI.getOperand(OpIdx);
4788 if (!Op.isReg()) {
4789 if (!is_contained(NonVecOpIndices, OpIdx))
4790 return false;
4791 continue;
4792 }
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00004793
Petar Avramovic29f88b92021-12-23 14:09:51 +01004794 LLT Ty = MRI.getType(Op.getReg());
4795 if (!Ty.isVector()) {
4796 if (!is_contained(NonVecOpIndices, OpIdx))
4797 return false;
Petar Avramovic29f88b92021-12-23 14:09:51 +01004798 continue;
4799 }
4800
4801 if (Ty.getNumElements() != NumElts)
4802 return false;
4803 }
4804
4805 return true;
4806}
Fangrui Songea2d4c52021-12-24 00:55:54 -08004807#endif
Petar Avramovic29f88b92021-12-23 14:09:51 +01004808
4809/// Fill \p DstOps with DstOps that have same number of elements combined as
4810/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
4811/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
4812/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
4813static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
4814 unsigned NumElts) {
4815 LLT LeftoverTy;
4816 assert(Ty.isVector() && "Expected vector type");
4817 LLT EltTy = Ty.getElementType();
4818 LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
4819 int NumParts, NumLeftover;
4820 std::tie(NumParts, NumLeftover) =
4821 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
4822
4823 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
4824 for (int i = 0; i < NumParts; ++i) {
4825 DstOps.push_back(NarrowTy);
4826 }
4827
4828 if (LeftoverTy.isValid()) {
4829 assert(NumLeftover == 1 && "expected exactly one leftover");
4830 DstOps.push_back(LeftoverTy);
4831 }
4832}
4833
4834/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
4835/// made from \p Op depending on operand type.
4836static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
4837 MachineOperand &Op) {
4838 for (unsigned i = 0; i < N; ++i) {
4839 if (Op.isReg())
4840 Ops.push_back(Op.getReg());
4841 else if (Op.isImm())
4842 Ops.push_back(Op.getImm());
4843 else if (Op.isPredicate())
4844 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
4845 else
4846 llvm_unreachable("Unsupported type");
4847 }
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00004848}
4849
Matt Arsenaultc83b8232019-02-07 17:38:00 +00004850// Handle splitting vector operations which need to have the same number of
4851// elements in each type index, but each type index may have a different element
4852// type.
4853//
4854// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
4855// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4856// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4857//
4858// Also handles some irregular breakdown cases, e.g.
4859// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
4860// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4861// s64 = G_SHL s64, s32
4862LegalizerHelper::LegalizeResult
4863LegalizerHelper::fewerElementsVectorMultiEltType(
Petar Avramovic29f88b92021-12-23 14:09:51 +01004864 GenericMachineInstr &MI, unsigned NumElts,
4865 std::initializer_list<unsigned> NonVecOpIndices) {
4866 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
4867 "Non-compatible opcode or not specified non-vector operands");
4868 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
Matt Arsenaultc83b8232019-02-07 17:38:00 +00004869
Petar Avramovic29f88b92021-12-23 14:09:51 +01004870 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4871 unsigned NumDefs = MI.getNumDefs();
Matt Arsenaultc83b8232019-02-07 17:38:00 +00004872
Petar Avramovic29f88b92021-12-23 14:09:51 +01004873 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
4874 // Build instructions with DstOps to use instruction found by CSE directly.
4875 // CSE copies found instruction into given vreg when building with vreg dest.
4876 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
4877 // Output registers will be taken from created instructions.
4878 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
4879 for (unsigned i = 0; i < NumDefs; ++i) {
4880 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
4881 }
Matt Arsenaultc83b8232019-02-07 17:38:00 +00004882
Petar Avramovic29f88b92021-12-23 14:09:51 +01004883 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
4884 // Operands listed in NonVecOpIndices will be used as is without splitting;
4885 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
4886 // scalar condition (op 1), immediate in sext_inreg (op 2).
4887 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
4888 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4889 ++UseIdx, ++UseNo) {
4890 if (is_contained(NonVecOpIndices, UseIdx)) {
4891 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
4892 MI.getOperand(UseIdx));
Matt Arsenaultc83b8232019-02-07 17:38:00 +00004893 } else {
Petar Avramovic29f88b92021-12-23 14:09:51 +01004894 SmallVector<Register, 8> SplitPieces;
chuongg3fcfe1b62024-01-15 16:40:39 +00004895 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
4896 MRI);
Petar Avramovic29f88b92021-12-23 14:09:51 +01004897 for (auto Reg : SplitPieces)
4898 InputOpsPieces[UseNo].push_back(Reg);
Matt Arsenaultc83b8232019-02-07 17:38:00 +00004899 }
Matt Arsenaultc83b8232019-02-07 17:38:00 +00004900 }
4901
Petar Avramovic29f88b92021-12-23 14:09:51 +01004902 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
Matt Arsenaultc83b8232019-02-07 17:38:00 +00004903
Petar Avramovic29f88b92021-12-23 14:09:51 +01004904 // Take i-th piece of each input operand split and build sub-vector/scalar
4905 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
4906 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4907 SmallVector<DstOp, 2> Defs;
4908 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4909 Defs.push_back(OutputOpsPieces[DstNo][i]);
Matt Arsenaultc83b8232019-02-07 17:38:00 +00004910
Petar Avramovic29f88b92021-12-23 14:09:51 +01004911 SmallVector<SrcOp, 3> Uses;
4912 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
4913 Uses.push_back(InputOpsPieces[InputNo][i]);
Matt Arsenaultc83b8232019-02-07 17:38:00 +00004914
Petar Avramovic29f88b92021-12-23 14:09:51 +01004915 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
4916 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4917 OutputRegs[DstNo].push_back(I.getReg(DstNo));
4918 }
Matt Arsenaultca676342019-01-25 02:36:32 +00004919
Petar Avramovic29f88b92021-12-23 14:09:51 +01004920 // Merge small outputs into MI's output for each def operand.
4921 if (NumLeftovers) {
4922 for (unsigned i = 0; i < NumDefs; ++i)
4923 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
Matt Arsenaultcbaada62019-02-02 23:29:55 +00004924 } else {
Petar Avramovic29f88b92021-12-23 14:09:51 +01004925 for (unsigned i = 0; i < NumDefs; ++i)
Diana Picusf95a5fb2023-01-09 11:59:00 +01004926 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
Matt Arsenaultca676342019-01-25 02:36:32 +00004927 }
4928
Matt Arsenault1b1e6852019-01-25 02:59:34 +00004929 MI.eraseFromParent();
4930 return Legalized;
4931}
4932
4933LegalizerHelper::LegalizeResult
Petar Avramovic29f88b92021-12-23 14:09:51 +01004934LegalizerHelper::fewerElementsVectorPhi(GenericMachineInstr &MI,
4935 unsigned NumElts) {
4936 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
Matt Arsenault1b1e6852019-01-25 02:59:34 +00004937
Petar Avramovic29f88b92021-12-23 14:09:51 +01004938 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4939 unsigned NumDefs = MI.getNumDefs();
Matt Arsenault1b1e6852019-01-25 02:59:34 +00004940
Petar Avramovic29f88b92021-12-23 14:09:51 +01004941 SmallVector<DstOp, 8> OutputOpsPieces;
4942 SmallVector<Register, 8> OutputRegs;
4943 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
Matt Arsenault1b1e6852019-01-25 02:59:34 +00004944
Petar Avramovic29f88b92021-12-23 14:09:51 +01004945 // Instructions that perform register split will be inserted in basic block
4946 // where register is defined (basic block is in the next operand).
4947 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
4948 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4949 UseIdx += 2, ++UseNo) {
4950 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
Amara Emerson53445f52022-11-13 01:43:04 -08004951 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
chuongg3fcfe1b62024-01-15 16:40:39 +00004952 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
4953 MIRBuilder, MRI);
Petar Avramovic29f88b92021-12-23 14:09:51 +01004954 }
Matt Arsenaultd3093c22019-02-28 00:16:32 +00004955
Petar Avramovic29f88b92021-12-23 14:09:51 +01004956 // Build PHIs with fewer elements.
4957 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4958 MIRBuilder.setInsertPt(*MI.getParent(), MI);
4959 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4960 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
4961 Phi.addDef(
4962 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
4963 OutputRegs.push_back(Phi.getReg(0));
Matt Arsenaultd3093c22019-02-28 00:16:32 +00004964
Petar Avramovic29f88b92021-12-23 14:09:51 +01004965 for (unsigned j = 0; j < NumInputs / 2; ++j) {
4966 Phi.addUse(InputOpsPieces[j][i]);
4967 Phi.add(MI.getOperand(1 + j * 2 + 1));
Matt Arsenaultd3093c22019-02-28 00:16:32 +00004968 }
4969 }
4970
Dávid Ferenc Szabó23470202024-04-15 11:01:55 +02004971 // Set the insert point after the existing PHIs
4972 MachineBasicBlock &MBB = *MI.getParent();
4973 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
4974
Petar Avramovic29f88b92021-12-23 14:09:51 +01004975 // Merge small outputs into MI's def.
4976 if (NumLeftovers) {
4977 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
4978 } else {
Diana Picusf95a5fb2023-01-09 11:59:00 +01004979 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
Petar Avramovic29f88b92021-12-23 14:09:51 +01004980 }
4981
Matt Arsenaultd3093c22019-02-28 00:16:32 +00004982 MI.eraseFromParent();
4983 return Legalized;
4984}
4985
4986LegalizerHelper::LegalizeResult
Matt Arsenault28215ca2019-08-13 16:26:28 +00004987LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
4988 unsigned TypeIdx,
4989 LLT NarrowTy) {
Matt Arsenault28215ca2019-08-13 16:26:28 +00004990 const int NumDst = MI.getNumOperands() - 1;
4991 const Register SrcReg = MI.getOperand(NumDst).getReg();
Petar Avramovic29f88b92021-12-23 14:09:51 +01004992 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
Matt Arsenault28215ca2019-08-13 16:26:28 +00004993 LLT SrcTy = MRI.getType(SrcReg);
4994
Petar Avramovic29f88b92021-12-23 14:09:51 +01004995 if (TypeIdx != 1 || NarrowTy == DstTy)
Matt Arsenault28215ca2019-08-13 16:26:28 +00004996 return UnableToLegalize;
4997
Petar Avramovic29f88b92021-12-23 14:09:51 +01004998 // Requires compatible types. Otherwise SrcReg should have been defined by
4999 // merge-like instruction that would get artifact combined. Most likely
5000 // instruction that defines SrcReg has to perform more/fewer elements
5001 // legalization compatible with NarrowTy.
5002 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5003 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
Matt Arsenault28215ca2019-08-13 16:26:28 +00005004
Petar Avramovic29f88b92021-12-23 14:09:51 +01005005 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5006 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
5007 return UnableToLegalize;
5008
5009 // This is most likely DstTy (smaller then register size) packed in SrcTy
5010 // (larger then register size) and since unmerge was not combined it will be
5011 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
5012 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
5013
5014 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
5015 //
5016 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
5017 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
5018 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
5019 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
Matt Arsenault28215ca2019-08-13 16:26:28 +00005020 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5021 const int PartsPerUnmerge = NumDst / NumUnmerge;
5022
5023 for (int I = 0; I != NumUnmerge; ++I) {
5024 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5025
5026 for (int J = 0; J != PartsPerUnmerge; ++J)
5027 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
5028 MIB.addUse(Unmerge.getReg(I));
5029 }
5030
5031 MI.eraseFromParent();
5032 return Legalized;
5033}
5034
Pushpinder Singhd0e54222021-03-09 06:10:00 +00005035LegalizerHelper::LegalizeResult
Matt Arsenault901e3312020-08-03 18:37:29 -04005036LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
5037 LLT NarrowTy) {
Amara Emerson719024a2023-02-23 16:35:39 -08005038 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
Petar Avramovic29f88b92021-12-23 14:09:51 +01005039 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
5040 // that should have been artifact combined. Most likely instruction that uses
5041 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
5042 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5043 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5044 if (NarrowTy == SrcTy)
5045 return UnableToLegalize;
Matt Arsenault31adc282020-08-03 14:13:38 -04005046
Petar Avramovic29f88b92021-12-23 14:09:51 +01005047 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
5048 // is for old mir tests. Since the changes to more/fewer elements it should no
5049 // longer be possible to generate MIR like this when starting from llvm-ir
5050 // because LCMTy approach was replaced with merge/unmerge to vector elements.
5051 if (TypeIdx == 1) {
5052 assert(SrcTy.isVector() && "Expected vector types");
5053 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5054 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5055 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
5056 return UnableToLegalize;
5057 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
5058 //
5059 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
5060 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
5061 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
5062 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
5063 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
5064 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
Matt Arsenault31adc282020-08-03 14:13:38 -04005065
Petar Avramovic29f88b92021-12-23 14:09:51 +01005066 SmallVector<Register, 8> Elts;
5067 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
5068 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
5069 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
5070 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5071 Elts.push_back(Unmerge.getReg(j));
5072 }
Matt Arsenault31adc282020-08-03 14:13:38 -04005073
Petar Avramovic29f88b92021-12-23 14:09:51 +01005074 SmallVector<Register, 8> NarrowTyElts;
5075 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
5076 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5077 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
5078 ++i, Offset += NumNarrowTyElts) {
5079 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
Diana Picusf95a5fb2023-01-09 11:59:00 +01005080 NarrowTyElts.push_back(
5081 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
Petar Avramovic29f88b92021-12-23 14:09:51 +01005082 }
Matt Arsenault31adc282020-08-03 14:13:38 -04005083
Diana Picusf95a5fb2023-01-09 11:59:00 +01005084 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
Petar Avramovic29f88b92021-12-23 14:09:51 +01005085 MI.eraseFromParent();
5086 return Legalized;
5087 }
5088
5089 assert(TypeIdx == 0 && "Bad type index");
5090 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5091 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
5092 return UnableToLegalize;
5093
5094 // This is most likely SrcTy (smaller then register size) packed in DstTy
5095 // (larger then register size) and since merge was not combined it will be
5096 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
5097 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
5098
5099 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
5100 //
5101 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
5102 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
5103 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
5104 SmallVector<Register, 8> NarrowTyElts;
5105 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
5106 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5107 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
5108 for (unsigned i = 0; i < NumParts; ++i) {
5109 SmallVector<Register, 8> Sources;
5110 for (unsigned j = 0; j < NumElts; ++j)
5111 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
Diana Picusf95a5fb2023-01-09 11:59:00 +01005112 NarrowTyElts.push_back(
5113 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
Petar Avramovic29f88b92021-12-23 14:09:51 +01005114 }
5115
Diana Picusf95a5fb2023-01-09 11:59:00 +01005116 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
Matt Arsenault31adc282020-08-03 14:13:38 -04005117 MI.eraseFromParent();
5118 return Legalized;
5119}
5120
5121LegalizerHelper::LegalizeResult
Matt Arsenault5a15f662020-07-27 22:00:50 -04005122LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
5123 unsigned TypeIdx,
5124 LLT NarrowVecTy) {
Amara Emerson719024a2023-02-23 16:35:39 -08005125 auto [DstReg, SrcVec] = MI.getFirst2Regs();
Matt Arsenault5a15f662020-07-27 22:00:50 -04005126 Register InsertVal;
5127 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5128
5129 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
5130 if (IsInsert)
5131 InsertVal = MI.getOperand(2).getReg();
5132
5133 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
Matt Arsenaulte0020152020-07-27 09:58:17 -04005134
5135 // TODO: Handle total scalarization case.
5136 if (!NarrowVecTy.isVector())
5137 return UnableToLegalize;
5138
Matt Arsenaulte0020152020-07-27 09:58:17 -04005139 LLT VecTy = MRI.getType(SrcVec);
5140
5141 // If the index is a constant, we can really break this down as you would
5142 // expect, and index into the target size pieces.
5143 int64_t IdxVal;
Petar Avramovicd477a7c2021-09-17 11:21:55 +02005144 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
Amara Emerson59a4ee92021-05-26 23:28:44 -07005145 if (MaybeCst) {
5146 IdxVal = MaybeCst->Value.getSExtValue();
Matt Arsenaulte0020152020-07-27 09:58:17 -04005147 // Avoid out of bounds indexing the pieces.
5148 if (IdxVal >= VecTy.getNumElements()) {
5149 MIRBuilder.buildUndef(DstReg);
5150 MI.eraseFromParent();
5151 return Legalized;
5152 }
5153
5154 SmallVector<Register, 8> VecParts;
5155 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5156
5157 // Build a sequence of NarrowTy pieces in VecParts for this operand.
Matt Arsenault5a15f662020-07-27 22:00:50 -04005158 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5159 TargetOpcode::G_ANYEXT);
Matt Arsenaulte0020152020-07-27 09:58:17 -04005160
5161 unsigned NewNumElts = NarrowVecTy.getNumElements();
5162
5163 LLT IdxTy = MRI.getType(Idx);
5164 int64_t PartIdx = IdxVal / NewNumElts;
5165 auto NewIdx =
5166 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5167
Matt Arsenault5a15f662020-07-27 22:00:50 -04005168 if (IsInsert) {
5169 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5170
5171 // Use the adjusted index to insert into one of the subvectors.
5172 auto InsertPart = MIRBuilder.buildInsertVectorElement(
5173 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5174 VecParts[PartIdx] = InsertPart.getReg(0);
5175
5176 // Recombine the inserted subvector with the others to reform the result
5177 // vector.
5178 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5179 } else {
5180 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5181 }
5182
Matt Arsenaulte0020152020-07-27 09:58:17 -04005183 MI.eraseFromParent();
5184 return Legalized;
5185 }
5186
Matt Arsenault5a15f662020-07-27 22:00:50 -04005187 // With a variable index, we can't perform the operation in a smaller type, so
Matt Arsenaulte0020152020-07-27 09:58:17 -04005188 // we're forced to expand this.
5189 //
5190 // TODO: We could emit a chain of compare/select to figure out which piece to
5191 // index.
Matt Arsenault1ad051dd2020-07-27 21:13:40 -04005192 return lowerExtractInsertVectorElt(MI);
Matt Arsenaulte0020152020-07-27 09:58:17 -04005193}
5194
5195LegalizerHelper::LegalizeResult
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07005196LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx,
Matt Arsenault7f09fd62019-02-05 00:26:12 +00005197 LLT NarrowTy) {
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005198 // FIXME: Don't know how to handle secondary types yet.
5199 if (TypeIdx != 0)
5200 return UnableToLegalize;
5201
Matt Arsenaultcfca2a72019-01-27 22:36:24 +00005202 // This implementation doesn't work for atomics. Give up instead of doing
5203 // something invalid.
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07005204 if (LdStMI.isAtomic())
Matt Arsenaultcfca2a72019-01-27 22:36:24 +00005205 return UnableToLegalize;
5206
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07005207 bool IsLoad = isa<GLoad>(LdStMI);
5208 Register ValReg = LdStMI.getReg(0);
5209 Register AddrReg = LdStMI.getPointerReg();
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005210 LLT ValTy = MRI.getType(ValReg);
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005211
Matt Arsenaultc0ad75e2020-02-13 15:08:59 -05005212 // FIXME: Do we need a distinct NarrowMemory legalize action?
David Green601e1022024-03-17 18:15:56 +00005213 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
Matt Arsenaultc0ad75e2020-02-13 15:08:59 -05005214 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
5215 return UnableToLegalize;
5216 }
5217
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005218 int NumParts = -1;
Matt Arsenaultd3093c22019-02-28 00:16:32 +00005219 int NumLeftover = -1;
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005220 LLT LeftoverTy;
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00005221 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005222 if (IsLoad) {
Matt Arsenaultd3093c22019-02-28 00:16:32 +00005223 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005224 } else {
5225 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
chuongg3fcfe1b62024-01-15 16:40:39 +00005226 NarrowLeftoverRegs, MIRBuilder, MRI)) {
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005227 NumParts = NarrowRegs.size();
Matt Arsenaultd3093c22019-02-28 00:16:32 +00005228 NumLeftover = NarrowLeftoverRegs.size();
5229 }
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005230 }
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005231
5232 if (NumParts == -1)
5233 return UnableToLegalize;
5234
Matt Arsenault1ea182c2020-07-31 10:19:02 -04005235 LLT PtrTy = MRI.getType(AddrReg);
5236 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005237
5238 unsigned TotalSize = ValTy.getSizeInBits();
5239
5240 // Split the load/store into PartTy sized pieces starting at Offset. If this
5241 // is a load, return the new registers in ValRegs. For a store, each elements
5242 // of ValRegs should be PartTy. Returns the next offset that needs to be
5243 // handled.
Sheng146c7822022-02-07 19:04:27 -05005244 bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian();
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07005245 auto MMO = LdStMI.getMMO();
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00005246 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
Sheng146c7822022-02-07 19:04:27 -05005247 unsigned NumParts, unsigned Offset) -> unsigned {
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005248 MachineFunction &MF = MIRBuilder.getMF();
5249 unsigned PartSize = PartTy.getSizeInBits();
5250 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
Sheng146c7822022-02-07 19:04:27 -05005251 ++Idx) {
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005252 unsigned ByteOffset = Offset / 8;
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00005253 Register NewAddrReg;
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005254
Daniel Sanderse74c5b92019-11-01 13:18:00 -07005255 MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005256
5257 MachineMemOperand *NewMMO =
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07005258 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005259
5260 if (IsLoad) {
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00005261 Register Dst = MRI.createGenericVirtualRegister(PartTy);
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005262 ValRegs.push_back(Dst);
5263 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5264 } else {
5265 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5266 }
Sheng146c7822022-02-07 19:04:27 -05005267 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005268 }
5269
5270 return Offset;
5271 };
5272
Sheng146c7822022-02-07 19:04:27 -05005273 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
5274 unsigned HandledOffset =
5275 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005276
5277 // Handle the rest of the register if this isn't an even type breakdown.
5278 if (LeftoverTy.isValid())
Sheng146c7822022-02-07 19:04:27 -05005279 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005280
5281 if (IsLoad) {
5282 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5283 LeftoverTy, NarrowLeftoverRegs);
5284 }
5285
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07005286 LdStMI.eraseFromParent();
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005287 return Legalized;
5288}
5289
5290LegalizerHelper::LegalizeResult
Tim Northover69fa84a2016-10-14 22:18:18 +00005291LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
5292 LLT NarrowTy) {
Matt Arsenault1b1e6852019-01-25 02:59:34 +00005293 using namespace TargetOpcode;
Petar Avramovic29f88b92021-12-23 14:09:51 +01005294 GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI);
5295 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
Volkan Keles574d7372018-12-14 22:11:20 +00005296
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005297 switch (MI.getOpcode()) {
5298 case G_IMPLICIT_DEF:
Matt Arsenaultce8a1f72020-02-15 20:24:36 -05005299 case G_TRUNC:
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005300 case G_AND:
5301 case G_OR:
5302 case G_XOR:
5303 case G_ADD:
5304 case G_SUB:
5305 case G_MUL:
Matt Arsenault3e8bb7a2020-07-25 10:47:33 -04005306 case G_PTR_ADD:
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005307 case G_SMULH:
5308 case G_UMULH:
5309 case G_FADD:
5310 case G_FMUL:
5311 case G_FSUB:
5312 case G_FNEG:
5313 case G_FABS:
Matt Arsenault9dba67f2019-02-11 17:05:20 +00005314 case G_FCANONICALIZE:
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005315 case G_FDIV:
5316 case G_FREM:
5317 case G_FMA:
Matt Arsenaultcf103722019-09-06 20:49:10 +00005318 case G_FMAD:
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005319 case G_FPOW:
5320 case G_FEXP:
5321 case G_FEXP2:
Matt Arsenaultb14e83d2023-08-12 07:20:00 -04005322 case G_FEXP10:
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005323 case G_FLOG:
5324 case G_FLOG2:
5325 case G_FLOG10:
Matt Arsenaulteece6ba2023-04-26 22:02:42 -04005326 case G_FLDEXP:
Jessica Paquetteba557672019-04-25 16:44:40 +00005327 case G_FNEARBYINT:
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005328 case G_FCEIL:
Jessica Paquetteebdb0212019-02-11 17:22:58 +00005329 case G_FFLOOR:
Jessica Paquetted5c69e02019-04-19 23:41:52 +00005330 case G_FRINT:
Sumanth Gundapaneni0ee32c42024-07-24 14:34:31 -05005331 case G_INTRINSIC_LRINT:
5332 case G_INTRINSIC_LLRINT:
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005333 case G_INTRINSIC_ROUND:
Matt Arsenault0da582d2020-07-19 09:56:15 -04005334 case G_INTRINSIC_ROUNDEVEN:
Sumanth Gundapanenie78156a2024-08-21 12:13:56 -05005335 case G_LROUND:
5336 case G_LLROUND:
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005337 case G_INTRINSIC_TRUNC:
Jessica Paquette7db82d72019-01-28 18:34:18 +00005338 case G_FCOS:
5339 case G_FSIN:
Farzon Lotfi1d874332024-06-05 15:01:33 -04005340 case G_FTAN:
Farzon Lotfie2f463b2024-07-19 10:18:23 -04005341 case G_FACOS:
5342 case G_FASIN:
5343 case G_FATAN:
Tex Riddellc03d09c2024-10-24 17:53:12 -07005344 case G_FATAN2:
Farzon Lotfie2f463b2024-07-19 10:18:23 -04005345 case G_FCOSH:
5346 case G_FSINH:
5347 case G_FTANH:
Jessica Paquette22457f82019-01-30 21:03:52 +00005348 case G_FSQRT:
Matt Arsenaultd1bfc8d2019-01-31 02:34:03 +00005349 case G_BSWAP:
Matt Arsenault5ff310e2019-09-04 20:46:15 +00005350 case G_BITREVERSE:
Amara Emersonae878da2019-04-10 23:06:08 +00005351 case G_SDIV:
Matt Arsenaultd12f2a22020-01-04 13:24:09 -05005352 case G_UDIV:
5353 case G_SREM:
5354 case G_UREM:
Christudasan Devadasan90d78402021-04-12 15:49:47 +05305355 case G_SDIVREM:
5356 case G_UDIVREM:
Matt Arsenault0f3ba442019-05-23 17:58:48 +00005357 case G_SMIN:
5358 case G_SMAX:
5359 case G_UMIN:
5360 case G_UMAX:
Mirko Brkusanin35ef4c92021-06-03 18:09:45 +02005361 case G_ABS:
Matt Arsenault6ce1b4f2019-07-10 16:31:19 +00005362 case G_FMINNUM:
5363 case G_FMAXNUM:
5364 case G_FMINNUM_IEEE:
5365 case G_FMAXNUM_IEEE:
5366 case G_FMINIMUM:
5367 case G_FMAXIMUM:
Matt Arsenault4919f2e2020-03-19 21:25:27 -04005368 case G_FSHL:
5369 case G_FSHR:
Mirko Brkusanin5263bf52021-09-07 16:18:19 +02005370 case G_ROTL:
5371 case G_ROTR:
Dominik Montada55e3a7c2020-04-14 11:25:05 +02005372 case G_FREEZE:
Matt Arsenault23ec7732020-07-12 16:11:53 -04005373 case G_SADDSAT:
5374 case G_SSUBSAT:
5375 case G_UADDSAT:
5376 case G_USUBSAT:
Pushpinder Singhd0e54222021-03-09 06:10:00 +00005377 case G_UMULO:
5378 case G_SMULO:
Matt Arsenaultc83b8232019-02-07 17:38:00 +00005379 case G_SHL:
5380 case G_LSHR:
5381 case G_ASHR:
Bevin Hansson5de6c562020-07-16 17:02:04 +02005382 case G_SSHLSAT:
5383 case G_USHLSAT:
Matt Arsenault75e30c42019-02-20 16:42:52 +00005384 case G_CTLZ:
5385 case G_CTLZ_ZERO_UNDEF:
5386 case G_CTTZ:
5387 case G_CTTZ_ZERO_UNDEF:
5388 case G_CTPOP:
Matt Arsenault1448f562019-05-17 12:19:52 +00005389 case G_FCOPYSIGN:
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005390 case G_ZEXT:
5391 case G_SEXT:
5392 case G_ANYEXT:
5393 case G_FPEXT:
5394 case G_FPTRUNC:
5395 case G_SITOFP:
5396 case G_UITOFP:
5397 case G_FPTOSI:
5398 case G_FPTOUI:
David Greenfeac7612024-09-16 10:33:59 +01005399 case G_FPTOSI_SAT:
5400 case G_FPTOUI_SAT:
Matt Arsenaultcbaada62019-02-02 23:29:55 +00005401 case G_INTTOPTR:
5402 case G_PTRTOINT:
Matt Arsenaulta8b43392019-02-08 02:40:47 +00005403 case G_ADDRSPACE_CAST:
Abinav Puthan Purayil898d5772022-03-31 16:33:28 +05305404 case G_UADDO:
5405 case G_USUBO:
5406 case G_UADDE:
5407 case G_USUBE:
5408 case G_SADDO:
5409 case G_SSUBO:
5410 case G_SADDE:
5411 case G_SSUBE:
Matt Arsenaultfe5b9a62020-05-31 13:23:20 -04005412 case G_STRICT_FADD:
Matt Arsenault1fe12992022-11-17 23:03:23 -08005413 case G_STRICT_FSUB:
Matt Arsenaultfe5b9a62020-05-31 13:23:20 -04005414 case G_STRICT_FMUL:
5415 case G_STRICT_FMA:
Matt Arsenaulteece6ba2023-04-26 22:02:42 -04005416 case G_STRICT_FLDEXP:
Matt Arsenault003b58f2023-04-26 21:57:10 -04005417 case G_FFREXP:
Petar Avramovic29f88b92021-12-23 14:09:51 +01005418 return fewerElementsVectorMultiEltType(GMI, NumElts);
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005419 case G_ICMP:
5420 case G_FCMP:
Petar Avramovic29f88b92021-12-23 14:09:51 +01005421 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
Janek van Oirschot322966f2022-11-28 15:40:31 -05005422 case G_IS_FPCLASS:
5423 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
Matt Arsenaultdc6c7852019-01-30 04:19:31 +00005424 case G_SELECT:
Petar Avramovic29f88b92021-12-23 14:09:51 +01005425 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
5426 return fewerElementsVectorMultiEltType(GMI, NumElts);
5427 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
Matt Arsenaultd3093c22019-02-28 00:16:32 +00005428 case G_PHI:
Petar Avramovic29f88b92021-12-23 14:09:51 +01005429 return fewerElementsVectorPhi(GMI, NumElts);
Matt Arsenault28215ca2019-08-13 16:26:28 +00005430 case G_UNMERGE_VALUES:
5431 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
Matt Arsenault3cd39592019-10-09 22:44:43 +00005432 case G_BUILD_VECTOR:
Matt Arsenault901e3312020-08-03 18:37:29 -04005433 assert(TypeIdx == 0 && "not a vector type index");
5434 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
Matt Arsenault31adc282020-08-03 14:13:38 -04005435 case G_CONCAT_VECTORS:
Matt Arsenault901e3312020-08-03 18:37:29 -04005436 if (TypeIdx != 1) // TODO: This probably does work as expected already.
5437 return UnableToLegalize;
5438 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
Matt Arsenaulte0020152020-07-27 09:58:17 -04005439 case G_EXTRACT_VECTOR_ELT:
Matt Arsenault5a15f662020-07-27 22:00:50 -04005440 case G_INSERT_VECTOR_ELT:
5441 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005442 case G_LOAD:
5443 case G_STORE:
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07005444 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
Matt Arsenaultcd7650c2020-01-11 19:05:06 -05005445 case G_SEXT_INREG:
Petar Avramovic29f88b92021-12-23 14:09:51 +01005446 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
Amara Emersona35c2c72021-02-21 14:17:03 -08005447 GISEL_VECREDUCE_CASES_NONSEQ
5448 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
David Green77b124c2024-01-05 08:11:44 +00005449 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5450 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5451 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
Amara Emerson9f39ba12021-05-19 21:35:05 -07005452 case G_SHUFFLE_VECTOR:
5453 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
David Green5550e9c2024-01-04 07:26:23 +00005454 case G_FPOWI:
5455 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
chuongg30fb3d422024-02-21 13:24:45 +00005456 case G_BITCAST:
5457 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
Matt Arsenault401658c2024-04-24 12:25:02 +02005458 case G_INTRINSIC_FPTRUNC_ROUND:
5459 return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
Tim Northover33b07d62016-07-22 20:03:43 +00005460 default:
5461 return UnableToLegalize;
Tim Northover33b07d62016-07-22 20:03:43 +00005462 }
5463}
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00005464
chuongg30fb3d422024-02-21 13:24:45 +00005465LegalizerHelper::LegalizeResult
5466LegalizerHelper::fewerElementsBitcast(MachineInstr &MI, unsigned int TypeIdx,
5467 LLT NarrowTy) {
5468 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
5469 "Not a bitcast operation");
5470
5471 if (TypeIdx != 0)
5472 return UnableToLegalize;
5473
5474 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5475
Tim Gymnich2db2dc8a2024-12-12 18:47:46 +01005476 unsigned NewElemCount =
5477 NarrowTy.getSizeInBits() / SrcTy.getScalarSizeInBits();
5478 LLT SrcNarrowTy = LLT::fixed_vector(NewElemCount, SrcTy.getElementType());
chuongg30fb3d422024-02-21 13:24:45 +00005479
5480 // Split the Src and Dst Reg into smaller registers
5481 SmallVector<Register> SrcVRegs, BitcastVRegs;
5482 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5483 return UnableToLegalize;
5484
5485 // Build new smaller bitcast instructions
5486 // Not supporting Leftover types for now but will have to
5487 for (unsigned i = 0; i < SrcVRegs.size(); i++)
5488 BitcastVRegs.push_back(
5489 MIRBuilder.buildBitcast(NarrowTy, SrcVRegs[i]).getReg(0));
5490
5491 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5492 MI.eraseFromParent();
5493 return Legalized;
5494}
5495
Amara Emerson9f39ba12021-05-19 21:35:05 -07005496LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
5497 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5498 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5499 if (TypeIdx != 0)
5500 return UnableToLegalize;
5501
Amara Emerson719024a2023-02-23 16:35:39 -08005502 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5503 MI.getFirst3RegLLTs();
Amara Emerson9f39ba12021-05-19 21:35:05 -07005504 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
Amara Emerson9f39ba12021-05-19 21:35:05 -07005505 // The shuffle should be canonicalized by now.
5506 if (DstTy != Src1Ty)
5507 return UnableToLegalize;
5508 if (DstTy != Src2Ty)
5509 return UnableToLegalize;
5510
5511 if (!isPowerOf2_32(DstTy.getNumElements()))
5512 return UnableToLegalize;
5513
5514 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
5515 // Further legalization attempts will be needed to do split further.
Sander de Smalenc9acd2f2021-06-25 11:27:41 +01005516 NarrowTy =
5517 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
David Green4c8c1302024-12-15 10:44:40 +00005518 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
Amara Emerson9f39ba12021-05-19 21:35:05 -07005519
5520 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
chuongg3fcfe1b62024-01-15 16:40:39 +00005521 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
5522 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
Amara Emerson9f39ba12021-05-19 21:35:05 -07005523 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5524 SplitSrc2Regs[1]};
5525
5526 Register Hi, Lo;
5527
5528 // If Lo or Hi uses elements from at most two of the four input vectors, then
5529 // express it as a vector shuffle of those two inputs. Otherwise extract the
5530 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
5531 SmallVector<int, 16> Ops;
5532 for (unsigned High = 0; High < 2; ++High) {
5533 Register &Output = High ? Hi : Lo;
5534
5535 // Build a shuffle mask for the output, discovering on the fly which
5536 // input vectors to use as shuffle operands (recorded in InputUsed).
5537 // If building a suitable shuffle vector proves too hard, then bail
5538 // out with useBuildVector set.
5539 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
5540 unsigned FirstMaskIdx = High * NewElts;
5541 bool UseBuildVector = false;
5542 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5543 // The mask element. This indexes into the input.
5544 int Idx = Mask[FirstMaskIdx + MaskOffset];
5545
5546 // The input vector this mask element indexes into.
5547 unsigned Input = (unsigned)Idx / NewElts;
5548
Joe Loser5e96cea2022-09-06 18:06:58 -06005549 if (Input >= std::size(Inputs)) {
Amara Emerson9f39ba12021-05-19 21:35:05 -07005550 // The mask element does not index into any input vector.
5551 Ops.push_back(-1);
5552 continue;
5553 }
5554
5555 // Turn the index into an offset from the start of the input vector.
5556 Idx -= Input * NewElts;
5557
5558 // Find or create a shuffle vector operand to hold this input.
5559 unsigned OpNo;
Joe Loser5e96cea2022-09-06 18:06:58 -06005560 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
Amara Emerson9f39ba12021-05-19 21:35:05 -07005561 if (InputUsed[OpNo] == Input) {
5562 // This input vector is already an operand.
5563 break;
5564 } else if (InputUsed[OpNo] == -1U) {
5565 // Create a new operand for this input vector.
5566 InputUsed[OpNo] = Input;
5567 break;
5568 }
5569 }
5570
Joe Loser5e96cea2022-09-06 18:06:58 -06005571 if (OpNo >= std::size(InputUsed)) {
Amara Emerson9f39ba12021-05-19 21:35:05 -07005572 // More than two input vectors used! Give up on trying to create a
5573 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
5574 UseBuildVector = true;
5575 break;
5576 }
5577
5578 // Add the mask index for the new shuffle vector.
5579 Ops.push_back(Idx + OpNo * NewElts);
5580 }
5581
5582 if (UseBuildVector) {
5583 LLT EltTy = NarrowTy.getElementType();
5584 SmallVector<Register, 16> SVOps;
5585
5586 // Extract the input elements by hand.
5587 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5588 // The mask element. This indexes into the input.
5589 int Idx = Mask[FirstMaskIdx + MaskOffset];
5590
5591 // The input vector this mask element indexes into.
5592 unsigned Input = (unsigned)Idx / NewElts;
5593
Joe Loser5e96cea2022-09-06 18:06:58 -06005594 if (Input >= std::size(Inputs)) {
Amara Emerson9f39ba12021-05-19 21:35:05 -07005595 // The mask element is "undef" or indexes off the end of the input.
5596 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
5597 continue;
5598 }
5599
5600 // Turn the index into an offset from the start of the input vector.
5601 Idx -= Input * NewElts;
5602
5603 // Extract the vector element by hand.
5604 SVOps.push_back(MIRBuilder
5605 .buildExtractVectorElement(
5606 EltTy, Inputs[Input],
5607 MIRBuilder.buildConstant(LLT::scalar(32), Idx))
5608 .getReg(0));
5609 }
5610
5611 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
5612 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5613 } else if (InputUsed[0] == -1U) {
5614 // No input vectors were used! The result is undefined.
5615 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
5616 } else {
5617 Register Op0 = Inputs[InputUsed[0]];
5618 // If only one input was used, use an undefined vector for the other.
5619 Register Op1 = InputUsed[1] == -1U
5620 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
5621 : Inputs[InputUsed[1]];
5622 // At least one input vector was used. Create a new shuffle vector.
5623 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
5624 }
5625
5626 Ops.clear();
5627 }
5628
David Green4c8c1302024-12-15 10:44:40 +00005629 MIRBuilder.buildMergeLikeInstr(DstReg, {Lo, Hi});
Amara Emerson9f39ba12021-05-19 21:35:05 -07005630 MI.eraseFromParent();
5631 return Legalized;
5632}
5633
Amara Emerson95ac3d12021-08-18 00:19:58 -07005634LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
5635 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
Amara Emersonb9669782023-08-12 13:55:08 -07005636 auto &RdxMI = cast<GVecReduce>(MI);
Amara Emerson95ac3d12021-08-18 00:19:58 -07005637
5638 if (TypeIdx != 1)
5639 return UnableToLegalize;
5640
5641 // The semantics of the normal non-sequential reductions allow us to freely
5642 // re-associate the operation.
Amara Emersonb9669782023-08-12 13:55:08 -07005643 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
Amara Emerson95ac3d12021-08-18 00:19:58 -07005644
5645 if (NarrowTy.isVector() &&
5646 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
5647 return UnableToLegalize;
5648
Amara Emersonb9669782023-08-12 13:55:08 -07005649 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
Amara Emerson95ac3d12021-08-18 00:19:58 -07005650 SmallVector<Register> SplitSrcs;
5651 // If NarrowTy is a scalar then we're being asked to scalarize.
5652 const unsigned NumParts =
5653 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
5654 : SrcTy.getNumElements();
5655
chuongg3fcfe1b62024-01-15 16:40:39 +00005656 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
Amara Emerson95ac3d12021-08-18 00:19:58 -07005657 if (NarrowTy.isScalar()) {
5658 if (DstTy != NarrowTy)
5659 return UnableToLegalize; // FIXME: handle implicit extensions.
5660
5661 if (isPowerOf2_32(NumParts)) {
5662 // Generate a tree of scalar operations to reduce the critical path.
5663 SmallVector<Register> PartialResults;
5664 unsigned NumPartsLeft = NumParts;
5665 while (NumPartsLeft > 1) {
5666 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5667 PartialResults.emplace_back(
5668 MIRBuilder
5669 .buildInstr(ScalarOpc, {NarrowTy},
5670 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5671 .getReg(0));
5672 }
5673 SplitSrcs = PartialResults;
5674 PartialResults.clear();
5675 NumPartsLeft = SplitSrcs.size();
5676 }
5677 assert(SplitSrcs.size() == 1);
5678 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
5679 MI.eraseFromParent();
5680 return Legalized;
5681 }
5682 // If we can't generate a tree, then just do sequential operations.
5683 Register Acc = SplitSrcs[0];
5684 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
5685 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5686 .getReg(0);
5687 MIRBuilder.buildCopy(DstReg, Acc);
5688 MI.eraseFromParent();
5689 return Legalized;
5690 }
5691 SmallVector<Register> PartialReductions;
5692 for (unsigned Part = 0; Part < NumParts; ++Part) {
5693 PartialReductions.push_back(
Amara Emersonb9669782023-08-12 13:55:08 -07005694 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5695 .getReg(0));
Amara Emerson95ac3d12021-08-18 00:19:58 -07005696 }
5697
Amara Emersona35c2c72021-02-21 14:17:03 -08005698 // If the types involved are powers of 2, we can generate intermediate vector
5699 // ops, before generating a final reduction operation.
5700 if (isPowerOf2_32(SrcTy.getNumElements()) &&
5701 isPowerOf2_32(NarrowTy.getNumElements())) {
5702 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5703 }
5704
5705 Register Acc = PartialReductions[0];
5706 for (unsigned Part = 1; Part < NumParts; ++Part) {
5707 if (Part == NumParts - 1) {
5708 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
5709 {Acc, PartialReductions[Part]});
5710 } else {
5711 Acc = MIRBuilder
5712 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5713 .getReg(0);
5714 }
5715 }
5716 MI.eraseFromParent();
5717 return Legalized;
5718}
5719
5720LegalizerHelper::LegalizeResult
David Green77b124c2024-01-05 08:11:44 +00005721LegalizerHelper::fewerElementsVectorSeqReductions(MachineInstr &MI,
5722 unsigned int TypeIdx,
5723 LLT NarrowTy) {
5724 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5725 MI.getFirst3RegLLTs();
5726 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5727 DstTy != NarrowTy)
5728 return UnableToLegalize;
5729
5730 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5731 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5732 "Unexpected vecreduce opcode");
5733 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5734 ? TargetOpcode::G_FADD
5735 : TargetOpcode::G_FMUL;
5736
5737 SmallVector<Register> SplitSrcs;
5738 unsigned NumParts = SrcTy.getNumElements();
chuongg3fcfe1b62024-01-15 16:40:39 +00005739 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
David Green77b124c2024-01-05 08:11:44 +00005740 Register Acc = ScalarReg;
5741 for (unsigned i = 0; i < NumParts; i++)
5742 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5743 .getReg(0);
5744
5745 MIRBuilder.buildCopy(DstReg, Acc);
5746 MI.eraseFromParent();
5747 return Legalized;
5748}
5749
5750LegalizerHelper::LegalizeResult
Amara Emersona35c2c72021-02-21 14:17:03 -08005751LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
5752 LLT SrcTy, LLT NarrowTy,
5753 unsigned ScalarOpc) {
5754 SmallVector<Register> SplitSrcs;
5755 // Split the sources into NarrowTy size pieces.
5756 extractParts(SrcReg, NarrowTy,
chuongg3fcfe1b62024-01-15 16:40:39 +00005757 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
5758 MIRBuilder, MRI);
Amara Emersona35c2c72021-02-21 14:17:03 -08005759 // We're going to do a tree reduction using vector operations until we have
5760 // one NarrowTy size value left.
5761 while (SplitSrcs.size() > 1) {
5762 SmallVector<Register> PartialRdxs;
5763 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
5764 Register LHS = SplitSrcs[Idx];
5765 Register RHS = SplitSrcs[Idx + 1];
5766 // Create the intermediate vector op.
5767 Register Res =
5768 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
5769 PartialRdxs.push_back(Res);
5770 }
5771 SplitSrcs = std::move(PartialRdxs);
5772 }
5773 // Finally generate the requested NarrowTy based reduction.
5774 Observer.changingInstr(MI);
5775 MI.getOperand(1).setReg(SplitSrcs[0]);
5776 Observer.changedInstr(MI);
5777 return Legalized;
5778}
5779
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00005780LegalizerHelper::LegalizeResult
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005781LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
5782 const LLT HalfTy, const LLT AmtTy) {
5783
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00005784 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5785 Register InH = MRI.createGenericVirtualRegister(HalfTy);
Jay Foad63f73542020-01-16 12:37:00 +00005786 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005787
Jay Foada9bceb22021-09-30 09:54:57 +01005788 if (Amt.isZero()) {
Diana Picusf95a5fb2023-01-09 11:59:00 +01005789 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005790 MI.eraseFromParent();
5791 return Legalized;
5792 }
5793
5794 LLT NVT = HalfTy;
5795 unsigned NVTBits = HalfTy.getSizeInBits();
5796 unsigned VTBits = 2 * NVTBits;
5797
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00005798 SrcOp Lo(Register(0)), Hi(Register(0));
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005799 if (MI.getOpcode() == TargetOpcode::G_SHL) {
5800 if (Amt.ugt(VTBits)) {
5801 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5802 } else if (Amt.ugt(NVTBits)) {
5803 Lo = MIRBuilder.buildConstant(NVT, 0);
5804 Hi = MIRBuilder.buildShl(NVT, InL,
5805 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5806 } else if (Amt == NVTBits) {
5807 Lo = MIRBuilder.buildConstant(NVT, 0);
5808 Hi = InL;
5809 } else {
5810 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
Matt Arsenaulte98cab12019-02-07 20:44:08 +00005811 auto OrLHS =
5812 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
5813 auto OrRHS = MIRBuilder.buildLShr(
5814 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5815 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005816 }
5817 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5818 if (Amt.ugt(VTBits)) {
5819 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5820 } else if (Amt.ugt(NVTBits)) {
5821 Lo = MIRBuilder.buildLShr(NVT, InH,
5822 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5823 Hi = MIRBuilder.buildConstant(NVT, 0);
5824 } else if (Amt == NVTBits) {
5825 Lo = InH;
5826 Hi = MIRBuilder.buildConstant(NVT, 0);
5827 } else {
5828 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5829
5830 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5831 auto OrRHS = MIRBuilder.buildShl(
5832 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5833
5834 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5835 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
5836 }
5837 } else {
5838 if (Amt.ugt(VTBits)) {
5839 Hi = Lo = MIRBuilder.buildAShr(
5840 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5841 } else if (Amt.ugt(NVTBits)) {
5842 Lo = MIRBuilder.buildAShr(NVT, InH,
5843 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5844 Hi = MIRBuilder.buildAShr(NVT, InH,
5845 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5846 } else if (Amt == NVTBits) {
5847 Lo = InH;
5848 Hi = MIRBuilder.buildAShr(NVT, InH,
5849 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5850 } else {
5851 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5852
5853 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5854 auto OrRHS = MIRBuilder.buildShl(
5855 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5856
5857 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5858 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
5859 }
5860 }
5861
Diana Picusf95a5fb2023-01-09 11:59:00 +01005862 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005863 MI.eraseFromParent();
5864
5865 return Legalized;
5866}
5867
5868// TODO: Optimize if constant shift amount.
5869LegalizerHelper::LegalizeResult
5870LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
5871 LLT RequestedTy) {
5872 if (TypeIdx == 1) {
5873 Observer.changingInstr(MI);
5874 narrowScalarSrc(MI, RequestedTy, 2);
5875 Observer.changedInstr(MI);
5876 return Legalized;
5877 }
5878
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00005879 Register DstReg = MI.getOperand(0).getReg();
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005880 LLT DstTy = MRI.getType(DstReg);
5881 if (DstTy.isVector())
5882 return UnableToLegalize;
5883
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00005884 Register Amt = MI.getOperand(2).getReg();
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005885 LLT ShiftAmtTy = MRI.getType(Amt);
5886 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
5887 if (DstEltSize % 2 != 0)
5888 return UnableToLegalize;
5889
5890 // Ignore the input type. We can only go to exactly half the size of the
5891 // input. If that isn't small enough, the resulting pieces will be further
5892 // legalized.
5893 const unsigned NewBitSize = DstEltSize / 2;
5894 const LLT HalfTy = LLT::scalar(NewBitSize);
5895 const LLT CondTy = LLT::scalar(1);
5896
Petar Avramovicd477a7c2021-09-17 11:21:55 +02005897 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
Konstantin Schwarz64bef132020-10-08 14:30:33 +02005898 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
5899 ShiftAmtTy);
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005900 }
5901
5902 // TODO: Expand with known bits.
5903
5904 // Handle the fully general expansion by an unknown amount.
5905 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
5906
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00005907 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5908 Register InH = MRI.createGenericVirtualRegister(HalfTy);
Jay Foad63f73542020-01-16 12:37:00 +00005909 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005910
5911 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
5912 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
5913
5914 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
5915 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
5916 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
5917
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00005918 Register ResultRegs[2];
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005919 switch (MI.getOpcode()) {
5920 case TargetOpcode::G_SHL: {
5921 // Short: ShAmt < NewBitSize
Petar Avramovicd568ed42019-08-27 14:22:32 +00005922 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005923
Petar Avramovicd568ed42019-08-27 14:22:32 +00005924 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
5925 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
5926 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005927
5928 // Long: ShAmt >= NewBitSize
5929 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
5930 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
5931
5932 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
5933 auto Hi = MIRBuilder.buildSelect(
5934 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
5935
5936 ResultRegs[0] = Lo.getReg(0);
5937 ResultRegs[1] = Hi.getReg(0);
5938 break;
5939 }
Petar Avramovica3932382019-08-27 14:33:05 +00005940 case TargetOpcode::G_LSHR:
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005941 case TargetOpcode::G_ASHR: {
5942 // Short: ShAmt < NewBitSize
Petar Avramovica3932382019-08-27 14:33:05 +00005943 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005944
Petar Avramovicd568ed42019-08-27 14:22:32 +00005945 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
5946 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
5947 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005948
5949 // Long: ShAmt >= NewBitSize
Petar Avramovica3932382019-08-27 14:33:05 +00005950 MachineInstrBuilder HiL;
5951 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5952 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
5953 } else {
5954 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
5955 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
5956 }
5957 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
5958 {InH, AmtExcess}); // Lo from Hi part.
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005959
5960 auto Lo = MIRBuilder.buildSelect(
5961 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
5962
5963 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
5964
5965 ResultRegs[0] = Lo.getReg(0);
5966 ResultRegs[1] = Hi.getReg(0);
5967 break;
5968 }
5969 default:
5970 llvm_unreachable("not a shift");
5971 }
5972
Diana Picusf95a5fb2023-01-09 11:59:00 +01005973 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005974 MI.eraseFromParent();
5975 return Legalized;
5976}
5977
5978LegalizerHelper::LegalizeResult
Matt Arsenault72bcf152019-02-28 00:01:05 +00005979LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
5980 LLT MoreTy) {
5981 assert(TypeIdx == 0 && "Expecting only Idx 0");
5982
5983 Observer.changingInstr(MI);
5984 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
5985 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
5986 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
5987 moreElementsVectorSrc(MI, MoreTy, I);
5988 }
5989
5990 MachineBasicBlock &MBB = *MI.getParent();
Amara Emerson9d647212019-09-16 23:46:03 +00005991 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
Matt Arsenault72bcf152019-02-28 00:01:05 +00005992 moreElementsVectorDst(MI, MoreTy, 0);
5993 Observer.changedInstr(MI);
5994 return Legalized;
5995}
5996
Dhruv Chawla (work)2c9b6c12024-02-27 15:57:46 +05305997MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
5998 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
5999 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
6000
6001 switch (Opcode) {
6002 default:
6003 llvm_unreachable(
6004 "getNeutralElementForVecReduce called with invalid opcode!");
6005 case TargetOpcode::G_VECREDUCE_ADD:
6006 case TargetOpcode::G_VECREDUCE_OR:
6007 case TargetOpcode::G_VECREDUCE_XOR:
6008 case TargetOpcode::G_VECREDUCE_UMAX:
6009 return MIRBuilder.buildConstant(Ty, 0);
6010 case TargetOpcode::G_VECREDUCE_MUL:
6011 return MIRBuilder.buildConstant(Ty, 1);
6012 case TargetOpcode::G_VECREDUCE_AND:
6013 case TargetOpcode::G_VECREDUCE_UMIN:
6014 return MIRBuilder.buildConstant(
6015 Ty, APInt::getAllOnes(Ty.getScalarSizeInBits()));
6016 case TargetOpcode::G_VECREDUCE_SMAX:
6017 return MIRBuilder.buildConstant(
6018 Ty, APInt::getSignedMinValue(Ty.getSizeInBits()));
6019 case TargetOpcode::G_VECREDUCE_SMIN:
6020 return MIRBuilder.buildConstant(
6021 Ty, APInt::getSignedMaxValue(Ty.getSizeInBits()));
6022 case TargetOpcode::G_VECREDUCE_FADD:
6023 return MIRBuilder.buildFConstant(Ty, -0.0);
6024 case TargetOpcode::G_VECREDUCE_FMUL:
6025 return MIRBuilder.buildFConstant(Ty, 1.0);
6026 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6027 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6028 assert(false && "getNeutralElementForVecReduce unimplemented for "
Nikita Popovf2f18452024-06-21 08:33:40 +02006029 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
Dhruv Chawla (work)2c9b6c12024-02-27 15:57:46 +05306030 }
6031 llvm_unreachable("switch expected to return!");
6032}
6033
Matt Arsenault72bcf152019-02-28 00:01:05 +00006034LegalizerHelper::LegalizeResult
Matt Arsenault18ec3822019-02-11 22:00:39 +00006035LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
6036 LLT MoreTy) {
Matt Arsenault18ec3822019-02-11 22:00:39 +00006037 unsigned Opc = MI.getOpcode();
6038 switch (Opc) {
Matt Arsenault7bedceb2019-08-01 01:44:22 +00006039 case TargetOpcode::G_IMPLICIT_DEF:
6040 case TargetOpcode::G_LOAD: {
6041 if (TypeIdx != 0)
6042 return UnableToLegalize;
Matt Arsenault18ec3822019-02-11 22:00:39 +00006043 Observer.changingInstr(MI);
6044 moreElementsVectorDst(MI, MoreTy, 0);
6045 Observer.changedInstr(MI);
6046 return Legalized;
6047 }
Matt Arsenault7bedceb2019-08-01 01:44:22 +00006048 case TargetOpcode::G_STORE:
6049 if (TypeIdx != 0)
6050 return UnableToLegalize;
6051 Observer.changingInstr(MI);
6052 moreElementsVectorSrc(MI, MoreTy, 0);
6053 Observer.changedInstr(MI);
6054 return Legalized;
Matt Arsenault26b7e852019-02-19 16:30:19 +00006055 case TargetOpcode::G_AND:
6056 case TargetOpcode::G_OR:
Matt Arsenault0f3ba442019-05-23 17:58:48 +00006057 case TargetOpcode::G_XOR:
Petar Avramovic29f88b92021-12-23 14:09:51 +01006058 case TargetOpcode::G_ADD:
6059 case TargetOpcode::G_SUB:
6060 case TargetOpcode::G_MUL:
6061 case TargetOpcode::G_FADD:
David Greenef0b8cf2023-08-23 09:51:06 +01006062 case TargetOpcode::G_FSUB:
Petar Avramovic29f88b92021-12-23 14:09:51 +01006063 case TargetOpcode::G_FMUL:
David Green58a2f832023-08-30 22:09:53 +01006064 case TargetOpcode::G_FDIV:
David Green3a775222024-02-17 10:19:27 +00006065 case TargetOpcode::G_FCOPYSIGN:
Petar Avramovic29f88b92021-12-23 14:09:51 +01006066 case TargetOpcode::G_UADDSAT:
6067 case TargetOpcode::G_USUBSAT:
6068 case TargetOpcode::G_SADDSAT:
6069 case TargetOpcode::G_SSUBSAT:
Matt Arsenault0f3ba442019-05-23 17:58:48 +00006070 case TargetOpcode::G_SMIN:
6071 case TargetOpcode::G_SMAX:
6072 case TargetOpcode::G_UMIN:
Matt Arsenault9fd31fd2019-07-27 17:47:08 -04006073 case TargetOpcode::G_UMAX:
6074 case TargetOpcode::G_FMINNUM:
6075 case TargetOpcode::G_FMAXNUM:
6076 case TargetOpcode::G_FMINNUM_IEEE:
6077 case TargetOpcode::G_FMAXNUM_IEEE:
6078 case TargetOpcode::G_FMINIMUM:
Matt Arsenault08ec15e2022-11-17 22:14:35 -08006079 case TargetOpcode::G_FMAXIMUM:
6080 case TargetOpcode::G_STRICT_FADD:
6081 case TargetOpcode::G_STRICT_FSUB:
chuongg3bfef1612024-01-22 14:08:26 +00006082 case TargetOpcode::G_STRICT_FMUL:
6083 case TargetOpcode::G_SHL:
6084 case TargetOpcode::G_ASHR:
6085 case TargetOpcode::G_LSHR: {
Matt Arsenault26b7e852019-02-19 16:30:19 +00006086 Observer.changingInstr(MI);
6087 moreElementsVectorSrc(MI, MoreTy, 1);
6088 moreElementsVectorSrc(MI, MoreTy, 2);
6089 moreElementsVectorDst(MI, MoreTy, 0);
6090 Observer.changedInstr(MI);
6091 return Legalized;
6092 }
Petar Avramovic29f88b92021-12-23 14:09:51 +01006093 case TargetOpcode::G_FMA:
Matt Arsenaultfe5b9a62020-05-31 13:23:20 -04006094 case TargetOpcode::G_STRICT_FMA:
Petar Avramovic29f88b92021-12-23 14:09:51 +01006095 case TargetOpcode::G_FSHR:
6096 case TargetOpcode::G_FSHL: {
6097 Observer.changingInstr(MI);
6098 moreElementsVectorSrc(MI, MoreTy, 1);
6099 moreElementsVectorSrc(MI, MoreTy, 2);
6100 moreElementsVectorSrc(MI, MoreTy, 3);
6101 moreElementsVectorDst(MI, MoreTy, 0);
6102 Observer.changedInstr(MI);
6103 return Legalized;
6104 }
Mateja Marjanoviccf760742023-05-03 17:32:22 +02006105 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
Matt Arsenault4d884272019-02-19 16:44:22 +00006106 case TargetOpcode::G_EXTRACT:
6107 if (TypeIdx != 1)
6108 return UnableToLegalize;
6109 Observer.changingInstr(MI);
6110 moreElementsVectorSrc(MI, MoreTy, 1);
6111 Observer.changedInstr(MI);
6112 return Legalized;
Matt Arsenaultc4d07552019-02-20 16:11:22 +00006113 case TargetOpcode::G_INSERT:
Mateja Marjanoviccf760742023-05-03 17:32:22 +02006114 case TargetOpcode::G_INSERT_VECTOR_ELT:
Dominik Montada55e3a7c2020-04-14 11:25:05 +02006115 case TargetOpcode::G_FREEZE:
Petar Avramovic29f88b92021-12-23 14:09:51 +01006116 case TargetOpcode::G_FNEG:
6117 case TargetOpcode::G_FABS:
David Greenacd17ea2023-08-11 10:16:45 +01006118 case TargetOpcode::G_FSQRT:
David Greencf65afb2023-08-17 16:25:32 +01006119 case TargetOpcode::G_FCEIL:
6120 case TargetOpcode::G_FFLOOR:
6121 case TargetOpcode::G_FNEARBYINT:
6122 case TargetOpcode::G_FRINT:
6123 case TargetOpcode::G_INTRINSIC_ROUND:
6124 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6125 case TargetOpcode::G_INTRINSIC_TRUNC:
Petar Avramovic29f88b92021-12-23 14:09:51 +01006126 case TargetOpcode::G_BSWAP:
6127 case TargetOpcode::G_FCANONICALIZE:
6128 case TargetOpcode::G_SEXT_INREG:
chuongg32c552d32024-01-28 20:21:38 +00006129 case TargetOpcode::G_ABS:
Matt Arsenaultc4d07552019-02-20 16:11:22 +00006130 if (TypeIdx != 0)
6131 return UnableToLegalize;
6132 Observer.changingInstr(MI);
6133 moreElementsVectorSrc(MI, MoreTy, 1);
6134 moreElementsVectorDst(MI, MoreTy, 0);
6135 Observer.changedInstr(MI);
6136 return Legalized;
Matt Arsenault3754f602022-04-11 21:31:15 -04006137 case TargetOpcode::G_SELECT: {
Amara Emerson719024a2023-02-23 16:35:39 -08006138 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
Matt Arsenault3754f602022-04-11 21:31:15 -04006139 if (TypeIdx == 1) {
6140 if (!CondTy.isScalar() ||
6141 DstTy.getElementCount() != MoreTy.getElementCount())
6142 return UnableToLegalize;
6143
6144 // This is turning a scalar select of vectors into a vector
6145 // select. Broadcast the select condition.
6146 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6147 Observer.changingInstr(MI);
6148 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6149 Observer.changedInstr(MI);
6150 return Legalized;
6151 }
6152
6153 if (CondTy.isVector())
Matt Arsenaultb4c95b32019-02-19 17:03:09 +00006154 return UnableToLegalize;
6155
6156 Observer.changingInstr(MI);
6157 moreElementsVectorSrc(MI, MoreTy, 2);
6158 moreElementsVectorSrc(MI, MoreTy, 3);
6159 moreElementsVectorDst(MI, MoreTy, 0);
6160 Observer.changedInstr(MI);
6161 return Legalized;
Matt Arsenault3754f602022-04-11 21:31:15 -04006162 }
Petar Avramovic29f88b92021-12-23 14:09:51 +01006163 case TargetOpcode::G_UNMERGE_VALUES:
6164 return UnableToLegalize;
Matt Arsenault72bcf152019-02-28 00:01:05 +00006165 case TargetOpcode::G_PHI:
6166 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
Amara Emerson97c42632021-07-09 23:11:22 -07006167 case TargetOpcode::G_SHUFFLE_VECTOR:
6168 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
Petar Avramovic29f88b92021-12-23 14:09:51 +01006169 case TargetOpcode::G_BUILD_VECTOR: {
6170 SmallVector<SrcOp, 8> Elts;
6171 for (auto Op : MI.uses()) {
6172 Elts.push_back(Op.getReg());
6173 }
6174
6175 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
6176 Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
6177 }
6178
6179 MIRBuilder.buildDeleteTrailingVectorElements(
6180 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
6181 MI.eraseFromParent();
6182 return Legalized;
6183 }
Dhruv Chawla843a9782024-03-18 07:46:17 +05306184 case TargetOpcode::G_SEXT:
6185 case TargetOpcode::G_ZEXT:
6186 case TargetOpcode::G_ANYEXT:
chuongg3d88d9832023-10-11 16:05:25 +01006187 case TargetOpcode::G_TRUNC:
David Green6edc9a72023-07-23 16:58:13 +01006188 case TargetOpcode::G_FPTRUNC:
David Green54574d32023-11-04 11:47:05 +00006189 case TargetOpcode::G_FPEXT:
6190 case TargetOpcode::G_FPTOSI:
David Green10ce3192023-11-10 13:41:13 +00006191 case TargetOpcode::G_FPTOUI:
David Greenfeac7612024-09-16 10:33:59 +01006192 case TargetOpcode::G_FPTOSI_SAT:
6193 case TargetOpcode::G_FPTOUI_SAT:
David Green10ce3192023-11-10 13:41:13 +00006194 case TargetOpcode::G_SITOFP:
6195 case TargetOpcode::G_UITOFP: {
David Green74c0bdf2023-07-18 18:52:19 +01006196 Observer.changingInstr(MI);
David Greenfbc24732024-03-26 09:48:06 +00006197 LLT SrcExtTy;
6198 LLT DstExtTy;
6199 if (TypeIdx == 0) {
6200 DstExtTy = MoreTy;
6201 SrcExtTy = LLT::fixed_vector(
6202 MoreTy.getNumElements(),
6203 MRI.getType(MI.getOperand(1).getReg()).getElementType());
6204 } else {
6205 DstExtTy = LLT::fixed_vector(
6206 MoreTy.getNumElements(),
6207 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6208 SrcExtTy = MoreTy;
6209 }
6210 moreElementsVectorSrc(MI, SrcExtTy, 1);
6211 moreElementsVectorDst(MI, DstExtTy, 0);
David Green74c0bdf2023-07-18 18:52:19 +01006212 Observer.changedInstr(MI);
6213 return Legalized;
6214 }
David Greenf297d0b2024-01-28 15:42:36 +00006215 case TargetOpcode::G_ICMP:
6216 case TargetOpcode::G_FCMP: {
6217 if (TypeIdx != 1)
6218 return UnableToLegalize;
6219
Thorsten Schütt67dc6e92024-01-17 22:23:51 +01006220 Observer.changingInstr(MI);
6221 moreElementsVectorSrc(MI, MoreTy, 2);
6222 moreElementsVectorSrc(MI, MoreTy, 3);
David Greenf297d0b2024-01-28 15:42:36 +00006223 LLT CondTy = LLT::fixed_vector(
6224 MoreTy.getNumElements(),
6225 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6226 moreElementsVectorDst(MI, CondTy, 0);
Thorsten Schütt67dc6e92024-01-17 22:23:51 +01006227 Observer.changedInstr(MI);
6228 return Legalized;
6229 }
chuongg30fb3d422024-02-21 13:24:45 +00006230 case TargetOpcode::G_BITCAST: {
6231 if (TypeIdx != 0)
6232 return UnableToLegalize;
6233
6234 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
6235 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6236
6237 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
6238 if (coefficient % DstTy.getNumElements() != 0)
6239 return UnableToLegalize;
6240
6241 coefficient = coefficient / DstTy.getNumElements();
6242
6243 LLT NewTy = SrcTy.changeElementCount(
6244 ElementCount::get(coefficient, MoreTy.isScalable()));
6245 Observer.changingInstr(MI);
6246 moreElementsVectorSrc(MI, NewTy, 1);
6247 moreElementsVectorDst(MI, MoreTy, 0);
6248 Observer.changedInstr(MI);
6249 return Legalized;
6250 }
Dhruv Chawla (work)2c9b6c12024-02-27 15:57:46 +05306251 case TargetOpcode::G_VECREDUCE_FADD:
6252 case TargetOpcode::G_VECREDUCE_FMUL:
6253 case TargetOpcode::G_VECREDUCE_ADD:
6254 case TargetOpcode::G_VECREDUCE_MUL:
6255 case TargetOpcode::G_VECREDUCE_AND:
6256 case TargetOpcode::G_VECREDUCE_OR:
6257 case TargetOpcode::G_VECREDUCE_XOR:
6258 case TargetOpcode::G_VECREDUCE_SMAX:
6259 case TargetOpcode::G_VECREDUCE_SMIN:
6260 case TargetOpcode::G_VECREDUCE_UMAX:
6261 case TargetOpcode::G_VECREDUCE_UMIN: {
6262 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
6263 MachineOperand &MO = MI.getOperand(1);
6264 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6265 auto NeutralElement = getNeutralElementForVecReduce(
6266 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
6267
6268 LLT IdxTy(TLI.getVectorIdxTy(MIRBuilder.getDataLayout()));
6269 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
6270 i != e; i++) {
6271 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
6272 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6273 NeutralElement, Idx);
6274 }
6275
6276 Observer.changingInstr(MI);
6277 MO.setReg(NewVec.getReg(0));
6278 Observer.changedInstr(MI);
6279 return Legalized;
6280 }
6281
Matt Arsenault18ec3822019-02-11 22:00:39 +00006282 default:
6283 return UnableToLegalize;
6284 }
6285}
6286
Vladislav Dzhidzhoev3a51eed2023-02-07 21:32:50 +01006287LegalizerHelper::LegalizeResult
6288LegalizerHelper::equalizeVectorShuffleLengths(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08006289 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
Kevin Atheyec7cffc2022-12-15 11:19:24 -08006290 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6291 unsigned MaskNumElts = Mask.size();
6292 unsigned SrcNumElts = SrcTy.getNumElements();
Kevin Atheyec7cffc2022-12-15 11:19:24 -08006293 LLT DestEltTy = DstTy.getElementType();
6294
Vladislav Dzhidzhoev3a51eed2023-02-07 21:32:50 +01006295 if (MaskNumElts == SrcNumElts)
6296 return Legalized;
6297
6298 if (MaskNumElts < SrcNumElts) {
6299 // Extend mask to match new destination vector size with
6300 // undef values.
Craig Topper5797ed62024-12-10 22:18:46 -08006301 SmallVector<int, 16> NewMask(SrcNumElts, -1);
6302 llvm::copy(Mask, NewMask.begin());
Vladislav Dzhidzhoev3a51eed2023-02-07 21:32:50 +01006303
6304 moreElementsVectorDst(MI, SrcTy, 0);
6305 MIRBuilder.setInstrAndDebugLoc(MI);
6306 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6307 MI.getOperand(1).getReg(),
6308 MI.getOperand(2).getReg(), NewMask);
6309 MI.eraseFromParent();
6310
6311 return Legalized;
Kevin Atheyec7cffc2022-12-15 11:19:24 -08006312 }
6313
6314 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
6315 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
6316 LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy);
6317
6318 // Create new source vectors by concatenating the initial
6319 // source vectors with undefined vectors of the same size.
6320 auto Undef = MIRBuilder.buildUndef(SrcTy);
6321 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
6322 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
6323 MOps1[0] = MI.getOperand(1).getReg();
6324 MOps2[0] = MI.getOperand(2).getReg();
6325
6326 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
6327 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
6328
6329 // Readjust mask for new input vector length.
6330 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
6331 for (unsigned I = 0; I != MaskNumElts; ++I) {
6332 int Idx = Mask[I];
6333 if (Idx >= static_cast<int>(SrcNumElts))
6334 Idx += PaddedMaskNumElts - SrcNumElts;
6335 MappedOps[I] = Idx;
6336 }
6337
6338 // If we got more elements than required, extract subvector.
6339 if (MaskNumElts != PaddedMaskNumElts) {
6340 auto Shuffle =
6341 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
6342
6343 SmallVector<Register, 16> Elts(MaskNumElts);
6344 for (unsigned I = 0; I < MaskNumElts; ++I) {
6345 Elts[I] =
6346 MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle, I)
6347 .getReg(0);
6348 }
6349 MIRBuilder.buildBuildVector(DstReg, Elts);
6350 } else {
6351 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
6352 }
6353
6354 MI.eraseFromParent();
6355 return LegalizerHelper::LegalizeResult::Legalized;
6356}
6357
Amara Emerson97c42632021-07-09 23:11:22 -07006358LegalizerHelper::LegalizeResult
6359LegalizerHelper::moreElementsVectorShuffle(MachineInstr &MI,
6360 unsigned int TypeIdx, LLT MoreTy) {
Amara Emerson719024a2023-02-23 16:35:39 -08006361 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
Amara Emerson97c42632021-07-09 23:11:22 -07006362 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
Amara Emerson97c42632021-07-09 23:11:22 -07006363 unsigned NumElts = DstTy.getNumElements();
6364 unsigned WidenNumElts = MoreTy.getNumElements();
6365
Kevin Atheyec7cffc2022-12-15 11:19:24 -08006366 if (DstTy.isVector() && Src1Ty.isVector() &&
Vladislav Dzhidzhoev3a51eed2023-02-07 21:32:50 +01006367 DstTy.getNumElements() != Src1Ty.getNumElements()) {
6368 return equalizeVectorShuffleLengths(MI);
Kevin Atheyec7cffc2022-12-15 11:19:24 -08006369 }
6370
6371 if (TypeIdx != 0)
6372 return UnableToLegalize;
6373
Amara Emerson97c42632021-07-09 23:11:22 -07006374 // Expect a canonicalized shuffle.
6375 if (DstTy != Src1Ty || DstTy != Src2Ty)
6376 return UnableToLegalize;
6377
6378 moreElementsVectorSrc(MI, MoreTy, 1);
6379 moreElementsVectorSrc(MI, MoreTy, 2);
6380
6381 // Adjust mask based on new input vector length.
Craig Topper5797ed62024-12-10 22:18:46 -08006382 SmallVector<int, 16> NewMask(WidenNumElts, -1);
Amara Emerson97c42632021-07-09 23:11:22 -07006383 for (unsigned I = 0; I != NumElts; ++I) {
6384 int Idx = Mask[I];
6385 if (Idx < static_cast<int>(NumElts))
Craig Topper5797ed62024-12-10 22:18:46 -08006386 NewMask[I] = Idx;
Amara Emerson97c42632021-07-09 23:11:22 -07006387 else
Craig Topper5797ed62024-12-10 22:18:46 -08006388 NewMask[I] = Idx - NumElts + WidenNumElts;
Amara Emerson97c42632021-07-09 23:11:22 -07006389 }
Amara Emerson97c42632021-07-09 23:11:22 -07006390 moreElementsVectorDst(MI, MoreTy, 0);
6391 MIRBuilder.setInstrAndDebugLoc(MI);
6392 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6393 MI.getOperand(1).getReg(),
6394 MI.getOperand(2).getReg(), NewMask);
6395 MI.eraseFromParent();
6396 return Legalized;
6397}
6398
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00006399void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
6400 ArrayRef<Register> Src1Regs,
6401 ArrayRef<Register> Src2Regs,
Petar Avramovic0b17e592019-03-11 10:00:17 +00006402 LLT NarrowTy) {
6403 MachineIRBuilder &B = MIRBuilder;
6404 unsigned SrcParts = Src1Regs.size();
6405 unsigned DstParts = DstRegs.size();
6406
6407 unsigned DstIdx = 0; // Low bits of the result.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00006408 Register FactorSum =
Petar Avramovic0b17e592019-03-11 10:00:17 +00006409 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
6410 DstRegs[DstIdx] = FactorSum;
6411
6412 unsigned CarrySumPrevDstIdx;
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00006413 SmallVector<Register, 4> Factors;
Petar Avramovic0b17e592019-03-11 10:00:17 +00006414
6415 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
6416 // Collect low parts of muls for DstIdx.
6417 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
6418 i <= std::min(DstIdx, SrcParts - 1); ++i) {
6419 MachineInstrBuilder Mul =
6420 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
6421 Factors.push_back(Mul.getReg(0));
6422 }
6423 // Collect high parts of muls from previous DstIdx.
6424 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
6425 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
6426 MachineInstrBuilder Umulh =
6427 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
6428 Factors.push_back(Umulh.getReg(0));
6429 }
Greg Bedwellb1c4b4d2019-10-28 14:28:00 +00006430 // Add CarrySum from additions calculated for previous DstIdx.
Petar Avramovic0b17e592019-03-11 10:00:17 +00006431 if (DstIdx != 1) {
6432 Factors.push_back(CarrySumPrevDstIdx);
6433 }
6434
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00006435 Register CarrySum;
Petar Avramovic0b17e592019-03-11 10:00:17 +00006436 // Add all factors and accumulate all carries into CarrySum.
6437 if (DstIdx != DstParts - 1) {
6438 MachineInstrBuilder Uaddo =
Jay Foad24688f82021-10-04 20:25:42 +01006439 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
Petar Avramovic0b17e592019-03-11 10:00:17 +00006440 FactorSum = Uaddo.getReg(0);
6441 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
6442 for (unsigned i = 2; i < Factors.size(); ++i) {
6443 MachineInstrBuilder Uaddo =
Jay Foad24688f82021-10-04 20:25:42 +01006444 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
Petar Avramovic0b17e592019-03-11 10:00:17 +00006445 FactorSum = Uaddo.getReg(0);
6446 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
6447 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
6448 }
6449 } else {
6450 // Since value for the next index is not calculated, neither is CarrySum.
6451 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
6452 for (unsigned i = 2; i < Factors.size(); ++i)
6453 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
6454 }
6455
6456 CarrySumPrevDstIdx = CarrySum;
6457 DstRegs[DstIdx] = FactorSum;
6458 Factors.clear();
6459 }
6460}
6461
Matt Arsenault18ec3822019-02-11 22:00:39 +00006462LegalizerHelper::LegalizeResult
Cassie Jones362463882021-02-14 14:37:55 -05006463LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx,
6464 LLT NarrowTy) {
6465 if (TypeIdx != 0)
6466 return UnableToLegalize;
6467
Cassie Jones97a1cdb2021-02-14 14:42:46 -05006468 Register DstReg = MI.getOperand(0).getReg();
6469 LLT DstType = MRI.getType(DstReg);
6470 // FIXME: add support for vector types
6471 if (DstType.isVector())
6472 return UnableToLegalize;
6473
Cassie Jonese1532642021-02-22 17:11:23 -05006474 unsigned Opcode = MI.getOpcode();
6475 unsigned OpO, OpE, OpF;
6476 switch (Opcode) {
6477 case TargetOpcode::G_SADDO:
Cassie Jones8f956a52021-02-22 17:11:35 -05006478 case TargetOpcode::G_SADDE:
Cassie Jonesc63b33b2021-02-22 17:10:58 -05006479 case TargetOpcode::G_UADDO:
Cassie Jones8f956a52021-02-22 17:11:35 -05006480 case TargetOpcode::G_UADDE:
Cassie Jones362463882021-02-14 14:37:55 -05006481 case TargetOpcode::G_ADD:
6482 OpO = TargetOpcode::G_UADDO;
6483 OpE = TargetOpcode::G_UADDE;
Cassie Jonese1532642021-02-22 17:11:23 -05006484 OpF = TargetOpcode::G_UADDE;
Cassie Jones8f956a52021-02-22 17:11:35 -05006485 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
Cassie Jonese1532642021-02-22 17:11:23 -05006486 OpF = TargetOpcode::G_SADDE;
Cassie Jones362463882021-02-14 14:37:55 -05006487 break;
Cassie Jonese1532642021-02-22 17:11:23 -05006488 case TargetOpcode::G_SSUBO:
Cassie Jones8f956a52021-02-22 17:11:35 -05006489 case TargetOpcode::G_SSUBE:
Cassie Jonesc63b33b2021-02-22 17:10:58 -05006490 case TargetOpcode::G_USUBO:
Cassie Jones8f956a52021-02-22 17:11:35 -05006491 case TargetOpcode::G_USUBE:
Cassie Jones362463882021-02-14 14:37:55 -05006492 case TargetOpcode::G_SUB:
6493 OpO = TargetOpcode::G_USUBO;
6494 OpE = TargetOpcode::G_USUBE;
Cassie Jonese1532642021-02-22 17:11:23 -05006495 OpF = TargetOpcode::G_USUBE;
Cassie Jones8f956a52021-02-22 17:11:35 -05006496 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
Cassie Jonese1532642021-02-22 17:11:23 -05006497 OpF = TargetOpcode::G_SSUBE;
Cassie Jones362463882021-02-14 14:37:55 -05006498 break;
6499 default:
6500 llvm_unreachable("Unexpected add/sub opcode!");
6501 }
6502
Cassie Jonesc63b33b2021-02-22 17:10:58 -05006503 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
6504 unsigned NumDefs = MI.getNumExplicitDefs();
6505 Register Src1 = MI.getOperand(NumDefs).getReg();
6506 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
Justin Bogner4271e1d2021-03-02 14:46:03 -08006507 Register CarryDst, CarryIn;
Cassie Jonesc63b33b2021-02-22 17:10:58 -05006508 if (NumDefs == 2)
6509 CarryDst = MI.getOperand(1).getReg();
Cassie Jones8f956a52021-02-22 17:11:35 -05006510 if (MI.getNumOperands() == NumDefs + 3)
6511 CarryIn = MI.getOperand(NumDefs + 2).getReg();
Cassie Jonesc63b33b2021-02-22 17:10:58 -05006512
Justin Bogner4271e1d2021-03-02 14:46:03 -08006513 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
6514 LLT LeftoverTy, DummyTy;
6515 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
chuongg3fcfe1b62024-01-15 16:40:39 +00006516 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
6517 MIRBuilder, MRI);
6518 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
6519 MRI);
Cassie Jones362463882021-02-14 14:37:55 -05006520
Justin Bogner4271e1d2021-03-02 14:46:03 -08006521 int NarrowParts = Src1Regs.size();
Craig Toppere3284d82024-12-10 07:18:20 -08006522 Src1Regs.append(Src1Left);
6523 Src2Regs.append(Src2Left);
Justin Bogner4271e1d2021-03-02 14:46:03 -08006524 DstRegs.reserve(Src1Regs.size());
6525
6526 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
6527 Register DstReg =
6528 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
Craig Topper7c124182024-12-09 20:23:24 -08006529 Register CarryOut;
Cassie Jonesc63b33b2021-02-22 17:10:58 -05006530 // Forward the final carry-out to the destination register
Justin Bogner4271e1d2021-03-02 14:46:03 -08006531 if (i == e - 1 && CarryDst)
Cassie Jonesc63b33b2021-02-22 17:10:58 -05006532 CarryOut = CarryDst;
Craig Topper7c124182024-12-09 20:23:24 -08006533 else
6534 CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
Cassie Jones362463882021-02-14 14:37:55 -05006535
Cassie Jones8f956a52021-02-22 17:11:35 -05006536 if (!CarryIn) {
Cassie Jones362463882021-02-14 14:37:55 -05006537 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
6538 {Src1Regs[i], Src2Regs[i]});
Justin Bogner4271e1d2021-03-02 14:46:03 -08006539 } else if (i == e - 1) {
Cassie Jonese1532642021-02-22 17:11:23 -05006540 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
6541 {Src1Regs[i], Src2Regs[i], CarryIn});
6542 } else {
Cassie Jones362463882021-02-14 14:37:55 -05006543 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
6544 {Src1Regs[i], Src2Regs[i], CarryIn});
6545 }
6546
6547 DstRegs.push_back(DstReg);
6548 CarryIn = CarryOut;
6549 }
Justin Bogner4271e1d2021-03-02 14:46:03 -08006550 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
serge-sans-paille38818b62023-01-04 08:28:45 +01006551 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
6552 ArrayRef(DstRegs).drop_front(NarrowParts));
Justin Bogner4271e1d2021-03-02 14:46:03 -08006553
Cassie Jones362463882021-02-14 14:37:55 -05006554 MI.eraseFromParent();
6555 return Legalized;
6556}
6557
6558LegalizerHelper::LegalizeResult
Petar Avramovic0b17e592019-03-11 10:00:17 +00006559LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
Amara Emerson719024a2023-02-23 16:35:39 -08006560 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
Petar Avramovic0b17e592019-03-11 10:00:17 +00006561
Matt Arsenault211e89d2019-01-27 00:52:51 +00006562 LLT Ty = MRI.getType(DstReg);
Jay Foad24688f82021-10-04 20:25:42 +01006563 if (Ty.isVector())
Matt Arsenault211e89d2019-01-27 00:52:51 +00006564 return UnableToLegalize;
6565
Jay Foad0a031f52021-10-05 10:47:54 +01006566 unsigned Size = Ty.getSizeInBits();
Jay Foad24688f82021-10-04 20:25:42 +01006567 unsigned NarrowSize = NarrowTy.getSizeInBits();
Jay Foad0a031f52021-10-05 10:47:54 +01006568 if (Size % NarrowSize != 0)
Jay Foad24688f82021-10-04 20:25:42 +01006569 return UnableToLegalize;
6570
Jay Foad0a031f52021-10-05 10:47:54 +01006571 unsigned NumParts = Size / NarrowSize;
Petar Avramovic5229f472019-03-11 10:08:44 +00006572 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
Jay Foad0a031f52021-10-05 10:47:54 +01006573 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
Matt Arsenault211e89d2019-01-27 00:52:51 +00006574
Matt Arsenaultde8451f2020-02-04 10:34:22 -05006575 SmallVector<Register, 2> Src1Parts, Src2Parts;
6576 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
chuongg3fcfe1b62024-01-15 16:40:39 +00006577 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
6578 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
Petar Avramovic5229f472019-03-11 10:08:44 +00006579 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
Matt Arsenault211e89d2019-01-27 00:52:51 +00006580
Petar Avramovic5229f472019-03-11 10:08:44 +00006581 // Take only high half of registers if this is high mul.
Jay Foad0a031f52021-10-05 10:47:54 +01006582 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
Diana Picusf95a5fb2023-01-09 11:59:00 +01006583 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
Matt Arsenault211e89d2019-01-27 00:52:51 +00006584 MI.eraseFromParent();
6585 return Legalized;
6586}
6587
Matt Arsenault1cf713662019-02-12 14:54:52 +00006588LegalizerHelper::LegalizeResult
Matt Arsenault83a25a12021-03-26 17:29:36 -04006589LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx,
6590 LLT NarrowTy) {
6591 if (TypeIdx != 0)
6592 return UnableToLegalize;
6593
6594 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
6595
6596 Register Src = MI.getOperand(1).getReg();
6597 LLT SrcTy = MRI.getType(Src);
6598
6599 // If all finite floats fit into the narrowed integer type, we can just swap
6600 // out the result type. This is practically only useful for conversions from
6601 // half to at least 16-bits, so just handle the one case.
6602 if (SrcTy.getScalarType() != LLT::scalar(16) ||
Simon Pilgrimbc980762021-04-20 17:19:15 +01006603 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
Matt Arsenault83a25a12021-03-26 17:29:36 -04006604 return UnableToLegalize;
6605
6606 Observer.changingInstr(MI);
6607 narrowScalarDst(MI, NarrowTy, 0,
6608 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
6609 Observer.changedInstr(MI);
6610 return Legalized;
6611}
6612
6613LegalizerHelper::LegalizeResult
Matt Arsenault1cf713662019-02-12 14:54:52 +00006614LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
6615 LLT NarrowTy) {
6616 if (TypeIdx != 1)
6617 return UnableToLegalize;
6618
6619 uint64_t NarrowSize = NarrowTy.getSizeInBits();
6620
6621 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
6622 // FIXME: add support for when SizeOp1 isn't an exact multiple of
6623 // NarrowSize.
6624 if (SizeOp1 % NarrowSize != 0)
6625 return UnableToLegalize;
6626 int NumParts = SizeOp1 / NarrowSize;
6627
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00006628 SmallVector<Register, 2> SrcRegs, DstRegs;
Matt Arsenault1cf713662019-02-12 14:54:52 +00006629 SmallVector<uint64_t, 2> Indexes;
chuongg3fcfe1b62024-01-15 16:40:39 +00006630 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
6631 MIRBuilder, MRI);
Matt Arsenault1cf713662019-02-12 14:54:52 +00006632
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00006633 Register OpReg = MI.getOperand(0).getReg();
Matt Arsenault1cf713662019-02-12 14:54:52 +00006634 uint64_t OpStart = MI.getOperand(2).getImm();
6635 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
6636 for (int i = 0; i < NumParts; ++i) {
6637 unsigned SrcStart = i * NarrowSize;
6638
6639 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
6640 // No part of the extract uses this subregister, ignore it.
6641 continue;
6642 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
6643 // The entire subregister is extracted, forward the value.
6644 DstRegs.push_back(SrcRegs[i]);
6645 continue;
6646 }
6647
6648 // OpSegStart is where this destination segment would start in OpReg if it
6649 // extended infinitely in both directions.
6650 int64_t ExtractOffset;
6651 uint64_t SegSize;
6652 if (OpStart < SrcStart) {
6653 ExtractOffset = 0;
6654 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
6655 } else {
6656 ExtractOffset = OpStart - SrcStart;
6657 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
6658 }
6659
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00006660 Register SegReg = SrcRegs[i];
Matt Arsenault1cf713662019-02-12 14:54:52 +00006661 if (ExtractOffset != 0 || SegSize != NarrowSize) {
6662 // A genuine extract is needed.
6663 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
6664 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
6665 }
6666
6667 DstRegs.push_back(SegReg);
6668 }
6669
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00006670 Register DstReg = MI.getOperand(0).getReg();
Dominik Montada6b966232020-03-12 09:03:08 +01006671 if (MRI.getType(DstReg).isVector())
Matt Arsenault1cf713662019-02-12 14:54:52 +00006672 MIRBuilder.buildBuildVector(DstReg, DstRegs);
Dominik Montada6b966232020-03-12 09:03:08 +01006673 else if (DstRegs.size() > 1)
Diana Picusf95a5fb2023-01-09 11:59:00 +01006674 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
Dominik Montada6b966232020-03-12 09:03:08 +01006675 else
6676 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
Matt Arsenault1cf713662019-02-12 14:54:52 +00006677 MI.eraseFromParent();
6678 return Legalized;
6679}
6680
6681LegalizerHelper::LegalizeResult
6682LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
6683 LLT NarrowTy) {
6684 // FIXME: Don't know how to handle secondary types yet.
6685 if (TypeIdx != 0)
6686 return UnableToLegalize;
6687
Justin Bogner2a7e7592021-03-02 09:49:15 -08006688 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
Matt Arsenault1cf713662019-02-12 14:54:52 +00006689 SmallVector<uint64_t, 2> Indexes;
Justin Bogner2a7e7592021-03-02 09:49:15 -08006690 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
6691 LLT LeftoverTy;
6692 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
chuongg3fcfe1b62024-01-15 16:40:39 +00006693 LeftoverRegs, MIRBuilder, MRI);
Matt Arsenault1cf713662019-02-12 14:54:52 +00006694
Craig Toppere3284d82024-12-10 07:18:20 -08006695 SrcRegs.append(LeftoverRegs);
Justin Bogner2a7e7592021-03-02 09:49:15 -08006696
6697 uint64_t NarrowSize = NarrowTy.getSizeInBits();
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00006698 Register OpReg = MI.getOperand(2).getReg();
Matt Arsenault1cf713662019-02-12 14:54:52 +00006699 uint64_t OpStart = MI.getOperand(3).getImm();
6700 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
Justin Bogner2a7e7592021-03-02 09:49:15 -08006701 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
6702 unsigned DstStart = I * NarrowSize;
Matt Arsenault1cf713662019-02-12 14:54:52 +00006703
Justin Bogner2a7e7592021-03-02 09:49:15 -08006704 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
Matt Arsenault1cf713662019-02-12 14:54:52 +00006705 // The entire subregister is defined by this insert, forward the new
6706 // value.
6707 DstRegs.push_back(OpReg);
6708 continue;
6709 }
6710
Justin Bogner2a7e7592021-03-02 09:49:15 -08006711 Register SrcReg = SrcRegs[I];
6712 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
6713 // The leftover reg is smaller than NarrowTy, so we need to extend it.
6714 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
6715 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
6716 }
6717
6718 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
6719 // No part of the insert affects this subregister, forward the original.
6720 DstRegs.push_back(SrcReg);
6721 continue;
6722 }
6723
Matt Arsenault1cf713662019-02-12 14:54:52 +00006724 // OpSegStart is where this destination segment would start in OpReg if it
6725 // extended infinitely in both directions.
6726 int64_t ExtractOffset, InsertOffset;
6727 uint64_t SegSize;
6728 if (OpStart < DstStart) {
6729 InsertOffset = 0;
6730 ExtractOffset = DstStart - OpStart;
6731 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
6732 } else {
6733 InsertOffset = OpStart - DstStart;
6734 ExtractOffset = 0;
6735 SegSize =
6736 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
6737 }
6738
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00006739 Register SegReg = OpReg;
Matt Arsenault1cf713662019-02-12 14:54:52 +00006740 if (ExtractOffset != 0 || SegSize != OpSize) {
6741 // A genuine extract is needed.
6742 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
6743 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
6744 }
6745
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00006746 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
Justin Bogner2a7e7592021-03-02 09:49:15 -08006747 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
Matt Arsenault1cf713662019-02-12 14:54:52 +00006748 DstRegs.push_back(DstReg);
6749 }
6750
Justin Bogner2a7e7592021-03-02 09:49:15 -08006751 uint64_t WideSize = DstRegs.size() * NarrowSize;
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00006752 Register DstReg = MI.getOperand(0).getReg();
Justin Bogner2a7e7592021-03-02 09:49:15 -08006753 if (WideSize > RegTy.getSizeInBits()) {
6754 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
Diana Picusf95a5fb2023-01-09 11:59:00 +01006755 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
Justin Bogner2a7e7592021-03-02 09:49:15 -08006756 MIRBuilder.buildTrunc(DstReg, MergeReg);
6757 } else
Diana Picusf95a5fb2023-01-09 11:59:00 +01006758 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
Justin Bogner2a7e7592021-03-02 09:49:15 -08006759
Matt Arsenault1cf713662019-02-12 14:54:52 +00006760 MI.eraseFromParent();
6761 return Legalized;
6762}
6763
Matt Arsenault211e89d2019-01-27 00:52:51 +00006764LegalizerHelper::LegalizeResult
Matt Arsenault9e0eeba2019-04-10 17:07:56 +00006765LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
6766 LLT NarrowTy) {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00006767 Register DstReg = MI.getOperand(0).getReg();
Matt Arsenault9e0eeba2019-04-10 17:07:56 +00006768 LLT DstTy = MRI.getType(DstReg);
6769
6770 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
6771
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00006772 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
6773 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
6774 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
Matt Arsenault9e0eeba2019-04-10 17:07:56 +00006775 LLT LeftoverTy;
6776 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
chuongg3fcfe1b62024-01-15 16:40:39 +00006777 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
Matt Arsenault9e0eeba2019-04-10 17:07:56 +00006778 return UnableToLegalize;
6779
6780 LLT Unused;
6781 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
chuongg3fcfe1b62024-01-15 16:40:39 +00006782 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
Matt Arsenault9e0eeba2019-04-10 17:07:56 +00006783 llvm_unreachable("inconsistent extractParts result");
6784
6785 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
6786 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
6787 {Src0Regs[I], Src1Regs[I]});
Jay Foadb482e1b2020-01-23 11:51:35 +00006788 DstRegs.push_back(Inst.getReg(0));
Matt Arsenault9e0eeba2019-04-10 17:07:56 +00006789 }
6790
6791 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
6792 auto Inst = MIRBuilder.buildInstr(
6793 MI.getOpcode(),
6794 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
Jay Foadb482e1b2020-01-23 11:51:35 +00006795 DstLeftoverRegs.push_back(Inst.getReg(0));
Matt Arsenault9e0eeba2019-04-10 17:07:56 +00006796 }
6797
6798 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6799 LeftoverTy, DstLeftoverRegs);
6800
6801 MI.eraseFromParent();
6802 return Legalized;
6803}
6804
6805LegalizerHelper::LegalizeResult
Matt Arsenaultbe31a7b2020-01-10 11:02:18 -05006806LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx,
6807 LLT NarrowTy) {
6808 if (TypeIdx != 0)
6809 return UnableToLegalize;
6810
Amara Emerson719024a2023-02-23 16:35:39 -08006811 auto [DstReg, SrcReg] = MI.getFirst2Regs();
Matt Arsenaultbe31a7b2020-01-10 11:02:18 -05006812
Matt Arsenaulta66d2812020-01-10 10:41:29 -05006813 LLT DstTy = MRI.getType(DstReg);
6814 if (DstTy.isVector())
Matt Arsenaultbe31a7b2020-01-10 11:02:18 -05006815 return UnableToLegalize;
6816
Matt Arsenaulta66d2812020-01-10 10:41:29 -05006817 SmallVector<Register, 8> Parts;
6818 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
Matt Arsenaultcd7650c2020-01-11 19:05:06 -05006819 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
6820 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
6821
Matt Arsenaultbe31a7b2020-01-10 11:02:18 -05006822 MI.eraseFromParent();
6823 return Legalized;
6824}
6825
6826LegalizerHelper::LegalizeResult
Matt Arsenault81511e52019-02-05 00:13:44 +00006827LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
6828 LLT NarrowTy) {
6829 if (TypeIdx != 0)
6830 return UnableToLegalize;
6831
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00006832 Register CondReg = MI.getOperand(1).getReg();
Matt Arsenault81511e52019-02-05 00:13:44 +00006833 LLT CondTy = MRI.getType(CondReg);
6834 if (CondTy.isVector()) // TODO: Handle vselect
6835 return UnableToLegalize;
6836
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00006837 Register DstReg = MI.getOperand(0).getReg();
Matt Arsenault81511e52019-02-05 00:13:44 +00006838 LLT DstTy = MRI.getType(DstReg);
6839
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00006840 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
6841 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
6842 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
Matt Arsenault81511e52019-02-05 00:13:44 +00006843 LLT LeftoverTy;
6844 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
chuongg3fcfe1b62024-01-15 16:40:39 +00006845 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
Matt Arsenault81511e52019-02-05 00:13:44 +00006846 return UnableToLegalize;
6847
6848 LLT Unused;
6849 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
chuongg3fcfe1b62024-01-15 16:40:39 +00006850 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
Matt Arsenault81511e52019-02-05 00:13:44 +00006851 llvm_unreachable("inconsistent extractParts result");
6852
6853 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
6854 auto Select = MIRBuilder.buildSelect(NarrowTy,
6855 CondReg, Src1Regs[I], Src2Regs[I]);
Jay Foadb482e1b2020-01-23 11:51:35 +00006856 DstRegs.push_back(Select.getReg(0));
Matt Arsenault81511e52019-02-05 00:13:44 +00006857 }
6858
6859 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
6860 auto Select = MIRBuilder.buildSelect(
6861 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
Jay Foadb482e1b2020-01-23 11:51:35 +00006862 DstLeftoverRegs.push_back(Select.getReg(0));
Matt Arsenault81511e52019-02-05 00:13:44 +00006863 }
6864
6865 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6866 LeftoverTy, DstLeftoverRegs);
6867
6868 MI.eraseFromParent();
6869 return Legalized;
6870}
6871
6872LegalizerHelper::LegalizeResult
Petar Avramovic2b66d322020-01-27 09:43:38 +01006873LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
6874 LLT NarrowTy) {
6875 if (TypeIdx != 1)
6876 return UnableToLegalize;
6877
Amara Emerson719024a2023-02-23 16:35:39 -08006878 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
Petar Avramovic2b66d322020-01-27 09:43:38 +01006879 unsigned NarrowSize = NarrowTy.getSizeInBits();
6880
6881 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
Matt Arsenault312a9d12020-02-07 12:24:15 -05006882 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
6883
Petar Avramovic2b66d322020-01-27 09:43:38 +01006884 MachineIRBuilder &B = MIRBuilder;
Matt Arsenault6135f5e2020-02-07 11:55:39 -05006885 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
Petar Avramovic2b66d322020-01-27 09:43:38 +01006886 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
6887 auto C_0 = B.buildConstant(NarrowTy, 0);
6888 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
6889 UnmergeSrc.getReg(1), C_0);
Matt Arsenault312a9d12020-02-07 12:24:15 -05006890 auto LoCTLZ = IsUndef ?
6891 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
6892 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
Matt Arsenault6135f5e2020-02-07 11:55:39 -05006893 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
6894 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
6895 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
6896 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
Petar Avramovic2b66d322020-01-27 09:43:38 +01006897
6898 MI.eraseFromParent();
6899 return Legalized;
6900 }
6901
6902 return UnableToLegalize;
6903}
6904
6905LegalizerHelper::LegalizeResult
Petar Avramovic8bc7ba52020-01-27 09:51:06 +01006906LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
6907 LLT NarrowTy) {
6908 if (TypeIdx != 1)
6909 return UnableToLegalize;
6910
Amara Emerson719024a2023-02-23 16:35:39 -08006911 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
Petar Avramovic8bc7ba52020-01-27 09:51:06 +01006912 unsigned NarrowSize = NarrowTy.getSizeInBits();
6913
6914 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
Matt Arsenault312a9d12020-02-07 12:24:15 -05006915 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
6916
Petar Avramovic8bc7ba52020-01-27 09:51:06 +01006917 MachineIRBuilder &B = MIRBuilder;
Matt Arsenault6135f5e2020-02-07 11:55:39 -05006918 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
Petar Avramovic8bc7ba52020-01-27 09:51:06 +01006919 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
6920 auto C_0 = B.buildConstant(NarrowTy, 0);
6921 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
6922 UnmergeSrc.getReg(0), C_0);
Matt Arsenault312a9d12020-02-07 12:24:15 -05006923 auto HiCTTZ = IsUndef ?
6924 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
6925 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
Matt Arsenault6135f5e2020-02-07 11:55:39 -05006926 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
6927 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
6928 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
6929 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
Petar Avramovic8bc7ba52020-01-27 09:51:06 +01006930
6931 MI.eraseFromParent();
6932 return Legalized;
6933 }
6934
6935 return UnableToLegalize;
6936}
6937
6938LegalizerHelper::LegalizeResult
Petar Avramoviccbf03aee2020-01-27 09:59:50 +01006939LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
6940 LLT NarrowTy) {
6941 if (TypeIdx != 1)
6942 return UnableToLegalize;
6943
Amara Emerson719024a2023-02-23 16:35:39 -08006944 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
Petar Avramoviccbf03aee2020-01-27 09:59:50 +01006945 unsigned NarrowSize = NarrowTy.getSizeInBits();
6946
6947 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6948 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
6949
Matt Arsenault3b198512020-02-06 22:29:23 -05006950 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
6951 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
Jon Roelofsf2e8e462021-07-26 16:42:20 -07006952 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
Petar Avramoviccbf03aee2020-01-27 09:59:50 +01006953
6954 MI.eraseFromParent();
6955 return Legalized;
6956 }
6957
6958 return UnableToLegalize;
6959}
6960
6961LegalizerHelper::LegalizeResult
Matt Arsenaulteece6ba2023-04-26 22:02:42 -04006962LegalizerHelper::narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx,
6963 LLT NarrowTy) {
6964 if (TypeIdx != 1)
6965 return UnableToLegalize;
6966
6967 MachineIRBuilder &B = MIRBuilder;
6968 Register ExpReg = MI.getOperand(2).getReg();
6969 LLT ExpTy = MRI.getType(ExpReg);
6970
6971 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
6972
6973 // Clamp the exponent to the range of the target type.
6974 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
6975 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
6976 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
6977 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
6978
6979 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
6980 Observer.changingInstr(MI);
6981 MI.getOperand(2).setReg(Trunc.getReg(0));
6982 Observer.changedInstr(MI);
6983 return Legalized;
6984}
6985
6986LegalizerHelper::LegalizeResult
Matt Arsenaulta1282922020-07-15 11:10:54 -04006987LegalizerHelper::lowerBitCount(MachineInstr &MI) {
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00006988 unsigned Opc = MI.getOpcode();
Matt Arsenaulta679f272020-07-19 12:29:48 -04006989 const auto &TII = MIRBuilder.getTII();
Diana Picus0528e2c2018-11-26 11:07:02 +00006990 auto isSupported = [this](const LegalityQuery &Q) {
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00006991 auto QAction = LI.getAction(Q).Action;
Diana Picus0528e2c2018-11-26 11:07:02 +00006992 return QAction == Legal || QAction == Libcall || QAction == Custom;
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00006993 };
6994 switch (Opc) {
6995 default:
6996 return UnableToLegalize;
6997 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
6998 // This trivially expands to CTLZ.
Daniel Sandersd001e0e2018-12-12 23:48:13 +00006999 Observer.changingInstr(MI);
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007000 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00007001 Observer.changedInstr(MI);
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007002 return Legalized;
7003 }
7004 case TargetOpcode::G_CTLZ: {
Amara Emerson719024a2023-02-23 16:35:39 -08007005 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
Matt Arsenault8de2dad2020-02-06 21:11:52 -05007006 unsigned Len = SrcTy.getSizeInBits();
7007
7008 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
Diana Picus0528e2c2018-11-26 11:07:02 +00007009 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
Matt Arsenault8de2dad2020-02-06 21:11:52 -05007010 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
7011 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
7012 auto ICmp = MIRBuilder.buildICmp(
7013 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
7014 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7015 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007016 MI.eraseFromParent();
7017 return Legalized;
7018 }
7019 // for now, we do this:
7020 // NewLen = NextPowerOf2(Len);
7021 // x = x | (x >> 1);
7022 // x = x | (x >> 2);
7023 // ...
7024 // x = x | (x >>16);
7025 // x = x | (x >>32); // for 64-bit input
7026 // Upto NewLen/2
7027 // return Len - popcount(x);
7028 //
7029 // Ref: "Hacker's Delight" by Henry Warren
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00007030 Register Op = SrcReg;
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007031 unsigned NewLen = PowerOf2Ceil(Len);
7032 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
Matt Arsenault8de2dad2020-02-06 21:11:52 -05007033 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7034 auto MIBOp = MIRBuilder.buildOr(
7035 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
Jay Foadb482e1b2020-01-23 11:51:35 +00007036 Op = MIBOp.getReg(0);
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007037 }
Matt Arsenault8de2dad2020-02-06 21:11:52 -05007038 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
7039 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
Jay Foad63f73542020-01-16 12:37:00 +00007040 MIBPop);
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007041 MI.eraseFromParent();
7042 return Legalized;
7043 }
7044 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
7045 // This trivially expands to CTTZ.
Daniel Sandersd001e0e2018-12-12 23:48:13 +00007046 Observer.changingInstr(MI);
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007047 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00007048 Observer.changedInstr(MI);
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007049 return Legalized;
7050 }
7051 case TargetOpcode::G_CTTZ: {
Amara Emerson719024a2023-02-23 16:35:39 -08007052 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
Matt Arsenault8de2dad2020-02-06 21:11:52 -05007053
7054 unsigned Len = SrcTy.getSizeInBits();
7055 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007056 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
7057 // zero.
Matt Arsenault8de2dad2020-02-06 21:11:52 -05007058 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
7059 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
7060 auto ICmp = MIRBuilder.buildICmp(
7061 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
7062 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7063 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007064 MI.eraseFromParent();
7065 return Legalized;
7066 }
7067 // for now, we use: { return popcount(~x & (x - 1)); }
7068 // unless the target has ctlz but not ctpop, in which case we use:
7069 // { return 32 - nlz(~x & (x-1)); }
7070 // Ref: "Hacker's Delight" by Henry Warren
Matt Arsenaulta1282922020-07-15 11:10:54 -04007071 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
7072 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
Jay Foad28bb43b2020-01-16 12:09:48 +00007073 auto MIBTmp = MIRBuilder.buildAnd(
Matt Arsenaulta1282922020-07-15 11:10:54 -04007074 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7075 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7076 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7077 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
Jay Foad63f73542020-01-16 12:37:00 +00007078 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
Matt Arsenaulta1282922020-07-15 11:10:54 -04007079 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007080 MI.eraseFromParent();
7081 return Legalized;
7082 }
Craig Topper44e8bea2023-11-12 19:36:24 -08007083 Observer.changingInstr(MI);
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007084 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
Jay Foadb482e1b2020-01-23 11:51:35 +00007085 MI.getOperand(1).setReg(MIBTmp.getReg(0));
Craig Topper44e8bea2023-11-12 19:36:24 -08007086 Observer.changedInstr(MI);
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007087 return Legalized;
7088 }
Petar Avramoviccbf03aee2020-01-27 09:59:50 +01007089 case TargetOpcode::G_CTPOP: {
Matt Arsenaulta1282922020-07-15 11:10:54 -04007090 Register SrcReg = MI.getOperand(1).getReg();
7091 LLT Ty = MRI.getType(SrcReg);
Petar Avramoviccbf03aee2020-01-27 09:59:50 +01007092 unsigned Size = Ty.getSizeInBits();
7093 MachineIRBuilder &B = MIRBuilder;
7094
7095 // Count set bits in blocks of 2 bits. Default approach would be
7096 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
7097 // We use following formula instead:
7098 // B2Count = val - { (val >> 1) & 0x55555555 }
7099 // since it gives same result in blocks of 2 with one instruction less.
7100 auto C_1 = B.buildConstant(Ty, 1);
Matt Arsenaulta1282922020-07-15 11:10:54 -04007101 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
Petar Avramoviccbf03aee2020-01-27 09:59:50 +01007102 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
7103 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
7104 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
Matt Arsenaulta1282922020-07-15 11:10:54 -04007105 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
Petar Avramoviccbf03aee2020-01-27 09:59:50 +01007106
7107 // In order to get count in blocks of 4 add values from adjacent block of 2.
7108 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
7109 auto C_2 = B.buildConstant(Ty, 2);
7110 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
7111 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
7112 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
7113 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7114 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7115 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7116
7117 // For count in blocks of 8 bits we don't have to mask high 4 bits before
7118 // addition since count value sits in range {0,...,8} and 4 bits are enough
7119 // to hold such binary values. After addition high 4 bits still hold count
7120 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
7121 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
7122 auto C_4 = B.buildConstant(Ty, 4);
7123 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
7124 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
7125 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
7126 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
7127 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7128
7129 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
7130 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
7131 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
7132 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
Petar Avramoviccbf03aee2020-01-27 09:59:50 +01007133
7134 // Shift count result from 8 high bits to low bits.
7135 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
Petar Avramoviccbf03aee2020-01-27 09:59:50 +01007136
Wang Pengcheng610b9e22024-03-29 15:38:39 +08007137 auto IsMulSupported = [this](const LLT Ty) {
7138 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7139 return Action == Legal || Action == WidenScalar || Action == Custom;
7140 };
7141 if (IsMulSupported(Ty)) {
7142 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
7143 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7144 } else {
7145 auto ResTmp = B8Count;
7146 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
7147 auto ShiftC = B.buildConstant(Ty, Shift);
7148 auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
7149 ResTmp = B.buildAdd(Ty, ResTmp, Shl);
7150 }
7151 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7152 }
Petar Avramoviccbf03aee2020-01-27 09:59:50 +01007153 MI.eraseFromParent();
7154 return Legalized;
7155 }
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007156 }
7157}
Matt Arsenault02b5ca82019-05-17 23:05:13 +00007158
Matt Arsenaultb24436a2020-03-19 22:48:13 -04007159// Check that (every element of) Reg is undef or not an exact multiple of BW.
7160static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI,
7161 Register Reg, unsigned BW) {
7162 return matchUnaryPredicate(
7163 MRI, Reg,
7164 [=](const Constant *C) {
7165 // Null constant here means an undef.
7166 const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
7167 return !CI || CI->getValue().urem(BW) != 0;
7168 },
7169 /*AllowUndefs*/ true);
7170}
7171
7172LegalizerHelper::LegalizeResult
7173LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08007174 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
Matt Arsenaultb24436a2020-03-19 22:48:13 -04007175 LLT Ty = MRI.getType(Dst);
7176 LLT ShTy = MRI.getType(Z);
7177
7178 unsigned BW = Ty.getScalarSizeInBits();
Matt Arsenault14b03b42021-03-29 17:26:49 -04007179
7180 if (!isPowerOf2_32(BW))
7181 return UnableToLegalize;
7182
Matt Arsenaultb24436a2020-03-19 22:48:13 -04007183 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7184 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7185
7186 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7187 // fshl X, Y, Z -> fshr X, Y, -Z
7188 // fshr X, Y, Z -> fshl X, Y, -Z
7189 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
7190 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7191 } else {
7192 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7193 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7194 auto One = MIRBuilder.buildConstant(ShTy, 1);
7195 if (IsFSHL) {
7196 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7197 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
7198 } else {
7199 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7200 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
7201 }
7202
7203 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
7204 }
7205
7206 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
7207 MI.eraseFromParent();
7208 return Legalized;
7209}
7210
7211LegalizerHelper::LegalizeResult
7212LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08007213 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
Matt Arsenaultb24436a2020-03-19 22:48:13 -04007214 LLT Ty = MRI.getType(Dst);
7215 LLT ShTy = MRI.getType(Z);
7216
7217 const unsigned BW = Ty.getScalarSizeInBits();
7218 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7219
7220 Register ShX, ShY;
7221 Register ShAmt, InvShAmt;
7222
7223 // FIXME: Emit optimized urem by constant instead of letting it expand later.
7224 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7225 // fshl: X << C | Y >> (BW - C)
7226 // fshr: X << (BW - C) | Y >> C
7227 // where C = Z % BW is not zero
7228 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7229 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7230 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
7231 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
7232 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
7233 } else {
7234 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7235 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7236 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
7237 if (isPowerOf2_32(BW)) {
7238 // Z % BW -> Z & (BW - 1)
7239 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
7240 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7241 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
7242 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
7243 } else {
7244 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7245 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7246 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
7247 }
7248
7249 auto One = MIRBuilder.buildConstant(ShTy, 1);
7250 if (IsFSHL) {
7251 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
7252 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
7253 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
7254 } else {
7255 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
7256 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
7257 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
7258 }
7259 }
7260
7261 MIRBuilder.buildOr(Dst, ShX, ShY);
7262 MI.eraseFromParent();
7263 return Legalized;
7264}
7265
7266LegalizerHelper::LegalizeResult
7267LegalizerHelper::lowerFunnelShift(MachineInstr &MI) {
7268 // These operations approximately do the following (while avoiding undefined
7269 // shifts by BW):
7270 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
7271 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
7272 Register Dst = MI.getOperand(0).getReg();
7273 LLT Ty = MRI.getType(Dst);
7274 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
7275
7276 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7277 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
Matt Arsenault14b03b42021-03-29 17:26:49 -04007278
7279 // TODO: Use smarter heuristic that accounts for vector legalization.
Matt Arsenaultb24436a2020-03-19 22:48:13 -04007280 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
7281 return lowerFunnelShiftAsShifts(MI);
Matt Arsenault14b03b42021-03-29 17:26:49 -04007282
7283 // This only works for powers of 2, fallback to shifts if it fails.
7284 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
7285 if (Result == UnableToLegalize)
7286 return lowerFunnelShiftAsShifts(MI);
7287 return Result;
Matt Arsenaultb24436a2020-03-19 22:48:13 -04007288}
7289
Tuan Chuong Goha40c9842023-08-17 16:31:54 +01007290LegalizerHelper::LegalizeResult LegalizerHelper::lowerEXT(MachineInstr &MI) {
7291 auto [Dst, Src] = MI.getFirst2Regs();
7292 LLT DstTy = MRI.getType(Dst);
7293 LLT SrcTy = MRI.getType(Src);
7294
7295 uint32_t DstTySize = DstTy.getSizeInBits();
7296 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
7297 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
7298
7299 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
7300 !isPowerOf2_32(SrcTyScalarSize))
7301 return UnableToLegalize;
7302
7303 // The step between extend is too large, split it by creating an intermediate
7304 // extend instruction
7305 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
7306 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
7307 // If the destination type is illegal, split it into multiple statements
7308 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
7309 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
7310 // Unmerge the vector
7311 LLT EltTy = MidTy.changeElementCount(
7312 MidTy.getElementCount().divideCoefficientBy(2));
7313 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
7314
7315 // ZExt the vectors
7316 LLT ZExtResTy = DstTy.changeElementCount(
7317 DstTy.getElementCount().divideCoefficientBy(2));
7318 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7319 {UnmergeSrc.getReg(0)});
7320 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7321 {UnmergeSrc.getReg(1)});
7322
7323 // Merge the ending vectors
7324 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
7325
7326 MI.eraseFromParent();
7327 return Legalized;
7328 }
7329 return UnableToLegalize;
7330}
7331
chuongg3d88d9832023-10-11 16:05:25 +01007332LegalizerHelper::LegalizeResult LegalizerHelper::lowerTRUNC(MachineInstr &MI) {
7333 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
7334 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
7335 // Similar to how operand splitting is done in SelectiondDAG, we can handle
7336 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
7337 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
7338 // %lo16(<4 x s16>) = G_TRUNC %inlo
7339 // %hi16(<4 x s16>) = G_TRUNC %inhi
7340 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
7341 // %res(<8 x s8>) = G_TRUNC %in16
7342
7343 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
7344
7345 Register DstReg = MI.getOperand(0).getReg();
7346 Register SrcReg = MI.getOperand(1).getReg();
7347 LLT DstTy = MRI.getType(DstReg);
7348 LLT SrcTy = MRI.getType(SrcReg);
7349
7350 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
7351 isPowerOf2_32(DstTy.getScalarSizeInBits()) &&
7352 isPowerOf2_32(SrcTy.getNumElements()) &&
7353 isPowerOf2_32(SrcTy.getScalarSizeInBits())) {
7354 // Split input type.
7355 LLT SplitSrcTy = SrcTy.changeElementCount(
7356 SrcTy.getElementCount().divideCoefficientBy(2));
7357
7358 // First, split the source into two smaller vectors.
7359 SmallVector<Register, 2> SplitSrcs;
chuongg3fcfe1b62024-01-15 16:40:39 +00007360 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
chuongg3d88d9832023-10-11 16:05:25 +01007361
7362 // Truncate the splits into intermediate narrower elements.
7363 LLT InterTy;
7364 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7365 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
7366 else
7367 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
7368 for (unsigned I = 0; I < SplitSrcs.size(); ++I) {
7369 SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
7370 }
7371
7372 // Combine the new truncates into one vector
7373 auto Merge = MIRBuilder.buildMergeLikeInstr(
7374 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
7375
7376 // Truncate the new vector to the final result type
7377 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7378 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
7379 else
7380 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
7381
7382 MI.eraseFromParent();
7383
7384 return Legalized;
7385 }
7386 return UnableToLegalize;
7387}
7388
Amara Emersonf5e9be62021-03-26 15:27:15 -07007389LegalizerHelper::LegalizeResult
7390LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08007391 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
Amara Emersonf5e9be62021-03-26 15:27:15 -07007392 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
7393 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
7394 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7395 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
7396 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
7397 MI.eraseFromParent();
7398 return Legalized;
7399}
7400
7401LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08007402 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
Amara Emersonf5e9be62021-03-26 15:27:15 -07007403
7404 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
7405 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
7406
7407 MIRBuilder.setInstrAndDebugLoc(MI);
7408
7409 // If a rotate in the other direction is supported, use it.
7410 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7411 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
7412 isPowerOf2_32(EltSizeInBits))
7413 return lowerRotateWithReverseRotate(MI);
7414
Mirko Brkusanin5263bf52021-09-07 16:18:19 +02007415 // If a funnel shift is supported, use it.
7416 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7417 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7418 bool IsFShLegal = false;
7419 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
7420 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
7421 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
7422 Register R3) {
7423 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
7424 MI.eraseFromParent();
7425 return Legalized;
7426 };
7427 // If a funnel shift in the other direction is supported, use it.
7428 if (IsFShLegal) {
7429 return buildFunnelShift(FShOpc, Dst, Src, Amt);
7430 } else if (isPowerOf2_32(EltSizeInBits)) {
7431 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
7432 return buildFunnelShift(RevFsh, Dst, Src, Amt);
7433 }
7434 }
7435
Amara Emersonf5e9be62021-03-26 15:27:15 -07007436 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
7437 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
7438 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
7439 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
7440 Register ShVal;
7441 Register RevShiftVal;
7442 if (isPowerOf2_32(EltSizeInBits)) {
7443 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
7444 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
7445 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
7446 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
7447 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
7448 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
7449 RevShiftVal =
7450 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
7451 } else {
7452 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
7453 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
7454 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
7455 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
7456 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
7457 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
7458 auto One = MIRBuilder.buildConstant(AmtTy, 1);
7459 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
7460 RevShiftVal =
7461 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
7462 }
7463 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
7464 MI.eraseFromParent();
7465 return Legalized;
7466}
7467
Matt Arsenault02b5ca82019-05-17 23:05:13 +00007468// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
7469// representation.
7470LegalizerHelper::LegalizeResult
7471LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08007472 auto [Dst, Src] = MI.getFirst2Regs();
Matt Arsenault02b5ca82019-05-17 23:05:13 +00007473 const LLT S64 = LLT::scalar(64);
7474 const LLT S32 = LLT::scalar(32);
7475 const LLT S1 = LLT::scalar(1);
7476
7477 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
7478
7479 // unsigned cul2f(ulong u) {
7480 // uint lz = clz(u);
7481 // uint e = (u != 0) ? 127U + 63U - lz : 0;
7482 // u = (u << lz) & 0x7fffffffffffffffUL;
7483 // ulong t = u & 0xffffffffffUL;
7484 // uint v = (e << 23) | (uint)(u >> 40);
7485 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
7486 // return as_float(v + r);
7487 // }
7488
7489 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
7490 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
7491
7492 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
7493
7494 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
7495 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
7496
7497 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
7498 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
7499
7500 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
7501 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
7502
7503 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
7504
7505 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
7506 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
7507
7508 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
7509 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
7510 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
7511
7512 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
7513 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
7514 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
7515 auto One = MIRBuilder.buildConstant(S32, 1);
7516
7517 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
7518 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
7519 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
7520 MIRBuilder.buildAdd(Dst, V, R);
7521
Matt Arsenault350ee7fb2020-06-12 10:20:07 -04007522 MI.eraseFromParent();
Matt Arsenault02b5ca82019-05-17 23:05:13 +00007523 return Legalized;
7524}
7525
Evgenii Kudriashove9cb4402024-09-25 17:15:36 +03007526// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit
7527// operations and G_SITOFP
7528LegalizerHelper::LegalizeResult
7529LegalizerHelper::lowerU64ToF32WithSITOFP(MachineInstr &MI) {
7530 auto [Dst, Src] = MI.getFirst2Regs();
7531 const LLT S64 = LLT::scalar(64);
7532 const LLT S32 = LLT::scalar(32);
7533 const LLT S1 = LLT::scalar(1);
7534
7535 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
7536
7537 // For i64 < INT_MAX we simply reuse SITOFP.
7538 // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
7539 // saved before division, convert to float by SITOFP, multiply the result
7540 // by 2.
7541 auto One = MIRBuilder.buildConstant(S64, 1);
7542 auto Zero = MIRBuilder.buildConstant(S64, 0);
7543 // Result if Src < INT_MAX
7544 auto SmallResult = MIRBuilder.buildSITOFP(S32, Src);
7545 // Result if Src >= INT_MAX
7546 auto Halved = MIRBuilder.buildLShr(S64, Src, One);
7547 auto LowerBit = MIRBuilder.buildAnd(S64, Src, One);
7548 auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit);
7549 auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved);
7550 auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP);
7551 // Check if the original value is larger than INT_MAX by comparing with
7552 // zero to pick one of the two conversions.
7553 auto IsLarge =
7554 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT, S1, Src, Zero);
7555 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
7556
7557 MI.eraseFromParent();
7558 return Legalized;
7559}
7560
7561// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an
7562// IEEE double representation.
7563LegalizerHelper::LegalizeResult
7564LegalizerHelper::lowerU64ToF64BitFloatOps(MachineInstr &MI) {
7565 auto [Dst, Src] = MI.getFirst2Regs();
7566 const LLT S64 = LLT::scalar(64);
7567 const LLT S32 = LLT::scalar(32);
7568
7569 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
7570
7571 // We create double value from 32 bit parts with 32 exponent difference.
7572 // Note that + and - are float operations that adjust the implicit leading
7573 // one, the bases 2^52 and 2^84 are for illustrative purposes.
7574 //
7575 // X = 2^52 * 1.0...LowBits
7576 // Y = 2^84 * 1.0...HighBits
7577 // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
7578 // = - 2^52 * 1.0...HighBits
7579 // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
7580 auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000));
7581 auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000));
7582 auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
7583 auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84);
7584 auto HalfWidth = MIRBuilder.buildConstant(S64, 32);
7585
7586 auto LowBits = MIRBuilder.buildTrunc(S32, Src);
7587 LowBits = MIRBuilder.buildZExt(S64, LowBits);
7588 auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits);
7589 auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth);
7590 auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits);
7591 auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP);
7592 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
7593
7594 MI.eraseFromParent();
7595 return Legalized;
7596}
7597
Matt Arsenaulta1282922020-07-15 11:10:54 -04007598LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08007599 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
Matt Arsenault02b5ca82019-05-17 23:05:13 +00007600
Matt Arsenaultbc276c62019-11-15 11:59:12 +05307601 if (SrcTy == LLT::scalar(1)) {
7602 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
7603 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
7604 MIRBuilder.buildSelect(Dst, Src, True, False);
7605 MI.eraseFromParent();
7606 return Legalized;
7607 }
7608
Matt Arsenault02b5ca82019-05-17 23:05:13 +00007609 if (SrcTy != LLT::scalar(64))
7610 return UnableToLegalize;
7611
Evgenii Kudriashove9cb4402024-09-25 17:15:36 +03007612 if (DstTy == LLT::scalar(32))
Matt Arsenault02b5ca82019-05-17 23:05:13 +00007613 // TODO: SelectionDAG has several alternative expansions to port which may
Evgenii Kudriashove9cb4402024-09-25 17:15:36 +03007614 // be more reasonable depending on the available instructions. We also need
7615 // a more advanced mechanism to choose an optimal version depending on
7616 // target features such as sitofp or CTLZ availability.
7617 return lowerU64ToF32WithSITOFP(MI);
7618
7619 if (DstTy == LLT::scalar(64))
7620 return lowerU64ToF64BitFloatOps(MI);
Matt Arsenault02b5ca82019-05-17 23:05:13 +00007621
7622 return UnableToLegalize;
7623}
7624
Matt Arsenaulta1282922020-07-15 11:10:54 -04007625LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08007626 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
Matt Arsenault02b5ca82019-05-17 23:05:13 +00007627
7628 const LLT S64 = LLT::scalar(64);
7629 const LLT S32 = LLT::scalar(32);
7630 const LLT S1 = LLT::scalar(1);
7631
Matt Arsenaultbc276c62019-11-15 11:59:12 +05307632 if (SrcTy == S1) {
7633 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
7634 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
7635 MIRBuilder.buildSelect(Dst, Src, True, False);
7636 MI.eraseFromParent();
7637 return Legalized;
7638 }
7639
Matt Arsenault02b5ca82019-05-17 23:05:13 +00007640 if (SrcTy != S64)
7641 return UnableToLegalize;
7642
7643 if (DstTy == S32) {
7644 // signed cl2f(long l) {
7645 // long s = l >> 63;
7646 // float r = cul2f((l + s) ^ s);
7647 // return s ? -r : r;
7648 // }
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00007649 Register L = Src;
Matt Arsenault02b5ca82019-05-17 23:05:13 +00007650 auto SignBit = MIRBuilder.buildConstant(S64, 63);
7651 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
7652
7653 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
7654 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
7655 auto R = MIRBuilder.buildUITOFP(S32, Xor);
7656
7657 auto RNeg = MIRBuilder.buildFNeg(S32, R);
7658 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
7659 MIRBuilder.buildConstant(S64, 0));
7660 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
Matt Arsenault350ee7fb2020-06-12 10:20:07 -04007661 MI.eraseFromParent();
Matt Arsenault02b5ca82019-05-17 23:05:13 +00007662 return Legalized;
7663 }
7664
7665 return UnableToLegalize;
7666}
Matt Arsenault6f74f552019-07-01 17:18:03 +00007667
Matt Arsenaulta1282922020-07-15 11:10:54 -04007668LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08007669 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
Petar Avramovic6412b562019-08-30 05:44:02 +00007670 const LLT S64 = LLT::scalar(64);
7671 const LLT S32 = LLT::scalar(32);
7672
7673 if (SrcTy != S64 && SrcTy != S32)
7674 return UnableToLegalize;
7675 if (DstTy != S32 && DstTy != S64)
7676 return UnableToLegalize;
7677
7678 // FPTOSI gives same result as FPTOUI for positive signed integers.
7679 // FPTOUI needs to deal with fp values that convert to unsigned integers
7680 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
7681
7682 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
7683 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
7684 : APFloat::IEEEdouble(),
Chris Lattner735f4672021-09-08 22:13:13 -07007685 APInt::getZero(SrcTy.getSizeInBits()));
Petar Avramovic6412b562019-08-30 05:44:02 +00007686 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
7687
7688 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
7689
7690 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
7691 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
7692 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
7693 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
7694 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
7695 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
7696 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
7697
Matt Arsenault1060b9e2020-01-04 17:06:47 -05007698 const LLT S1 = LLT::scalar(1);
7699
Petar Avramovic6412b562019-08-30 05:44:02 +00007700 MachineInstrBuilder FCMP =
Matt Arsenault1060b9e2020-01-04 17:06:47 -05007701 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
Petar Avramovic6412b562019-08-30 05:44:02 +00007702 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
7703
7704 MI.eraseFromParent();
7705 return Legalized;
7706}
7707
Matt Arsenaultea956682020-01-04 17:09:48 -05007708LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08007709 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
Matt Arsenaultea956682020-01-04 17:09:48 -05007710 const LLT S64 = LLT::scalar(64);
7711 const LLT S32 = LLT::scalar(32);
7712
7713 // FIXME: Only f32 to i64 conversions are supported.
7714 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
7715 return UnableToLegalize;
7716
7717 // Expand f32 -> i64 conversion
7718 // This algorithm comes from compiler-rt's implementation of fixsfdi:
xgupta94fac812021-02-01 12:54:21 +05307719 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
Matt Arsenaultea956682020-01-04 17:09:48 -05007720
7721 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
7722
7723 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
7724 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
7725
7726 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
7727 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
7728
7729 auto SignMask = MIRBuilder.buildConstant(SrcTy,
7730 APInt::getSignMask(SrcEltBits));
7731 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
7732 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
7733 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
7734 Sign = MIRBuilder.buildSExt(DstTy, Sign);
7735
7736 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
7737 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
7738 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
7739
7740 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
7741 R = MIRBuilder.buildZExt(DstTy, R);
7742
7743 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
7744 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
7745 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
7746 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
7747
7748 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
7749 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
7750
7751 const LLT S1 = LLT::scalar(1);
7752 auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
7753 S1, Exponent, ExponentLoBit);
7754
7755 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
7756
7757 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
7758 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
7759
7760 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
7761
7762 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
7763 S1, Exponent, ZeroSrcTy);
7764
7765 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
7766 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
7767
7768 MI.eraseFromParent();
7769 return Legalized;
7770}
7771
David Greenfeac7612024-09-16 10:33:59 +01007772LegalizerHelper::LegalizeResult
7773LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) {
7774 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7775
7776 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
7777 unsigned SatWidth = DstTy.getScalarSizeInBits();
7778
7779 // Determine minimum and maximum integer values and their corresponding
7780 // floating-point values.
7781 APInt MinInt, MaxInt;
7782 if (IsSigned) {
7783 MinInt = APInt::getSignedMinValue(SatWidth);
7784 MaxInt = APInt::getSignedMaxValue(SatWidth);
7785 } else {
7786 MinInt = APInt::getMinValue(SatWidth);
7787 MaxInt = APInt::getMaxValue(SatWidth);
7788 }
7789
7790 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
7791 APFloat MinFloat(Semantics);
7792 APFloat MaxFloat(Semantics);
7793
7794 APFloat::opStatus MinStatus =
7795 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
7796 APFloat::opStatus MaxStatus =
7797 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
7798 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
7799 !(MaxStatus & APFloat::opStatus::opInexact);
7800
7801 // If the integer bounds are exactly representable as floats, emit a
7802 // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
7803 // and selects.
7804 if (AreExactFloatBounds) {
7805 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
7806 auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
7807 auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_ULT,
7808 SrcTy.changeElementSize(1), Src, MaxC);
7809 auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
7810 // Clamp by MaxFloat from above. NaN cannot occur.
7811 auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
7812 auto MinP =
7813 MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Max,
7814 MinC, MachineInstr::FmNoNans);
7815 auto Min =
7816 MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
7817 // Convert clamped value to integer. In the unsigned case we're done,
7818 // because we mapped NaN to MinFloat, which will cast to zero.
7819 if (!IsSigned) {
7820 MIRBuilder.buildFPTOUI(Dst, Min);
7821 MI.eraseFromParent();
7822 return Legalized;
7823 }
7824
7825 // Otherwise, select 0 if Src is NaN.
7826 auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
7827 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
7828 DstTy.changeElementSize(1), Src, Src);
7829 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
7830 FpToInt);
7831 MI.eraseFromParent();
7832 return Legalized;
7833 }
7834
7835 // Result of direct conversion. The assumption here is that the operation is
7836 // non-trapping and it's fine to apply it to an out-of-range value if we
7837 // select it away later.
7838 auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
7839 : MIRBuilder.buildFPTOUI(DstTy, Src);
7840
7841 // If Src ULT MinFloat, select MinInt. In particular, this also selects
7842 // MinInt if Src is NaN.
7843 auto ULT =
7844 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, SrcTy.changeElementSize(1), Src,
7845 MIRBuilder.buildFConstant(SrcTy, MinFloat));
7846 auto Max = MIRBuilder.buildSelect(
7847 DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
7848 // If Src OGT MaxFloat, select MaxInt.
7849 auto OGT =
7850 MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Src,
7851 MIRBuilder.buildFConstant(SrcTy, MaxFloat));
7852
7853 // In the unsigned case we are done, because we mapped NaN to MinInt, which
7854 // is already zero.
7855 if (!IsSigned) {
7856 MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
7857 Max);
7858 MI.eraseFromParent();
7859 return Legalized;
7860 }
7861
7862 // Otherwise, select 0 if Src is NaN.
7863 auto Min = MIRBuilder.buildSelect(
7864 DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
7865 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
7866 DstTy.changeElementSize(1), Src, Src);
7867 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
7868 MI.eraseFromParent();
7869 return Legalized;
7870}
7871
Matt Arsenaultbfbfa182020-01-18 10:08:11 -05007872// f64 -> f16 conversion using round-to-nearest-even rounding mode.
7873LegalizerHelper::LegalizeResult
7874LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
Ivan Kosarev15e77492023-07-12 11:19:36 +01007875 const LLT S1 = LLT::scalar(1);
Ivan Kosarev15e77492023-07-12 11:19:36 +01007876 const LLT S32 = LLT::scalar(32);
Ivan Kosarev15e77492023-07-12 11:19:36 +01007877
Amara Emerson719024a2023-02-23 16:35:39 -08007878 auto [Dst, Src] = MI.getFirst2Regs();
Ivan Kosareve705b2b2023-07-12 14:35:42 +01007879 assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
7880 MRI.getType(Src).getScalarType() == LLT::scalar(64));
Ivan Kosarev15e77492023-07-12 11:19:36 +01007881
Matt Arsenaultbfbfa182020-01-18 10:08:11 -05007882 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
7883 return UnableToLegalize;
7884
Ivan Kosarev15e77492023-07-12 11:19:36 +01007885 if (MIRBuilder.getMF().getTarget().Options.UnsafeFPMath) {
7886 unsigned Flags = MI.getFlags();
7887 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
7888 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
7889 MI.eraseFromParent();
7890 return Legalized;
7891 }
7892
Matt Arsenaultbfbfa182020-01-18 10:08:11 -05007893 const unsigned ExpMask = 0x7ff;
7894 const unsigned ExpBiasf64 = 1023;
7895 const unsigned ExpBiasf16 = 15;
Matt Arsenaultbfbfa182020-01-18 10:08:11 -05007896
7897 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
7898 Register U = Unmerge.getReg(0);
7899 Register UH = Unmerge.getReg(1);
7900
7901 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
Petar Avramovicbd3d9512020-06-11 17:55:59 +02007902 E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
Matt Arsenaultbfbfa182020-01-18 10:08:11 -05007903
7904 // Subtract the fp64 exponent bias (1023) to get the real exponent and
7905 // add the f16 bias (15) to get the biased exponent for the f16 format.
7906 E = MIRBuilder.buildAdd(
7907 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
Matt Arsenaultbfbfa182020-01-18 10:08:11 -05007908
7909 auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
7910 M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
7911
7912 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
7913 MIRBuilder.buildConstant(S32, 0x1ff));
7914 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
7915
7916 auto Zero = MIRBuilder.buildConstant(S32, 0);
7917 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
7918 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
7919 M = MIRBuilder.buildOr(S32, M, Lo40Set);
7920
7921 // (M != 0 ? 0x0200 : 0) | 0x7c00;
7922 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
7923 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
7924 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
7925
7926 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
7927 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
7928
7929 // N = M | (E << 12);
7930 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
7931 auto N = MIRBuilder.buildOr(S32, M, EShl12);
7932
7933 // B = clamp(1-E, 0, 13);
7934 auto One = MIRBuilder.buildConstant(S32, 1);
7935 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
7936 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
7937 B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
7938
7939 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
7940 MIRBuilder.buildConstant(S32, 0x1000));
7941
7942 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
7943 auto D0 = MIRBuilder.buildShl(S32, D, B);
7944
7945 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
7946 D0, SigSetHigh);
7947 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
7948 D = MIRBuilder.buildOr(S32, D, D1);
7949
7950 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
7951 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
7952
7953 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
7954 V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
7955
7956 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
7957 MIRBuilder.buildConstant(S32, 3));
7958 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
7959
7960 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
7961 MIRBuilder.buildConstant(S32, 5));
7962 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
7963
7964 V1 = MIRBuilder.buildOr(S32, V0, V1);
7965 V = MIRBuilder.buildAdd(S32, V, V1);
7966
7967 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
7968 E, MIRBuilder.buildConstant(S32, 30));
7969 V = MIRBuilder.buildSelect(S32, CmpEGt30,
7970 MIRBuilder.buildConstant(S32, 0x7c00), V);
7971
7972 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
7973 E, MIRBuilder.buildConstant(S32, 1039));
7974 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
7975
7976 // Extract the sign bit.
7977 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
7978 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
7979
7980 // Insert the sign bit
7981 V = MIRBuilder.buildOr(S32, Sign, V);
7982
7983 MIRBuilder.buildTrunc(Dst, V);
7984 MI.eraseFromParent();
7985 return Legalized;
7986}
7987
7988LegalizerHelper::LegalizeResult
Matt Arsenaulta1282922020-07-15 11:10:54 -04007989LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08007990 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
Matt Arsenaultbfbfa182020-01-18 10:08:11 -05007991 const LLT S64 = LLT::scalar(64);
7992 const LLT S16 = LLT::scalar(16);
7993
7994 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
7995 return lowerFPTRUNC_F64_TO_F16(MI);
7996
7997 return UnableToLegalize;
7998}
7999
Matt Arsenault7cd8a022020-07-17 11:01:15 -04008000LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08008001 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
Matt Arsenault7cd8a022020-07-17 11:01:15 -04008002 LLT Ty = MRI.getType(Dst);
8003
8004 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
8005 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
8006 MI.eraseFromParent();
8007 return Legalized;
8008}
8009
Matt Arsenault6f74f552019-07-01 17:18:03 +00008010static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
8011 switch (Opc) {
8012 case TargetOpcode::G_SMIN:
8013 return CmpInst::ICMP_SLT;
8014 case TargetOpcode::G_SMAX:
8015 return CmpInst::ICMP_SGT;
8016 case TargetOpcode::G_UMIN:
8017 return CmpInst::ICMP_ULT;
8018 case TargetOpcode::G_UMAX:
8019 return CmpInst::ICMP_UGT;
8020 default:
8021 llvm_unreachable("not in integer min/max");
8022 }
8023}
8024
Matt Arsenaulta1282922020-07-15 11:10:54 -04008025LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08008026 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
Matt Arsenault6f74f552019-07-01 17:18:03 +00008027
8028 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
8029 LLT CmpType = MRI.getType(Dst).changeElementSize(1);
8030
8031 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8032 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8033
8034 MI.eraseFromParent();
8035 return Legalized;
8036}
Matt Arsenaultb1843e12019-07-09 23:34:29 +00008037
8038LegalizerHelper::LegalizeResult
Thorsten Schütt2d2d6852024-07-23 10:12:28 +02008039LegalizerHelper::lowerThreewayCompare(MachineInstr &MI) {
8040 GSUCmp *Cmp = cast<GSUCmp>(&MI);
8041
8042 Register Dst = Cmp->getReg(0);
8043 LLT DstTy = MRI.getType(Dst);
Craig Topperde1a4232024-12-15 20:47:17 -08008044 LLT SrcTy = MRI.getType(Cmp->getReg(1));
Thorsten Schütt2d2d6852024-07-23 10:12:28 +02008045 LLT CmpTy = DstTy.changeElementSize(1);
8046
8047 CmpInst::Predicate LTPredicate = Cmp->isSigned()
8048 ? CmpInst::Predicate::ICMP_SLT
8049 : CmpInst::Predicate::ICMP_ULT;
8050 CmpInst::Predicate GTPredicate = Cmp->isSigned()
8051 ? CmpInst::Predicate::ICMP_SGT
8052 : CmpInst::Predicate::ICMP_UGT;
8053
Thorsten Schütt2d2d6852024-07-23 10:12:28 +02008054 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
8055 auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8056 Cmp->getRHSReg());
Thorsten Schütt2d2d6852024-07-23 10:12:28 +02008057 auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8058 Cmp->getRHSReg());
Craig Topperde1a4232024-12-15 20:47:17 -08008059
8060 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
8061 auto BC = TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false);
8062 if (TLI.shouldExpandCmpUsingSelects(getApproximateEVTForLLT(SrcTy, Ctx)) ||
8063 BC == TargetLowering::UndefinedBooleanContent) {
8064 auto One = MIRBuilder.buildConstant(DstTy, 1);
8065 auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8066
8067 auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
8068 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8069 } else {
8070 if (BC == TargetLowering::ZeroOrNegativeOneBooleanContent)
8071 std::swap(IsGT, IsLT);
8072 // Extend boolean results to DstTy, which is at least i2, before subtracting
8073 // them.
8074 unsigned BoolExtOp =
8075 MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false);
8076 IsGT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8077 IsLT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8078 MIRBuilder.buildSub(Dst, IsGT, IsLT);
8079 }
Thorsten Schütt2d2d6852024-07-23 10:12:28 +02008080
8081 MI.eraseFromParent();
8082 return Legalized;
8083}
8084
8085LegalizerHelper::LegalizeResult
Matt Arsenaulta1282922020-07-15 11:10:54 -04008086LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08008087 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
Matt Arsenaultb1843e12019-07-09 23:34:29 +00008088 const int Src0Size = Src0Ty.getScalarSizeInBits();
8089 const int Src1Size = Src1Ty.getScalarSizeInBits();
8090
8091 auto SignBitMask = MIRBuilder.buildConstant(
8092 Src0Ty, APInt::getSignMask(Src0Size));
8093
8094 auto NotSignBitMask = MIRBuilder.buildConstant(
8095 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
8096
Jay Foad5cf64122021-01-29 14:41:58 +00008097 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
8098 Register And1;
Matt Arsenaultb1843e12019-07-09 23:34:29 +00008099 if (Src0Ty == Src1Ty) {
Jay Foad5cf64122021-01-29 14:41:58 +00008100 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
Matt Arsenaultb1843e12019-07-09 23:34:29 +00008101 } else if (Src0Size > Src1Size) {
8102 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
8103 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
8104 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
Jay Foad5cf64122021-01-29 14:41:58 +00008105 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
Matt Arsenaultb1843e12019-07-09 23:34:29 +00008106 } else {
8107 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
8108 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
8109 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
Jay Foad5cf64122021-01-29 14:41:58 +00008110 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
Matt Arsenaultb1843e12019-07-09 23:34:29 +00008111 }
8112
8113 // Be careful about setting nsz/nnan/ninf on every instruction, since the
8114 // constants are a nan and -0.0, but the final result should preserve
8115 // everything.
Jay Foad5cf64122021-01-29 14:41:58 +00008116 unsigned Flags = MI.getFlags();
Matt Arsenault2df23732024-06-28 23:03:39 +02008117
8118 // We masked the sign bit and the not-sign bit, so these are disjoint.
8119 Flags |= MachineInstr::Disjoint;
8120
Jay Foad5cf64122021-01-29 14:41:58 +00008121 MIRBuilder.buildOr(Dst, And0, And1, Flags);
Matt Arsenaultb1843e12019-07-09 23:34:29 +00008122
8123 MI.eraseFromParent();
8124 return Legalized;
8125}
Matt Arsenault6ce1b4f2019-07-10 16:31:19 +00008126
8127LegalizerHelper::LegalizeResult
8128LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
8129 unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
8130 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
8131
Amara Emerson719024a2023-02-23 16:35:39 -08008132 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
Matt Arsenault6ce1b4f2019-07-10 16:31:19 +00008133 LLT Ty = MRI.getType(Dst);
8134
8135 if (!MI.getFlag(MachineInstr::FmNoNans)) {
8136 // Insert canonicalizes if it's possible we need to quiet to get correct
8137 // sNaN behavior.
8138
8139 // Note this must be done here, and not as an optimization combine in the
8140 // absence of a dedicate quiet-snan instruction as we're using an
8141 // omni-purpose G_FCANONICALIZE.
8142 if (!isKnownNeverSNaN(Src0, MRI))
8143 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
8144
8145 if (!isKnownNeverSNaN(Src1, MRI))
8146 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
8147 }
8148
8149 // If there are no nans, it's safe to simply replace this with the non-IEEE
8150 // version.
8151 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
8152 MI.eraseFromParent();
8153 return Legalized;
8154}
Matt Arsenaultd9d30a42019-08-01 19:10:05 +00008155
Matt Arsenault4d339182019-09-13 00:44:35 +00008156LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
8157 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
8158 Register DstReg = MI.getOperand(0).getReg();
8159 LLT Ty = MRI.getType(DstReg);
8160 unsigned Flags = MI.getFlags();
8161
8162 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
8163 Flags);
8164 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
8165 MI.eraseFromParent();
8166 return Legalized;
8167}
8168
Matt Arsenaultd9d30a42019-08-01 19:10:05 +00008169LegalizerHelper::LegalizeResult
Matt Arsenaultf3de8ab2019-12-24 14:49:31 -05008170LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08008171 auto [DstReg, X] = MI.getFirst2Regs();
Matt Arsenault19a03502020-03-14 14:52:48 -04008172 const unsigned Flags = MI.getFlags();
8173 const LLT Ty = MRI.getType(DstReg);
8174 const LLT CondTy = Ty.changeElementSize(1);
8175
8176 // round(x) =>
8177 // t = trunc(x);
8178 // d = fabs(x - t);
Matt Arsenault1328a852023-09-19 09:14:17 +03008179 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
8180 // return t + o;
Matt Arsenault19a03502020-03-14 14:52:48 -04008181
8182 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
8183
8184 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
8185 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
Matt Arsenault1328a852023-09-19 09:14:17 +03008186
Matt Arsenault19a03502020-03-14 14:52:48 -04008187 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
Matt Arsenault1328a852023-09-19 09:14:17 +03008188 auto Cmp =
8189 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
Matt Arsenault19a03502020-03-14 14:52:48 -04008190
Matt Arsenault1328a852023-09-19 09:14:17 +03008191 // Could emit G_UITOFP instead
8192 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
8193 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8194 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
8195 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
Matt Arsenault19a03502020-03-14 14:52:48 -04008196
Matt Arsenault1328a852023-09-19 09:14:17 +03008197 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
Matt Arsenault19a03502020-03-14 14:52:48 -04008198
8199 MI.eraseFromParent();
8200 return Legalized;
8201}
8202
Amara Emerson719024a2023-02-23 16:35:39 -08008203LegalizerHelper::LegalizeResult LegalizerHelper::lowerFFloor(MachineInstr &MI) {
8204 auto [DstReg, SrcReg] = MI.getFirst2Regs();
Matt Arsenaultf3de8ab2019-12-24 14:49:31 -05008205 unsigned Flags = MI.getFlags();
8206 LLT Ty = MRI.getType(DstReg);
8207 const LLT CondTy = Ty.changeElementSize(1);
8208
8209 // result = trunc(src);
8210 // if (src < 0.0 && src != result)
8211 // result += -1.0.
8212
Matt Arsenaultf3de8ab2019-12-24 14:49:31 -05008213 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
Matt Arsenault19a03502020-03-14 14:52:48 -04008214 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
Matt Arsenaultf3de8ab2019-12-24 14:49:31 -05008215
8216 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
8217 SrcReg, Zero, Flags);
8218 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
8219 SrcReg, Trunc, Flags);
8220 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
8221 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
8222
Matt Arsenault19a03502020-03-14 14:52:48 -04008223 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
Matt Arsenaultf3de8ab2019-12-24 14:49:31 -05008224 MI.eraseFromParent();
8225 return Legalized;
8226}
8227
8228LegalizerHelper::LegalizeResult
Matt Arsenault69999602020-03-29 15:51:54 -04008229LegalizerHelper::lowerMergeValues(MachineInstr &MI) {
8230 const unsigned NumOps = MI.getNumOperands();
Amara Emerson719024a2023-02-23 16:35:39 -08008231 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
8232 unsigned PartSize = Src0Ty.getSizeInBits();
Matt Arsenault69999602020-03-29 15:51:54 -04008233
8234 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
8235 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
8236
8237 for (unsigned I = 2; I != NumOps; ++I) {
8238 const unsigned Offset = (I - 1) * PartSize;
8239
8240 Register SrcReg = MI.getOperand(I).getReg();
8241 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
8242
8243 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
8244 MRI.createGenericVirtualRegister(WideTy);
8245
8246 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
8247 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
8248 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
8249 ResultReg = NextResult;
8250 }
8251
8252 if (DstTy.isPointer()) {
8253 if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
8254 DstTy.getAddressSpace())) {
8255 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
8256 return UnableToLegalize;
8257 }
8258
8259 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
8260 }
8261
8262 MI.eraseFromParent();
8263 return Legalized;
8264}
8265
8266LegalizerHelper::LegalizeResult
Matt Arsenaultd9d30a42019-08-01 19:10:05 +00008267LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
8268 const unsigned NumDst = MI.getNumOperands() - 1;
Matt Arsenault3af85fa2020-03-29 18:04:53 -04008269 Register SrcReg = MI.getOperand(NumDst).getReg();
Matt Arsenaultd9d30a42019-08-01 19:10:05 +00008270 Register Dst0Reg = MI.getOperand(0).getReg();
8271 LLT DstTy = MRI.getType(Dst0Reg);
Matt Arsenault3af85fa2020-03-29 18:04:53 -04008272 if (DstTy.isPointer())
8273 return UnableToLegalize; // TODO
Matt Arsenaultd9d30a42019-08-01 19:10:05 +00008274
Matt Arsenault3af85fa2020-03-29 18:04:53 -04008275 SrcReg = coerceToScalar(SrcReg);
8276 if (!SrcReg)
8277 return UnableToLegalize;
Matt Arsenaultd9d30a42019-08-01 19:10:05 +00008278
8279 // Expand scalarizing unmerge as bitcast to integer and shift.
Matt Arsenault3af85fa2020-03-29 18:04:53 -04008280 LLT IntTy = MRI.getType(SrcReg);
Matt Arsenaultd9d30a42019-08-01 19:10:05 +00008281
Matt Arsenault3af85fa2020-03-29 18:04:53 -04008282 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
Matt Arsenaultd9d30a42019-08-01 19:10:05 +00008283
Matt Arsenault3af85fa2020-03-29 18:04:53 -04008284 const unsigned DstSize = DstTy.getSizeInBits();
8285 unsigned Offset = DstSize;
8286 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
8287 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
8288 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
8289 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
Matt Arsenaultd9d30a42019-08-01 19:10:05 +00008290 }
8291
Matt Arsenault3af85fa2020-03-29 18:04:53 -04008292 MI.eraseFromParent();
8293 return Legalized;
Matt Arsenaultd9d30a42019-08-01 19:10:05 +00008294}
Matt Arsenault690645b2019-08-13 16:09:07 +00008295
Matt Arsenault1ad051dd2020-07-27 21:13:40 -04008296/// Lower a vector extract or insert by writing the vector to a stack temporary
8297/// and reloading the element or vector.
Matt Arsenault0b7de792020-07-26 21:25:10 -04008298///
8299/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
8300/// =>
8301/// %stack_temp = G_FRAME_INDEX
8302/// G_STORE %vec, %stack_temp
8303/// %idx = clamp(%idx, %vec.getNumElements())
8304/// %element_ptr = G_PTR_ADD %stack_temp, %idx
8305/// %dst = G_LOAD %element_ptr
8306LegalizerHelper::LegalizeResult
Matt Arsenault1ad051dd2020-07-27 21:13:40 -04008307LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
Matt Arsenault0b7de792020-07-26 21:25:10 -04008308 Register DstReg = MI.getOperand(0).getReg();
8309 Register SrcVec = MI.getOperand(1).getReg();
Matt Arsenault1ad051dd2020-07-27 21:13:40 -04008310 Register InsertVal;
8311 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
8312 InsertVal = MI.getOperand(2).getReg();
8313
8314 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
8315
Matt Arsenault0b7de792020-07-26 21:25:10 -04008316 LLT VecTy = MRI.getType(SrcVec);
8317 LLT EltTy = VecTy.getElementType();
Petar Avramovic29f88b92021-12-23 14:09:51 +01008318 unsigned NumElts = VecTy.getNumElements();
8319
8320 int64_t IdxVal;
8321 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
8322 SmallVector<Register, 8> SrcRegs;
chuongg3fcfe1b62024-01-15 16:40:39 +00008323 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
Petar Avramovic29f88b92021-12-23 14:09:51 +01008324
8325 if (InsertVal) {
8326 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
Diana Picusf95a5fb2023-01-09 11:59:00 +01008327 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
Petar Avramovic29f88b92021-12-23 14:09:51 +01008328 } else {
8329 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
8330 }
8331
8332 MI.eraseFromParent();
8333 return Legalized;
8334 }
8335
Matt Arsenault0b7de792020-07-26 21:25:10 -04008336 if (!EltTy.isByteSized()) { // Not implemented.
8337 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
8338 return UnableToLegalize;
8339 }
8340
8341 unsigned EltBytes = EltTy.getSizeInBytes();
Matt Arsenault1ad051dd2020-07-27 21:13:40 -04008342 Align VecAlign = getStackTemporaryAlignment(VecTy);
8343 Align EltAlign;
Matt Arsenault0b7de792020-07-26 21:25:10 -04008344
8345 MachinePointerInfo PtrInfo;
Sander de Smalen81b7f112023-11-22 08:52:53 +00008346 auto StackTemp = createStackTemporary(
8347 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
Matt Arsenault1ad051dd2020-07-27 21:13:40 -04008348 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
Matt Arsenault0b7de792020-07-26 21:25:10 -04008349
8350 // Get the pointer to the element, and be sure not to hit undefined behavior
8351 // if the index is out of bounds.
Matt Arsenault1ad051dd2020-07-27 21:13:40 -04008352 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
Matt Arsenault0b7de792020-07-26 21:25:10 -04008353
Matt Arsenault0b7de792020-07-26 21:25:10 -04008354 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
8355 int64_t Offset = IdxVal * EltBytes;
8356 PtrInfo = PtrInfo.getWithOffset(Offset);
Matt Arsenault1ad051dd2020-07-27 21:13:40 -04008357 EltAlign = commonAlignment(VecAlign, Offset);
Matt Arsenault0b7de792020-07-26 21:25:10 -04008358 } else {
8359 // We lose information with a variable offset.
Matt Arsenault1ad051dd2020-07-27 21:13:40 -04008360 EltAlign = getStackTemporaryAlignment(EltTy);
8361 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
Matt Arsenault0b7de792020-07-26 21:25:10 -04008362 }
8363
Matt Arsenault1ad051dd2020-07-27 21:13:40 -04008364 if (InsertVal) {
8365 // Write the inserted element
8366 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
8367
8368 // Reload the whole vector.
8369 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
8370 } else {
8371 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
8372 }
8373
Matt Arsenault0b7de792020-07-26 21:25:10 -04008374 MI.eraseFromParent();
8375 return Legalized;
8376}
8377
Matt Arsenault690645b2019-08-13 16:09:07 +00008378LegalizerHelper::LegalizeResult
8379LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08008380 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
8381 MI.getFirst3RegLLTs();
Matt Arsenault690645b2019-08-13 16:09:07 +00008382 LLT IdxTy = LLT::scalar(32);
8383
Eli Friedmane68e4cb2020-01-13 15:32:45 -08008384 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
Matt Arsenault690645b2019-08-13 16:09:07 +00008385 Register Undef;
8386 SmallVector<Register, 32> BuildVec;
Jay Foad71ca53b2023-09-04 18:32:43 +01008387 LLT EltTy = DstTy.getScalarType();
Matt Arsenault690645b2019-08-13 16:09:07 +00008388
8389 for (int Idx : Mask) {
8390 if (Idx < 0) {
8391 if (!Undef.isValid())
8392 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
8393 BuildVec.push_back(Undef);
8394 continue;
8395 }
8396
Aditya Nandakumar615eee62019-08-13 21:49:11 +00008397 if (Src0Ty.isScalar()) {
8398 BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
8399 } else {
Aditya Nandakumarc65ac862019-08-14 01:23:33 +00008400 int NumElts = Src0Ty.getNumElements();
Aditya Nandakumar615eee62019-08-13 21:49:11 +00008401 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
8402 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
8403 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
8404 auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
8405 BuildVec.push_back(Extract.getReg(0));
8406 }
Matt Arsenault690645b2019-08-13 16:09:07 +00008407 }
8408
Jay Foad71ca53b2023-09-04 18:32:43 +01008409 if (DstTy.isScalar())
8410 MIRBuilder.buildCopy(DstReg, BuildVec[0]);
8411 else
8412 MIRBuilder.buildBuildVector(DstReg, BuildVec);
Matt Arsenault690645b2019-08-13 16:09:07 +00008413 MI.eraseFromParent();
8414 return Legalized;
8415}
Amara Emersone20b91c2019-08-27 19:54:27 +00008416
Lawrence Benson177ce192024-07-17 14:24:24 +02008417LegalizerHelper::LegalizeResult
8418LegalizerHelper::lowerVECTOR_COMPRESS(llvm::MachineInstr &MI) {
8419 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
8420 MI.getFirst4RegLLTs();
8421
8422 if (VecTy.isScalableVector())
8423 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
8424
8425 Align VecAlign = getStackTemporaryAlignment(VecTy);
8426 MachinePointerInfo PtrInfo;
8427 Register StackPtr =
8428 createStackTemporary(TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign,
8429 PtrInfo)
8430 .getReg(0);
8431 MachinePointerInfo ValPtrInfo =
8432 MachinePointerInfo::getUnknownStack(*MI.getMF());
8433
8434 LLT IdxTy = LLT::scalar(32);
8435 LLT ValTy = VecTy.getElementType();
8436 Align ValAlign = getStackTemporaryAlignment(ValTy);
8437
8438 auto OutPos = MIRBuilder.buildConstant(IdxTy, 0);
8439
8440 bool HasPassthru =
8441 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
8442
8443 if (HasPassthru)
8444 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
8445
8446 Register LastWriteVal;
8447 std::optional<APInt> PassthruSplatVal =
8448 isConstantOrConstantSplatVector(*MRI.getVRegDef(Passthru), MRI);
8449
8450 if (PassthruSplatVal.has_value()) {
8451 LastWriteVal =
8452 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
8453 } else if (HasPassthru) {
8454 auto Popcount = MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
8455 Popcount = MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
8456 {LLT::scalar(32)}, {Popcount});
8457
8458 Register LastElmtPtr =
8459 getVectorElementPointer(StackPtr, VecTy, Popcount.getReg(0));
8460 LastWriteVal =
8461 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
8462 .getReg(0);
8463 }
8464
8465 unsigned NumElmts = VecTy.getNumElements();
8466 for (unsigned I = 0; I < NumElmts; ++I) {
8467 auto Idx = MIRBuilder.buildConstant(IdxTy, I);
8468 auto Val = MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
8469 Register ElmtPtr =
8470 getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
8471 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
8472
8473 LLT MaskITy = MaskTy.getElementType();
8474 auto MaskI = MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
8475 if (MaskITy.getSizeInBits() > 1)
8476 MaskI = MIRBuilder.buildTrunc(LLT::scalar(1), MaskI);
8477
8478 MaskI = MIRBuilder.buildZExt(IdxTy, MaskI);
8479 OutPos = MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
8480
8481 if (HasPassthru && I == NumElmts - 1) {
8482 auto EndOfVector =
8483 MIRBuilder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
8484 auto AllLanesSelected = MIRBuilder.buildICmp(
8485 CmpInst::ICMP_UGT, LLT::scalar(1), OutPos, EndOfVector);
8486 OutPos = MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
8487 {OutPos, EndOfVector});
8488 ElmtPtr = getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
8489
8490 LastWriteVal =
8491 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
8492 .getReg(0);
8493 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
8494 }
8495 }
8496
8497 // TODO: Use StackPtr's FrameIndex alignment.
8498 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
8499
8500 MI.eraseFromParent();
8501 return Legalized;
8502}
8503
Momchil Velikovc1140d42023-12-04 09:44:02 +00008504Register LegalizerHelper::getDynStackAllocTargetPtr(Register SPReg,
8505 Register AllocSize,
8506 Align Alignment,
8507 LLT PtrTy) {
Amara Emersone20b91c2019-08-27 19:54:27 +00008508 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
8509
Amara Emersone20b91c2019-08-27 19:54:27 +00008510 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
8511 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
8512
8513 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
8514 // have to generate an extra instruction to negate the alloc and then use
Daniel Sanderse74c5b92019-11-01 13:18:00 -07008515 // G_PTR_ADD to add the negative offset.
Amara Emersone20b91c2019-08-27 19:54:27 +00008516 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
Guillaume Chatelet9f5c7862020-04-03 08:10:59 +00008517 if (Alignment > Align(1)) {
8518 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
Amara Emersone20b91c2019-08-27 19:54:27 +00008519 AlignMask.negate();
8520 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
8521 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
8522 }
8523
Momchil Velikovc1140d42023-12-04 09:44:02 +00008524 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
8525}
8526
8527LegalizerHelper::LegalizeResult
8528LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
8529 const auto &MF = *MI.getMF();
8530 const auto &TFI = *MF.getSubtarget().getFrameLowering();
8531 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
8532 return UnableToLegalize;
8533
8534 Register Dst = MI.getOperand(0).getReg();
8535 Register AllocSize = MI.getOperand(1).getReg();
8536 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
8537
8538 LLT PtrTy = MRI.getType(Dst);
8539 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
8540 Register SPTmp =
8541 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
8542
Amara Emersone20b91c2019-08-27 19:54:27 +00008543 MIRBuilder.buildCopy(SPReg, SPTmp);
8544 MIRBuilder.buildCopy(Dst, SPTmp);
8545
8546 MI.eraseFromParent();
8547 return Legalized;
8548}
Matt Arsenaulta5b9c752019-10-06 01:37:35 +00008549
8550LegalizerHelper::LegalizeResult
Matt Arsenault1ca08082023-07-29 19:12:24 -04008551LegalizerHelper::lowerStackSave(MachineInstr &MI) {
8552 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
8553 if (!StackPtr)
8554 return UnableToLegalize;
8555
8556 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
8557 MI.eraseFromParent();
8558 return Legalized;
8559}
8560
8561LegalizerHelper::LegalizeResult
8562LegalizerHelper::lowerStackRestore(MachineInstr &MI) {
8563 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
8564 if (!StackPtr)
8565 return UnableToLegalize;
8566
8567 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
8568 MI.eraseFromParent();
8569 return Legalized;
8570}
8571
8572LegalizerHelper::LegalizeResult
Matt Arsenaulta5b9c752019-10-06 01:37:35 +00008573LegalizerHelper::lowerExtract(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08008574 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
Matt Arsenaulta5b9c752019-10-06 01:37:35 +00008575 unsigned Offset = MI.getOperand(2).getImm();
8576
Petar Avramovic29f88b92021-12-23 14:09:51 +01008577 // Extract sub-vector or one element
8578 if (SrcTy.isVector()) {
8579 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
8580 unsigned DstSize = DstTy.getSizeInBits();
8581
8582 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
8583 (Offset + DstSize <= SrcTy.getSizeInBits())) {
8584 // Unmerge and allow access to each Src element for the artifact combiner.
Amara Emerson719024a2023-02-23 16:35:39 -08008585 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
Petar Avramovic29f88b92021-12-23 14:09:51 +01008586
8587 // Take element(s) we need to extract and copy it (merge them).
8588 SmallVector<Register, 8> SubVectorElts;
8589 for (unsigned Idx = Offset / SrcEltSize;
8590 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
8591 SubVectorElts.push_back(Unmerge.getReg(Idx));
8592 }
8593 if (SubVectorElts.size() == 1)
Amara Emerson719024a2023-02-23 16:35:39 -08008594 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
Petar Avramovic29f88b92021-12-23 14:09:51 +01008595 else
Amara Emerson719024a2023-02-23 16:35:39 -08008596 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
Petar Avramovic29f88b92021-12-23 14:09:51 +01008597
8598 MI.eraseFromParent();
8599 return Legalized;
8600 }
8601 }
8602
Matt Arsenaulta5b9c752019-10-06 01:37:35 +00008603 if (DstTy.isScalar() &&
8604 (SrcTy.isScalar() ||
8605 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
8606 LLT SrcIntTy = SrcTy;
8607 if (!SrcTy.isScalar()) {
8608 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
Amara Emerson719024a2023-02-23 16:35:39 -08008609 SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
Matt Arsenaulta5b9c752019-10-06 01:37:35 +00008610 }
8611
8612 if (Offset == 0)
Amara Emerson719024a2023-02-23 16:35:39 -08008613 MIRBuilder.buildTrunc(DstReg, SrcReg);
Matt Arsenaulta5b9c752019-10-06 01:37:35 +00008614 else {
8615 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
Amara Emerson719024a2023-02-23 16:35:39 -08008616 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
8617 MIRBuilder.buildTrunc(DstReg, Shr);
Matt Arsenaulta5b9c752019-10-06 01:37:35 +00008618 }
8619
8620 MI.eraseFromParent();
8621 return Legalized;
8622 }
8623
8624 return UnableToLegalize;
8625}
Matt Arsenault4bcdcad2019-10-07 19:13:27 +00008626
8627LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08008628 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
Matt Arsenault4bcdcad2019-10-07 19:13:27 +00008629 uint64_t Offset = MI.getOperand(3).getImm();
8630
8631 LLT DstTy = MRI.getType(Src);
8632 LLT InsertTy = MRI.getType(InsertSrc);
8633
Petar Avramovic29f88b92021-12-23 14:09:51 +01008634 // Insert sub-vector or one element
8635 if (DstTy.isVector() && !InsertTy.isPointer()) {
8636 LLT EltTy = DstTy.getElementType();
8637 unsigned EltSize = EltTy.getSizeInBits();
8638 unsigned InsertSize = InsertTy.getSizeInBits();
8639
8640 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
8641 (Offset + InsertSize <= DstTy.getSizeInBits())) {
8642 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
8643 SmallVector<Register, 8> DstElts;
8644 unsigned Idx = 0;
8645 // Elements from Src before insert start Offset
8646 for (; Idx < Offset / EltSize; ++Idx) {
8647 DstElts.push_back(UnmergeSrc.getReg(Idx));
8648 }
8649
8650 // Replace elements in Src with elements from InsertSrc
8651 if (InsertTy.getSizeInBits() > EltSize) {
8652 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
8653 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
8654 ++Idx, ++i) {
8655 DstElts.push_back(UnmergeInsertSrc.getReg(i));
8656 }
8657 } else {
8658 DstElts.push_back(InsertSrc);
8659 ++Idx;
8660 }
8661
8662 // Remaining elements from Src after insert
8663 for (; Idx < DstTy.getNumElements(); ++Idx) {
8664 DstElts.push_back(UnmergeSrc.getReg(Idx));
8665 }
8666
Diana Picusf95a5fb2023-01-09 11:59:00 +01008667 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
Petar Avramovic29f88b92021-12-23 14:09:51 +01008668 MI.eraseFromParent();
8669 return Legalized;
8670 }
8671 }
8672
Dominik Montada8ff2dcb12020-03-11 12:18:59 +01008673 if (InsertTy.isVector() ||
8674 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
8675 return UnableToLegalize;
Matt Arsenault4bcdcad2019-10-07 19:13:27 +00008676
Dominik Montada8ff2dcb12020-03-11 12:18:59 +01008677 const DataLayout &DL = MIRBuilder.getDataLayout();
8678 if ((DstTy.isPointer() &&
8679 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
8680 (InsertTy.isPointer() &&
8681 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
8682 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
8683 return UnableToLegalize;
Matt Arsenault4bcdcad2019-10-07 19:13:27 +00008684 }
8685
Dominik Montada8ff2dcb12020-03-11 12:18:59 +01008686 LLT IntDstTy = DstTy;
8687
8688 if (!DstTy.isScalar()) {
8689 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
8690 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
8691 }
8692
8693 if (!InsertTy.isScalar()) {
8694 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
8695 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
8696 }
8697
8698 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
8699 if (Offset != 0) {
8700 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
8701 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
8702 }
8703
8704 APInt MaskVal = APInt::getBitsSetWithWrap(
8705 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
8706
8707 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
8708 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
8709 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
8710
8711 MIRBuilder.buildCast(Dst, Or);
8712 MI.eraseFromParent();
8713 return Legalized;
Matt Arsenault4bcdcad2019-10-07 19:13:27 +00008714}
Matt Arsenault34ed76e2019-10-16 20:46:32 +00008715
8716LegalizerHelper::LegalizeResult
8717LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08008718 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
8719 MI.getFirst4RegLLTs();
Matt Arsenault34ed76e2019-10-16 20:46:32 +00008720 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
8721
Amara Emerson719024a2023-02-23 16:35:39 -08008722 LLT Ty = Dst0Ty;
8723 LLT BoolTy = Dst1Ty;
Matt Arsenault34ed76e2019-10-16 20:46:32 +00008724
Shilei Tian3a106e52024-03-29 15:59:50 -04008725 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
8726
Matt Arsenault34ed76e2019-10-16 20:46:32 +00008727 if (IsAdd)
Shilei Tian3a106e52024-03-29 15:59:50 -04008728 MIRBuilder.buildAdd(NewDst0, LHS, RHS);
Matt Arsenault34ed76e2019-10-16 20:46:32 +00008729 else
Shilei Tian3a106e52024-03-29 15:59:50 -04008730 MIRBuilder.buildSub(NewDst0, LHS, RHS);
Matt Arsenault34ed76e2019-10-16 20:46:32 +00008731
8732 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
8733
8734 auto Zero = MIRBuilder.buildConstant(Ty, 0);
8735
8736 // For an addition, the result should be less than one of the operands (LHS)
8737 // if and only if the other operand (RHS) is negative, otherwise there will
8738 // be overflow.
8739 // For a subtraction, the result should be less than one of the operands
8740 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
8741 // otherwise there will be overflow.
8742 auto ResultLowerThanLHS =
Shilei Tian3a106e52024-03-29 15:59:50 -04008743 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
Matt Arsenault34ed76e2019-10-16 20:46:32 +00008744 auto ConditionRHS = MIRBuilder.buildICmp(
8745 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
8746
8747 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
Shilei Tian3a106e52024-03-29 15:59:50 -04008748
8749 MIRBuilder.buildCopy(Dst0, NewDst0);
Matt Arsenault34ed76e2019-10-16 20:46:32 +00008750 MI.eraseFromParent();
Shilei Tian3a106e52024-03-29 15:59:50 -04008751
Matt Arsenault34ed76e2019-10-16 20:46:32 +00008752 return Legalized;
8753}
Petar Avramovic94a24e72019-12-30 11:13:22 +01008754
8755LegalizerHelper::LegalizeResult
Jay Foadb35833b2020-07-12 14:18:45 -04008756LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08008757 auto [Res, LHS, RHS] = MI.getFirst3Regs();
Jay Foadb35833b2020-07-12 14:18:45 -04008758 LLT Ty = MRI.getType(Res);
8759 bool IsSigned;
8760 bool IsAdd;
8761 unsigned BaseOp;
8762 switch (MI.getOpcode()) {
8763 default:
8764 llvm_unreachable("unexpected addsat/subsat opcode");
8765 case TargetOpcode::G_UADDSAT:
8766 IsSigned = false;
8767 IsAdd = true;
8768 BaseOp = TargetOpcode::G_ADD;
8769 break;
8770 case TargetOpcode::G_SADDSAT:
8771 IsSigned = true;
8772 IsAdd = true;
8773 BaseOp = TargetOpcode::G_ADD;
8774 break;
8775 case TargetOpcode::G_USUBSAT:
8776 IsSigned = false;
8777 IsAdd = false;
8778 BaseOp = TargetOpcode::G_SUB;
8779 break;
8780 case TargetOpcode::G_SSUBSAT:
8781 IsSigned = true;
8782 IsAdd = false;
8783 BaseOp = TargetOpcode::G_SUB;
8784 break;
8785 }
8786
8787 if (IsSigned) {
8788 // sadd.sat(a, b) ->
8789 // hi = 0x7fffffff - smax(a, 0)
8790 // lo = 0x80000000 - smin(a, 0)
8791 // a + smin(smax(lo, b), hi)
8792 // ssub.sat(a, b) ->
8793 // lo = smax(a, -1) - 0x7fffffff
8794 // hi = smin(a, -1) - 0x80000000
8795 // a - smin(smax(lo, b), hi)
8796 // TODO: AMDGPU can use a "median of 3" instruction here:
8797 // a +/- med3(lo, b, hi)
8798 uint64_t NumBits = Ty.getScalarSizeInBits();
8799 auto MaxVal =
8800 MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
8801 auto MinVal =
8802 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
8803 MachineInstrBuilder Hi, Lo;
8804 if (IsAdd) {
8805 auto Zero = MIRBuilder.buildConstant(Ty, 0);
8806 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
8807 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
8808 } else {
8809 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
8810 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
8811 MaxVal);
8812 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
8813 MinVal);
8814 }
8815 auto RHSClamped =
8816 MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
8817 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
8818 } else {
8819 // uadd.sat(a, b) -> a + umin(~a, b)
8820 // usub.sat(a, b) -> a - umin(a, b)
8821 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
8822 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
8823 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
8824 }
8825
8826 MI.eraseFromParent();
8827 return Legalized;
8828}
8829
8830LegalizerHelper::LegalizeResult
8831LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08008832 auto [Res, LHS, RHS] = MI.getFirst3Regs();
Jay Foadb35833b2020-07-12 14:18:45 -04008833 LLT Ty = MRI.getType(Res);
8834 LLT BoolTy = Ty.changeElementSize(1);
8835 bool IsSigned;
8836 bool IsAdd;
8837 unsigned OverflowOp;
8838 switch (MI.getOpcode()) {
8839 default:
8840 llvm_unreachable("unexpected addsat/subsat opcode");
8841 case TargetOpcode::G_UADDSAT:
8842 IsSigned = false;
8843 IsAdd = true;
8844 OverflowOp = TargetOpcode::G_UADDO;
8845 break;
8846 case TargetOpcode::G_SADDSAT:
8847 IsSigned = true;
8848 IsAdd = true;
8849 OverflowOp = TargetOpcode::G_SADDO;
8850 break;
8851 case TargetOpcode::G_USUBSAT:
8852 IsSigned = false;
8853 IsAdd = false;
8854 OverflowOp = TargetOpcode::G_USUBO;
8855 break;
8856 case TargetOpcode::G_SSUBSAT:
8857 IsSigned = true;
8858 IsAdd = false;
8859 OverflowOp = TargetOpcode::G_SSUBO;
8860 break;
8861 }
8862
8863 auto OverflowRes =
8864 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
8865 Register Tmp = OverflowRes.getReg(0);
8866 Register Ov = OverflowRes.getReg(1);
8867 MachineInstrBuilder Clamp;
8868 if (IsSigned) {
8869 // sadd.sat(a, b) ->
8870 // {tmp, ov} = saddo(a, b)
8871 // ov ? (tmp >>s 31) + 0x80000000 : r
8872 // ssub.sat(a, b) ->
8873 // {tmp, ov} = ssubo(a, b)
8874 // ov ? (tmp >>s 31) + 0x80000000 : r
8875 uint64_t NumBits = Ty.getScalarSizeInBits();
8876 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
8877 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
8878 auto MinVal =
8879 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
8880 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
8881 } else {
8882 // uadd.sat(a, b) ->
8883 // {tmp, ov} = uaddo(a, b)
8884 // ov ? 0xffffffff : tmp
8885 // usub.sat(a, b) ->
8886 // {tmp, ov} = usubo(a, b)
8887 // ov ? 0 : tmp
8888 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
8889 }
8890 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
8891
8892 MI.eraseFromParent();
8893 return Legalized;
8894}
8895
8896LegalizerHelper::LegalizeResult
Bevin Hansson5de6c562020-07-16 17:02:04 +02008897LegalizerHelper::lowerShlSat(MachineInstr &MI) {
8898 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
8899 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
8900 "Expected shlsat opcode!");
8901 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
Amara Emerson719024a2023-02-23 16:35:39 -08008902 auto [Res, LHS, RHS] = MI.getFirst3Regs();
Bevin Hansson5de6c562020-07-16 17:02:04 +02008903 LLT Ty = MRI.getType(Res);
8904 LLT BoolTy = Ty.changeElementSize(1);
8905
8906 unsigned BW = Ty.getScalarSizeInBits();
8907 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
8908 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
8909 : MIRBuilder.buildLShr(Ty, Result, RHS);
8910
8911 MachineInstrBuilder SatVal;
8912 if (IsSigned) {
8913 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
8914 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
8915 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
8916 MIRBuilder.buildConstant(Ty, 0));
8917 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
8918 } else {
8919 SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
8920 }
Mirko Brkusanin4cf6dd52020-11-16 17:43:15 +01008921 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
Bevin Hansson5de6c562020-07-16 17:02:04 +02008922 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
8923
8924 MI.eraseFromParent();
8925 return Legalized;
8926}
8927
Amara Emerson719024a2023-02-23 16:35:39 -08008928LegalizerHelper::LegalizeResult LegalizerHelper::lowerBswap(MachineInstr &MI) {
8929 auto [Dst, Src] = MI.getFirst2Regs();
Petar Avramovic94a24e72019-12-30 11:13:22 +01008930 const LLT Ty = MRI.getType(Src);
Matt Arsenault2e773622020-02-14 11:51:57 -05008931 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
Petar Avramovic94a24e72019-12-30 11:13:22 +01008932 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
8933
8934 // Swap most and least significant byte, set remaining bytes in Res to zero.
8935 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
8936 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
8937 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
8938 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
8939
8940 // Set i-th high/low byte in Res to i-th low/high byte from Src.
8941 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
8942 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
8943 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
8944 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
8945 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
8946 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
8947 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
8948 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
8949 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
8950 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
8951 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
8952 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
8953 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
8954 }
8955 Res.getInstr()->getOperand(0).setReg(Dst);
8956
8957 MI.eraseFromParent();
8958 return Legalized;
8959}
Petar Avramovic98f72a52019-12-30 18:06:29 +01008960
8961//{ (Src & Mask) >> N } | { (Src << N) & Mask }
8962static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
Yingwei Zheng6c1932f2024-03-23 14:57:35 +08008963 MachineInstrBuilder Src, const APInt &Mask) {
Petar Avramovic98f72a52019-12-30 18:06:29 +01008964 const LLT Ty = Dst.getLLTTy(*B.getMRI());
8965 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
8966 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
8967 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
8968 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
8969 return B.buildOr(Dst, LHS, RHS);
8970}
8971
8972LegalizerHelper::LegalizeResult
8973LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08008974 auto [Dst, Src] = MI.getFirst2Regs();
Petar Avramovic98f72a52019-12-30 18:06:29 +01008975 const LLT Ty = MRI.getType(Src);
Yingwei Zheng24ddce62024-05-29 21:42:08 +08008976 unsigned Size = Ty.getScalarSizeInBits();
Petar Avramovic98f72a52019-12-30 18:06:29 +01008977
Yingwei Zheng24ddce62024-05-29 21:42:08 +08008978 if (Size >= 8) {
8979 MachineInstrBuilder BSWAP =
8980 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
Petar Avramovic98f72a52019-12-30 18:06:29 +01008981
Yingwei Zheng24ddce62024-05-29 21:42:08 +08008982 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
8983 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
8984 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
8985 MachineInstrBuilder Swap4 =
8986 SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
Petar Avramovic98f72a52019-12-30 18:06:29 +01008987
Yingwei Zheng24ddce62024-05-29 21:42:08 +08008988 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
8989 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
8990 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
8991 MachineInstrBuilder Swap2 =
8992 SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
Petar Avramovic98f72a52019-12-30 18:06:29 +01008993
Yingwei Zheng24ddce62024-05-29 21:42:08 +08008994 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
8995 // 6|7
8996 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
8997 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
8998 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
8999 } else {
9000 // Expand bitreverse for types smaller than 8 bits.
9001 MachineInstrBuilder Tmp;
9002 for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
9003 MachineInstrBuilder Tmp2;
9004 if (I < J) {
9005 auto ShAmt = MIRBuilder.buildConstant(Ty, J - I);
9006 Tmp2 = MIRBuilder.buildShl(Ty, Src, ShAmt);
9007 } else {
9008 auto ShAmt = MIRBuilder.buildConstant(Ty, I - J);
9009 Tmp2 = MIRBuilder.buildLShr(Ty, Src, ShAmt);
9010 }
9011
Simon Pilgrim4e251e72024-05-29 17:57:23 +01009012 auto Mask = MIRBuilder.buildConstant(Ty, 1ULL << J);
Yingwei Zheng24ddce62024-05-29 21:42:08 +08009013 Tmp2 = MIRBuilder.buildAnd(Ty, Tmp2, Mask);
9014 if (I == 0)
9015 Tmp = Tmp2;
9016 else
9017 Tmp = MIRBuilder.buildOr(Ty, Tmp, Tmp2);
9018 }
9019 MIRBuilder.buildCopy(Dst, Tmp);
9020 }
Petar Avramovic98f72a52019-12-30 18:06:29 +01009021
9022 MI.eraseFromParent();
9023 return Legalized;
9024}
Matt Arsenault0ea3c722019-12-27 19:26:51 -05009025
9026LegalizerHelper::LegalizeResult
Matt Arsenaultc5c1bb32020-01-12 13:29:44 -05009027LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
Matt Arsenault0ea3c722019-12-27 19:26:51 -05009028 MachineFunction &MF = MIRBuilder.getMF();
Matt Arsenaultc5c1bb32020-01-12 13:29:44 -05009029
9030 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
9031 int NameOpIdx = IsRead ? 1 : 0;
9032 int ValRegIndex = IsRead ? 0 : 1;
9033
9034 Register ValReg = MI.getOperand(ValRegIndex).getReg();
9035 const LLT Ty = MRI.getType(ValReg);
9036 const MDString *RegStr = cast<MDString>(
9037 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
9038
Matt Arsenaultadbcc8e2020-07-31 11:41:05 -04009039 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
Matt Arsenaultc5c1bb32020-01-12 13:29:44 -05009040 if (!PhysReg.isValid())
Matt Arsenault0ea3c722019-12-27 19:26:51 -05009041 return UnableToLegalize;
9042
Matt Arsenaultc5c1bb32020-01-12 13:29:44 -05009043 if (IsRead)
9044 MIRBuilder.buildCopy(ValReg, PhysReg);
9045 else
9046 MIRBuilder.buildCopy(PhysReg, ValReg);
9047
Matt Arsenault0ea3c722019-12-27 19:26:51 -05009048 MI.eraseFromParent();
9049 return Legalized;
9050}
Pushpinder Singh41d66692020-08-10 05:47:50 -04009051
9052LegalizerHelper::LegalizeResult
9053LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
9054 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
9055 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
9056 Register Result = MI.getOperand(0).getReg();
9057 LLT OrigTy = MRI.getType(Result);
9058 auto SizeInBits = OrigTy.getScalarSizeInBits();
9059 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
9060
9061 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
9062 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
9063 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
9064 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
9065
9066 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
9067 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
9068 MIRBuilder.buildTrunc(Result, Shifted);
9069
9070 MI.eraseFromParent();
9071 return Legalized;
9072}
Amara Emerson08232192020-09-26 10:02:39 -07009073
Janek van Oirschot587747d2022-12-06 20:36:07 +00009074LegalizerHelper::LegalizeResult
9075LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08009076 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
Matt Arsenault61f2f2c2023-03-17 09:21:57 -04009077 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
Janek van Oirschot587747d2022-12-06 20:36:07 +00009078
Matt Arsenault61f2f2c2023-03-17 09:21:57 -04009079 if (Mask == fcNone) {
Janek van Oirschot587747d2022-12-06 20:36:07 +00009080 MIRBuilder.buildConstant(DstReg, 0);
9081 MI.eraseFromParent();
9082 return Legalized;
9083 }
Matt Arsenault61f2f2c2023-03-17 09:21:57 -04009084 if (Mask == fcAllFlags) {
Janek van Oirschot587747d2022-12-06 20:36:07 +00009085 MIRBuilder.buildConstant(DstReg, 1);
9086 MI.eraseFromParent();
9087 return Legalized;
9088 }
9089
Matt Arsenault61820f82023-02-02 10:28:05 -04009090 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
9091 // version
9092
Janek van Oirschot587747d2022-12-06 20:36:07 +00009093 unsigned BitSize = SrcTy.getScalarSizeInBits();
9094 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
9095
9096 LLT IntTy = LLT::scalar(BitSize);
9097 if (SrcTy.isVector())
9098 IntTy = LLT::vector(SrcTy.getElementCount(), IntTy);
9099 auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
9100
9101 // Various masks.
9102 APInt SignBit = APInt::getSignMask(BitSize);
9103 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9104 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9105 APInt ExpMask = Inf;
9106 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9107 APInt QNaNBitMask =
9108 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
Kazu Hiratab7ffd962023-02-19 22:54:23 -08009109 APInt InvertionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
Janek van Oirschot587747d2022-12-06 20:36:07 +00009110
9111 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
9112 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
9113 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
9114 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
9115 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
9116
9117 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
9118 auto Sign =
9119 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs);
9120
9121 auto Res = MIRBuilder.buildConstant(DstTy, 0);
Amara Emerson719024a2023-02-23 16:35:39 -08009122 // Clang doesn't support capture of structured bindings:
9123 LLT DstTyCopy = DstTy;
Janek van Oirschot587747d2022-12-06 20:36:07 +00009124 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
Amara Emerson719024a2023-02-23 16:35:39 -08009125 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
Janek van Oirschot587747d2022-12-06 20:36:07 +00009126 };
9127
9128 // Tests that involve more than one class should be processed first.
9129 if ((Mask & fcFinite) == fcFinite) {
9130 // finite(V) ==> abs(V) u< exp_mask
9131 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9132 ExpMaskC));
9133 Mask &= ~fcFinite;
9134 } else if ((Mask & fcFinite) == fcPosFinite) {
9135 // finite(V) && V > 0 ==> V u< exp_mask
9136 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
9137 ExpMaskC));
9138 Mask &= ~fcPosFinite;
9139 } else if ((Mask & fcFinite) == fcNegFinite) {
9140 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
9141 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9142 ExpMaskC);
9143 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
9144 appendToRes(And);
9145 Mask &= ~fcNegFinite;
9146 }
9147
Matt Arsenault61820f82023-02-02 10:28:05 -04009148 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
9149 // fcZero | fcSubnormal => test all exponent bits are 0
9150 // TODO: Handle sign bit specific cases
9151 // TODO: Handle inverted case
9152 if (PartialCheck == (fcZero | fcSubnormal)) {
9153 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
9154 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9155 ExpBits, ZeroC));
9156 Mask &= ~PartialCheck;
9157 }
9158 }
9159
Janek van Oirschot587747d2022-12-06 20:36:07 +00009160 // Check for individual classes.
Matt Arsenault61f2f2c2023-03-17 09:21:57 -04009161 if (FPClassTest PartialCheck = Mask & fcZero) {
Janek van Oirschot587747d2022-12-06 20:36:07 +00009162 if (PartialCheck == fcPosZero)
9163 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9164 AsInt, ZeroC));
9165 else if (PartialCheck == fcZero)
9166 appendToRes(
9167 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, ZeroC));
9168 else // fcNegZero
9169 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9170 AsInt, SignBitC));
9171 }
9172
Matt Arsenault9356ec12023-02-02 10:14:36 -04009173 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
9174 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
9175 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
9176 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
9177 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
9178 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
9179 auto SubnormalRes =
9180 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
9181 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
9182 if (PartialCheck == fcNegSubnormal)
9183 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
9184 appendToRes(SubnormalRes);
9185 }
9186
Matt Arsenault61f2f2c2023-03-17 09:21:57 -04009187 if (FPClassTest PartialCheck = Mask & fcInf) {
Janek van Oirschot587747d2022-12-06 20:36:07 +00009188 if (PartialCheck == fcPosInf)
9189 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9190 AsInt, InfC));
9191 else if (PartialCheck == fcInf)
9192 appendToRes(
9193 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, InfC));
9194 else { // fcNegInf
9195 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9196 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
9197 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9198 AsInt, NegInfC));
9199 }
9200 }
9201
Matt Arsenault61f2f2c2023-03-17 09:21:57 -04009202 if (FPClassTest PartialCheck = Mask & fcNan) {
Janek van Oirschot587747d2022-12-06 20:36:07 +00009203 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
9204 if (PartialCheck == fcNan) {
9205 // isnan(V) ==> abs(V) u> int(inf)
9206 appendToRes(
9207 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC));
9208 } else if (PartialCheck == fcQNan) {
9209 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
9210 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
9211 InfWithQnanBitC));
9212 } else { // fcSNan
9213 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
9214 // abs(V) u< (unsigned(Inf) | quiet_bit)
9215 auto IsNan =
9216 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC);
9217 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
9218 Abs, InfWithQnanBitC);
9219 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
9220 }
9221 }
9222
Matt Arsenault61f2f2c2023-03-17 09:21:57 -04009223 if (FPClassTest PartialCheck = Mask & fcNormal) {
Janek van Oirschot587747d2022-12-06 20:36:07 +00009224 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
9225 // (max_exp-1))
9226 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9227 auto ExpMinusOne = MIRBuilder.buildSub(
9228 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
9229 APInt MaxExpMinusOne = ExpMask - ExpLSB;
9230 auto NormalRes =
9231 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne,
9232 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
9233 if (PartialCheck == fcNegNormal)
9234 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
9235 else if (PartialCheck == fcPosNormal) {
9236 auto PosSign = MIRBuilder.buildXor(
9237 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InvertionMask));
9238 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
9239 }
9240 appendToRes(NormalRes);
9241 }
9242
9243 MIRBuilder.buildCopy(DstReg, Res);
9244 MI.eraseFromParent();
9245 return Legalized;
9246}
9247
Amara Emerson08232192020-09-26 10:02:39 -07009248LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
Kai Nackef2d0bba2024-01-26 09:11:29 -05009249 // Implement G_SELECT in terms of XOR, AND, OR.
Amara Emerson719024a2023-02-23 16:35:39 -08009250 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
9251 MI.getFirst4RegLLTs();
Amara Emerson08232192020-09-26 10:02:39 -07009252
Jay Foadd57515bd2024-02-13 08:21:35 +00009253 bool IsEltPtr = DstTy.isPointerOrPointerVector();
Amara Emersonf24f4692022-09-11 16:28:44 +01009254 if (IsEltPtr) {
9255 LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
9256 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
9257 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
9258 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
9259 DstTy = NewTy;
9260 }
9261
Amara Emerson87ff1562020-11-17 12:09:31 -08009262 if (MaskTy.isScalar()) {
Kai Nackef2d0bba2024-01-26 09:11:29 -05009263 // Turn the scalar condition into a vector condition mask if needed.
Matt Arsenault3f2cc7c2022-04-11 21:11:26 -04009264
Amara Emerson87ff1562020-11-17 12:09:31 -08009265 Register MaskElt = MaskReg;
Matt Arsenault3f2cc7c2022-04-11 21:11:26 -04009266
9267 // The condition was potentially zero extended before, but we want a sign
9268 // extended boolean.
Amara Emerson78833a42022-09-20 00:21:55 +01009269 if (MaskTy != LLT::scalar(1))
Matt Arsenault3f2cc7c2022-04-11 21:11:26 -04009270 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
Matt Arsenault3f2cc7c2022-04-11 21:11:26 -04009271
9272 // Continue the sign extension (or truncate) to match the data type.
Kai Nackef2d0bba2024-01-26 09:11:29 -05009273 MaskElt =
9274 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
Matt Arsenault3f2cc7c2022-04-11 21:11:26 -04009275
Kai Nackef2d0bba2024-01-26 09:11:29 -05009276 if (DstTy.isVector()) {
9277 // Generate a vector splat idiom.
9278 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
9279 MaskReg = ShufSplat.getReg(0);
9280 } else {
9281 MaskReg = MaskElt;
9282 }
Matt Arsenault3f2cc7c2022-04-11 21:11:26 -04009283 MaskTy = DstTy;
Kai Nackef2d0bba2024-01-26 09:11:29 -05009284 } else if (!DstTy.isVector()) {
9285 // Cannot handle the case that mask is a vector and dst is a scalar.
9286 return UnableToLegalize;
Amara Emerson87ff1562020-11-17 12:09:31 -08009287 }
9288
Matt Arsenault3f2cc7c2022-04-11 21:11:26 -04009289 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
Amara Emerson08232192020-09-26 10:02:39 -07009290 return UnableToLegalize;
Amara Emerson87ff1562020-11-17 12:09:31 -08009291 }
Amara Emerson08232192020-09-26 10:02:39 -07009292
9293 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
9294 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
9295 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
Amara Emersonf24f4692022-09-11 16:28:44 +01009296 if (IsEltPtr) {
9297 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
9298 MIRBuilder.buildIntToPtr(DstReg, Or);
9299 } else {
9300 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
9301 }
Amara Emerson08232192020-09-26 10:02:39 -07009302 MI.eraseFromParent();
9303 return Legalized;
Kazu Hiratae3d3dbd332021-01-10 09:24:56 -08009304}
Christudasan Devadasan4c6ab482021-03-10 18:03:10 +05309305
9306LegalizerHelper::LegalizeResult LegalizerHelper::lowerDIVREM(MachineInstr &MI) {
9307 // Split DIVREM into individual instructions.
9308 unsigned Opcode = MI.getOpcode();
9309
9310 MIRBuilder.buildInstr(
9311 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
9312 : TargetOpcode::G_UDIV,
9313 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
9314 MIRBuilder.buildInstr(
9315 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
9316 : TargetOpcode::G_UREM,
9317 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
9318 MI.eraseFromParent();
9319 return Legalized;
9320}
Mirko Brkusanin35ef4c92021-06-03 18:09:45 +02009321
9322LegalizerHelper::LegalizeResult
9323LegalizerHelper::lowerAbsToAddXor(MachineInstr &MI) {
9324 // Expand %res = G_ABS %a into:
9325 // %v1 = G_ASHR %a, scalar_size-1
9326 // %v2 = G_ADD %a, %v1
9327 // %res = G_XOR %v2, %v1
9328 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
9329 Register OpReg = MI.getOperand(1).getReg();
9330 auto ShiftAmt =
9331 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
9332 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
9333 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
9334 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
9335 MI.eraseFromParent();
9336 return Legalized;
9337}
9338
9339LegalizerHelper::LegalizeResult
9340LegalizerHelper::lowerAbsToMaxNeg(MachineInstr &MI) {
9341 // Expand %res = G_ABS %a into:
9342 // %v1 = G_CONSTANT 0
9343 // %v2 = G_SUB %v1, %a
9344 // %res = G_SMAX %a, %v2
9345 Register SrcReg = MI.getOperand(1).getReg();
9346 LLT Ty = MRI.getType(SrcReg);
Madhur Amilkanthwar7bb87d52024-03-21 09:54:03 +05309347 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9348 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
9349 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
9350 MI.eraseFromParent();
9351 return Legalized;
9352}
9353
9354LegalizerHelper::LegalizeResult
9355LegalizerHelper::lowerAbsToCNeg(MachineInstr &MI) {
9356 Register SrcReg = MI.getOperand(1).getReg();
9357 Register DestReg = MI.getOperand(0).getReg();
9358 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
Mirko Brkusanin35ef4c92021-06-03 18:09:45 +02009359 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
9360 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
Madhur Amilkanthwar7bb87d52024-03-21 09:54:03 +05309361 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
9362 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
Mirko Brkusanin35ef4c92021-06-03 18:09:45 +02009363 MI.eraseFromParent();
9364 return Legalized;
9365}
Jessica Paquette791006f2021-08-17 10:39:18 -07009366
Him1880748f422024-09-03 12:47:26 +01009367LegalizerHelper::LegalizeResult LegalizerHelper::lowerFAbs(MachineInstr &MI) {
9368 Register SrcReg = MI.getOperand(1).getReg();
9369 Register DstReg = MI.getOperand(0).getReg();
9370
9371 LLT Ty = MRI.getType(DstReg);
9372
9373 // Reset sign bit
9374 MIRBuilder.buildAnd(
9375 DstReg, SrcReg,
9376 MIRBuilder.buildConstant(
9377 Ty, APInt::getSignedMaxValue(Ty.getScalarSizeInBits())));
9378
9379 MI.eraseFromParent();
9380 return Legalized;
9381}
9382
Amara Emerson95ac3d12021-08-18 00:19:58 -07009383LegalizerHelper::LegalizeResult
9384LegalizerHelper::lowerVectorReduction(MachineInstr &MI) {
9385 Register SrcReg = MI.getOperand(1).getReg();
9386 LLT SrcTy = MRI.getType(SrcReg);
9387 LLT DstTy = MRI.getType(SrcReg);
9388
9389 // The source could be a scalar if the IR type was <1 x sN>.
9390 if (SrcTy.isScalar()) {
9391 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
9392 return UnableToLegalize; // FIXME: handle extension.
9393 // This can be just a plain copy.
9394 Observer.changingInstr(MI);
9395 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
9396 Observer.changedInstr(MI);
9397 return Legalized;
9398 }
David Green28027392023-06-11 10:25:24 +01009399 return UnableToLegalize;
Amara Emerson95ac3d12021-08-18 00:19:58 -07009400}
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009401
Michael Maitland6f9cb9a72023-12-08 13:24:27 -05009402LegalizerHelper::LegalizeResult LegalizerHelper::lowerVAArg(MachineInstr &MI) {
9403 MachineFunction &MF = *MI.getMF();
9404 const DataLayout &DL = MIRBuilder.getDataLayout();
9405 LLVMContext &Ctx = MF.getFunction().getContext();
9406 Register ListPtr = MI.getOperand(1).getReg();
9407 LLT PtrTy = MRI.getType(ListPtr);
9408
9409 // LstPtr is a pointer to the head of the list. Get the address
9410 // of the head of the list.
9411 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
9412 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
9413 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
9414 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
9415
9416 const Align A(MI.getOperand(2).getImm());
9417 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
9418 if (A > TLI.getMinStackArgumentAlignment()) {
9419 Register AlignAmt =
9420 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
9421 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
9422 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
9423 VAList = AndDst.getReg(0);
9424 }
9425
9426 // Increment the pointer, VAList, to the next vaarg
9427 // The list should be bumped by the size of element in the current head of
9428 // list.
9429 Register Dst = MI.getOperand(0).getReg();
9430 LLT LLTTy = MRI.getType(Dst);
9431 Type *Ty = getTypeForLLT(LLTTy, Ctx);
9432 auto IncAmt =
9433 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
9434 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
9435
9436 // Store the increment VAList to the legalized pointer
9437 MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
9438 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
9439 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
9440 // Load the actual argument out of the pointer VAList
9441 Align EltAlignment = DL.getABITypeAlign(Ty);
9442 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
9443 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
9444 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
9445
9446 MI.eraseFromParent();
9447 return Legalized;
9448}
9449
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009450static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
9451 // On Darwin, -Os means optimize for size without hurting performance, so
9452 // only really optimize for size when -Oz (MinSize) is used.
9453 if (MF.getTarget().getTargetTriple().isOSDarwin())
9454 return MF.getFunction().hasMinSize();
9455 return MF.getFunction().hasOptSize();
9456}
9457
9458// Returns a list of types to use for memory op lowering in MemOps. A partial
9459// port of findOptimalMemOpLowering in TargetLowering.
9460static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
9461 unsigned Limit, const MemOp &Op,
9462 unsigned DstAS, unsigned SrcAS,
9463 const AttributeList &FuncAttributes,
9464 const TargetLowering &TLI) {
9465 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
9466 return false;
9467
9468 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
9469
9470 if (Ty == LLT()) {
9471 // Use the largest scalar type whose alignment constraints are satisfied.
9472 // We only need to check DstAlign here as SrcAlign is always greater or
9473 // equal to DstAlign (or zero).
9474 Ty = LLT::scalar(64);
9475 if (Op.isFixedDstAlign())
9476 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
9477 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
9478 Ty = LLT::scalar(Ty.getSizeInBytes());
9479 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
9480 // FIXME: check for the largest legal type we can load/store to.
9481 }
9482
9483 unsigned NumMemOps = 0;
9484 uint64_t Size = Op.size();
9485 while (Size) {
9486 unsigned TySize = Ty.getSizeInBytes();
9487 while (TySize > Size) {
9488 // For now, only use non-vector load / store's for the left-over pieces.
9489 LLT NewTy = Ty;
9490 // FIXME: check for mem op safety and legality of the types. Not all of
9491 // SDAGisms map cleanly to GISel concepts.
9492 if (NewTy.isVector())
9493 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
Kazu Hirataf20b5072023-01-28 09:06:31 -08009494 NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009495 unsigned NewTySize = NewTy.getSizeInBytes();
9496 assert(NewTySize > 0 && "Could not find appropriate type");
9497
9498 // If the new LLT cannot cover all of the remaining bits, then consider
9499 // issuing a (or a pair of) unaligned and overlapping load / store.
Stanislav Mekhanoshinbcaf31e2022-04-21 16:23:11 -07009500 unsigned Fast;
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009501 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
9502 MVT VT = getMVTForLLT(Ty);
9503 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
9504 TLI.allowsMisalignedMemoryAccesses(
9505 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
9506 MachineMemOperand::MONone, &Fast) &&
9507 Fast)
9508 TySize = Size;
9509 else {
9510 Ty = NewTy;
9511 TySize = NewTySize;
9512 }
9513 }
9514
9515 if (++NumMemOps > Limit)
9516 return false;
9517
9518 MemOps.push_back(Ty);
9519 Size -= TySize;
9520 }
9521
9522 return true;
9523}
9524
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009525// Get a vectorized representation of the memset value operand, GISel edition.
9526static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
9527 MachineRegisterInfo &MRI = *MIB.getMRI();
9528 unsigned NumBits = Ty.getScalarSizeInBits();
Petar Avramovicd477a7c2021-09-17 11:21:55 +02009529 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009530 if (!Ty.isVector() && ValVRegAndVal) {
Jay Foad6bec3e92021-10-06 10:54:07 +01009531 APInt Scalar = ValVRegAndVal->Value.trunc(8);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009532 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
9533 return MIB.buildConstant(Ty, SplatVal).getReg(0);
9534 }
9535
9536 // Extend the byte value to the larger type, and then multiply by a magic
9537 // value 0x010101... in order to replicate it across every byte.
9538 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
9539 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
9540 return MIB.buildConstant(Ty, 0).getReg(0);
9541 }
9542
9543 LLT ExtType = Ty.getScalarType();
9544 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
9545 if (NumBits > 8) {
9546 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
9547 auto MagicMI = MIB.buildConstant(ExtType, Magic);
9548 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
9549 }
9550
9551 // For vector types create a G_BUILD_VECTOR.
9552 if (Ty.isVector())
Michael Maitland96049fc2024-03-07 09:50:29 -05009553 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009554
9555 return Val;
9556}
9557
9558LegalizerHelper::LegalizeResult
9559LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
9560 uint64_t KnownLen, Align Alignment,
9561 bool IsVolatile) {
9562 auto &MF = *MI.getParent()->getParent();
9563 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9564 auto &DL = MF.getDataLayout();
9565 LLVMContext &C = MF.getFunction().getContext();
9566
9567 assert(KnownLen != 0 && "Have a zero length memset length!");
9568
9569 bool DstAlignCanChange = false;
9570 MachineFrameInfo &MFI = MF.getFrameInfo();
9571 bool OptSize = shouldLowerMemFuncForSize(MF);
9572
9573 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
9574 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
9575 DstAlignCanChange = true;
9576
9577 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
9578 std::vector<LLT> MemOps;
9579
9580 const auto &DstMMO = **MI.memoperands_begin();
9581 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
9582
Petar Avramovicd477a7c2021-09-17 11:21:55 +02009583 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009584 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
9585
9586 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
9587 MemOp::Set(KnownLen, DstAlignCanChange,
9588 Alignment,
9589 /*IsZeroMemset=*/IsZeroVal,
9590 /*IsVolatile=*/IsVolatile),
9591 DstPtrInfo.getAddrSpace(), ~0u,
9592 MF.getFunction().getAttributes(), TLI))
9593 return UnableToLegalize;
9594
9595 if (DstAlignCanChange) {
9596 // Get an estimate of the type from the LLT.
9597 Type *IRTy = getTypeForLLT(MemOps[0], C);
9598 Align NewAlign = DL.getABITypeAlign(IRTy);
9599 if (NewAlign > Alignment) {
9600 Alignment = NewAlign;
9601 unsigned FI = FIDef->getOperand(1).getIndex();
9602 // Give the stack frame object a larger alignment if needed.
9603 if (MFI.getObjectAlign(FI) < Alignment)
9604 MFI.setObjectAlignment(FI, Alignment);
9605 }
9606 }
9607
9608 MachineIRBuilder MIB(MI);
9609 // Find the largest store and generate the bit pattern for it.
9610 LLT LargestTy = MemOps[0];
9611 for (unsigned i = 1; i < MemOps.size(); i++)
9612 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
9613 LargestTy = MemOps[i];
9614
9615 // The memset stored value is always defined as an s8, so in order to make it
9616 // work with larger store types we need to repeat the bit pattern across the
9617 // wider type.
9618 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
9619
9620 if (!MemSetValue)
9621 return UnableToLegalize;
9622
9623 // Generate the stores. For each store type in the list, we generate the
9624 // matching store of that type to the destination address.
9625 LLT PtrTy = MRI.getType(Dst);
9626 unsigned DstOff = 0;
9627 unsigned Size = KnownLen;
9628 for (unsigned I = 0; I < MemOps.size(); I++) {
9629 LLT Ty = MemOps[I];
9630 unsigned TySize = Ty.getSizeInBytes();
9631 if (TySize > Size) {
9632 // Issuing an unaligned load / store pair that overlaps with the previous
9633 // pair. Adjust the offset accordingly.
9634 assert(I == MemOps.size() - 1 && I != 0);
9635 DstOff -= TySize - Size;
9636 }
9637
9638 // If this store is smaller than the largest store see whether we can get
9639 // the smaller value for free with a truncate.
9640 Register Value = MemSetValue;
9641 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
9642 MVT VT = getMVTForLLT(Ty);
9643 MVT LargestVT = getMVTForLLT(LargestTy);
9644 if (!LargestTy.isVector() && !Ty.isVector() &&
9645 TLI.isTruncateFree(LargestVT, VT))
9646 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
9647 else
9648 Value = getMemsetValue(Val, Ty, MIB);
9649 if (!Value)
9650 return UnableToLegalize;
9651 }
9652
9653 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
9654
9655 Register Ptr = Dst;
9656 if (DstOff != 0) {
9657 auto Offset =
9658 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
9659 Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
9660 }
9661
9662 MIB.buildStore(Value, Ptr, *StoreMMO);
9663 DstOff += Ty.getSizeInBytes();
9664 Size -= TySize;
9665 }
9666
9667 MI.eraseFromParent();
9668 return Legalized;
9669}
9670
9671LegalizerHelper::LegalizeResult
9672LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
9673 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
9674
Amara Emerson719024a2023-02-23 16:35:39 -08009675 auto [Dst, Src, Len] = MI.getFirst3Regs();
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009676
9677 const auto *MMOIt = MI.memoperands_begin();
9678 const MachineMemOperand *MemOp = *MMOIt;
9679 bool IsVolatile = MemOp->isVolatile();
9680
9681 // See if this is a constant length copy
Petar Avramovicd477a7c2021-09-17 11:21:55 +02009682 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009683 // FIXME: support dynamically sized G_MEMCPY_INLINE
Kazu Hirata5413bf12022-06-20 11:33:56 -07009684 assert(LenVRegAndVal &&
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009685 "inline memcpy with dynamic size is not yet supported");
9686 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
9687 if (KnownLen == 0) {
9688 MI.eraseFromParent();
9689 return Legalized;
9690 }
9691
9692 const auto &DstMMO = **MI.memoperands_begin();
9693 const auto &SrcMMO = **std::next(MI.memoperands_begin());
9694 Align DstAlign = DstMMO.getBaseAlign();
9695 Align SrcAlign = SrcMMO.getBaseAlign();
9696
9697 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
9698 IsVolatile);
9699}
9700
9701LegalizerHelper::LegalizeResult
9702LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
9703 uint64_t KnownLen, Align DstAlign,
9704 Align SrcAlign, bool IsVolatile) {
9705 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
9706 return lowerMemcpy(MI, Dst, Src, KnownLen,
9707 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
9708 IsVolatile);
9709}
9710
9711LegalizerHelper::LegalizeResult
9712LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
9713 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
9714 Align SrcAlign, bool IsVolatile) {
9715 auto &MF = *MI.getParent()->getParent();
9716 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9717 auto &DL = MF.getDataLayout();
9718 LLVMContext &C = MF.getFunction().getContext();
9719
9720 assert(KnownLen != 0 && "Have a zero length memcpy length!");
9721
9722 bool DstAlignCanChange = false;
9723 MachineFrameInfo &MFI = MF.getFrameInfo();
Guillaume Chatelet3c126d52022-06-22 15:02:48 +00009724 Align Alignment = std::min(DstAlign, SrcAlign);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009725
9726 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
9727 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
9728 DstAlignCanChange = true;
9729
9730 // FIXME: infer better src pointer alignment like SelectionDAG does here.
9731 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
9732 // if the memcpy is in a tail call position.
9733
9734 std::vector<LLT> MemOps;
9735
9736 const auto &DstMMO = **MI.memoperands_begin();
9737 const auto &SrcMMO = **std::next(MI.memoperands_begin());
9738 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
9739 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
9740
9741 if (!findGISelOptimalMemOpLowering(
9742 MemOps, Limit,
9743 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
9744 IsVolatile),
9745 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
9746 MF.getFunction().getAttributes(), TLI))
9747 return UnableToLegalize;
9748
9749 if (DstAlignCanChange) {
9750 // Get an estimate of the type from the LLT.
9751 Type *IRTy = getTypeForLLT(MemOps[0], C);
9752 Align NewAlign = DL.getABITypeAlign(IRTy);
9753
9754 // Don't promote to an alignment that would require dynamic stack
9755 // realignment.
9756 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
9757 if (!TRI->hasStackRealignment(MF))
Sergei Barannikov4d7a0ab2024-08-27 22:59:33 +03009758 if (MaybeAlign StackAlign = DL.getStackAlignment())
9759 NewAlign = std::min(NewAlign, *StackAlign);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009760
9761 if (NewAlign > Alignment) {
9762 Alignment = NewAlign;
9763 unsigned FI = FIDef->getOperand(1).getIndex();
9764 // Give the stack frame object a larger alignment if needed.
9765 if (MFI.getObjectAlign(FI) < Alignment)
9766 MFI.setObjectAlignment(FI, Alignment);
9767 }
9768 }
9769
9770 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
9771
9772 MachineIRBuilder MIB(MI);
9773 // Now we need to emit a pair of load and stores for each of the types we've
9774 // collected. I.e. for each type, generate a load from the source pointer of
9775 // that type width, and then generate a corresponding store to the dest buffer
9776 // of that value loaded. This can result in a sequence of loads and stores
9777 // mixed types, depending on what the target specifies as good types to use.
9778 unsigned CurrOffset = 0;
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009779 unsigned Size = KnownLen;
9780 for (auto CopyTy : MemOps) {
9781 // Issuing an unaligned load / store pair that overlaps with the previous
9782 // pair. Adjust the offset accordingly.
9783 if (CopyTy.getSizeInBytes() > Size)
9784 CurrOffset -= CopyTy.getSizeInBytes() - Size;
9785
9786 // Construct MMOs for the accesses.
9787 auto *LoadMMO =
9788 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
9789 auto *StoreMMO =
9790 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
9791
9792 // Create the load.
9793 Register LoadPtr = Src;
9794 Register Offset;
9795 if (CurrOffset != 0) {
Jameson Nash0332d102021-10-21 11:58:02 -04009796 LLT SrcTy = MRI.getType(Src);
9797 Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009798 .getReg(0);
Jameson Nash0332d102021-10-21 11:58:02 -04009799 LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009800 }
9801 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
9802
9803 // Create the store.
Jameson Nash0332d102021-10-21 11:58:02 -04009804 Register StorePtr = Dst;
9805 if (CurrOffset != 0) {
9806 LLT DstTy = MRI.getType(Dst);
9807 StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
9808 }
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009809 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
9810 CurrOffset += CopyTy.getSizeInBytes();
9811 Size -= CopyTy.getSizeInBytes();
9812 }
9813
9814 MI.eraseFromParent();
9815 return Legalized;
9816}
9817
9818LegalizerHelper::LegalizeResult
9819LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
9820 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
9821 bool IsVolatile) {
9822 auto &MF = *MI.getParent()->getParent();
9823 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9824 auto &DL = MF.getDataLayout();
9825 LLVMContext &C = MF.getFunction().getContext();
9826
9827 assert(KnownLen != 0 && "Have a zero length memmove length!");
9828
9829 bool DstAlignCanChange = false;
9830 MachineFrameInfo &MFI = MF.getFrameInfo();
9831 bool OptSize = shouldLowerMemFuncForSize(MF);
Guillaume Chatelet3c126d52022-06-22 15:02:48 +00009832 Align Alignment = std::min(DstAlign, SrcAlign);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009833
9834 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
9835 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
9836 DstAlignCanChange = true;
9837
9838 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
9839 std::vector<LLT> MemOps;
9840
9841 const auto &DstMMO = **MI.memoperands_begin();
9842 const auto &SrcMMO = **std::next(MI.memoperands_begin());
9843 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
9844 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
9845
9846 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
9847 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
9848 // same thing here.
9849 if (!findGISelOptimalMemOpLowering(
9850 MemOps, Limit,
9851 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
9852 /*IsVolatile*/ true),
9853 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
9854 MF.getFunction().getAttributes(), TLI))
9855 return UnableToLegalize;
9856
9857 if (DstAlignCanChange) {
9858 // Get an estimate of the type from the LLT.
9859 Type *IRTy = getTypeForLLT(MemOps[0], C);
9860 Align NewAlign = DL.getABITypeAlign(IRTy);
9861
9862 // Don't promote to an alignment that would require dynamic stack
9863 // realignment.
9864 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
9865 if (!TRI->hasStackRealignment(MF))
Sergei Barannikov4d7a0ab2024-08-27 22:59:33 +03009866 if (MaybeAlign StackAlign = DL.getStackAlignment())
9867 NewAlign = std::min(NewAlign, *StackAlign);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009868
9869 if (NewAlign > Alignment) {
9870 Alignment = NewAlign;
9871 unsigned FI = FIDef->getOperand(1).getIndex();
9872 // Give the stack frame object a larger alignment if needed.
9873 if (MFI.getObjectAlign(FI) < Alignment)
9874 MFI.setObjectAlignment(FI, Alignment);
9875 }
9876 }
9877
9878 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
9879
9880 MachineIRBuilder MIB(MI);
9881 // Memmove requires that we perform the loads first before issuing the stores.
9882 // Apart from that, this loop is pretty much doing the same thing as the
9883 // memcpy codegen function.
9884 unsigned CurrOffset = 0;
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009885 SmallVector<Register, 16> LoadVals;
9886 for (auto CopyTy : MemOps) {
9887 // Construct MMO for the load.
9888 auto *LoadMMO =
9889 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
9890
9891 // Create the load.
9892 Register LoadPtr = Src;
9893 if (CurrOffset != 0) {
Jameson Nash0332d102021-10-21 11:58:02 -04009894 LLT SrcTy = MRI.getType(Src);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009895 auto Offset =
Jameson Nash0332d102021-10-21 11:58:02 -04009896 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
9897 LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009898 }
9899 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
9900 CurrOffset += CopyTy.getSizeInBytes();
9901 }
9902
9903 CurrOffset = 0;
9904 for (unsigned I = 0; I < MemOps.size(); ++I) {
9905 LLT CopyTy = MemOps[I];
9906 // Now store the values loaded.
9907 auto *StoreMMO =
9908 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
9909
9910 Register StorePtr = Dst;
9911 if (CurrOffset != 0) {
Jameson Nash0332d102021-10-21 11:58:02 -04009912 LLT DstTy = MRI.getType(Dst);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009913 auto Offset =
Jameson Nash0332d102021-10-21 11:58:02 -04009914 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
9915 StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009916 }
9917 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
9918 CurrOffset += CopyTy.getSizeInBytes();
9919 }
9920 MI.eraseFromParent();
9921 return Legalized;
9922}
9923
9924LegalizerHelper::LegalizeResult
9925LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
9926 const unsigned Opc = MI.getOpcode();
9927 // This combine is fairly complex so it's not written with a separate
9928 // matcher function.
9929 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
9930 Opc == TargetOpcode::G_MEMSET) &&
9931 "Expected memcpy like instruction");
9932
9933 auto MMOIt = MI.memoperands_begin();
9934 const MachineMemOperand *MemOp = *MMOIt;
9935
9936 Align DstAlign = MemOp->getBaseAlign();
9937 Align SrcAlign;
Amara Emerson719024a2023-02-23 16:35:39 -08009938 auto [Dst, Src, Len] = MI.getFirst3Regs();
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009939
9940 if (Opc != TargetOpcode::G_MEMSET) {
9941 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
9942 MemOp = *(++MMOIt);
9943 SrcAlign = MemOp->getBaseAlign();
9944 }
9945
9946 // See if this is a constant length copy
Petar Avramovicd477a7c2021-09-17 11:21:55 +02009947 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009948 if (!LenVRegAndVal)
9949 return UnableToLegalize;
9950 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
9951
9952 if (KnownLen == 0) {
9953 MI.eraseFromParent();
9954 return Legalized;
9955 }
9956
9957 bool IsVolatile = MemOp->isVolatile();
9958 if (Opc == TargetOpcode::G_MEMCPY_INLINE)
9959 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
9960 IsVolatile);
9961
9962 // Don't try to optimize volatile.
9963 if (IsVolatile)
9964 return UnableToLegalize;
9965
9966 if (MaxLen && KnownLen > MaxLen)
9967 return UnableToLegalize;
9968
9969 if (Opc == TargetOpcode::G_MEMCPY) {
9970 auto &MF = *MI.getParent()->getParent();
9971 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9972 bool OptSize = shouldLowerMemFuncForSize(MF);
9973 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
9974 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
9975 IsVolatile);
9976 }
9977 if (Opc == TargetOpcode::G_MEMMOVE)
9978 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
9979 if (Opc == TargetOpcode::G_MEMSET)
9980 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
9981 return UnableToLegalize;
9982}