blob: 208fa8475fd4ffa050b83cdc387f6227f6025029 [file] [log] [blame]
Tim Northover69fa84a2016-10-14 22:18:18 +00001//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
Tim Northover33b07d62016-07-22 20:03:43 +00002//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Tim Northover33b07d62016-07-22 20:03:43 +00006//
7//===----------------------------------------------------------------------===//
8//
Tim Northover69fa84a2016-10-14 22:18:18 +00009/// \file This file implements the LegalizerHelper class to legalize
Tim Northover33b07d62016-07-22 20:03:43 +000010/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
Tim Northover69fa84a2016-10-14 22:18:18 +000015#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
Tim Northoveredb3c8c2016-08-29 19:07:16 +000016#include "llvm/CodeGen/GlobalISel/CallLowering.h"
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +000017#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
Jessica Delfc672b62023-02-21 09:40:07 +010018#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
serge-sans-pailleed98c1b2022-03-09 22:29:31 +010019#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
Tim Northover69fa84a2016-10-14 22:18:18 +000020#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
Jessica Paquette324af792021-05-25 16:54:20 -070021#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
Matt Arsenault0b7de792020-07-26 21:25:10 -040022#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
serge-sans-pailleed98c1b2022-03-09 22:29:31 +010023#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
Amara Emersona35c2c72021-02-21 14:17:03 -080024#include "llvm/CodeGen/GlobalISel/Utils.h"
Amara Emerson41ebbed2025-01-05 21:32:27 -080025#include "llvm/CodeGen/LowLevelTypeUtils.h"
Chen Zheng6ee2f772022-12-12 09:53:53 +000026#include "llvm/CodeGen/MachineConstantPool.h"
serge-sans-pailleed98c1b2022-03-09 22:29:31 +010027#include "llvm/CodeGen/MachineFrameInfo.h"
Tim Northover33b07d62016-07-22 20:03:43 +000028#include "llvm/CodeGen/MachineRegisterInfo.h"
Joseph Huber615b7ee2024-07-20 07:29:04 -050029#include "llvm/CodeGen/RuntimeLibcallUtil.h"
Amara Emersone20b91c2019-08-27 19:54:27 +000030#include "llvm/CodeGen/TargetFrameLowering.h"
Aditya Nandakumarc0333f72018-08-21 17:30:31 +000031#include "llvm/CodeGen/TargetInstrInfo.h"
David Blaikieb3bde2e2017-11-17 01:07:10 +000032#include "llvm/CodeGen/TargetLowering.h"
Amara Emerson9f39ba12021-05-19 21:35:05 -070033#include "llvm/CodeGen/TargetOpcodes.h"
David Blaikieb3bde2e2017-11-17 01:07:10 +000034#include "llvm/CodeGen/TargetSubtargetInfo.h"
Amara Emerson9f39ba12021-05-19 21:35:05 -070035#include "llvm/IR/Instructions.h"
Tim Northover33b07d62016-07-22 20:03:43 +000036#include "llvm/Support/Debug.h"
Aditya Nandakumarc0333f72018-08-21 17:30:31 +000037#include "llvm/Support/MathExtras.h"
Tim Northover33b07d62016-07-22 20:03:43 +000038#include "llvm/Support/raw_ostream.h"
Mirko Brkusanin36527cb2021-09-07 11:30:11 +020039#include "llvm/Target/TargetMachine.h"
Kazu Hirata267f21a2022-08-28 10:41:51 -070040#include <numeric>
Kazu Hirata3ccbfc32022-11-26 14:44:54 -080041#include <optional>
Tim Northover33b07d62016-07-22 20:03:43 +000042
Daniel Sanders5377fb32017-04-20 15:46:12 +000043#define DEBUG_TYPE "legalizer"
Tim Northover33b07d62016-07-22 20:03:43 +000044
45using namespace llvm;
Daniel Sanders9ade5592018-01-29 17:37:29 +000046using namespace LegalizeActions;
Matt Arsenault0b7de792020-07-26 21:25:10 -040047using namespace MIPatternMatch;
Tim Northover33b07d62016-07-22 20:03:43 +000048
Matt Arsenaultc83b8232019-02-07 17:38:00 +000049/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
50///
51/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
52/// with any leftover piece as type \p LeftoverTy
53///
Matt Arsenaultd3093c22019-02-28 00:16:32 +000054/// Returns -1 in the first element of the pair if the breakdown is not
55/// satisfiable.
56static std::pair<int, int>
57getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
Matt Arsenaultc83b8232019-02-07 17:38:00 +000058 assert(!LeftoverTy.isValid() && "this is an out argument");
59
60 unsigned Size = OrigTy.getSizeInBits();
61 unsigned NarrowSize = NarrowTy.getSizeInBits();
62 unsigned NumParts = Size / NarrowSize;
63 unsigned LeftoverSize = Size - NumParts * NarrowSize;
64 assert(Size > NarrowSize);
65
66 if (LeftoverSize == 0)
Matt Arsenaultd3093c22019-02-28 00:16:32 +000067 return {NumParts, 0};
Matt Arsenaultc83b8232019-02-07 17:38:00 +000068
69 if (NarrowTy.isVector()) {
70 unsigned EltSize = OrigTy.getScalarSizeInBits();
71 if (LeftoverSize % EltSize != 0)
Matt Arsenaultd3093c22019-02-28 00:16:32 +000072 return {-1, -1};
David Green34de2152024-05-13 21:58:41 +010073 LeftoverTy =
74 LLT::scalarOrVector(ElementCount::getFixed(LeftoverSize / EltSize),
75 OrigTy.getElementType());
Matt Arsenaultc83b8232019-02-07 17:38:00 +000076 } else {
77 LeftoverTy = LLT::scalar(LeftoverSize);
78 }
79
Matt Arsenaultd3093c22019-02-28 00:16:32 +000080 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
81 return std::make_pair(NumParts, NumLeftover);
Matt Arsenaultc83b8232019-02-07 17:38:00 +000082}
83
Konstantin Schwarz76986bd2020-02-06 10:01:57 -080084static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
85
86 if (!Ty.isScalar())
87 return nullptr;
88
89 switch (Ty.getSizeInBits()) {
90 case 16:
91 return Type::getHalfTy(Ctx);
92 case 32:
93 return Type::getFloatTy(Ctx);
94 case 64:
95 return Type::getDoubleTy(Ctx);
Matt Arsenault0da582d2020-07-19 09:56:15 -040096 case 80:
97 return Type::getX86_FP80Ty(Ctx);
Konstantin Schwarz76986bd2020-02-06 10:01:57 -080098 case 128:
99 return Type::getFP128Ty(Ctx);
100 default:
101 return nullptr;
102 }
103}
104
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +0000105LegalizerHelper::LegalizerHelper(MachineFunction &MF,
Aditya Nandakumar500e3ea2019-01-16 00:40:37 +0000106 GISelChangeObserver &Observer,
107 MachineIRBuilder &Builder)
Matt Arsenault7f8b2e12020-06-09 17:02:12 -0400108 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
Matt Arsenaultadbcc8e2020-07-31 11:41:05 -0400109 LI(*MF.getSubtarget().getLegalizerInfo()),
Jessica Delfc672b62023-02-21 09:40:07 +0100110 TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
Tim Northover33b07d62016-07-22 20:03:43 +0000111
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +0000112LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
Aditya Nandakumar500e3ea2019-01-16 00:40:37 +0000113 GISelChangeObserver &Observer,
Jessica Delfc672b62023-02-21 09:40:07 +0100114 MachineIRBuilder &B, GISelKnownBits *KB)
115 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
116 TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
Matt Arsenaultd55d5922020-08-19 10:46:59 -0400117
Tim Northover69fa84a2016-10-14 22:18:18 +0000118LegalizerHelper::LegalizeResult
Jessica Paquette324af792021-05-25 16:54:20 -0700119LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
120 LostDebugLocObserver &LocObserver) {
Matt Arsenaultc1d771d2020-06-07 21:56:42 -0400121 LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
Daniel Sanders5377fb32017-04-20 15:46:12 +0000122
Matt Arsenault32823092020-06-07 20:57:28 -0400123 MIRBuilder.setInstrAndDebugLoc(MI);
124
Sameer Sahasrabuddhed9847cd2023-07-31 12:14:34 +0530125 if (isa<GIntrinsic>(MI))
Matt Arsenault7f8b2e12020-06-09 17:02:12 -0400126 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
Daniel Sanders262ed0e2018-01-24 17:17:46 +0000127 auto Step = LI.getAction(MI, MRI);
128 switch (Step.Action) {
Daniel Sanders9ade5592018-01-29 17:37:29 +0000129 case Legal:
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000130 LLVM_DEBUG(dbgs() << ".. Already legal\n");
Tim Northover33b07d62016-07-22 20:03:43 +0000131 return AlreadyLegal;
Daniel Sanders9ade5592018-01-29 17:37:29 +0000132 case Libcall:
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000133 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
Jessica Paquette324af792021-05-25 16:54:20 -0700134 return libcall(MI, LocObserver);
Daniel Sanders9ade5592018-01-29 17:37:29 +0000135 case NarrowScalar:
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000136 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
Daniel Sanders262ed0e2018-01-24 17:17:46 +0000137 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
Daniel Sanders9ade5592018-01-29 17:37:29 +0000138 case WidenScalar:
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000139 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
Daniel Sanders262ed0e2018-01-24 17:17:46 +0000140 return widenScalar(MI, Step.TypeIdx, Step.NewType);
Matt Arsenault39c55ce2020-02-13 15:52:32 -0500141 case Bitcast:
142 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
143 return bitcast(MI, Step.TypeIdx, Step.NewType);
Daniel Sanders9ade5592018-01-29 17:37:29 +0000144 case Lower:
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000145 LLVM_DEBUG(dbgs() << ".. Lower\n");
Daniel Sanders262ed0e2018-01-24 17:17:46 +0000146 return lower(MI, Step.TypeIdx, Step.NewType);
Daniel Sanders9ade5592018-01-29 17:37:29 +0000147 case FewerElements:
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000148 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
Daniel Sanders262ed0e2018-01-24 17:17:46 +0000149 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
Matt Arsenault18ec3822019-02-11 22:00:39 +0000150 case MoreElements:
151 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
152 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
Daniel Sanders9ade5592018-01-29 17:37:29 +0000153 case Custom:
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000154 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
David Greend659bd12024-01-03 07:59:36 +0000155 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
156 : UnableToLegalize;
Tim Northover33b07d62016-07-22 20:03:43 +0000157 default:
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000158 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
Tim Northover33b07d62016-07-22 20:03:43 +0000159 return UnableToLegalize;
160 }
161}
162
Matt Arsenaulte3a676e2019-06-24 15:50:29 +0000163void LegalizerHelper::insertParts(Register DstReg,
Matt Arsenaultc7bce732019-01-31 02:46:05 +0000164 LLT ResultTy, LLT PartTy,
Matt Arsenaulte3a676e2019-06-24 15:50:29 +0000165 ArrayRef<Register> PartRegs,
Matt Arsenaultc7bce732019-01-31 02:46:05 +0000166 LLT LeftoverTy,
Matt Arsenaulte3a676e2019-06-24 15:50:29 +0000167 ArrayRef<Register> LeftoverRegs) {
Matt Arsenaultc7bce732019-01-31 02:46:05 +0000168 if (!LeftoverTy.isValid()) {
169 assert(LeftoverRegs.empty());
170
Matt Arsenault81511e52019-02-05 00:13:44 +0000171 if (!ResultTy.isVector()) {
Diana Picusf95a5fb2023-01-09 11:59:00 +0100172 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
Matt Arsenault81511e52019-02-05 00:13:44 +0000173 return;
174 }
175
Matt Arsenaultc7bce732019-01-31 02:46:05 +0000176 if (PartTy.isVector())
177 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
178 else
179 MIRBuilder.buildBuildVector(DstReg, PartRegs);
180 return;
181 }
182
Petar Avramovic29f88b92021-12-23 14:09:51 +0100183 // Merge sub-vectors with different number of elements and insert into DstReg.
184 if (ResultTy.isVector()) {
185 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
Craig Toppere3284d82024-12-10 07:18:20 -0800186 SmallVector<Register, 8> AllRegs(PartRegs.begin(), PartRegs.end());
187 AllRegs.append(LeftoverRegs.begin(), LeftoverRegs.end());
Petar Avramovic29f88b92021-12-23 14:09:51 +0100188 return mergeMixedSubvectors(DstReg, AllRegs);
189 }
190
Matt Arsenault31a96592021-06-07 18:57:03 -0400191 SmallVector<Register> GCDRegs;
Jessica Paquette47aeeff2021-07-08 16:45:45 -0700192 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
193 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
194 extractGCDType(GCDRegs, GCDTy, PartReg);
Matt Arsenault31a96592021-06-07 18:57:03 -0400195 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
196 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
Matt Arsenaultc7bce732019-01-31 02:46:05 +0000197}
198
Petar Avramovic29f88b92021-12-23 14:09:51 +0100199void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
200 Register Reg) {
201 LLT Ty = MRI.getType(Reg);
202 SmallVector<Register, 8> RegElts;
chuongg3fcfe1b62024-01-15 16:40:39 +0000203 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
204 MIRBuilder, MRI);
Petar Avramovic29f88b92021-12-23 14:09:51 +0100205 Elts.append(RegElts);
206}
207
208/// Merge \p PartRegs with different types into \p DstReg.
209void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
210 ArrayRef<Register> PartRegs) {
211 SmallVector<Register, 8> AllElts;
212 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
213 appendVectorElts(AllElts, PartRegs[i]);
214
215 Register Leftover = PartRegs[PartRegs.size() - 1];
David Green34de2152024-05-13 21:58:41 +0100216 if (!MRI.getType(Leftover).isVector())
Petar Avramovic29f88b92021-12-23 14:09:51 +0100217 AllElts.push_back(Leftover);
218 else
219 appendVectorElts(AllElts, Leftover);
220
Diana Picusf95a5fb2023-01-09 11:59:00 +0100221 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
Petar Avramovic29f88b92021-12-23 14:09:51 +0100222}
223
Matt Arsenault31adc282020-08-03 14:13:38 -0400224/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
Matt Arsenaulta66d2812020-01-10 10:41:29 -0500225static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
226 const MachineInstr &MI) {
227 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
228
Matt Arsenault31adc282020-08-03 14:13:38 -0400229 const int StartIdx = Regs.size();
Matt Arsenaulta66d2812020-01-10 10:41:29 -0500230 const int NumResults = MI.getNumOperands() - 1;
Matt Arsenault31adc282020-08-03 14:13:38 -0400231 Regs.resize(Regs.size() + NumResults);
Matt Arsenaulta66d2812020-01-10 10:41:29 -0500232 for (int I = 0; I != NumResults; ++I)
Matt Arsenault31adc282020-08-03 14:13:38 -0400233 Regs[StartIdx + I] = MI.getOperand(I).getReg();
Matt Arsenaulta66d2812020-01-10 10:41:29 -0500234}
235
Matt Arsenault31adc282020-08-03 14:13:38 -0400236void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
237 LLT GCDTy, Register SrcReg) {
Matt Arsenaulta66d2812020-01-10 10:41:29 -0500238 LLT SrcTy = MRI.getType(SrcReg);
Matt Arsenaulta66d2812020-01-10 10:41:29 -0500239 if (SrcTy == GCDTy) {
240 // If the source already evenly divides the result type, we don't need to do
241 // anything.
242 Parts.push_back(SrcReg);
243 } else {
244 // Need to split into common type sized pieces.
245 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
246 getUnmergeResults(Parts, *Unmerge);
247 }
Matt Arsenault31adc282020-08-03 14:13:38 -0400248}
Matt Arsenaulta66d2812020-01-10 10:41:29 -0500249
Matt Arsenault31adc282020-08-03 14:13:38 -0400250LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
251 LLT NarrowTy, Register SrcReg) {
252 LLT SrcTy = MRI.getType(SrcReg);
253 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
254 extractGCDType(Parts, GCDTy, SrcReg);
Matt Arsenaulta66d2812020-01-10 10:41:29 -0500255 return GCDTy;
256}
257
Matt Arsenaultcd7650c2020-01-11 19:05:06 -0500258LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
259 SmallVectorImpl<Register> &VRegs,
260 unsigned PadStrategy) {
Matt Arsenaulta66d2812020-01-10 10:41:29 -0500261 LLT LCMTy = getLCMType(DstTy, NarrowTy);
262
263 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
264 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
265 int NumOrigSrc = VRegs.size();
266
267 Register PadReg;
268
269 // Get a value we can use to pad the source value if the sources won't evenly
270 // cover the result type.
271 if (NumOrigSrc < NumParts * NumSubParts) {
272 if (PadStrategy == TargetOpcode::G_ZEXT)
273 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
274 else if (PadStrategy == TargetOpcode::G_ANYEXT)
275 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
276 else {
277 assert(PadStrategy == TargetOpcode::G_SEXT);
278
279 // Shift the sign bit of the low register through the high register.
280 auto ShiftAmt =
281 MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
282 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
283 }
284 }
285
286 // Registers for the final merge to be produced.
Matt Arsenaultde8451f2020-02-04 10:34:22 -0500287 SmallVector<Register, 4> Remerge(NumParts);
Matt Arsenaulta66d2812020-01-10 10:41:29 -0500288
289 // Registers needed for intermediate merges, which will be merged into a
290 // source for Remerge.
Matt Arsenaultde8451f2020-02-04 10:34:22 -0500291 SmallVector<Register, 4> SubMerge(NumSubParts);
Matt Arsenaulta66d2812020-01-10 10:41:29 -0500292
293 // Once we've fully read off the end of the original source bits, we can reuse
294 // the same high bits for remaining padding elements.
295 Register AllPadReg;
296
297 // Build merges to the LCM type to cover the original result type.
298 for (int I = 0; I != NumParts; ++I) {
299 bool AllMergePartsArePadding = true;
300
301 // Build the requested merges to the requested type.
302 for (int J = 0; J != NumSubParts; ++J) {
303 int Idx = I * NumSubParts + J;
304 if (Idx >= NumOrigSrc) {
305 SubMerge[J] = PadReg;
306 continue;
307 }
308
309 SubMerge[J] = VRegs[Idx];
310
311 // There are meaningful bits here we can't reuse later.
312 AllMergePartsArePadding = false;
313 }
314
315 // If we've filled up a complete piece with padding bits, we can directly
316 // emit the natural sized constant if applicable, rather than a merge of
317 // smaller constants.
318 if (AllMergePartsArePadding && !AllPadReg) {
319 if (PadStrategy == TargetOpcode::G_ANYEXT)
320 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
321 else if (PadStrategy == TargetOpcode::G_ZEXT)
322 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
323
324 // If this is a sign extension, we can't materialize a trivial constant
325 // with the right type and have to produce a merge.
326 }
327
328 if (AllPadReg) {
329 // Avoid creating additional instructions if we're just adding additional
330 // copies of padding bits.
331 Remerge[I] = AllPadReg;
332 continue;
333 }
334
335 if (NumSubParts == 1)
336 Remerge[I] = SubMerge[0];
337 else
Diana Picusf95a5fb2023-01-09 11:59:00 +0100338 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
Matt Arsenaulta66d2812020-01-10 10:41:29 -0500339
340 // In the sign extend padding case, re-use the first all-signbit merge.
341 if (AllMergePartsArePadding && !AllPadReg)
342 AllPadReg = Remerge[I];
343 }
344
Matt Arsenaultcd7650c2020-01-11 19:05:06 -0500345 VRegs = std::move(Remerge);
346 return LCMTy;
347}
348
349void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
350 ArrayRef<Register> RemergeRegs) {
351 LLT DstTy = MRI.getType(DstReg);
352
Matt Arsenaulta66d2812020-01-10 10:41:29 -0500353 // Create the merge to the widened source, and extract the relevant bits into
354 // the result.
Matt Arsenaultcd7650c2020-01-11 19:05:06 -0500355
356 if (DstTy == LCMTy) {
Diana Picusf95a5fb2023-01-09 11:59:00 +0100357 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
Matt Arsenaultcd7650c2020-01-11 19:05:06 -0500358 return;
359 }
360
Diana Picusf95a5fb2023-01-09 11:59:00 +0100361 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
Matt Arsenaultcd7650c2020-01-11 19:05:06 -0500362 if (DstTy.isScalar() && LCMTy.isScalar()) {
363 MIRBuilder.buildTrunc(DstReg, Remerge);
364 return;
365 }
366
367 if (LCMTy.isVector()) {
Matt Arsenaulte75afc92020-07-28 10:15:42 -0400368 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
369 SmallVector<Register, 8> UnmergeDefs(NumDefs);
370 UnmergeDefs[0] = DstReg;
371 for (unsigned I = 1; I != NumDefs; ++I)
372 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
373
374 MIRBuilder.buildUnmerge(UnmergeDefs,
Diana Picusf95a5fb2023-01-09 11:59:00 +0100375 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
Matt Arsenaultcd7650c2020-01-11 19:05:06 -0500376 return;
377 }
378
379 llvm_unreachable("unhandled case");
Matt Arsenaulta66d2812020-01-10 10:41:29 -0500380}
381
Tim Northovere0418412017-02-08 23:23:39 +0000382static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
Matt Arsenault0da582d2020-07-19 09:56:15 -0400383#define RTLIBCASE_INT(LibcallPrefix) \
Dominik Montadafeb20a12020-03-02 16:28:17 +0100384 do { \
385 switch (Size) { \
386 case 32: \
387 return RTLIB::LibcallPrefix##32; \
388 case 64: \
389 return RTLIB::LibcallPrefix##64; \
390 case 128: \
391 return RTLIB::LibcallPrefix##128; \
392 default: \
393 llvm_unreachable("unexpected size"); \
394 } \
395 } while (0)
396
Matt Arsenault0da582d2020-07-19 09:56:15 -0400397#define RTLIBCASE(LibcallPrefix) \
398 do { \
399 switch (Size) { \
400 case 32: \
401 return RTLIB::LibcallPrefix##32; \
402 case 64: \
403 return RTLIB::LibcallPrefix##64; \
404 case 80: \
405 return RTLIB::LibcallPrefix##80; \
406 case 128: \
407 return RTLIB::LibcallPrefix##128; \
408 default: \
409 llvm_unreachable("unexpected size"); \
410 } \
411 } while (0)
Dominik Montadafeb20a12020-03-02 16:28:17 +0100412
Tim Northovere0418412017-02-08 23:23:39 +0000413 switch (Opcode) {
JaydeepChauhan14b693e1c2025-02-03 12:42:43 +0530414 case TargetOpcode::G_LROUND:
415 RTLIBCASE(LROUND_F);
416 case TargetOpcode::G_LLROUND:
417 RTLIBCASE(LLROUND_F);
Kai Nackeb3837532022-08-02 13:12:38 -0400418 case TargetOpcode::G_MUL:
419 RTLIBCASE_INT(MUL_I);
Diana Picuse97822e2017-04-24 07:22:31 +0000420 case TargetOpcode::G_SDIV:
Matt Arsenault0da582d2020-07-19 09:56:15 -0400421 RTLIBCASE_INT(SDIV_I);
Diana Picuse97822e2017-04-24 07:22:31 +0000422 case TargetOpcode::G_UDIV:
Matt Arsenault0da582d2020-07-19 09:56:15 -0400423 RTLIBCASE_INT(UDIV_I);
Diana Picus02e11012017-06-15 10:53:31 +0000424 case TargetOpcode::G_SREM:
Matt Arsenault0da582d2020-07-19 09:56:15 -0400425 RTLIBCASE_INT(SREM_I);
Diana Picus02e11012017-06-15 10:53:31 +0000426 case TargetOpcode::G_UREM:
Matt Arsenault0da582d2020-07-19 09:56:15 -0400427 RTLIBCASE_INT(UREM_I);
Diana Picus0528e2c2018-11-26 11:07:02 +0000428 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
Matt Arsenault0da582d2020-07-19 09:56:15 -0400429 RTLIBCASE_INT(CTLZ_I);
Diana Picus1314a282017-04-11 10:52:34 +0000430 case TargetOpcode::G_FADD:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100431 RTLIBCASE(ADD_F);
Javed Absar5cde1cc2017-10-30 13:51:56 +0000432 case TargetOpcode::G_FSUB:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100433 RTLIBCASE(SUB_F);
Diana Picus9faa09b2017-11-23 12:44:20 +0000434 case TargetOpcode::G_FMUL:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100435 RTLIBCASE(MUL_F);
Diana Picusc01f7f12017-11-23 13:26:07 +0000436 case TargetOpcode::G_FDIV:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100437 RTLIBCASE(DIV_F);
Jessica Paquette84bedac2019-01-30 23:46:15 +0000438 case TargetOpcode::G_FEXP:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100439 RTLIBCASE(EXP_F);
Jessica Paquettee7941212019-04-03 16:58:32 +0000440 case TargetOpcode::G_FEXP2:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100441 RTLIBCASE(EXP2_F);
Matt Arsenaultb14e83d2023-08-12 07:20:00 -0400442 case TargetOpcode::G_FEXP10:
443 RTLIBCASE(EXP10_F);
Tim Northovere0418412017-02-08 23:23:39 +0000444 case TargetOpcode::G_FREM:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100445 RTLIBCASE(REM_F);
Tim Northovere0418412017-02-08 23:23:39 +0000446 case TargetOpcode::G_FPOW:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100447 RTLIBCASE(POW_F);
David Green5550e9c2024-01-04 07:26:23 +0000448 case TargetOpcode::G_FPOWI:
449 RTLIBCASE(POWI_F);
Diana Picuse74243d2018-01-12 11:30:45 +0000450 case TargetOpcode::G_FMA:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100451 RTLIBCASE(FMA_F);
Jessica Paquette7db82d72019-01-28 18:34:18 +0000452 case TargetOpcode::G_FSIN:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100453 RTLIBCASE(SIN_F);
Jessica Paquette7db82d72019-01-28 18:34:18 +0000454 case TargetOpcode::G_FCOS:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100455 RTLIBCASE(COS_F);
Farzon Lotfi1d874332024-06-05 15:01:33 -0400456 case TargetOpcode::G_FTAN:
457 RTLIBCASE(TAN_F);
Farzon Lotfi0b58f342024-07-11 15:58:43 -0400458 case TargetOpcode::G_FASIN:
459 RTLIBCASE(ASIN_F);
460 case TargetOpcode::G_FACOS:
461 RTLIBCASE(ACOS_F);
462 case TargetOpcode::G_FATAN:
463 RTLIBCASE(ATAN_F);
Tex Riddellc03d09c2024-10-24 17:53:12 -0700464 case TargetOpcode::G_FATAN2:
465 RTLIBCASE(ATAN2_F);
Farzon Lotfi0b58f342024-07-11 15:58:43 -0400466 case TargetOpcode::G_FSINH:
467 RTLIBCASE(SINH_F);
468 case TargetOpcode::G_FCOSH:
469 RTLIBCASE(COSH_F);
470 case TargetOpcode::G_FTANH:
471 RTLIBCASE(TANH_F);
Jessica Paquettec49428a2019-01-28 19:53:14 +0000472 case TargetOpcode::G_FLOG10:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100473 RTLIBCASE(LOG10_F);
Jessica Paquette2d73ecd2019-01-28 21:27:23 +0000474 case TargetOpcode::G_FLOG:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100475 RTLIBCASE(LOG_F);
Jessica Paquette0154bd12019-01-30 21:16:04 +0000476 case TargetOpcode::G_FLOG2:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100477 RTLIBCASE(LOG2_F);
Matt Arsenaulteece6ba2023-04-26 22:02:42 -0400478 case TargetOpcode::G_FLDEXP:
479 RTLIBCASE(LDEXP_F);
Petar Avramovicfaaa2b5d2019-06-06 09:02:24 +0000480 case TargetOpcode::G_FCEIL:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100481 RTLIBCASE(CEIL_F);
Petar Avramovicfaaa2b5d2019-06-06 09:02:24 +0000482 case TargetOpcode::G_FFLOOR:
Dominik Montadafeb20a12020-03-02 16:28:17 +0100483 RTLIBCASE(FLOOR_F);
484 case TargetOpcode::G_FMINNUM:
485 RTLIBCASE(FMIN_F);
486 case TargetOpcode::G_FMAXNUM:
487 RTLIBCASE(FMAX_F);
488 case TargetOpcode::G_FSQRT:
489 RTLIBCASE(SQRT_F);
490 case TargetOpcode::G_FRINT:
491 RTLIBCASE(RINT_F);
492 case TargetOpcode::G_FNEARBYINT:
493 RTLIBCASE(NEARBYINT_F);
Craig Topperd5d14172024-09-18 12:07:44 -0700494 case TargetOpcode::G_INTRINSIC_TRUNC:
495 RTLIBCASE(TRUNC_F);
496 case TargetOpcode::G_INTRINSIC_ROUND:
497 RTLIBCASE(ROUND_F);
Matt Arsenault0da582d2020-07-19 09:56:15 -0400498 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
499 RTLIBCASE(ROUNDEVEN_F);
David Green28d28d52024-04-15 09:41:08 +0100500 case TargetOpcode::G_INTRINSIC_LRINT:
501 RTLIBCASE(LRINT_F);
David Green8d49ce12024-04-17 18:38:24 +0100502 case TargetOpcode::G_INTRINSIC_LLRINT:
503 RTLIBCASE(LLRINT_F);
Tim Northovere0418412017-02-08 23:23:39 +0000504 }
505 llvm_unreachable("Unknown libcall function");
Craig Topperebcaa572024-11-25 18:00:03 -0800506#undef RTLIBCASE_INT
507#undef RTLIBCASE
Tim Northovere0418412017-02-08 23:23:39 +0000508}
509
Jessica Paquette727328a2019-09-13 20:25:58 +0000510/// True if an instruction is in tail position in its caller. Intended for
511/// legalizing libcalls as tail calls when possible.
David Greend659bd12024-01-03 07:59:36 +0000512static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result,
513 MachineInstr &MI,
Jon Roelofsa14b4e32021-07-06 08:28:11 -0700514 const TargetInstrInfo &TII,
515 MachineRegisterInfo &MRI) {
Vedant Kumarf1a71b52020-04-16 15:23:57 -0700516 MachineBasicBlock &MBB = *MI.getParent();
517 const Function &F = MBB.getParent()->getFunction();
Jessica Paquette727328a2019-09-13 20:25:58 +0000518
519 // Conservatively require the attributes of the call to match those of
520 // the return. Ignore NoAlias and NonNull because they don't affect the
521 // call sequence.
522 AttributeList CallerAttrs = F.getAttributes();
Nikita Popovc63a3172022-01-15 22:14:16 +0100523 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
Jessica Paquette727328a2019-09-13 20:25:58 +0000524 .removeAttribute(Attribute::NoAlias)
525 .removeAttribute(Attribute::NonNull)
526 .hasAttributes())
527 return false;
528
529 // It's not safe to eliminate the sign / zero extension of the return value.
Arthur Eubanksd7593eb2021-08-13 11:59:18 -0700530 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
531 CallerAttrs.hasRetAttr(Attribute::SExt))
Jessica Paquette727328a2019-09-13 20:25:58 +0000532 return false;
533
Jon Roelofsa14b4e32021-07-06 08:28:11 -0700534 // Only tail call if the following instruction is a standard return or if we
535 // have a `thisreturn` callee, and a sequence like:
536 //
537 // G_MEMCPY %0, %1, %2
538 // $x0 = COPY %0
539 // RET_ReallyLR implicit $x0
Vedant Kumarf1a71b52020-04-16 15:23:57 -0700540 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
Jon Roelofsa14b4e32021-07-06 08:28:11 -0700541 if (Next != MBB.instr_end() && Next->isCopy()) {
David Greend659bd12024-01-03 07:59:36 +0000542 if (MI.getOpcode() == TargetOpcode::G_BZERO)
Jon Roelofsa14b4e32021-07-06 08:28:11 -0700543 return false;
Jon Roelofsa14b4e32021-07-06 08:28:11 -0700544
David Greend659bd12024-01-03 07:59:36 +0000545 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
546 // mempy/etc routines return the same parameter. For other it will be the
547 // returned value.
Jon Roelofsa14b4e32021-07-06 08:28:11 -0700548 Register VReg = MI.getOperand(0).getReg();
549 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
550 return false;
551
552 Register PReg = Next->getOperand(0).getReg();
553 if (!PReg.isPhysical())
554 return false;
555
556 auto Ret = next_nodbg(Next, MBB.instr_end());
557 if (Ret == MBB.instr_end() || !Ret->isReturn())
558 return false;
559
560 if (Ret->getNumImplicitOperands() != 1)
561 return false;
562
David Greend659bd12024-01-03 07:59:36 +0000563 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
Jon Roelofsa14b4e32021-07-06 08:28:11 -0700564 return false;
565
566 // Skip over the COPY that we just validated.
567 Next = Ret;
568 }
569
Vedant Kumarf1a71b52020-04-16 15:23:57 -0700570 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
Jessica Paquette727328a2019-09-13 20:25:58 +0000571 return false;
572
573 return true;
574}
575
Diana Picusfc1675e2017-07-05 12:57:24 +0000576LegalizerHelper::LegalizeResult
Dominik Montada9fedb692020-03-26 13:59:08 +0100577llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
Diana Picusfc1675e2017-07-05 12:57:24 +0000578 const CallLowering::ArgInfo &Result,
Dominik Montada9fedb692020-03-26 13:59:08 +0100579 ArrayRef<CallLowering::ArgInfo> Args,
David Greend659bd12024-01-03 07:59:36 +0000580 const CallingConv::ID CC, LostDebugLocObserver &LocObserver,
581 MachineInstr *MI) {
Diana Picuse97822e2017-04-24 07:22:31 +0000582 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
Diana Picusd0104ea2017-07-06 09:09:33 +0000583
Tim Northovere1a5f662019-08-09 08:26:38 +0000584 CallLowering::CallLoweringInfo Info;
Dominik Montada9fedb692020-03-26 13:59:08 +0100585 Info.CallConv = CC;
Tim Northovere1a5f662019-08-09 08:26:38 +0000586 Info.Callee = MachineOperand::CreateES(Name);
587 Info.OrigRet = Result;
David Greend659bd12024-01-03 07:59:36 +0000588 if (MI)
589 Info.IsTailCall =
590 (Result.Ty->isVoidTy() ||
591 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
592 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
593 *MIRBuilder.getMRI());
594
Tim Northovere1a5f662019-08-09 08:26:38 +0000595 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
596 if (!CLI.lowerCall(MIRBuilder, Info))
Diana Picus02e11012017-06-15 10:53:31 +0000597 return LegalizerHelper::UnableToLegalize;
Diana Picusd0104ea2017-07-06 09:09:33 +0000598
David Greend659bd12024-01-03 07:59:36 +0000599 if (MI && Info.LoweredTailCall) {
600 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
601
602 // Check debug locations before removing the return.
603 LocObserver.checkpoint(true);
604
605 // We must have a return following the call (or debug insts) to get past
606 // isLibCallInTailPosition.
607 do {
608 MachineInstr *Next = MI->getNextNode();
609 assert(Next &&
610 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
611 "Expected instr following MI to be return or debug inst?");
612 // We lowered a tail call, so the call is now the return from the block.
613 // Delete the old return.
614 Next->eraseFromParent();
615 } while (MI->getNextNode());
616
617 // We expect to lose the debug location from the return.
618 LocObserver.checkpoint(false);
619 }
Diana Picuse97822e2017-04-24 07:22:31 +0000620 return LegalizerHelper::Legalized;
621}
622
Dominik Montada9fedb692020-03-26 13:59:08 +0100623LegalizerHelper::LegalizeResult
624llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
625 const CallLowering::ArgInfo &Result,
David Greend659bd12024-01-03 07:59:36 +0000626 ArrayRef<CallLowering::ArgInfo> Args,
627 LostDebugLocObserver &LocObserver, MachineInstr *MI) {
Dominik Montada9fedb692020-03-26 13:59:08 +0100628 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
629 const char *Name = TLI.getLibcallName(Libcall);
David Green47c65cf2024-02-17 08:57:14 +0000630 if (!Name)
631 return LegalizerHelper::UnableToLegalize;
Dominik Montada9fedb692020-03-26 13:59:08 +0100632 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
David Greend659bd12024-01-03 07:59:36 +0000633 return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
Dominik Montada9fedb692020-03-26 13:59:08 +0100634}
635
Diana Picus65ed3642018-01-17 13:34:10 +0000636// Useful for libcalls where all operands have the same type.
Diana Picus02e11012017-06-15 10:53:31 +0000637static LegalizerHelper::LegalizeResult
638simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
David Greend659bd12024-01-03 07:59:36 +0000639 Type *OpType, LostDebugLocObserver &LocObserver) {
Diana Picus02e11012017-06-15 10:53:31 +0000640 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
Diana Picuse74243d2018-01-12 11:30:45 +0000641
Matt Arsenault9b057f62021-07-08 11:26:30 -0400642 // FIXME: What does the original arg index mean here?
Diana Picuse74243d2018-01-12 11:30:45 +0000643 SmallVector<CallLowering::ArgInfo, 3> Args;
Kazu Hirata259cd6f2021-11-25 22:17:10 -0800644 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
645 Args.push_back({MO.getReg(), OpType, 0});
Matt Arsenault9b057f62021-07-08 11:26:30 -0400646 return createLibcall(MIRBuilder, Libcall,
David Greend659bd12024-01-03 07:59:36 +0000647 {MI.getOperand(0).getReg(), OpType, 0}, Args,
648 LocObserver, &MI);
Diana Picus02e11012017-06-15 10:53:31 +0000649}
650
Amara Emersoncf12c782019-07-19 00:24:45 +0000651LegalizerHelper::LegalizeResult
652llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
Jessica Paquette324af792021-05-25 16:54:20 -0700653 MachineInstr &MI, LostDebugLocObserver &LocObserver) {
Amara Emersoncf12c782019-07-19 00:24:45 +0000654 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
655
656 SmallVector<CallLowering::ArgInfo, 3> Args;
Amara Emerson509a4942019-09-28 05:33:21 +0000657 // Add all the args, except for the last which is an imm denoting 'tail'.
Matt Arsenault0b7f6cc2020-08-03 09:00:24 -0400658 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
Amara Emersoncf12c782019-07-19 00:24:45 +0000659 Register Reg = MI.getOperand(i).getReg();
660
661 // Need derive an IR type for call lowering.
662 LLT OpLLT = MRI.getType(Reg);
663 Type *OpTy = nullptr;
664 if (OpLLT.isPointer())
Bjorn Petterssona7ee80f2023-08-11 14:38:53 +0200665 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
Amara Emersoncf12c782019-07-19 00:24:45 +0000666 else
667 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
Matt Arsenault9b057f62021-07-08 11:26:30 -0400668 Args.push_back({Reg, OpTy, 0});
Amara Emersoncf12c782019-07-19 00:24:45 +0000669 }
670
671 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
672 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
Amara Emersoncf12c782019-07-19 00:24:45 +0000673 RTLIB::Libcall RTLibcall;
Jessica Paquette23f657c2021-03-24 23:45:36 -0700674 unsigned Opc = MI.getOpcode();
675 switch (Opc) {
676 case TargetOpcode::G_BZERO:
677 RTLibcall = RTLIB::BZERO;
678 break;
Matt Arsenault0b7f6cc2020-08-03 09:00:24 -0400679 case TargetOpcode::G_MEMCPY:
Amara Emersoncf12c782019-07-19 00:24:45 +0000680 RTLibcall = RTLIB::MEMCPY;
Jon Roelofsafaf9282021-07-02 13:08:57 -0700681 Args[0].Flags[0].setReturned();
Amara Emersoncf12c782019-07-19 00:24:45 +0000682 break;
Matt Arsenault0b7f6cc2020-08-03 09:00:24 -0400683 case TargetOpcode::G_MEMMOVE:
Amara Emersoncf12c782019-07-19 00:24:45 +0000684 RTLibcall = RTLIB::MEMMOVE;
Jon Roelofsafaf9282021-07-02 13:08:57 -0700685 Args[0].Flags[0].setReturned();
Amara Emersoncf12c782019-07-19 00:24:45 +0000686 break;
Matt Arsenault0b7f6cc2020-08-03 09:00:24 -0400687 case TargetOpcode::G_MEMSET:
688 RTLibcall = RTLIB::MEMSET;
Jon Roelofsafaf9282021-07-02 13:08:57 -0700689 Args[0].Flags[0].setReturned();
Matt Arsenault0b7f6cc2020-08-03 09:00:24 -0400690 break;
Amara Emersoncf12c782019-07-19 00:24:45 +0000691 default:
Jon Roelofsafaf9282021-07-02 13:08:57 -0700692 llvm_unreachable("unsupported opcode");
Amara Emersoncf12c782019-07-19 00:24:45 +0000693 }
694 const char *Name = TLI.getLibcallName(RTLibcall);
695
Jessica Paquette23f657c2021-03-24 23:45:36 -0700696 // Unsupported libcall on the target.
697 if (!Name) {
698 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
699 << MIRBuilder.getTII().getName(Opc) << "\n");
700 return LegalizerHelper::UnableToLegalize;
701 }
702
Tim Northovere1a5f662019-08-09 08:26:38 +0000703 CallLowering::CallLoweringInfo Info;
704 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
705 Info.Callee = MachineOperand::CreateES(Name);
Matt Arsenault9b057f62021-07-08 11:26:30 -0400706 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
David Greend659bd12024-01-03 07:59:36 +0000707 Info.IsTailCall =
708 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
709 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
Jessica Paquette727328a2019-09-13 20:25:58 +0000710
Tim Northovere1a5f662019-08-09 08:26:38 +0000711 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
712 if (!CLI.lowerCall(MIRBuilder, Info))
Amara Emersoncf12c782019-07-19 00:24:45 +0000713 return LegalizerHelper::UnableToLegalize;
714
Jessica Paquette727328a2019-09-13 20:25:58 +0000715 if (Info.LoweredTailCall) {
716 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
Jessica Paquette324af792021-05-25 16:54:20 -0700717
718 // Check debug locations before removing the return.
719 LocObserver.checkpoint(true);
720
Vedant Kumarf1a71b52020-04-16 15:23:57 -0700721 // We must have a return following the call (or debug insts) to get past
Jessica Paquette727328a2019-09-13 20:25:58 +0000722 // isLibCallInTailPosition.
Vedant Kumarf1a71b52020-04-16 15:23:57 -0700723 do {
724 MachineInstr *Next = MI.getNextNode();
Jon Roelofsa14b4e32021-07-06 08:28:11 -0700725 assert(Next &&
726 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
Vedant Kumarf1a71b52020-04-16 15:23:57 -0700727 "Expected instr following MI to be return or debug inst?");
728 // We lowered a tail call, so the call is now the return from the block.
729 // Delete the old return.
730 Next->eraseFromParent();
731 } while (MI.getNextNode());
Jessica Paquette324af792021-05-25 16:54:20 -0700732
733 // We expect to lose the debug location from the return.
734 LocObserver.checkpoint(false);
Jessica Paquette727328a2019-09-13 20:25:58 +0000735 }
736
Amara Emersoncf12c782019-07-19 00:24:45 +0000737 return LegalizerHelper::Legalized;
738}
739
Thomas Preud'hommece61b0e2024-01-04 10:15:16 +0000740static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
741 unsigned Opc = MI.getOpcode();
742 auto &AtomicMI = cast<GMemOperation>(MI);
743 auto &MMO = AtomicMI.getMMO();
744 auto Ordering = MMO.getMergedOrdering();
745 LLT MemType = MMO.getMemoryType();
746 uint64_t MemSize = MemType.getSizeInBytes();
747 if (MemType.isVector())
748 return RTLIB::UNKNOWN_LIBCALL;
749
Him188ba461f82024-07-25 11:07:31 +0100750#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
Thomas Preud'hommece61b0e2024-01-04 10:15:16 +0000751#define LCALL5(A) \
752 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
753 switch (Opc) {
754 case TargetOpcode::G_ATOMIC_CMPXCHG:
755 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
756 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
757 return getOutlineAtomicHelper(LC, Ordering, MemSize);
758 }
759 case TargetOpcode::G_ATOMICRMW_XCHG: {
760 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
761 return getOutlineAtomicHelper(LC, Ordering, MemSize);
762 }
763 case TargetOpcode::G_ATOMICRMW_ADD:
764 case TargetOpcode::G_ATOMICRMW_SUB: {
765 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
766 return getOutlineAtomicHelper(LC, Ordering, MemSize);
767 }
768 case TargetOpcode::G_ATOMICRMW_AND: {
769 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
770 return getOutlineAtomicHelper(LC, Ordering, MemSize);
771 }
772 case TargetOpcode::G_ATOMICRMW_OR: {
773 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
774 return getOutlineAtomicHelper(LC, Ordering, MemSize);
775 }
776 case TargetOpcode::G_ATOMICRMW_XOR: {
777 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
778 return getOutlineAtomicHelper(LC, Ordering, MemSize);
779 }
780 default:
781 return RTLIB::UNKNOWN_LIBCALL;
782 }
783#undef LCALLS
784#undef LCALL5
785}
786
787static LegalizerHelper::LegalizeResult
788createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI) {
789 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
790
791 Type *RetTy;
792 SmallVector<Register> RetRegs;
793 SmallVector<CallLowering::ArgInfo, 3> Args;
794 unsigned Opc = MI.getOpcode();
795 switch (Opc) {
796 case TargetOpcode::G_ATOMIC_CMPXCHG:
797 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
798 Register Success;
799 LLT SuccessLLT;
800 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
801 MI.getFirst4RegLLTs();
802 RetRegs.push_back(Ret);
803 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
804 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
805 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
806 NewLLT) = MI.getFirst5RegLLTs();
807 RetRegs.push_back(Success);
808 RetTy = StructType::get(
809 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
810 }
811 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
812 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
813 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
814 break;
815 }
816 case TargetOpcode::G_ATOMICRMW_XCHG:
817 case TargetOpcode::G_ATOMICRMW_ADD:
818 case TargetOpcode::G_ATOMICRMW_SUB:
819 case TargetOpcode::G_ATOMICRMW_AND:
820 case TargetOpcode::G_ATOMICRMW_OR:
821 case TargetOpcode::G_ATOMICRMW_XOR: {
822 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
823 RetRegs.push_back(Ret);
824 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
825 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
826 Val =
827 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
828 .getReg(0);
829 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
830 Val =
831 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
832 .getReg(0);
833 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
834 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
835 break;
836 }
837 default:
838 llvm_unreachable("unsupported opcode");
839 }
840
841 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
842 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
843 RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(MI);
844 const char *Name = TLI.getLibcallName(RTLibcall);
845
846 // Unsupported libcall on the target.
847 if (!Name) {
848 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
849 << MIRBuilder.getTII().getName(Opc) << "\n");
850 return LegalizerHelper::UnableToLegalize;
851 }
852
853 CallLowering::CallLoweringInfo Info;
854 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
855 Info.Callee = MachineOperand::CreateES(Name);
856 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
857
858 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
859 if (!CLI.lowerCall(MIRBuilder, Info))
860 return LegalizerHelper::UnableToLegalize;
861
862 return LegalizerHelper::Legalized;
863}
864
Diana Picus65ed3642018-01-17 13:34:10 +0000865static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
866 Type *FromType) {
867 auto ToMVT = MVT::getVT(ToType);
868 auto FromMVT = MVT::getVT(FromType);
869
870 switch (Opcode) {
871 case TargetOpcode::G_FPEXT:
872 return RTLIB::getFPEXT(FromMVT, ToMVT);
873 case TargetOpcode::G_FPTRUNC:
874 return RTLIB::getFPROUND(FromMVT, ToMVT);
Diana Picus4ed0ee72018-01-30 07:54:52 +0000875 case TargetOpcode::G_FPTOSI:
876 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
877 case TargetOpcode::G_FPTOUI:
878 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
Diana Picus517531e2018-01-30 09:15:17 +0000879 case TargetOpcode::G_SITOFP:
880 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
881 case TargetOpcode::G_UITOFP:
882 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
Diana Picus65ed3642018-01-17 13:34:10 +0000883 }
884 llvm_unreachable("Unsupported libcall function");
885}
886
887static LegalizerHelper::LegalizeResult
888conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
Craig Topper4cf2cf12024-12-04 10:42:49 -0800889 Type *FromType, LostDebugLocObserver &LocObserver,
890 const TargetLowering &TLI, bool IsSigned = false) {
891 CallLowering::ArgInfo Arg = {MI.getOperand(1).getReg(), FromType, 0};
892 if (FromType->isIntegerTy()) {
893 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
894 Arg.Flags[0].setSExt();
895 else
896 Arg.Flags[0].setZExt();
897 }
898
Diana Picus65ed3642018-01-17 13:34:10 +0000899 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
Craig Topper4cf2cf12024-12-04 10:42:49 -0800900 return createLibcall(MIRBuilder, Libcall,
901 {MI.getOperand(0).getReg(), ToType, 0}, Arg, LocObserver,
902 &MI);
Diana Picus65ed3642018-01-17 13:34:10 +0000903}
904
Serge Pavlov462d5832023-10-09 21:13:39 +0700905static RTLIB::Libcall
906getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI) {
907 RTLIB::Libcall RTLibcall;
908 switch (MI.getOpcode()) {
Serge Pavlov7fc7ef12024-01-10 14:18:00 +0700909 case TargetOpcode::G_GET_FPENV:
910 RTLibcall = RTLIB::FEGETENV;
911 break;
912 case TargetOpcode::G_SET_FPENV:
913 case TargetOpcode::G_RESET_FPENV:
914 RTLibcall = RTLIB::FESETENV;
915 break;
Serge Pavlov462d5832023-10-09 21:13:39 +0700916 case TargetOpcode::G_GET_FPMODE:
917 RTLibcall = RTLIB::FEGETMODE;
918 break;
919 case TargetOpcode::G_SET_FPMODE:
920 case TargetOpcode::G_RESET_FPMODE:
921 RTLibcall = RTLIB::FESETMODE;
922 break;
923 default:
924 llvm_unreachable("Unexpected opcode");
925 }
926 return RTLibcall;
927}
928
929// Some library functions that read FP state (fegetmode, fegetenv) write the
930// state into a region in memory. IR intrinsics that do the same operations
931// (get_fpmode, get_fpenv) return the state as integer value. To implement these
932// intrinsics via the library functions, we need to use temporary variable,
933// for example:
934//
935// %0:_(s32) = G_GET_FPMODE
936//
937// is transformed to:
938//
939// %1:_(p0) = G_FRAME_INDEX %stack.0
940// BL &fegetmode
941// %0:_(s32) = G_LOAD % 1
942//
943LegalizerHelper::LegalizeResult
944LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
David Greend659bd12024-01-03 07:59:36 +0000945 MachineInstr &MI,
946 LostDebugLocObserver &LocObserver) {
Serge Pavlov462d5832023-10-09 21:13:39 +0700947 const DataLayout &DL = MIRBuilder.getDataLayout();
948 auto &MF = MIRBuilder.getMF();
949 auto &MRI = *MIRBuilder.getMRI();
950 auto &Ctx = MF.getFunction().getContext();
951
952 // Create temporary, where library function will put the read state.
953 Register Dst = MI.getOperand(0).getReg();
954 LLT StateTy = MRI.getType(Dst);
955 TypeSize StateSize = StateTy.getSizeInBytes();
956 Align TempAlign = getStackTemporaryAlignment(StateTy);
957 MachinePointerInfo TempPtrInfo;
958 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
959
960 // Create a call to library function, with the temporary as an argument.
961 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
962 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
963 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
964 auto Res =
965 createLibcall(MIRBuilder, RTLibcall,
966 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
David Greend659bd12024-01-03 07:59:36 +0000967 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
968 LocObserver, nullptr);
Serge Pavlov462d5832023-10-09 21:13:39 +0700969 if (Res != LegalizerHelper::Legalized)
970 return Res;
971
972 // Create a load from the temporary.
973 MachineMemOperand *MMO = MF.getMachineMemOperand(
974 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
975 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
976
977 return LegalizerHelper::Legalized;
978}
979
980// Similar to `createGetStateLibcall` the function calls a library function
981// using transient space in stack. In this case the library function reads
982// content of memory region.
983LegalizerHelper::LegalizeResult
984LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
David Greend659bd12024-01-03 07:59:36 +0000985 MachineInstr &MI,
986 LostDebugLocObserver &LocObserver) {
Serge Pavlov462d5832023-10-09 21:13:39 +0700987 const DataLayout &DL = MIRBuilder.getDataLayout();
988 auto &MF = MIRBuilder.getMF();
989 auto &MRI = *MIRBuilder.getMRI();
990 auto &Ctx = MF.getFunction().getContext();
991
992 // Create temporary, where library function will get the new state.
993 Register Src = MI.getOperand(0).getReg();
994 LLT StateTy = MRI.getType(Src);
995 TypeSize StateSize = StateTy.getSizeInBytes();
996 Align TempAlign = getStackTemporaryAlignment(StateTy);
997 MachinePointerInfo TempPtrInfo;
998 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
999
1000 // Put the new state into the temporary.
1001 MachineMemOperand *MMO = MF.getMachineMemOperand(
1002 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
1003 MIRBuilder.buildStore(Src, Temp, *MMO);
1004
1005 // Create a call to library function, with the temporary as an argument.
1006 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1007 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1008 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1009 return createLibcall(MIRBuilder, RTLibcall,
1010 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
David Greend659bd12024-01-03 07:59:36 +00001011 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1012 LocObserver, nullptr);
Serge Pavlov462d5832023-10-09 21:13:39 +07001013}
1014
Him188ba461f82024-07-25 11:07:31 +01001015/// Returns the corresponding libcall for the given Pred and
1016/// the ICMP predicate that should be generated to compare with #0
1017/// after the libcall.
1018static std::pair<RTLIB::Libcall, CmpInst::Predicate>
Craig Topper43b6b782024-11-26 15:48:49 -08001019getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size) {
1020#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1021 do { \
1022 switch (Size) { \
1023 case 32: \
1024 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1025 case 64: \
1026 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1027 case 128: \
1028 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1029 default: \
1030 llvm_unreachable("unexpected size"); \
1031 } \
1032 } while (0)
Him188ba461f82024-07-25 11:07:31 +01001033
1034 switch (Pred) {
1035 case CmpInst::FCMP_OEQ:
Craig Topper43b6b782024-11-26 15:48:49 -08001036 RTLIBCASE_CMP(OEQ_F, CmpInst::ICMP_EQ);
Him188ba461f82024-07-25 11:07:31 +01001037 case CmpInst::FCMP_UNE:
Craig Topper43b6b782024-11-26 15:48:49 -08001038 RTLIBCASE_CMP(UNE_F, CmpInst::ICMP_NE);
Him188ba461f82024-07-25 11:07:31 +01001039 case CmpInst::FCMP_OGE:
Craig Topper43b6b782024-11-26 15:48:49 -08001040 RTLIBCASE_CMP(OGE_F, CmpInst::ICMP_SGE);
Him188ba461f82024-07-25 11:07:31 +01001041 case CmpInst::FCMP_OLT:
Craig Topper43b6b782024-11-26 15:48:49 -08001042 RTLIBCASE_CMP(OLT_F, CmpInst::ICMP_SLT);
Him188ba461f82024-07-25 11:07:31 +01001043 case CmpInst::FCMP_OLE:
Craig Topper43b6b782024-11-26 15:48:49 -08001044 RTLIBCASE_CMP(OLE_F, CmpInst::ICMP_SLE);
Him188ba461f82024-07-25 11:07:31 +01001045 case CmpInst::FCMP_OGT:
Craig Topper43b6b782024-11-26 15:48:49 -08001046 RTLIBCASE_CMP(OGT_F, CmpInst::ICMP_SGT);
Him188ba461f82024-07-25 11:07:31 +01001047 case CmpInst::FCMP_UNO:
Craig Topper43b6b782024-11-26 15:48:49 -08001048 RTLIBCASE_CMP(UO_F, CmpInst::ICMP_NE);
Him188ba461f82024-07-25 11:07:31 +01001049 default:
1050 return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE};
1051 }
1052}
1053
1054LegalizerHelper::LegalizeResult
1055LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
1056 MachineInstr &MI,
1057 LostDebugLocObserver &LocObserver) {
1058 auto &MF = MIRBuilder.getMF();
1059 auto &Ctx = MF.getFunction().getContext();
1060 const GFCmp *Cmp = cast<GFCmp>(&MI);
1061
1062 LLT OpLLT = MRI.getType(Cmp->getLHSReg());
Craig Topper43b6b782024-11-26 15:48:49 -08001063 unsigned Size = OpLLT.getSizeInBits();
1064 if ((Size != 32 && Size != 64 && Size != 128) ||
1065 OpLLT != MRI.getType(Cmp->getRHSReg()))
Him188ba461f82024-07-25 11:07:31 +01001066 return UnableToLegalize;
1067
1068 Type *OpType = getFloatTypeForLLT(Ctx, OpLLT);
1069
1070 // DstReg type is s32
1071 const Register DstReg = Cmp->getReg(0);
Craig Topper43b6b782024-11-26 15:48:49 -08001072 LLT DstTy = MRI.getType(DstReg);
Him188ba461f82024-07-25 11:07:31 +01001073 const auto Cond = Cmp->getCond();
1074
1075 // Reference:
1076 // https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
1077 // Generates a libcall followed by ICMP.
Craig Topper43b6b782024-11-26 15:48:49 -08001078 const auto BuildLibcall = [&](const RTLIB::Libcall Libcall,
1079 const CmpInst::Predicate ICmpPred,
1080 const DstOp &Res) -> Register {
Him188ba461f82024-07-25 11:07:31 +01001081 // FCMP libcall always returns an i32, and needs an ICMP with #0.
1082 constexpr LLT TempLLT = LLT::scalar(32);
1083 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1084 // Generate libcall, holding result in Temp
1085 const auto Status = createLibcall(
1086 MIRBuilder, Libcall, {Temp, Type::getInt32Ty(Ctx), 0},
1087 {{Cmp->getLHSReg(), OpType, 0}, {Cmp->getRHSReg(), OpType, 1}},
1088 LocObserver, &MI);
1089 if (!Status)
1090 return {};
1091
1092 // Compare temp with #0 to get the final result.
1093 return MIRBuilder
1094 .buildICmp(ICmpPred, Res, Temp, MIRBuilder.buildConstant(TempLLT, 0))
1095 .getReg(0);
1096 };
1097
1098 // Simple case if we have a direct mapping from predicate to libcall
Craig Topper43b6b782024-11-26 15:48:49 -08001099 if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond, Size);
Him188ba461f82024-07-25 11:07:31 +01001100 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1101 ICmpPred != CmpInst::BAD_ICMP_PREDICATE) {
1102 if (BuildLibcall(Libcall, ICmpPred, DstReg)) {
1103 return Legalized;
1104 }
1105 return UnableToLegalize;
1106 }
1107
1108 // No direct mapping found, should be generated as combination of libcalls.
1109
1110 switch (Cond) {
1111 case CmpInst::FCMP_UEQ: {
1112 // FCMP_UEQ: unordered or equal
1113 // Convert into (FCMP_OEQ || FCMP_UNO).
1114
Craig Topper43b6b782024-11-26 15:48:49 -08001115 const auto [OeqLibcall, OeqPred] =
1116 getFCMPLibcallDesc(CmpInst::FCMP_OEQ, Size);
1117 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
Him188ba461f82024-07-25 11:07:31 +01001118
Craig Topper43b6b782024-11-26 15:48:49 -08001119 const auto [UnoLibcall, UnoPred] =
1120 getFCMPLibcallDesc(CmpInst::FCMP_UNO, Size);
1121 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
Him188ba461f82024-07-25 11:07:31 +01001122 if (Oeq && Uno)
1123 MIRBuilder.buildOr(DstReg, Oeq, Uno);
1124 else
1125 return UnableToLegalize;
1126
1127 break;
1128 }
1129 case CmpInst::FCMP_ONE: {
1130 // FCMP_ONE: ordered and operands are unequal
1131 // Convert into (!FCMP_OEQ && !FCMP_UNO).
1132
1133 // We inverse the predicate instead of generating a NOT
1134 // to save one instruction.
1135 // On AArch64 isel can even select two cmp into a single ccmp.
Craig Topper43b6b782024-11-26 15:48:49 -08001136 const auto [OeqLibcall, OeqPred] =
1137 getFCMPLibcallDesc(CmpInst::FCMP_OEQ, Size);
Him188ba461f82024-07-25 11:07:31 +01001138 const auto NotOeq =
Craig Topper43b6b782024-11-26 15:48:49 -08001139 BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred), DstTy);
Him188ba461f82024-07-25 11:07:31 +01001140
Craig Topper43b6b782024-11-26 15:48:49 -08001141 const auto [UnoLibcall, UnoPred] =
1142 getFCMPLibcallDesc(CmpInst::FCMP_UNO, Size);
Him188ba461f82024-07-25 11:07:31 +01001143 const auto NotUno =
Craig Topper43b6b782024-11-26 15:48:49 -08001144 BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred), DstTy);
Him188ba461f82024-07-25 11:07:31 +01001145
1146 if (NotOeq && NotUno)
1147 MIRBuilder.buildAnd(DstReg, NotOeq, NotUno);
1148 else
1149 return UnableToLegalize;
1150
1151 break;
1152 }
1153 case CmpInst::FCMP_ULT:
1154 case CmpInst::FCMP_UGE:
1155 case CmpInst::FCMP_UGT:
1156 case CmpInst::FCMP_ULE:
1157 case CmpInst::FCMP_ORD: {
1158 // Convert into: !(inverse(Pred))
1159 // E.g. FCMP_ULT becomes !FCMP_OGE
1160 // This is equivalent to the following, but saves some instructions.
1161 // MIRBuilder.buildNot(
1162 // PredTy,
1163 // MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
1164 // Op1, Op2));
1165 const auto [InversedLibcall, InversedPred] =
Craig Topper43b6b782024-11-26 15:48:49 -08001166 getFCMPLibcallDesc(CmpInst::getInversePredicate(Cond), Size);
Him188ba461f82024-07-25 11:07:31 +01001167 if (!BuildLibcall(InversedLibcall,
1168 CmpInst::getInversePredicate(InversedPred), DstReg))
1169 return UnableToLegalize;
1170 break;
1171 }
1172 default:
1173 return UnableToLegalize;
1174 }
1175
1176 return Legalized;
1177}
1178
Serge Pavlov462d5832023-10-09 21:13:39 +07001179// The function is used to legalize operations that set default environment
1180// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
1181// On most targets supported in glibc FE_DFL_MODE is defined as
1182// `((const femode_t *) -1)`. Such assumption is used here. If for some target
1183// it is not true, the target must provide custom lowering.
1184LegalizerHelper::LegalizeResult
1185LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
David Greend659bd12024-01-03 07:59:36 +00001186 MachineInstr &MI,
1187 LostDebugLocObserver &LocObserver) {
Serge Pavlov462d5832023-10-09 21:13:39 +07001188 const DataLayout &DL = MIRBuilder.getDataLayout();
1189 auto &MF = MIRBuilder.getMF();
1190 auto &Ctx = MF.getFunction().getContext();
1191
1192 // Create an argument for the library function.
1193 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
1194 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
1195 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
1196 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
1197 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
1198 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1199 MIRBuilder.buildIntToPtr(Dest, DefValue);
1200
1201 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1202 return createLibcall(MIRBuilder, RTLibcall,
1203 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
David Greend659bd12024-01-03 07:59:36 +00001204 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}),
1205 LocObserver, &MI);
Serge Pavlov462d5832023-10-09 21:13:39 +07001206}
1207
Tim Northover69fa84a2016-10-14 22:18:18 +00001208LegalizerHelper::LegalizeResult
Jessica Paquette324af792021-05-25 16:54:20 -07001209LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
Matthias Braunf1caa282017-12-15 22:22:58 +00001210 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
Tim Northoveredb3c8c2016-08-29 19:07:16 +00001211
Tim Northoveredb3c8c2016-08-29 19:07:16 +00001212 switch (MI.getOpcode()) {
1213 default:
1214 return UnableToLegalize;
Kai Nackeb3837532022-08-02 13:12:38 -04001215 case TargetOpcode::G_MUL:
Diana Picuse97822e2017-04-24 07:22:31 +00001216 case TargetOpcode::G_SDIV:
Diana Picus02e11012017-06-15 10:53:31 +00001217 case TargetOpcode::G_UDIV:
1218 case TargetOpcode::G_SREM:
Diana Picus0528e2c2018-11-26 11:07:02 +00001219 case TargetOpcode::G_UREM:
1220 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
Serge Pavlov462d5832023-10-09 21:13:39 +07001221 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1222 unsigned Size = LLTy.getSizeInBits();
Petar Avramovic0a5e4eb2018-12-18 15:59:51 +00001223 Type *HLTy = IntegerType::get(Ctx, Size);
David Greend659bd12024-01-03 07:59:36 +00001224 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
Diana Picusfc1675e2017-07-05 12:57:24 +00001225 if (Status != Legalized)
1226 return Status;
1227 break;
Diana Picuse97822e2017-04-24 07:22:31 +00001228 }
Diana Picus1314a282017-04-11 10:52:34 +00001229 case TargetOpcode::G_FADD:
Javed Absar5cde1cc2017-10-30 13:51:56 +00001230 case TargetOpcode::G_FSUB:
Diana Picus9faa09b2017-11-23 12:44:20 +00001231 case TargetOpcode::G_FMUL:
Diana Picusc01f7f12017-11-23 13:26:07 +00001232 case TargetOpcode::G_FDIV:
Diana Picuse74243d2018-01-12 11:30:45 +00001233 case TargetOpcode::G_FMA:
Tim Northovere0418412017-02-08 23:23:39 +00001234 case TargetOpcode::G_FPOW:
Jessica Paquette7db82d72019-01-28 18:34:18 +00001235 case TargetOpcode::G_FREM:
1236 case TargetOpcode::G_FCOS:
Jessica Paquettec49428a2019-01-28 19:53:14 +00001237 case TargetOpcode::G_FSIN:
Farzon Lotfi1d874332024-06-05 15:01:33 -04001238 case TargetOpcode::G_FTAN:
Farzon Lotfi0b58f342024-07-11 15:58:43 -04001239 case TargetOpcode::G_FACOS:
1240 case TargetOpcode::G_FASIN:
1241 case TargetOpcode::G_FATAN:
Tex Riddellc03d09c2024-10-24 17:53:12 -07001242 case TargetOpcode::G_FATAN2:
Farzon Lotfi0b58f342024-07-11 15:58:43 -04001243 case TargetOpcode::G_FCOSH:
1244 case TargetOpcode::G_FSINH:
1245 case TargetOpcode::G_FTANH:
Jessica Paquette2d73ecd2019-01-28 21:27:23 +00001246 case TargetOpcode::G_FLOG10:
Jessica Paquette0154bd12019-01-30 21:16:04 +00001247 case TargetOpcode::G_FLOG:
Jessica Paquette84bedac2019-01-30 23:46:15 +00001248 case TargetOpcode::G_FLOG2:
Jessica Paquettee7941212019-04-03 16:58:32 +00001249 case TargetOpcode::G_FEXP:
Petar Avramovicfaaa2b5d2019-06-06 09:02:24 +00001250 case TargetOpcode::G_FEXP2:
Matt Arsenaultb14e83d2023-08-12 07:20:00 -04001251 case TargetOpcode::G_FEXP10:
Petar Avramovicfaaa2b5d2019-06-06 09:02:24 +00001252 case TargetOpcode::G_FCEIL:
Dominik Montadafeb20a12020-03-02 16:28:17 +01001253 case TargetOpcode::G_FFLOOR:
1254 case TargetOpcode::G_FMINNUM:
1255 case TargetOpcode::G_FMAXNUM:
1256 case TargetOpcode::G_FSQRT:
1257 case TargetOpcode::G_FRINT:
Matt Arsenault0da582d2020-07-19 09:56:15 -04001258 case TargetOpcode::G_FNEARBYINT:
Craig Topperd5d14172024-09-18 12:07:44 -07001259 case TargetOpcode::G_INTRINSIC_TRUNC:
1260 case TargetOpcode::G_INTRINSIC_ROUND:
Matt Arsenault0da582d2020-07-19 09:56:15 -04001261 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
Serge Pavlov462d5832023-10-09 21:13:39 +07001262 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1263 unsigned Size = LLTy.getSizeInBits();
Konstantin Schwarz76986bd2020-02-06 10:01:57 -08001264 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
Matt Arsenault0da582d2020-07-19 09:56:15 -04001265 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1266 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
Jessica Paquette7db82d72019-01-28 18:34:18 +00001267 return UnableToLegalize;
1268 }
David Greend659bd12024-01-03 07:59:36 +00001269 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
Diana Picusfc1675e2017-07-05 12:57:24 +00001270 if (Status != Legalized)
1271 return Status;
1272 break;
Tim Northoveredb3c8c2016-08-29 19:07:16 +00001273 }
JaydeepChauhan14b693e1c2025-02-03 12:42:43 +05301274 case TargetOpcode::G_LROUND:
1275 case TargetOpcode::G_LLROUND:
David Green8d49ce12024-04-17 18:38:24 +01001276 case TargetOpcode::G_INTRINSIC_LRINT:
1277 case TargetOpcode::G_INTRINSIC_LLRINT: {
David Green28d28d52024-04-15 09:41:08 +01001278 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1279 unsigned Size = LLTy.getSizeInBits();
1280 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1281 Type *ITy = IntegerType::get(
1282 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1283 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1284 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1285 return UnableToLegalize;
1286 }
1287 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1288 LegalizeResult Status =
1289 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1290 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1291 if (Status != Legalized)
1292 return Status;
1293 MI.eraseFromParent();
1294 return Legalized;
1295 }
Craig Toppera15400d2024-12-02 13:30:46 -08001296 case TargetOpcode::G_FPOWI:
1297 case TargetOpcode::G_FLDEXP: {
David Green5550e9c2024-01-04 07:26:23 +00001298 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1299 unsigned Size = LLTy.getSizeInBits();
1300 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1301 Type *ITy = IntegerType::get(
1302 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1303 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1304 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1305 return UnableToLegalize;
1306 }
1307 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
Craig Topperbee33b52024-12-02 09:06:38 -08001308 SmallVector<CallLowering::ArgInfo, 2> Args = {
David Green5550e9c2024-01-04 07:26:23 +00001309 {MI.getOperand(1).getReg(), HLTy, 0},
1310 {MI.getOperand(2).getReg(), ITy, 1}};
Craig Topperbee33b52024-12-02 09:06:38 -08001311 Args[1].Flags[0].setSExt();
David Green5550e9c2024-01-04 07:26:23 +00001312 LegalizeResult Status =
1313 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), HLTy, 0},
1314 Args, LocObserver, &MI);
1315 if (Status != Legalized)
1316 return Status;
1317 break;
1318 }
Konstantin Schwarz76986bd2020-02-06 10:01:57 -08001319 case TargetOpcode::G_FPEXT:
Diana Picus65ed3642018-01-17 13:34:10 +00001320 case TargetOpcode::G_FPTRUNC: {
Konstantin Schwarz76986bd2020-02-06 10:01:57 -08001321 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1322 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1323 if (!FromTy || !ToTy)
Diana Picus65ed3642018-01-17 13:34:10 +00001324 return UnableToLegalize;
David Greend659bd12024-01-03 07:59:36 +00001325 LegalizeResult Status =
Craig Topper4cf2cf12024-12-04 10:42:49 -08001326 conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver, TLI);
Diana Picus65ed3642018-01-17 13:34:10 +00001327 if (Status != Legalized)
1328 return Status;
1329 break;
1330 }
Him188ba461f82024-07-25 11:07:31 +01001331 case TargetOpcode::G_FCMP: {
1332 LegalizeResult Status = createFCMPLibcall(MIRBuilder, MI, LocObserver);
1333 if (Status != Legalized)
1334 return Status;
1335 MI.eraseFromParent();
1336 return Status;
1337 }
Diana Picus4ed0ee72018-01-30 07:54:52 +00001338 case TargetOpcode::G_FPTOSI:
1339 case TargetOpcode::G_FPTOUI: {
1340 // FIXME: Support other types
David Greene8876242024-06-21 10:24:57 +01001341 Type *FromTy =
1342 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
Diana Picus4ed0ee72018-01-30 07:54:52 +00001343 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
David Greene8876242024-06-21 10:24:57 +01001344 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
Diana Picus4ed0ee72018-01-30 07:54:52 +00001345 return UnableToLegalize;
1346 LegalizeResult Status = conversionLibcall(
Craig Topper4cf2cf12024-12-04 10:42:49 -08001347 MI, MIRBuilder, Type::getIntNTy(Ctx, ToSize), FromTy, LocObserver, TLI);
Diana Picus4ed0ee72018-01-30 07:54:52 +00001348 if (Status != Legalized)
1349 return Status;
1350 break;
1351 }
Diana Picus517531e2018-01-30 09:15:17 +00001352 case TargetOpcode::G_SITOFP:
1353 case TargetOpcode::G_UITOFP: {
Diana Picus517531e2018-01-30 09:15:17 +00001354 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
Him188365f5b42024-07-15 16:24:24 +01001355 Type *ToTy =
1356 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1357 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
Diana Picus517531e2018-01-30 09:15:17 +00001358 return UnableToLegalize;
Craig Topper4cf2cf12024-12-04 10:42:49 -08001359 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SITOFP;
1360 LegalizeResult Status =
1361 conversionLibcall(MI, MIRBuilder, ToTy, Type::getIntNTy(Ctx, FromSize),
1362 LocObserver, TLI, IsSigned);
Diana Picus517531e2018-01-30 09:15:17 +00001363 if (Status != Legalized)
1364 return Status;
1365 break;
1366 }
Thomas Preud'hommece61b0e2024-01-04 10:15:16 +00001367 case TargetOpcode::G_ATOMICRMW_XCHG:
1368 case TargetOpcode::G_ATOMICRMW_ADD:
1369 case TargetOpcode::G_ATOMICRMW_SUB:
1370 case TargetOpcode::G_ATOMICRMW_AND:
1371 case TargetOpcode::G_ATOMICRMW_OR:
1372 case TargetOpcode::G_ATOMICRMW_XOR:
1373 case TargetOpcode::G_ATOMIC_CMPXCHG:
1374 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1375 auto Status = createAtomicLibcall(MIRBuilder, MI);
1376 if (Status != Legalized)
1377 return Status;
1378 break;
1379 }
Jessica Paquette23f657c2021-03-24 23:45:36 -07001380 case TargetOpcode::G_BZERO:
Matt Arsenault0b7f6cc2020-08-03 09:00:24 -04001381 case TargetOpcode::G_MEMCPY:
1382 case TargetOpcode::G_MEMMOVE:
1383 case TargetOpcode::G_MEMSET: {
Jessica Paquette23f657c2021-03-24 23:45:36 -07001384 LegalizeResult Result =
Jessica Paquette324af792021-05-25 16:54:20 -07001385 createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
Jessica Paquette23f657c2021-03-24 23:45:36 -07001386 if (Result != Legalized)
1387 return Result;
Matt Arsenault0b7f6cc2020-08-03 09:00:24 -04001388 MI.eraseFromParent();
1389 return Result;
1390 }
Serge Pavlov7fc7ef12024-01-10 14:18:00 +07001391 case TargetOpcode::G_GET_FPENV:
Serge Pavlov462d5832023-10-09 21:13:39 +07001392 case TargetOpcode::G_GET_FPMODE: {
David Greend659bd12024-01-03 07:59:36 +00001393 LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
Serge Pavlov462d5832023-10-09 21:13:39 +07001394 if (Result != Legalized)
1395 return Result;
1396 break;
1397 }
Serge Pavlov7fc7ef12024-01-10 14:18:00 +07001398 case TargetOpcode::G_SET_FPENV:
Serge Pavlov462d5832023-10-09 21:13:39 +07001399 case TargetOpcode::G_SET_FPMODE: {
David Greend659bd12024-01-03 07:59:36 +00001400 LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
Serge Pavlov462d5832023-10-09 21:13:39 +07001401 if (Result != Legalized)
1402 return Result;
1403 break;
1404 }
Serge Pavlov7fc7ef12024-01-10 14:18:00 +07001405 case TargetOpcode::G_RESET_FPENV:
Serge Pavlov462d5832023-10-09 21:13:39 +07001406 case TargetOpcode::G_RESET_FPMODE: {
David Greend659bd12024-01-03 07:59:36 +00001407 LegalizeResult Result =
1408 createResetStateLibcall(MIRBuilder, MI, LocObserver);
Serge Pavlov462d5832023-10-09 21:13:39 +07001409 if (Result != Legalized)
1410 return Result;
1411 break;
1412 }
Tim Northoveredb3c8c2016-08-29 19:07:16 +00001413 }
Diana Picusfc1675e2017-07-05 12:57:24 +00001414
1415 MI.eraseFromParent();
1416 return Legalized;
Tim Northoveredb3c8c2016-08-29 19:07:16 +00001417}
1418
Tim Northover69fa84a2016-10-14 22:18:18 +00001419LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
1420 unsigned TypeIdx,
1421 LLT NarrowTy) {
Daniel Sanders27fe8a52018-04-27 19:48:53 +00001422 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1423 uint64_t NarrowSize = NarrowTy.getSizeInBits();
Kristof Beylsaf9814a2017-11-07 10:34:34 +00001424
Tim Northover9656f142016-08-04 20:54:13 +00001425 switch (MI.getOpcode()) {
1426 default:
1427 return UnableToLegalize;
Tim Northoverff5e7e12017-06-30 20:27:36 +00001428 case TargetOpcode::G_IMPLICIT_DEF: {
Dominik Montada35950fe2020-03-23 12:30:55 +01001429 Register DstReg = MI.getOperand(0).getReg();
1430 LLT DstTy = MRI.getType(DstReg);
1431
1432 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1433 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1434 // FIXME: Although this would also be legal for the general case, it causes
1435 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1436 // combines not being hit). This seems to be a problem related to the
1437 // artifact combiner.
1438 if (SizeOp0 % NarrowSize != 0) {
1439 LLT ImplicitTy = NarrowTy;
1440 if (DstTy.isVector())
Sander de Smalend5e14ba2021-06-24 09:58:21 +01001441 ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
Dominik Montada35950fe2020-03-23 12:30:55 +01001442
1443 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1444 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1445
1446 MI.eraseFromParent();
1447 return Legalized;
1448 }
1449
Kristof Beylsaf9814a2017-11-07 10:34:34 +00001450 int NumParts = SizeOp0 / NarrowSize;
Tim Northoverff5e7e12017-06-30 20:27:36 +00001451
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00001452 SmallVector<Register, 2> DstRegs;
Volkan Keles02bb1742018-02-14 19:58:36 +00001453 for (int i = 0; i < NumParts; ++i)
Dominik Montada35950fe2020-03-23 12:30:55 +01001454 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
Amara Emerson5ec146042018-12-10 18:44:58 +00001455
Dominik Montada35950fe2020-03-23 12:30:55 +01001456 if (DstTy.isVector())
Amara Emerson5ec146042018-12-10 18:44:58 +00001457 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1458 else
Diana Picusf95a5fb2023-01-09 11:59:00 +01001459 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
Tim Northoverff5e7e12017-06-30 20:27:36 +00001460 MI.eraseFromParent();
1461 return Legalized;
1462 }
Matt Arsenault71872722019-04-10 17:27:53 +00001463 case TargetOpcode::G_CONSTANT: {
1464 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1465 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1466 unsigned TotalSize = Ty.getSizeInBits();
1467 unsigned NarrowSize = NarrowTy.getSizeInBits();
1468 int NumParts = TotalSize / NarrowSize;
1469
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00001470 SmallVector<Register, 4> PartRegs;
Matt Arsenault71872722019-04-10 17:27:53 +00001471 for (int I = 0; I != NumParts; ++I) {
1472 unsigned Offset = I * NarrowSize;
1473 auto K = MIRBuilder.buildConstant(NarrowTy,
1474 Val.lshr(Offset).trunc(NarrowSize));
1475 PartRegs.push_back(K.getReg(0));
1476 }
1477
1478 LLT LeftoverTy;
1479 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00001480 SmallVector<Register, 1> LeftoverRegs;
Matt Arsenault71872722019-04-10 17:27:53 +00001481 if (LeftoverBits != 0) {
1482 LeftoverTy = LLT::scalar(LeftoverBits);
1483 auto K = MIRBuilder.buildConstant(
1484 LeftoverTy,
1485 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1486 LeftoverRegs.push_back(K.getReg(0));
1487 }
1488
1489 insertParts(MI.getOperand(0).getReg(),
1490 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1491
1492 MI.eraseFromParent();
1493 return Legalized;
1494 }
Matt Arsenault25e99382020-01-10 10:07:24 -05001495 case TargetOpcode::G_SEXT:
Matt Arsenault917156172020-01-10 09:47:17 -05001496 case TargetOpcode::G_ZEXT:
Matt Arsenaultbe31a7b2020-01-10 11:02:18 -05001497 case TargetOpcode::G_ANYEXT:
1498 return narrowScalarExt(MI, TypeIdx, NarrowTy);
Petar Avramovic5b4c5c22019-08-21 09:26:39 +00001499 case TargetOpcode::G_TRUNC: {
1500 if (TypeIdx != 1)
1501 return UnableToLegalize;
1502
1503 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1504 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1505 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1506 return UnableToLegalize;
1507 }
1508
Jay Foad63f73542020-01-16 12:37:00 +00001509 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1510 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
Petar Avramovic5b4c5c22019-08-21 09:26:39 +00001511 MI.eraseFromParent();
1512 return Legalized;
1513 }
Yingwei Zheng821bcba2024-05-22 23:35:37 +08001514 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
Petar Avramovic29f88b92021-12-23 14:09:51 +01001515 case TargetOpcode::G_FREEZE: {
1516 if (TypeIdx != 0)
1517 return UnableToLegalize;
1518
1519 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1520 // Should widen scalar first
1521 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1522 return UnableToLegalize;
1523
1524 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1525 SmallVector<Register, 8> Parts;
1526 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1527 Parts.push_back(
Yingwei Zheng821bcba2024-05-22 23:35:37 +08001528 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1529 .getReg(0));
Petar Avramovic29f88b92021-12-23 14:09:51 +01001530 }
1531
Diana Picusf95a5fb2023-01-09 11:59:00 +01001532 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
Petar Avramovic29f88b92021-12-23 14:09:51 +01001533 MI.eraseFromParent();
1534 return Legalized;
1535 }
Justin Bogner62ce4b02021-02-02 17:02:52 -08001536 case TargetOpcode::G_ADD:
Cassie Jones362463882021-02-14 14:37:55 -05001537 case TargetOpcode::G_SUB:
Cassie Jonese1532642021-02-22 17:11:23 -05001538 case TargetOpcode::G_SADDO:
1539 case TargetOpcode::G_SSUBO:
Cassie Jones8f956a52021-02-22 17:11:35 -05001540 case TargetOpcode::G_SADDE:
1541 case TargetOpcode::G_SSUBE:
Cassie Jonesc63b33b2021-02-22 17:10:58 -05001542 case TargetOpcode::G_UADDO:
1543 case TargetOpcode::G_USUBO:
Cassie Jones8f956a52021-02-22 17:11:35 -05001544 case TargetOpcode::G_UADDE:
1545 case TargetOpcode::G_USUBE:
Cassie Jones362463882021-02-14 14:37:55 -05001546 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
Matt Arsenault211e89d2019-01-27 00:52:51 +00001547 case TargetOpcode::G_MUL:
Petar Avramovic5229f472019-03-11 10:08:44 +00001548 case TargetOpcode::G_UMULH:
Petar Avramovic0b17e592019-03-11 10:00:17 +00001549 return narrowScalarMul(MI, NarrowTy);
Matt Arsenault1cf713662019-02-12 14:54:52 +00001550 case TargetOpcode::G_EXTRACT:
1551 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1552 case TargetOpcode::G_INSERT:
1553 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
Justin Bognerd09c3ce2017-01-19 01:05:48 +00001554 case TargetOpcode::G_LOAD: {
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07001555 auto &LoadMI = cast<GLoad>(MI);
1556 Register DstReg = LoadMI.getDstReg();
Matt Arsenault18619af2019-01-29 18:13:02 +00001557 LLT DstTy = MRI.getType(DstReg);
Matt Arsenault7f09fd62019-02-05 00:26:12 +00001558 if (DstTy.isVector())
Matt Arsenault045bc9a2019-01-30 02:35:38 +00001559 return UnableToLegalize;
Matt Arsenault18619af2019-01-29 18:13:02 +00001560
David Green601e1022024-03-17 18:15:56 +00001561 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001562 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07001563 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
Matt Arsenault18619af2019-01-29 18:13:02 +00001564 MIRBuilder.buildAnyExt(DstReg, TmpReg);
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07001565 LoadMI.eraseFromParent();
Matt Arsenault18619af2019-01-29 18:13:02 +00001566 return Legalized;
1567 }
1568
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07001569 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
Justin Bognerd09c3ce2017-01-19 01:05:48 +00001570 }
Matt Arsenault6614f852019-01-22 19:02:10 +00001571 case TargetOpcode::G_ZEXTLOAD:
1572 case TargetOpcode::G_SEXTLOAD: {
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07001573 auto &LoadMI = cast<GExtLoad>(MI);
1574 Register DstReg = LoadMI.getDstReg();
1575 Register PtrReg = LoadMI.getPointerReg();
Matt Arsenault6614f852019-01-22 19:02:10 +00001576
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001577 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07001578 auto &MMO = LoadMI.getMMO();
David Green601e1022024-03-17 18:15:56 +00001579 unsigned MemSize = MMO.getSizeInBits().getValue();
Matt Arsenault2cbbc6e2021-01-05 23:25:18 -05001580
1581 if (MemSize == NarrowSize) {
Matt Arsenault6614f852019-01-22 19:02:10 +00001582 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
Matt Arsenault2cbbc6e2021-01-05 23:25:18 -05001583 } else if (MemSize < NarrowSize) {
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07001584 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
Matt Arsenault2cbbc6e2021-01-05 23:25:18 -05001585 } else if (MemSize > NarrowSize) {
1586 // FIXME: Need to split the load.
1587 return UnableToLegalize;
Matt Arsenault6614f852019-01-22 19:02:10 +00001588 }
1589
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07001590 if (isa<GZExtLoad>(LoadMI))
Matt Arsenault6614f852019-01-22 19:02:10 +00001591 MIRBuilder.buildZExt(DstReg, TmpReg);
1592 else
1593 MIRBuilder.buildSExt(DstReg, TmpReg);
1594
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07001595 LoadMI.eraseFromParent();
Matt Arsenault6614f852019-01-22 19:02:10 +00001596 return Legalized;
1597 }
Justin Bognerfde01042017-01-18 17:29:54 +00001598 case TargetOpcode::G_STORE: {
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07001599 auto &StoreMI = cast<GStore>(MI);
Matt Arsenault18619af2019-01-29 18:13:02 +00001600
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07001601 Register SrcReg = StoreMI.getValueReg();
Matt Arsenault18619af2019-01-29 18:13:02 +00001602 LLT SrcTy = MRI.getType(SrcReg);
Matt Arsenault7f09fd62019-02-05 00:26:12 +00001603 if (SrcTy.isVector())
1604 return UnableToLegalize;
1605
1606 int NumParts = SizeOp0 / NarrowSize;
1607 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1608 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1609 if (SrcTy.isVector() && LeftoverBits != 0)
1610 return UnableToLegalize;
Matt Arsenault18619af2019-01-29 18:13:02 +00001611
David Green601e1022024-03-17 18:15:56 +00001612 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001613 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
Matt Arsenault18619af2019-01-29 18:13:02 +00001614 MIRBuilder.buildTrunc(TmpReg, SrcReg);
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07001615 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1616 StoreMI.eraseFromParent();
Matt Arsenault18619af2019-01-29 18:13:02 +00001617 return Legalized;
1618 }
1619
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07001620 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
Justin Bognerfde01042017-01-18 17:29:54 +00001621 }
Matt Arsenault81511e52019-02-05 00:13:44 +00001622 case TargetOpcode::G_SELECT:
1623 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
Petar Avramovic150fd432018-12-18 11:36:14 +00001624 case TargetOpcode::G_AND:
1625 case TargetOpcode::G_OR:
1626 case TargetOpcode::G_XOR: {
Quentin Colombetc2f3cea2017-10-03 04:53:56 +00001627 // Legalize bitwise operation:
1628 // A = BinOp<Ty> B, C
1629 // into:
1630 // B1, ..., BN = G_UNMERGE_VALUES B
1631 // C1, ..., CN = G_UNMERGE_VALUES C
1632 // A1 = BinOp<Ty/N> B1, C2
1633 // ...
1634 // AN = BinOp<Ty/N> BN, CN
1635 // A = G_MERGE_VALUES A1, ..., AN
Matt Arsenault9e0eeba2019-04-10 17:07:56 +00001636 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
Quentin Colombetc2f3cea2017-10-03 04:53:56 +00001637 }
Matt Arsenault30989e42019-01-22 21:42:11 +00001638 case TargetOpcode::G_SHL:
1639 case TargetOpcode::G_LSHR:
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00001640 case TargetOpcode::G_ASHR:
1641 return narrowScalarShift(MI, TypeIdx, NarrowTy);
Matt Arsenaultd5684f72019-01-31 02:09:57 +00001642 case TargetOpcode::G_CTLZ:
1643 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1644 case TargetOpcode::G_CTTZ:
1645 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1646 case TargetOpcode::G_CTPOP:
Petar Avramovic2b66d322020-01-27 09:43:38 +01001647 if (TypeIdx == 1)
1648 switch (MI.getOpcode()) {
1649 case TargetOpcode::G_CTLZ:
Matt Arsenault312a9d12020-02-07 12:24:15 -05001650 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
Petar Avramovic2b66d322020-01-27 09:43:38 +01001651 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
Petar Avramovic8bc7ba52020-01-27 09:51:06 +01001652 case TargetOpcode::G_CTTZ:
Matt Arsenault312a9d12020-02-07 12:24:15 -05001653 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
Petar Avramovic8bc7ba52020-01-27 09:51:06 +01001654 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
Petar Avramoviccbf03aee2020-01-27 09:59:50 +01001655 case TargetOpcode::G_CTPOP:
1656 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
Petar Avramovic2b66d322020-01-27 09:43:38 +01001657 default:
1658 return UnableToLegalize;
1659 }
Matt Arsenaultd5684f72019-01-31 02:09:57 +00001660
1661 Observer.changingInstr(MI);
1662 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1663 Observer.changedInstr(MI);
1664 return Legalized;
Matt Arsenaultcbaada62019-02-02 23:29:55 +00001665 case TargetOpcode::G_INTTOPTR:
1666 if (TypeIdx != 1)
1667 return UnableToLegalize;
1668
1669 Observer.changingInstr(MI);
1670 narrowScalarSrc(MI, NarrowTy, 1);
1671 Observer.changedInstr(MI);
1672 return Legalized;
1673 case TargetOpcode::G_PTRTOINT:
1674 if (TypeIdx != 0)
1675 return UnableToLegalize;
1676
1677 Observer.changingInstr(MI);
1678 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1679 Observer.changedInstr(MI);
1680 return Legalized;
Petar Avramovicbe20e362019-07-09 14:36:17 +00001681 case TargetOpcode::G_PHI: {
Nikita Popovc35761d2021-03-01 21:37:26 +01001682 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1683 // NarrowSize.
1684 if (SizeOp0 % NarrowSize != 0)
1685 return UnableToLegalize;
1686
Petar Avramovicbe20e362019-07-09 14:36:17 +00001687 unsigned NumParts = SizeOp0 / NarrowSize;
Matt Arsenaultde8451f2020-02-04 10:34:22 -05001688 SmallVector<Register, 2> DstRegs(NumParts);
1689 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
Petar Avramovicbe20e362019-07-09 14:36:17 +00001690 Observer.changingInstr(MI);
1691 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1692 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
Amara Emerson53445f52022-11-13 01:43:04 -08001693 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
Petar Avramovicbe20e362019-07-09 14:36:17 +00001694 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
chuongg3fcfe1b62024-01-15 16:40:39 +00001695 SrcRegs[i / 2], MIRBuilder, MRI);
Petar Avramovicbe20e362019-07-09 14:36:17 +00001696 }
1697 MachineBasicBlock &MBB = *MI.getParent();
1698 MIRBuilder.setInsertPt(MBB, MI);
1699 for (unsigned i = 0; i < NumParts; ++i) {
1700 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1701 MachineInstrBuilder MIB =
1702 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1703 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1704 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1705 }
Amara Emerson02bcc862019-09-13 21:49:24 +00001706 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
Diana Picusf95a5fb2023-01-09 11:59:00 +01001707 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
Petar Avramovicbe20e362019-07-09 14:36:17 +00001708 Observer.changedInstr(MI);
1709 MI.eraseFromParent();
1710 return Legalized;
1711 }
Matt Arsenault434d6642019-07-15 19:37:34 +00001712 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1713 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1714 if (TypeIdx != 2)
1715 return UnableToLegalize;
1716
1717 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1718 Observer.changingInstr(MI);
1719 narrowScalarSrc(MI, NarrowTy, OpIdx);
1720 Observer.changedInstr(MI);
1721 return Legalized;
1722 }
Petar Avramovic1e626352019-07-17 12:08:01 +00001723 case TargetOpcode::G_ICMP: {
Jessica Paquette47d07802021-06-29 17:01:28 -07001724 Register LHS = MI.getOperand(2).getReg();
1725 LLT SrcTy = MRI.getType(LHS);
Petar Avramovic1e626352019-07-17 12:08:01 +00001726 CmpInst::Predicate Pred =
1727 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1728
Jessica Paquette47d07802021-06-29 17:01:28 -07001729 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1730 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1731 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
chuongg3fcfe1b62024-01-15 16:40:39 +00001732 LHSLeftoverRegs, MIRBuilder, MRI))
Jessica Paquette47d07802021-06-29 17:01:28 -07001733 return UnableToLegalize;
1734
1735 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1736 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1737 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
chuongg3fcfe1b62024-01-15 16:40:39 +00001738 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
Jessica Paquette47d07802021-06-29 17:01:28 -07001739 return UnableToLegalize;
1740
1741 // We now have the LHS and RHS of the compare split into narrow-type
1742 // registers, plus potentially some leftover type.
1743 Register Dst = MI.getOperand(0).getReg();
1744 LLT ResTy = MRI.getType(Dst);
1745 if (ICmpInst::isEquality(Pred)) {
1746 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1747 // them together. For each equal part, the result should be all 0s. For
1748 // each non-equal part, we'll get at least one 1.
1749 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1750 SmallVector<Register, 4> Xors;
1751 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1752 auto LHS = std::get<0>(LHSAndRHS);
1753 auto RHS = std::get<1>(LHSAndRHS);
1754 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1755 Xors.push_back(Xor);
1756 }
1757
1758 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1759 // to the desired narrow type so that we can OR them together later.
1760 SmallVector<Register, 4> WidenedXors;
1761 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1762 auto LHS = std::get<0>(LHSAndRHS);
1763 auto RHS = std::get<1>(LHSAndRHS);
1764 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1765 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1766 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1767 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1768 Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
1769 }
1770
1771 // Now, for each part we broke up, we know if they are equal/not equal
1772 // based off the G_XOR. We can OR these all together and compare against
1773 // 0 to get the result.
1774 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1775 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1776 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1777 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1778 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
Petar Avramovic1e626352019-07-17 12:08:01 +00001779 } else {
Craig Topper7ece5602024-12-12 09:50:26 -08001780 Register CmpIn;
1781 for (unsigned I = 0, E = LHSPartRegs.size(); I != E; ++I) {
1782 Register CmpOut;
1783 CmpInst::Predicate PartPred;
1784
1785 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1786 PartPred = Pred;
1787 CmpOut = Dst;
1788 } else {
1789 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1790 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1791 }
1792
1793 if (!CmpIn) {
1794 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[I],
1795 RHSPartRegs[I]);
1796 } else {
1797 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[I],
1798 RHSPartRegs[I]);
1799 auto CmpEq = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1800 LHSPartRegs[I], RHSPartRegs[I]);
1801 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1802 }
1803
1804 CmpIn = CmpOut;
1805 }
1806
1807 for (unsigned I = 0, E = LHSLeftoverRegs.size(); I != E; ++I) {
1808 Register CmpOut;
1809 CmpInst::Predicate PartPred;
1810
1811 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1812 PartPred = Pred;
1813 CmpOut = Dst;
1814 } else {
1815 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1816 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1817 }
1818
1819 if (!CmpIn) {
1820 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[I],
1821 RHSLeftoverRegs[I]);
1822 } else {
1823 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[I],
1824 RHSLeftoverRegs[I]);
1825 auto CmpEq =
1826 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1827 LHSLeftoverRegs[I], RHSLeftoverRegs[I]);
1828 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1829 }
1830
1831 CmpIn = CmpOut;
1832 }
Petar Avramovic1e626352019-07-17 12:08:01 +00001833 }
Petar Avramovic1e626352019-07-17 12:08:01 +00001834 MI.eraseFromParent();
1835 return Legalized;
1836 }
David Greenf297d0b2024-01-28 15:42:36 +00001837 case TargetOpcode::G_FCMP:
1838 if (TypeIdx != 0)
1839 return UnableToLegalize;
1840
1841 Observer.changingInstr(MI);
1842 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1843 Observer.changedInstr(MI);
1844 return Legalized;
1845
Daniel Sanderse9a57c22019-08-09 21:11:20 +00001846 case TargetOpcode::G_SEXT_INREG: {
1847 if (TypeIdx != 0)
1848 return UnableToLegalize;
1849
Daniel Sanderse9a57c22019-08-09 21:11:20 +00001850 int64_t SizeInBits = MI.getOperand(2).getImm();
1851
1852 // So long as the new type has more bits than the bits we're extending we
1853 // don't need to break it apart.
Craig Topper5d501b12023-11-24 08:39:38 -08001854 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
Daniel Sanderse9a57c22019-08-09 21:11:20 +00001855 Observer.changingInstr(MI);
1856 // We don't lose any non-extension bits by truncating the src and
1857 // sign-extending the dst.
1858 MachineOperand &MO1 = MI.getOperand(1);
Jay Foad63f73542020-01-16 12:37:00 +00001859 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
Jay Foadb482e1b2020-01-23 11:51:35 +00001860 MO1.setReg(TruncMIB.getReg(0));
Daniel Sanderse9a57c22019-08-09 21:11:20 +00001861
1862 MachineOperand &MO2 = MI.getOperand(0);
1863 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1864 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
Jay Foad63f73542020-01-16 12:37:00 +00001865 MIRBuilder.buildSExt(MO2, DstExt);
Daniel Sanderse9a57c22019-08-09 21:11:20 +00001866 MO2.setReg(DstExt);
1867 Observer.changedInstr(MI);
1868 return Legalized;
1869 }
1870
1871 // Break it apart. Components below the extension point are unmodified. The
1872 // component containing the extension point becomes a narrower SEXT_INREG.
1873 // Components above it are ashr'd from the component containing the
1874 // extension point.
1875 if (SizeOp0 % NarrowSize != 0)
1876 return UnableToLegalize;
1877 int NumParts = SizeOp0 / NarrowSize;
1878
1879 // List the registers where the destination will be scattered.
1880 SmallVector<Register, 2> DstRegs;
1881 // List the registers where the source will be split.
1882 SmallVector<Register, 2> SrcRegs;
1883
1884 // Create all the temporary registers.
1885 for (int i = 0; i < NumParts; ++i) {
1886 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1887
1888 SrcRegs.push_back(SrcReg);
1889 }
1890
1891 // Explode the big arguments into smaller chunks.
Jay Foad63f73542020-01-16 12:37:00 +00001892 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
Daniel Sanderse9a57c22019-08-09 21:11:20 +00001893
1894 Register AshrCstReg =
1895 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
Jay Foadb482e1b2020-01-23 11:51:35 +00001896 .getReg(0);
Craig Topper5d501b12023-11-24 08:39:38 -08001897 Register FullExtensionReg;
1898 Register PartialExtensionReg;
Daniel Sanderse9a57c22019-08-09 21:11:20 +00001899
1900 // Do the operation on each small part.
1901 for (int i = 0; i < NumParts; ++i) {
Craig Topper5d501b12023-11-24 08:39:38 -08001902 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
Daniel Sanderse9a57c22019-08-09 21:11:20 +00001903 DstRegs.push_back(SrcRegs[i]);
Craig Topper5d501b12023-11-24 08:39:38 -08001904 PartialExtensionReg = DstRegs.back();
1905 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
Daniel Sanderse9a57c22019-08-09 21:11:20 +00001906 assert(PartialExtensionReg &&
1907 "Expected to visit partial extension before full");
1908 if (FullExtensionReg) {
1909 DstRegs.push_back(FullExtensionReg);
1910 continue;
1911 }
Jay Foad28bb43b2020-01-16 12:09:48 +00001912 DstRegs.push_back(
1913 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
Jay Foadb482e1b2020-01-23 11:51:35 +00001914 .getReg(0));
Daniel Sanderse9a57c22019-08-09 21:11:20 +00001915 FullExtensionReg = DstRegs.back();
1916 } else {
1917 DstRegs.push_back(
1918 MIRBuilder
1919 .buildInstr(
1920 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1921 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
Jay Foadb482e1b2020-01-23 11:51:35 +00001922 .getReg(0));
Daniel Sanderse9a57c22019-08-09 21:11:20 +00001923 PartialExtensionReg = DstRegs.back();
1924 }
1925 }
1926
1927 // Gather the destination registers into the final destination.
1928 Register DstReg = MI.getOperand(0).getReg();
Diana Picusf95a5fb2023-01-09 11:59:00 +01001929 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
Daniel Sanderse9a57c22019-08-09 21:11:20 +00001930 MI.eraseFromParent();
1931 return Legalized;
1932 }
Petar Avramovic98f72a52019-12-30 18:06:29 +01001933 case TargetOpcode::G_BSWAP:
1934 case TargetOpcode::G_BITREVERSE: {
Petar Avramovic94a24e72019-12-30 11:13:22 +01001935 if (SizeOp0 % NarrowSize != 0)
1936 return UnableToLegalize;
1937
1938 Observer.changingInstr(MI);
1939 SmallVector<Register, 2> SrcRegs, DstRegs;
1940 unsigned NumParts = SizeOp0 / NarrowSize;
chuongg3fcfe1b62024-01-15 16:40:39 +00001941 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
1942 MIRBuilder, MRI);
Petar Avramovic94a24e72019-12-30 11:13:22 +01001943
1944 for (unsigned i = 0; i < NumParts; ++i) {
1945 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1946 {SrcRegs[NumParts - 1 - i]});
1947 DstRegs.push_back(DstPart.getReg(0));
1948 }
1949
Diana Picusf95a5fb2023-01-09 11:59:00 +01001950 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
Petar Avramovic94a24e72019-12-30 11:13:22 +01001951
1952 Observer.changedInstr(MI);
1953 MI.eraseFromParent();
1954 return Legalized;
1955 }
Matt Arsenaultf6176f82020-07-25 11:00:35 -04001956 case TargetOpcode::G_PTR_ADD:
Matt Arsenaultef3e83122020-05-23 18:10:34 -04001957 case TargetOpcode::G_PTRMASK: {
1958 if (TypeIdx != 1)
1959 return UnableToLegalize;
1960 Observer.changingInstr(MI);
1961 narrowScalarSrc(MI, NarrowTy, 2);
1962 Observer.changedInstr(MI);
1963 return Legalized;
1964 }
Matt Arsenault83a25a12021-03-26 17:29:36 -04001965 case TargetOpcode::G_FPTOUI:
1966 case TargetOpcode::G_FPTOSI:
David Greenfeac7612024-09-16 10:33:59 +01001967 case TargetOpcode::G_FPTOUI_SAT:
1968 case TargetOpcode::G_FPTOSI_SAT:
Matt Arsenault83a25a12021-03-26 17:29:36 -04001969 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
Petar Avramovic6a1030a2020-07-20 16:12:19 +02001970 case TargetOpcode::G_FPEXT:
1971 if (TypeIdx != 0)
1972 return UnableToLegalize;
1973 Observer.changingInstr(MI);
1974 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1975 Observer.changedInstr(MI);
1976 return Legalized;
Matt Arsenaulteece6ba2023-04-26 22:02:42 -04001977 case TargetOpcode::G_FLDEXP:
1978 case TargetOpcode::G_STRICT_FLDEXP:
1979 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
Michael Maitland54a9f0e2024-03-26 20:17:22 -04001980 case TargetOpcode::G_VSCALE: {
1981 Register Dst = MI.getOperand(0).getReg();
1982 LLT Ty = MRI.getType(Dst);
1983
1984 // Assume VSCALE(1) fits into a legal integer
1985 const APInt One(NarrowTy.getSizeInBits(), 1);
1986 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
1987 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
1988 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
1989 MIRBuilder.buildMul(Dst, ZExt, C);
1990
1991 MI.eraseFromParent();
1992 return Legalized;
1993 }
Tim Northover9656f142016-08-04 20:54:13 +00001994 }
Tim Northover33b07d62016-07-22 20:03:43 +00001995}
1996
Matt Arsenault3af85fa2020-03-29 18:04:53 -04001997Register LegalizerHelper::coerceToScalar(Register Val) {
1998 LLT Ty = MRI.getType(Val);
1999 if (Ty.isScalar())
2000 return Val;
2001
2002 const DataLayout &DL = MIRBuilder.getDataLayout();
2003 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
2004 if (Ty.isPointer()) {
2005 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2006 return Register();
2007 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2008 }
2009
2010 Register NewVal = Val;
2011
2012 assert(Ty.isVector());
Jay Foadd57515bd2024-02-13 08:21:35 +00002013 if (Ty.isPointerVector())
Matt Arsenault3af85fa2020-03-29 18:04:53 -04002014 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2015 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2016}
2017
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002018void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
2019 unsigned OpIdx, unsigned ExtOpcode) {
2020 MachineOperand &MO = MI.getOperand(OpIdx);
Jay Foad63f73542020-01-16 12:37:00 +00002021 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
Jay Foadb482e1b2020-01-23 11:51:35 +00002022 MO.setReg(ExtB.getReg(0));
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002023}
2024
Matt Arsenault30989e42019-01-22 21:42:11 +00002025void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
2026 unsigned OpIdx) {
2027 MachineOperand &MO = MI.getOperand(OpIdx);
Jay Foad63f73542020-01-16 12:37:00 +00002028 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
Jay Foadb482e1b2020-01-23 11:51:35 +00002029 MO.setReg(ExtB.getReg(0));
Matt Arsenault30989e42019-01-22 21:42:11 +00002030}
2031
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002032void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
2033 unsigned OpIdx, unsigned TruncOpcode) {
2034 MachineOperand &MO = MI.getOperand(OpIdx);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002035 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002036 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
Jay Foad63f73542020-01-16 12:37:00 +00002037 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002038 MO.setReg(DstExt);
2039}
2040
Matt Arsenaultd5684f72019-01-31 02:09:57 +00002041void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
2042 unsigned OpIdx, unsigned ExtOpcode) {
2043 MachineOperand &MO = MI.getOperand(OpIdx);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002044 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
Matt Arsenaultd5684f72019-01-31 02:09:57 +00002045 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
Jay Foad63f73542020-01-16 12:37:00 +00002046 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
Matt Arsenaultd5684f72019-01-31 02:09:57 +00002047 MO.setReg(DstTrunc);
2048}
2049
Matt Arsenault18ec3822019-02-11 22:00:39 +00002050void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
2051 unsigned OpIdx) {
2052 MachineOperand &MO = MI.getOperand(OpIdx);
Matt Arsenault18ec3822019-02-11 22:00:39 +00002053 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
Petar Avramovic29f88b92021-12-23 14:09:51 +01002054 Register Dst = MO.getReg();
2055 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2056 MO.setReg(DstExt);
2057 MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
Matt Arsenault18ec3822019-02-11 22:00:39 +00002058}
2059
Matt Arsenault26b7e852019-02-19 16:30:19 +00002060void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
2061 unsigned OpIdx) {
2062 MachineOperand &MO = MI.getOperand(OpIdx);
Petar Avramovic29f88b92021-12-23 14:09:51 +01002063 SmallVector<Register, 8> Regs;
2064 MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
Matt Arsenault26b7e852019-02-19 16:30:19 +00002065}
2066
Matt Arsenault39c55ce2020-02-13 15:52:32 -05002067void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
2068 MachineOperand &Op = MI.getOperand(OpIdx);
2069 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
2070}
2071
2072void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
2073 MachineOperand &MO = MI.getOperand(OpIdx);
2074 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2075 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2076 MIRBuilder.buildBitcast(MO, CastDst);
2077 MO.setReg(CastDst);
2078}
2079
Tim Northover69fa84a2016-10-14 22:18:18 +00002080LegalizerHelper::LegalizeResult
Mitch Phillipsae70b212021-07-26 19:32:49 -07002081LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
2082 LLT WideTy) {
2083 if (TypeIdx != 1)
2084 return UnableToLegalize;
2085
Amara Emerson719024a2023-02-23 16:35:39 -08002086 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
Matt Arsenault43cbca52019-07-03 23:08:06 +00002087 if (DstTy.isVector())
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002088 return UnableToLegalize;
2089
Amara Emerson719024a2023-02-23 16:35:39 -08002090 LLT SrcTy = MRI.getType(Src1Reg);
Matt Arsenault0966dd02019-07-17 20:22:44 +00002091 const int DstSize = DstTy.getSizeInBits();
2092 const int SrcSize = SrcTy.getSizeInBits();
2093 const int WideSize = WideTy.getSizeInBits();
2094 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
Matt Arsenaultc9f14f22019-07-01 19:36:10 +00002095
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002096 unsigned NumOps = MI.getNumOperands();
2097 unsigned NumSrc = MI.getNumOperands() - 1;
2098 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2099
Matt Arsenault0966dd02019-07-17 20:22:44 +00002100 if (WideSize >= DstSize) {
2101 // Directly pack the bits in the target type.
Amara Emerson719024a2023-02-23 16:35:39 -08002102 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002103
Matt Arsenault0966dd02019-07-17 20:22:44 +00002104 for (unsigned I = 2; I != NumOps; ++I) {
2105 const unsigned Offset = (I - 1) * PartSize;
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002106
Matt Arsenault0966dd02019-07-17 20:22:44 +00002107 Register SrcReg = MI.getOperand(I).getReg();
2108 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
2109
2110 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
2111
Matt Arsenault5faa5332019-08-01 18:13:16 +00002112 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
Matt Arsenault0966dd02019-07-17 20:22:44 +00002113 MRI.createGenericVirtualRegister(WideTy);
2114
2115 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
2116 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
2117 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
2118 ResultReg = NextResult;
2119 }
2120
2121 if (WideSize > DstSize)
2122 MIRBuilder.buildTrunc(DstReg, ResultReg);
Matt Arsenault5faa5332019-08-01 18:13:16 +00002123 else if (DstTy.isPointer())
2124 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
Matt Arsenault0966dd02019-07-17 20:22:44 +00002125
2126 MI.eraseFromParent();
2127 return Legalized;
2128 }
2129
2130 // Unmerge the original values to the GCD type, and recombine to the next
2131 // multiple greater than the original type.
2132 //
2133 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
2134 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
2135 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
2136 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
2137 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
2138 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
2139 // %12:_(s12) = G_MERGE_VALUES %10, %11
2140 //
2141 // Padding with undef if necessary:
2142 //
2143 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
2144 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
2145 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
2146 // %7:_(s2) = G_IMPLICIT_DEF
2147 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
2148 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
2149 // %10:_(s12) = G_MERGE_VALUES %8, %9
2150
Kazu Hirata267f21a2022-08-28 10:41:51 -07002151 const int GCD = std::gcd(SrcSize, WideSize);
Matt Arsenault0966dd02019-07-17 20:22:44 +00002152 LLT GCDTy = LLT::scalar(GCD);
2153
2154 SmallVector<Register, 8> Parts;
2155 SmallVector<Register, 8> NewMergeRegs;
2156 SmallVector<Register, 8> Unmerges;
2157 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
2158
2159 // Decompose the original operands if they don't evenly divide.
Kazu Hirata259cd6f2021-11-25 22:17:10 -08002160 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
2161 Register SrcReg = MO.getReg();
Matt Arsenault0966dd02019-07-17 20:22:44 +00002162 if (GCD == SrcSize) {
2163 Unmerges.push_back(SrcReg);
2164 } else {
2165 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2166 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2167 Unmerges.push_back(Unmerge.getReg(J));
2168 }
2169 }
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002170
Matt Arsenault0966dd02019-07-17 20:22:44 +00002171 // Pad with undef to the next size that is a multiple of the requested size.
2172 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
2173 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
2174 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
2175 Unmerges.push_back(UndefReg);
2176 }
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002177
Matt Arsenault0966dd02019-07-17 20:22:44 +00002178 const int PartsPerGCD = WideSize / GCD;
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002179
Matt Arsenault0966dd02019-07-17 20:22:44 +00002180 // Build merges of each piece.
2181 ArrayRef<Register> Slicer(Unmerges);
2182 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
Diana Picusf95a5fb2023-01-09 11:59:00 +01002183 auto Merge =
2184 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
Matt Arsenault0966dd02019-07-17 20:22:44 +00002185 NewMergeRegs.push_back(Merge.getReg(0));
2186 }
2187
2188 // A truncate may be necessary if the requested type doesn't evenly divide the
2189 // original result type.
2190 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
Diana Picusf95a5fb2023-01-09 11:59:00 +01002191 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
Matt Arsenault0966dd02019-07-17 20:22:44 +00002192 } else {
Diana Picusf95a5fb2023-01-09 11:59:00 +01002193 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
Matt Arsenault0966dd02019-07-17 20:22:44 +00002194 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002195 }
2196
2197 MI.eraseFromParent();
2198 return Legalized;
2199}
2200
2201LegalizerHelper::LegalizeResult
2202LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
2203 LLT WideTy) {
2204 if (TypeIdx != 0)
2205 return UnableToLegalize;
2206
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002207 int NumDst = MI.getNumOperands() - 1;
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00002208 Register SrcReg = MI.getOperand(NumDst).getReg();
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002209 LLT SrcTy = MRI.getType(SrcReg);
Matt Arsenaultbc101ff2020-01-21 11:12:36 -05002210 if (SrcTy.isVector())
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002211 return UnableToLegalize;
2212
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00002213 Register Dst0Reg = MI.getOperand(0).getReg();
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002214 LLT DstTy = MRI.getType(Dst0Reg);
2215 if (!DstTy.isScalar())
2216 return UnableToLegalize;
2217
Dominik Montadaccf49b92020-03-20 14:46:01 +01002218 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
Matt Arsenaultbc101ff2020-01-21 11:12:36 -05002219 if (SrcTy.isPointer()) {
2220 const DataLayout &DL = MIRBuilder.getDataLayout();
2221 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
Dominik Montadaccf49b92020-03-20 14:46:01 +01002222 LLVM_DEBUG(
2223 dbgs() << "Not casting non-integral address space integer\n");
Matt Arsenaultbc101ff2020-01-21 11:12:36 -05002224 return UnableToLegalize;
2225 }
2226
2227 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
2228 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2229 }
2230
Dominik Montadaccf49b92020-03-20 14:46:01 +01002231 // Widen SrcTy to WideTy. This does not affect the result, but since the
2232 // user requested this size, it is probably better handled than SrcTy and
Daniel Thornburgh2e2999c2022-01-18 18:03:26 -08002233 // should reduce the total number of legalization artifacts.
Dominik Montadaccf49b92020-03-20 14:46:01 +01002234 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2235 SrcTy = WideTy;
2236 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2237 }
2238
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002239 // Theres no unmerge type to target. Directly extract the bits from the
2240 // source type
2241 unsigned DstSize = DstTy.getSizeInBits();
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002242
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002243 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
2244 for (int I = 1; I != NumDst; ++I) {
2245 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
2246 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2247 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
2248 }
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002249
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002250 MI.eraseFromParent();
2251 return Legalized;
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002252 }
2253
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002254 // Extend the source to a wider type.
2255 LLT LCMTy = getLCMType(SrcTy, WideTy);
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002256
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002257 Register WideSrc = SrcReg;
Matt Arsenaultbc101ff2020-01-21 11:12:36 -05002258 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2259 // TODO: If this is an integral address space, cast to integer and anyext.
2260 if (SrcTy.isPointer()) {
2261 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2262 return UnableToLegalize;
2263 }
2264
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002265 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
Matt Arsenaultbc101ff2020-01-21 11:12:36 -05002266 }
2267
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002268 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002269
Dominik Montada113114a2020-09-28 16:38:35 +02002270 // Create a sequence of unmerges and merges to the original results. Since we
2271 // may have widened the source, we will need to pad the results with dead defs
2272 // to cover the source register.
2273 // e.g. widen s48 to s64:
2274 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002275 //
2276 // =>
Dominik Montada113114a2020-09-28 16:38:35 +02002277 // %4:_(s192) = G_ANYEXT %0:_(s96)
2278 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2279 // ; unpack to GCD type, with extra dead defs
2280 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2281 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2282 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2283 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2284 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2285 const LLT GCDTy = getGCDType(WideTy, DstTy);
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002286 const int NumUnmerge = Unmerge->getNumOperands() - 1;
Dominik Montada113114a2020-09-28 16:38:35 +02002287 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002288
Dominik Montada113114a2020-09-28 16:38:35 +02002289 // Directly unmerge to the destination without going through a GCD type
2290 // if possible
2291 if (PartsPerRemerge == 1) {
2292 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002293
Dominik Montada113114a2020-09-28 16:38:35 +02002294 for (int I = 0; I != NumUnmerge; ++I) {
2295 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2296
2297 for (int J = 0; J != PartsPerUnmerge; ++J) {
2298 int Idx = I * PartsPerUnmerge + J;
2299 if (Idx < NumDst)
2300 MIB.addDef(MI.getOperand(Idx).getReg());
2301 else {
2302 // Create dead def for excess components.
2303 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2304 }
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002305 }
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002306
Dominik Montada113114a2020-09-28 16:38:35 +02002307 MIB.addUse(Unmerge.getReg(I));
2308 }
2309 } else {
2310 SmallVector<Register, 16> Parts;
2311 for (int J = 0; J != NumUnmerge; ++J)
2312 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2313
2314 SmallVector<Register, 8> RemergeParts;
2315 for (int I = 0; I != NumDst; ++I) {
2316 for (int J = 0; J < PartsPerRemerge; ++J) {
2317 const int Idx = I * PartsPerRemerge + J;
2318 RemergeParts.emplace_back(Parts[Idx]);
2319 }
2320
Diana Picusf95a5fb2023-01-09 11:59:00 +01002321 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
Dominik Montada113114a2020-09-28 16:38:35 +02002322 RemergeParts.clear();
2323 }
Matt Arsenault2a160ba2020-01-21 09:02:42 -05002324 }
2325
2326 MI.eraseFromParent();
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002327 return Legalized;
2328}
2329
2330LegalizerHelper::LegalizeResult
Matt Arsenault1cf713662019-02-12 14:54:52 +00002331LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2332 LLT WideTy) {
Amara Emerson719024a2023-02-23 16:35:39 -08002333 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
Matt Arsenaultfbe92a52019-02-18 22:39:27 +00002334 unsigned Offset = MI.getOperand(2).getImm();
2335
2336 if (TypeIdx == 0) {
2337 if (SrcTy.isVector() || DstTy.isVector())
2338 return UnableToLegalize;
2339
2340 SrcOp Src(SrcReg);
2341 if (SrcTy.isPointer()) {
2342 // Extracts from pointers can be handled only if they are really just
2343 // simple integers.
2344 const DataLayout &DL = MIRBuilder.getDataLayout();
2345 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2346 return UnableToLegalize;
2347
2348 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2349 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2350 SrcTy = SrcAsIntTy;
2351 }
2352
2353 if (DstTy.isPointer())
2354 return UnableToLegalize;
2355
2356 if (Offset == 0) {
2357 // Avoid a shift in the degenerate case.
2358 MIRBuilder.buildTrunc(DstReg,
2359 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2360 MI.eraseFromParent();
2361 return Legalized;
2362 }
2363
2364 // Do a shift in the source type.
2365 LLT ShiftTy = SrcTy;
2366 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2367 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2368 ShiftTy = WideTy;
Matt Arsenault90b76da2020-07-29 13:31:59 -04002369 }
Matt Arsenaultfbe92a52019-02-18 22:39:27 +00002370
2371 auto LShr = MIRBuilder.buildLShr(
2372 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2373 MIRBuilder.buildTrunc(DstReg, LShr);
2374 MI.eraseFromParent();
2375 return Legalized;
2376 }
2377
Matt Arsenault8f624ab2019-04-22 15:10:42 +00002378 if (SrcTy.isScalar()) {
2379 Observer.changingInstr(MI);
2380 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2381 Observer.changedInstr(MI);
2382 return Legalized;
2383 }
2384
Matt Arsenault1cf713662019-02-12 14:54:52 +00002385 if (!SrcTy.isVector())
2386 return UnableToLegalize;
2387
Matt Arsenault1cf713662019-02-12 14:54:52 +00002388 if (DstTy != SrcTy.getElementType())
2389 return UnableToLegalize;
2390
Matt Arsenault1cf713662019-02-12 14:54:52 +00002391 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2392 return UnableToLegalize;
2393
2394 Observer.changingInstr(MI);
2395 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2396
2397 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2398 Offset);
2399 widenScalarDst(MI, WideTy.getScalarType(), 0);
2400 Observer.changedInstr(MI);
2401 return Legalized;
2402}
2403
2404LegalizerHelper::LegalizeResult
2405LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2406 LLT WideTy) {
Matt Arsenault5cbd4e42020-07-18 12:27:16 -04002407 if (TypeIdx != 0 || WideTy.isVector())
Matt Arsenault1cf713662019-02-12 14:54:52 +00002408 return UnableToLegalize;
2409 Observer.changingInstr(MI);
2410 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2411 widenScalarDst(MI, WideTy);
2412 Observer.changedInstr(MI);
2413 return Legalized;
2414}
2415
2416LegalizerHelper::LegalizeResult
Cassie Jonesf22f4552021-01-28 13:20:35 -05002417LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2418 LLT WideTy) {
Cassie Jonesf22f4552021-01-28 13:20:35 -05002419 unsigned Opcode;
2420 unsigned ExtOpcode;
Kazu Hirata3ccbfc32022-11-26 14:44:54 -08002421 std::optional<Register> CarryIn;
Cassie Jonesf22f4552021-01-28 13:20:35 -05002422 switch (MI.getOpcode()) {
2423 default:
2424 llvm_unreachable("Unexpected opcode!");
2425 case TargetOpcode::G_SADDO:
2426 Opcode = TargetOpcode::G_ADD;
2427 ExtOpcode = TargetOpcode::G_SEXT;
2428 break;
2429 case TargetOpcode::G_SSUBO:
2430 Opcode = TargetOpcode::G_SUB;
2431 ExtOpcode = TargetOpcode::G_SEXT;
2432 break;
2433 case TargetOpcode::G_UADDO:
2434 Opcode = TargetOpcode::G_ADD;
2435 ExtOpcode = TargetOpcode::G_ZEXT;
2436 break;
2437 case TargetOpcode::G_USUBO:
2438 Opcode = TargetOpcode::G_SUB;
2439 ExtOpcode = TargetOpcode::G_ZEXT;
2440 break;
2441 case TargetOpcode::G_SADDE:
2442 Opcode = TargetOpcode::G_UADDE;
2443 ExtOpcode = TargetOpcode::G_SEXT;
2444 CarryIn = MI.getOperand(4).getReg();
2445 break;
2446 case TargetOpcode::G_SSUBE:
2447 Opcode = TargetOpcode::G_USUBE;
2448 ExtOpcode = TargetOpcode::G_SEXT;
2449 CarryIn = MI.getOperand(4).getReg();
2450 break;
2451 case TargetOpcode::G_UADDE:
2452 Opcode = TargetOpcode::G_UADDE;
2453 ExtOpcode = TargetOpcode::G_ZEXT;
2454 CarryIn = MI.getOperand(4).getReg();
2455 break;
2456 case TargetOpcode::G_USUBE:
2457 Opcode = TargetOpcode::G_USUBE;
2458 ExtOpcode = TargetOpcode::G_ZEXT;
2459 CarryIn = MI.getOperand(4).getReg();
2460 break;
2461 }
2462
Matt Arsenault0e489922022-04-12 11:49:22 -04002463 if (TypeIdx == 1) {
2464 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2465
2466 Observer.changingInstr(MI);
Matt Arsenault0e489922022-04-12 11:49:22 -04002467 if (CarryIn)
2468 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
Tomas Matheson9a390d62022-08-23 17:01:53 +01002469 widenScalarDst(MI, WideTy, 1);
Matt Arsenault0e489922022-04-12 11:49:22 -04002470
2471 Observer.changedInstr(MI);
2472 return Legalized;
2473 }
2474
Mitch Phillipsc9466ed2021-01-22 14:25:31 -08002475 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2476 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2477 // Do the arithmetic in the larger type.
Cassie Jonesf22f4552021-01-28 13:20:35 -05002478 Register NewOp;
2479 if (CarryIn) {
2480 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2481 NewOp = MIRBuilder
2482 .buildInstr(Opcode, {WideTy, CarryOutTy},
2483 {LHSExt, RHSExt, *CarryIn})
2484 .getReg(0);
2485 } else {
2486 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2487 }
Mitch Phillipsc9466ed2021-01-22 14:25:31 -08002488 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2489 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2490 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2491 // There is no overflow if the ExtOp is the same as NewOp.
2492 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2493 // Now trunc the NewOp to the original result.
2494 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2495 MI.eraseFromParent();
2496 return Legalized;
2497}
2498
2499LegalizerHelper::LegalizeResult
Bevin Hansson5de6c562020-07-16 17:02:04 +02002500LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2501 LLT WideTy) {
Matt Arsenault6a8c11a2020-07-12 13:58:53 -04002502 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
Bevin Hansson5de6c562020-07-16 17:02:04 +02002503 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2504 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2505 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2506 MI.getOpcode() == TargetOpcode::G_USHLSAT;
Matt Arsenault6a8c11a2020-07-12 13:58:53 -04002507 // We can convert this to:
2508 // 1. Any extend iN to iM
2509 // 2. SHL by M-N
Bevin Hansson5de6c562020-07-16 17:02:04 +02002510 // 3. [US][ADD|SUB|SHL]SAT
Matt Arsenault6a8c11a2020-07-12 13:58:53 -04002511 // 4. L/ASHR by M-N
2512 //
2513 // It may be more efficient to lower this to a min and a max operation in
2514 // the higher precision arithmetic if the promoted operation isn't legal,
2515 // but this decision is up to the target's lowering request.
2516 Register DstReg = MI.getOperand(0).getReg();
2517
2518 unsigned NewBits = WideTy.getScalarSizeInBits();
2519 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2520
Bevin Hansson5de6c562020-07-16 17:02:04 +02002521 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2522 // must not left shift the RHS to preserve the shift amount.
Matt Arsenault6a8c11a2020-07-12 13:58:53 -04002523 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
Bevin Hansson5de6c562020-07-16 17:02:04 +02002524 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2525 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
Matt Arsenault6a8c11a2020-07-12 13:58:53 -04002526 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2527 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
Bevin Hansson5de6c562020-07-16 17:02:04 +02002528 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
Matt Arsenault6a8c11a2020-07-12 13:58:53 -04002529
2530 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2531 {ShiftL, ShiftR}, MI.getFlags());
2532
2533 // Use a shift that will preserve the number of sign bits when the trunc is
2534 // folded away.
2535 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2536 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2537
2538 MIRBuilder.buildTrunc(DstReg, Result);
2539 MI.eraseFromParent();
2540 return Legalized;
2541}
2542
2543LegalizerHelper::LegalizeResult
Pushpinder Singhd0e54222021-03-09 06:10:00 +00002544LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2545 LLT WideTy) {
Matt Arsenault95c2bcb2022-04-12 12:03:04 -04002546 if (TypeIdx == 1) {
2547 Observer.changingInstr(MI);
2548 widenScalarDst(MI, WideTy, 1);
2549 Observer.changedInstr(MI);
2550 return Legalized;
2551 }
Pushpinder Singhd0e54222021-03-09 06:10:00 +00002552
2553 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
Amara Emerson719024a2023-02-23 16:35:39 -08002554 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
Pushpinder Singhd0e54222021-03-09 06:10:00 +00002555 LLT SrcTy = MRI.getType(LHS);
2556 LLT OverflowTy = MRI.getType(OriginalOverflow);
2557 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2558
2559 // To determine if the result overflowed in the larger type, we extend the
2560 // input to the larger type, do the multiply (checking if it overflows),
2561 // then also check the high bits of the result to see if overflow happened
2562 // there.
2563 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2564 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2565 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2566
Craig Topper37505582023-10-13 20:34:45 -07002567 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2568 // so we don't need to check the overflow result of larger type Mulo.
2569 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2570
2571 unsigned MulOpc =
2572 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2573
2574 MachineInstrBuilder Mulo;
2575 if (WideMulCanOverflow)
2576 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2577 {LeftOperand, RightOperand});
2578 else
2579 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2580
Pushpinder Singhd0e54222021-03-09 06:10:00 +00002581 auto Mul = Mulo->getOperand(0);
2582 MIRBuilder.buildTrunc(Result, Mul);
2583
2584 MachineInstrBuilder ExtResult;
2585 // Overflow occurred if it occurred in the larger type, or if the high part
2586 // of the result does not zero/sign-extend the low part. Check this second
2587 // possibility first.
2588 if (IsSigned) {
2589 // For signed, overflow occurred when the high part does not sign-extend
2590 // the low part.
2591 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2592 } else {
2593 // Unsigned overflow occurred when the high part does not zero-extend the
2594 // low part.
2595 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2596 }
2597
Craig Topper37505582023-10-13 20:34:45 -07002598 if (WideMulCanOverflow) {
Pushpinder Singhd0e54222021-03-09 06:10:00 +00002599 auto Overflow =
2600 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2601 // Finally check if the multiplication in the larger type itself overflowed.
2602 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2603 } else {
2604 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2605 }
2606 MI.eraseFromParent();
2607 return Legalized;
2608}
2609
2610LegalizerHelper::LegalizeResult
Tim Northover69fa84a2016-10-14 22:18:18 +00002611LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
David Green10fe5312024-08-18 11:08:49 +01002612 unsigned Opcode = MI.getOpcode();
2613 switch (Opcode) {
Tim Northover32335812016-08-04 18:35:11 +00002614 default:
2615 return UnableToLegalize;
Tim Northover291e0da2021-07-21 09:05:56 +01002616 case TargetOpcode::G_ATOMICRMW_XCHG:
2617 case TargetOpcode::G_ATOMICRMW_ADD:
2618 case TargetOpcode::G_ATOMICRMW_SUB:
2619 case TargetOpcode::G_ATOMICRMW_AND:
2620 case TargetOpcode::G_ATOMICRMW_OR:
2621 case TargetOpcode::G_ATOMICRMW_XOR:
2622 case TargetOpcode::G_ATOMICRMW_MIN:
2623 case TargetOpcode::G_ATOMICRMW_MAX:
2624 case TargetOpcode::G_ATOMICRMW_UMIN:
2625 case TargetOpcode::G_ATOMICRMW_UMAX:
2626 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2627 Observer.changingInstr(MI);
2628 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2629 widenScalarDst(MI, WideTy, 0);
2630 Observer.changedInstr(MI);
2631 return Legalized;
2632 case TargetOpcode::G_ATOMIC_CMPXCHG:
2633 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2634 Observer.changingInstr(MI);
2635 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2636 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2637 widenScalarDst(MI, WideTy, 0);
2638 Observer.changedInstr(MI);
2639 return Legalized;
2640 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2641 if (TypeIdx == 0) {
2642 Observer.changingInstr(MI);
2643 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2644 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2645 widenScalarDst(MI, WideTy, 0);
2646 Observer.changedInstr(MI);
2647 return Legalized;
2648 }
2649 assert(TypeIdx == 1 &&
2650 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2651 Observer.changingInstr(MI);
2652 widenScalarDst(MI, WideTy, 1);
2653 Observer.changedInstr(MI);
2654 return Legalized;
Matt Arsenault1cf713662019-02-12 14:54:52 +00002655 case TargetOpcode::G_EXTRACT:
2656 return widenScalarExtract(MI, TypeIdx, WideTy);
2657 case TargetOpcode::G_INSERT:
2658 return widenScalarInsert(MI, TypeIdx, WideTy);
Matt Arsenault888aa5d2019-02-03 00:07:33 +00002659 case TargetOpcode::G_MERGE_VALUES:
2660 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2661 case TargetOpcode::G_UNMERGE_VALUES:
2662 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
Cassie Jonesaa8f3672021-01-25 16:57:20 -05002663 case TargetOpcode::G_SADDO:
Mitch Phillipsc9466ed2021-01-22 14:25:31 -08002664 case TargetOpcode::G_SSUBO:
Aditya Nandakumar6d47a412018-08-29 03:17:08 +00002665 case TargetOpcode::G_UADDO:
Mitch Phillipsc9466ed2021-01-22 14:25:31 -08002666 case TargetOpcode::G_USUBO:
Cassie Jonesf22f4552021-01-28 13:20:35 -05002667 case TargetOpcode::G_SADDE:
2668 case TargetOpcode::G_SSUBE:
2669 case TargetOpcode::G_UADDE:
2670 case TargetOpcode::G_USUBE:
2671 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
Pushpinder Singhd0e54222021-03-09 06:10:00 +00002672 case TargetOpcode::G_UMULO:
2673 case TargetOpcode::G_SMULO:
2674 return widenScalarMulo(MI, TypeIdx, WideTy);
Matt Arsenault6a8c11a2020-07-12 13:58:53 -04002675 case TargetOpcode::G_SADDSAT:
2676 case TargetOpcode::G_SSUBSAT:
Bevin Hansson5de6c562020-07-16 17:02:04 +02002677 case TargetOpcode::G_SSHLSAT:
Matt Arsenault6a8c11a2020-07-12 13:58:53 -04002678 case TargetOpcode::G_UADDSAT:
2679 case TargetOpcode::G_USUBSAT:
Bevin Hansson5de6c562020-07-16 17:02:04 +02002680 case TargetOpcode::G_USHLSAT:
2681 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
Aditya Nandakumarc1061832018-08-22 17:59:18 +00002682 case TargetOpcode::G_CTTZ:
2683 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2684 case TargetOpcode::G_CTLZ:
2685 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2686 case TargetOpcode::G_CTPOP: {
Matt Arsenaultd5684f72019-01-31 02:09:57 +00002687 if (TypeIdx == 0) {
Matt Arsenault3d6a49b2019-02-04 22:26:33 +00002688 Observer.changingInstr(MI);
Matt Arsenaultd5684f72019-01-31 02:09:57 +00002689 widenScalarDst(MI, WideTy, 0);
Matt Arsenault3d6a49b2019-02-04 22:26:33 +00002690 Observer.changedInstr(MI);
Matt Arsenaultd5684f72019-01-31 02:09:57 +00002691 return Legalized;
2692 }
2693
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002694 Register SrcReg = MI.getOperand(1).getReg();
Matt Arsenault3d6a49b2019-02-04 22:26:33 +00002695
Jay Foad57b91072021-08-06 11:05:42 +01002696 // First extend the input.
David Green10fe5312024-08-18 11:08:49 +01002697 unsigned ExtOpc = Opcode == TargetOpcode::G_CTTZ ||
2698 Opcode == TargetOpcode::G_CTTZ_ZERO_UNDEF
Jay Foad57b91072021-08-06 11:05:42 +01002699 ? TargetOpcode::G_ANYEXT
2700 : TargetOpcode::G_ZEXT;
2701 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
Matt Arsenault3d6a49b2019-02-04 22:26:33 +00002702 LLT CurTy = MRI.getType(SrcReg);
David Green10fe5312024-08-18 11:08:49 +01002703 unsigned NewOpc = Opcode;
Jay Foadcd2594e2021-08-04 14:37:45 +01002704 if (NewOpc == TargetOpcode::G_CTTZ) {
Aditya Nandakumarc1061832018-08-22 17:59:18 +00002705 // The count is the same in the larger type except if the original
2706 // value was zero. This can be handled by setting the bit just off
2707 // the top of the original type.
2708 auto TopBit =
2709 APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
Matt Arsenault3d6a49b2019-02-04 22:26:33 +00002710 MIBSrc = MIRBuilder.buildOr(
2711 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
Jay Foadcd2594e2021-08-04 14:37:45 +01002712 // Now we know the operand is non-zero, use the more relaxed opcode.
2713 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
Aditya Nandakumarc1061832018-08-22 17:59:18 +00002714 }
Matt Arsenault3d6a49b2019-02-04 22:26:33 +00002715
Manish Kausik H69192e02024-07-08 18:31:32 +05302716 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2717
David Green10fe5312024-08-18 11:08:49 +01002718 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
Manish Kausik H69192e02024-07-08 18:31:32 +05302719 // An optimization where the result is the CTLZ after the left shift by
2720 // (Difference in widety and current ty), that is,
2721 // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
2722 // Result = ctlz MIBSrc
2723 MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc,
2724 MIRBuilder.buildConstant(WideTy, SizeDiff));
2725 }
2726
Aditya Nandakumarc1061832018-08-22 17:59:18 +00002727 // Perform the operation at the larger size.
Jay Foadcd2594e2021-08-04 14:37:45 +01002728 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
Aditya Nandakumarc1061832018-08-22 17:59:18 +00002729 // This is already the correct result for CTPOP and CTTZs
David Green10fe5312024-08-18 11:08:49 +01002730 if (Opcode == TargetOpcode::G_CTLZ) {
Aditya Nandakumarc1061832018-08-22 17:59:18 +00002731 // The correct result is NewOp - (Difference in widety and current ty).
Jay Foad28bb43b2020-01-16 12:09:48 +00002732 MIBNewOp = MIRBuilder.buildSub(
2733 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
Aditya Nandakumarc1061832018-08-22 17:59:18 +00002734 }
Matt Arsenault3d6a49b2019-02-04 22:26:33 +00002735
2736 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2737 MI.eraseFromParent();
Aditya Nandakumarc1061832018-08-22 17:59:18 +00002738 return Legalized;
2739 }
Matt Arsenaultd1bfc8d2019-01-31 02:34:03 +00002740 case TargetOpcode::G_BSWAP: {
2741 Observer.changingInstr(MI);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002742 Register DstReg = MI.getOperand(0).getReg();
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002743
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002744 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2745 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2746 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
Matt Arsenaultd1bfc8d2019-01-31 02:34:03 +00002747 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2748
2749 MI.getOperand(0).setReg(DstExt);
2750
2751 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2752
2753 LLT Ty = MRI.getType(DstReg);
2754 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2755 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
Jay Foad28bb43b2020-01-16 12:09:48 +00002756 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
Matt Arsenaultd1bfc8d2019-01-31 02:34:03 +00002757
2758 MIRBuilder.buildTrunc(DstReg, ShrReg);
2759 Observer.changedInstr(MI);
2760 return Legalized;
2761 }
Matt Arsenault5ff310e2019-09-04 20:46:15 +00002762 case TargetOpcode::G_BITREVERSE: {
2763 Observer.changingInstr(MI);
2764
2765 Register DstReg = MI.getOperand(0).getReg();
2766 LLT Ty = MRI.getType(DstReg);
2767 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2768
2769 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2770 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2771 MI.getOperand(0).setReg(DstExt);
2772 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2773
2774 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2775 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2776 MIRBuilder.buildTrunc(DstReg, Shift);
2777 Observer.changedInstr(MI);
2778 return Legalized;
2779 }
Dominik Montada55e3a7c2020-04-14 11:25:05 +02002780 case TargetOpcode::G_FREEZE:
Yingwei Zheng821bcba2024-05-22 23:35:37 +08002781 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
Dominik Montada55e3a7c2020-04-14 11:25:05 +02002782 Observer.changingInstr(MI);
2783 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2784 widenScalarDst(MI, WideTy);
2785 Observer.changedInstr(MI);
2786 return Legalized;
2787
Mirko Brkusanin35ef4c92021-06-03 18:09:45 +02002788 case TargetOpcode::G_ABS:
2789 Observer.changingInstr(MI);
2790 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2791 widenScalarDst(MI, WideTy);
2792 Observer.changedInstr(MI);
2793 return Legalized;
2794
Tim Northover61c16142016-08-04 21:39:49 +00002795 case TargetOpcode::G_ADD:
2796 case TargetOpcode::G_AND:
2797 case TargetOpcode::G_MUL:
2798 case TargetOpcode::G_OR:
2799 case TargetOpcode::G_XOR:
Justin Bognerddb80ae2017-01-19 07:51:17 +00002800 case TargetOpcode::G_SUB:
Tuan Chuong Goh13a78fd2024-03-04 14:27:21 +00002801 case TargetOpcode::G_SHUFFLE_VECTOR:
Matt Arsenault1cf713662019-02-12 14:54:52 +00002802 // Perform operation at larger width (any extension is fines here, high bits
Tim Northover32335812016-08-04 18:35:11 +00002803 // don't affect the result) and then truncate the result back to the
2804 // original type.
Daniel Sandersd001e0e2018-12-12 23:48:13 +00002805 Observer.changingInstr(MI);
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002806 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2807 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2808 widenScalarDst(MI, WideTy);
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00002809 Observer.changedInstr(MI);
Roman Tereshin27bba442018-05-09 01:43:12 +00002810 return Legalized;
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002811
Brendon Cahoonf9f5d412021-04-30 09:57:44 -04002812 case TargetOpcode::G_SBFX:
2813 case TargetOpcode::G_UBFX:
2814 Observer.changingInstr(MI);
2815
2816 if (TypeIdx == 0) {
2817 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2818 widenScalarDst(MI, WideTy);
2819 } else {
2820 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2821 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2822 }
2823
2824 Observer.changedInstr(MI);
2825 return Legalized;
2826
Roman Tereshin6d266382018-05-09 21:43:30 +00002827 case TargetOpcode::G_SHL:
Matt Arsenault012ecbb2019-05-16 04:08:46 +00002828 Observer.changingInstr(MI);
Matt Arsenault30989e42019-01-22 21:42:11 +00002829
2830 if (TypeIdx == 0) {
2831 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2832 widenScalarDst(MI, WideTy);
2833 } else {
2834 assert(TypeIdx == 1);
2835 // The "number of bits to shift" operand must preserve its value as an
2836 // unsigned integer:
2837 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2838 }
2839
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00002840 Observer.changedInstr(MI);
Roman Tereshin6d266382018-05-09 21:43:30 +00002841 return Legalized;
2842
Craig Topperd605d9d2023-12-04 13:00:34 -08002843 case TargetOpcode::G_ROTR:
2844 case TargetOpcode::G_ROTL:
2845 if (TypeIdx != 1)
2846 return UnableToLegalize;
2847
2848 Observer.changingInstr(MI);
2849 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2850 Observer.changedInstr(MI);
2851 return Legalized;
2852
Tim Northover7a753d92016-08-26 17:46:06 +00002853 case TargetOpcode::G_SDIV:
Roman Tereshin27bba442018-05-09 01:43:12 +00002854 case TargetOpcode::G_SREM:
Matt Arsenault0f3ba442019-05-23 17:58:48 +00002855 case TargetOpcode::G_SMIN:
2856 case TargetOpcode::G_SMAX:
Daniel Sandersd001e0e2018-12-12 23:48:13 +00002857 Observer.changingInstr(MI);
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002858 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2859 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2860 widenScalarDst(MI, WideTy);
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00002861 Observer.changedInstr(MI);
Roman Tereshin27bba442018-05-09 01:43:12 +00002862 return Legalized;
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002863
Christudasan Devadasan90d78402021-04-12 15:49:47 +05302864 case TargetOpcode::G_SDIVREM:
2865 Observer.changingInstr(MI);
2866 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2867 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2868 widenScalarDst(MI, WideTy);
2869 widenScalarDst(MI, WideTy, 1);
2870 Observer.changedInstr(MI);
2871 return Legalized;
2872
Roman Tereshin6d266382018-05-09 21:43:30 +00002873 case TargetOpcode::G_ASHR:
Matt Arsenault30989e42019-01-22 21:42:11 +00002874 case TargetOpcode::G_LSHR:
Daniel Sandersd001e0e2018-12-12 23:48:13 +00002875 Observer.changingInstr(MI);
Matt Arsenault30989e42019-01-22 21:42:11 +00002876
2877 if (TypeIdx == 0) {
David Green10fe5312024-08-18 11:08:49 +01002878 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
2879 : TargetOpcode::G_ZEXT;
Matt Arsenault30989e42019-01-22 21:42:11 +00002880
2881 widenScalarSrc(MI, WideTy, 1, CvtOp);
2882 widenScalarDst(MI, WideTy);
2883 } else {
2884 assert(TypeIdx == 1);
2885 // The "number of bits to shift" operand must preserve its value as an
2886 // unsigned integer:
2887 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2888 }
2889
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00002890 Observer.changedInstr(MI);
Roman Tereshin6d266382018-05-09 21:43:30 +00002891 return Legalized;
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002892 case TargetOpcode::G_UDIV:
2893 case TargetOpcode::G_UREM:
Daniel Sandersd001e0e2018-12-12 23:48:13 +00002894 Observer.changingInstr(MI);
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002895 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2896 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2897 widenScalarDst(MI, WideTy);
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00002898 Observer.changedInstr(MI);
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002899 return Legalized;
Christudasan Devadasan90d78402021-04-12 15:49:47 +05302900 case TargetOpcode::G_UDIVREM:
2901 Observer.changingInstr(MI);
2902 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2903 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2904 widenScalarDst(MI, WideTy);
2905 widenScalarDst(MI, WideTy, 1);
2906 Observer.changedInstr(MI);
2907 return Legalized;
Craig Topper54dac272024-12-15 23:16:58 -08002908 case TargetOpcode::G_UMIN:
2909 case TargetOpcode::G_UMAX: {
2910 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2911
2912 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
2913 unsigned ExtOpc =
2914 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(Ty, Ctx),
2915 getApproximateEVTForLLT(WideTy, Ctx))
2916 ? TargetOpcode::G_SEXT
2917 : TargetOpcode::G_ZEXT;
2918
2919 Observer.changingInstr(MI);
2920 widenScalarSrc(MI, WideTy, 1, ExtOpc);
2921 widenScalarSrc(MI, WideTy, 2, ExtOpc);
2922 widenScalarDst(MI, WideTy);
2923 Observer.changedInstr(MI);
2924 return Legalized;
2925 }
Christudasan Devadasan90d78402021-04-12 15:49:47 +05302926
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002927 case TargetOpcode::G_SELECT:
Daniel Sandersd001e0e2018-12-12 23:48:13 +00002928 Observer.changingInstr(MI);
Petar Avramovic09dff332018-12-25 14:42:30 +00002929 if (TypeIdx == 0) {
2930 // Perform operation at larger width (any extension is fine here, high
2931 // bits don't affect the result) and then truncate the result back to the
2932 // original type.
2933 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2934 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2935 widenScalarDst(MI, WideTy);
2936 } else {
Matt Arsenault6d8e1b42019-01-30 02:57:43 +00002937 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
Petar Avramovic09dff332018-12-25 14:42:30 +00002938 // Explicit extension is required here since high bits affect the result.
Matt Arsenault6d8e1b42019-01-30 02:57:43 +00002939 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
Petar Avramovic09dff332018-12-25 14:42:30 +00002940 }
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00002941 Observer.changedInstr(MI);
Roman Tereshin27bba442018-05-09 01:43:12 +00002942 return Legalized;
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002943
Ahmed Bougachab6137062017-01-23 21:10:14 +00002944 case TargetOpcode::G_FPTOSI:
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002945 case TargetOpcode::G_FPTOUI:
David Green28d28d52024-04-15 09:41:08 +01002946 case TargetOpcode::G_INTRINSIC_LRINT:
David Green8d49ce12024-04-17 18:38:24 +01002947 case TargetOpcode::G_INTRINSIC_LLRINT:
Min-Yih Hsu7c3c8a12023-11-22 16:43:20 -08002948 case TargetOpcode::G_IS_FPCLASS:
Daniel Sandersd001e0e2018-12-12 23:48:13 +00002949 Observer.changingInstr(MI);
Matt Arsenaulted85b0c2019-10-01 01:06:48 +00002950
2951 if (TypeIdx == 0)
2952 widenScalarDst(MI, WideTy);
2953 else
2954 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2955
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00002956 Observer.changedInstr(MI);
Roman Tereshin27bba442018-05-09 01:43:12 +00002957 return Legalized;
Ahmed Bougachad2948232017-01-20 01:37:24 +00002958 case TargetOpcode::G_SITOFP:
Daniel Sandersd001e0e2018-12-12 23:48:13 +00002959 Observer.changingInstr(MI);
Petar Avramovic68500332020-07-16 16:31:57 +02002960
2961 if (TypeIdx == 0)
2962 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2963 else
2964 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2965
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00002966 Observer.changedInstr(MI);
Roman Tereshin27bba442018-05-09 01:43:12 +00002967 return Legalized;
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002968 case TargetOpcode::G_UITOFP:
Daniel Sandersd001e0e2018-12-12 23:48:13 +00002969 Observer.changingInstr(MI);
Petar Avramovic68500332020-07-16 16:31:57 +02002970
2971 if (TypeIdx == 0)
2972 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2973 else
2974 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2975
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00002976 Observer.changedInstr(MI);
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00002977 return Legalized;
David Greenfeac7612024-09-16 10:33:59 +01002978 case TargetOpcode::G_FPTOSI_SAT:
2979 case TargetOpcode::G_FPTOUI_SAT:
2980 Observer.changingInstr(MI);
2981
2982 if (TypeIdx == 0) {
2983 Register OldDst = MI.getOperand(0).getReg();
2984 LLT Ty = MRI.getType(OldDst);
2985 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
2986 Register NewDst;
2987 MI.getOperand(0).setReg(ExtReg);
2988 uint64_t ShortBits = Ty.getScalarSizeInBits();
2989 uint64_t WideBits = WideTy.getScalarSizeInBits();
2990 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2991 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
2992 // z = i16 fptosi_sat(a)
2993 // ->
2994 // x = i32 fptosi_sat(a)
2995 // y = smin(x, 32767)
2996 // z = smax(y, -32768)
2997 auto MaxVal = MIRBuilder.buildConstant(
2998 WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
2999 auto MinVal = MIRBuilder.buildConstant(
3000 WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
3001 Register MidReg =
3002 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
3003 NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
3004 } else {
3005 // z = i16 fptoui_sat(a)
3006 // ->
3007 // x = i32 fptoui_sat(a)
3008 // y = smin(x, 65535)
3009 auto MaxVal = MIRBuilder.buildConstant(
3010 WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
3011 NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3012 }
3013 MIRBuilder.buildTrunc(OldDst, NewDst);
3014 } else
3015 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3016
3017 Observer.changedInstr(MI);
3018 return Legalized;
Daniel Sanders5eb9f582018-04-28 18:14:50 +00003019 case TargetOpcode::G_LOAD:
Daniel Sanders5eb9f582018-04-28 18:14:50 +00003020 case TargetOpcode::G_SEXTLOAD:
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003021 case TargetOpcode::G_ZEXTLOAD:
Daniel Sandersd001e0e2018-12-12 23:48:13 +00003022 Observer.changingInstr(MI);
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003023 widenScalarDst(MI, WideTy);
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00003024 Observer.changedInstr(MI);
Tim Northover3c73e362016-08-23 18:20:09 +00003025 return Legalized;
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003026
Tim Northover3c73e362016-08-23 18:20:09 +00003027 case TargetOpcode::G_STORE: {
Matt Arsenault92c50012019-01-30 02:04:31 +00003028 if (TypeIdx != 0)
3029 return UnableToLegalize;
3030
3031 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
Amara Emerson7e3180a2025-01-05 21:31:34 -08003032 assert(!Ty.isPointerOrPointerVector() && "Can't widen type");
3033 if (!Ty.isScalar()) {
3034 // We need to widen the vector element type.
3035 Observer.changingInstr(MI);
3036 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
3037 // We also need to adjust the MMO to turn this into a truncating store.
3038 MachineMemOperand &MMO = **MI.memoperands_begin();
3039 MachineFunction &MF = MIRBuilder.getMF();
3040 auto *NewMMO = MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), Ty);
3041 MI.setMemRefs(MF, {NewMMO});
3042 Observer.changedInstr(MI);
3043 return Legalized;
3044 }
Tim Northover548feee2017-03-21 22:22:05 +00003045
Daniel Sandersd001e0e2018-12-12 23:48:13 +00003046 Observer.changingInstr(MI);
Matt Arsenault92c50012019-01-30 02:04:31 +00003047
3048 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3049 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3050 widenScalarSrc(MI, WideTy, 0, ExtType);
3051
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00003052 Observer.changedInstr(MI);
Tim Northover3c73e362016-08-23 18:20:09 +00003053 return Legalized;
3054 }
Tim Northoverea904f92016-08-19 22:40:00 +00003055 case TargetOpcode::G_CONSTANT: {
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003056 MachineOperand &SrcMO = MI.getOperand(1);
3057 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
Aditya Nandakumar6da7dbb2019-12-03 10:40:03 -08003058 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3059 MRI.getType(MI.getOperand(0).getReg()));
3060 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3061 ExtOpc == TargetOpcode::G_ANYEXT) &&
3062 "Illegal Extend");
3063 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3064 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3065 ? SrcVal.sext(WideTy.getSizeInBits())
3066 : SrcVal.zext(WideTy.getSizeInBits());
Daniel Sandersd001e0e2018-12-12 23:48:13 +00003067 Observer.changingInstr(MI);
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003068 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3069
3070 widenScalarDst(MI, WideTy);
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00003071 Observer.changedInstr(MI);
Tim Northoverea904f92016-08-19 22:40:00 +00003072 return Legalized;
3073 }
Tim Northovera11be042016-08-19 22:40:08 +00003074 case TargetOpcode::G_FCONSTANT: {
Amara Emersond4f84df2022-07-14 00:53:59 -07003075 // To avoid changing the bits of the constant due to extension to a larger
3076 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003077 MachineOperand &SrcMO = MI.getOperand(1);
Amara Emersond4f84df2022-07-14 00:53:59 -07003078 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
3079 MIRBuilder.setInstrAndDebugLoc(MI);
3080 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
3081 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
3082 MI.eraseFromParent();
Roman Tereshin25cbfe62018-05-08 22:53:09 +00003083 return Legalized;
Roman Tereshin27bba442018-05-09 01:43:12 +00003084 }
Matt Arsenaultbefee402019-01-09 07:34:14 +00003085 case TargetOpcode::G_IMPLICIT_DEF: {
3086 Observer.changingInstr(MI);
3087 widenScalarDst(MI, WideTy);
3088 Observer.changedInstr(MI);
3089 return Legalized;
3090 }
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003091 case TargetOpcode::G_BRCOND:
Daniel Sandersd001e0e2018-12-12 23:48:13 +00003092 Observer.changingInstr(MI);
Petar Avramovic5d9b8ee2019-02-14 11:39:53 +00003093 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00003094 Observer.changedInstr(MI);
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003095 return Legalized;
3096
3097 case TargetOpcode::G_FCMP:
Daniel Sandersd001e0e2018-12-12 23:48:13 +00003098 Observer.changingInstr(MI);
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003099 if (TypeIdx == 0)
3100 widenScalarDst(MI, WideTy);
3101 else {
3102 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3103 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
Roman Tereshin27bba442018-05-09 01:43:12 +00003104 }
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00003105 Observer.changedInstr(MI);
Roman Tereshin27bba442018-05-09 01:43:12 +00003106 return Legalized;
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003107
3108 case TargetOpcode::G_ICMP:
Daniel Sandersd001e0e2018-12-12 23:48:13 +00003109 Observer.changingInstr(MI);
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003110 if (TypeIdx == 0)
3111 widenScalarDst(MI, WideTy);
3112 else {
Craig Topper11587292024-12-15 22:55:58 -08003113 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
3114 CmpInst::Predicate Pred =
3115 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3116
3117 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3118 unsigned ExtOpcode =
3119 (CmpInst::isSigned(Pred) ||
3120 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(SrcTy, Ctx),
3121 getApproximateEVTForLLT(WideTy, Ctx)))
3122 ? TargetOpcode::G_SEXT
3123 : TargetOpcode::G_ZEXT;
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003124 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
3125 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
3126 }
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00003127 Observer.changedInstr(MI);
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003128 return Legalized;
3129
Daniel Sanderse74c5b92019-11-01 13:18:00 -07003130 case TargetOpcode::G_PTR_ADD:
3131 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
Daniel Sandersd001e0e2018-12-12 23:48:13 +00003132 Observer.changingInstr(MI);
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003133 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00003134 Observer.changedInstr(MI);
Tim Northover22d82cf2016-09-15 11:02:19 +00003135 return Legalized;
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003136
Aditya Nandakumar892979e2017-08-25 04:57:27 +00003137 case TargetOpcode::G_PHI: {
3138 assert(TypeIdx == 0 && "Expecting only Idx 0");
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003139
Daniel Sandersd001e0e2018-12-12 23:48:13 +00003140 Observer.changingInstr(MI);
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003141 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
3142 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
Amara Emerson53445f52022-11-13 01:43:04 -08003143 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003144 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
Aditya Nandakumar892979e2017-08-25 04:57:27 +00003145 }
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003146
3147 MachineBasicBlock &MBB = *MI.getParent();
Amara Emerson9d647212019-09-16 23:46:03 +00003148 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
Roman Tereshind5fa9fd2018-05-09 17:28:18 +00003149 widenScalarDst(MI, WideTy);
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00003150 Observer.changedInstr(MI);
Aditya Nandakumar892979e2017-08-25 04:57:27 +00003151 return Legalized;
3152 }
Matt Arsenault63786292019-01-22 20:38:15 +00003153 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3154 if (TypeIdx == 0) {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003155 Register VecReg = MI.getOperand(1).getReg();
Matt Arsenault63786292019-01-22 20:38:15 +00003156 LLT VecTy = MRI.getType(VecReg);
3157 Observer.changingInstr(MI);
3158
Sander de Smalend5e14ba2021-06-24 09:58:21 +01003159 widenScalarSrc(
3160 MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
Amara Emersondafcbfd2021-09-24 22:52:30 -07003161 TargetOpcode::G_ANYEXT);
Matt Arsenault63786292019-01-22 20:38:15 +00003162
3163 widenScalarDst(MI, WideTy, 0);
3164 Observer.changedInstr(MI);
3165 return Legalized;
3166 }
3167
Amara Emersoncbd86d82018-10-25 14:04:54 +00003168 if (TypeIdx != 2)
3169 return UnableToLegalize;
Daniel Sandersd001e0e2018-12-12 23:48:13 +00003170 Observer.changingInstr(MI);
Matt Arsenault1a276d12019-10-01 15:51:37 -04003171 // TODO: Probably should be zext
Amara Emersoncbd86d82018-10-25 14:04:54 +00003172 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00003173 Observer.changedInstr(MI);
Amara Emersoncbd86d82018-10-25 14:04:54 +00003174 return Legalized;
Matt Arsenault63786292019-01-22 20:38:15 +00003175 }
Matt Arsenault1a276d12019-10-01 15:51:37 -04003176 case TargetOpcode::G_INSERT_VECTOR_ELT: {
Alleneaf23b22023-09-12 21:15:01 +08003177 if (TypeIdx == 0) {
3178 Observer.changingInstr(MI);
3179 const LLT WideEltTy = WideTy.getElementType();
3180
3181 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3182 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
3183 widenScalarDst(MI, WideTy, 0);
3184 Observer.changedInstr(MI);
3185 return Legalized;
3186 }
3187
Matt Arsenault1a276d12019-10-01 15:51:37 -04003188 if (TypeIdx == 1) {
3189 Observer.changingInstr(MI);
3190
3191 Register VecReg = MI.getOperand(1).getReg();
3192 LLT VecTy = MRI.getType(VecReg);
Sander de Smalend5e14ba2021-06-24 09:58:21 +01003193 LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
Matt Arsenault1a276d12019-10-01 15:51:37 -04003194
3195 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
3196 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3197 widenScalarDst(MI, WideVecTy, 0);
3198 Observer.changedInstr(MI);
3199 return Legalized;
3200 }
3201
3202 if (TypeIdx == 2) {
3203 Observer.changingInstr(MI);
3204 // TODO: Probably should be zext
3205 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
3206 Observer.changedInstr(MI);
Matt Arsenaulte4f19d12020-06-16 11:39:44 -04003207 return Legalized;
Matt Arsenault1a276d12019-10-01 15:51:37 -04003208 }
3209
Matt Arsenaulte4f19d12020-06-16 11:39:44 -04003210 return UnableToLegalize;
Matt Arsenault1a276d12019-10-01 15:51:37 -04003211 }
Matt Arsenault745fd9f2019-01-20 19:10:31 +00003212 case TargetOpcode::G_FADD:
3213 case TargetOpcode::G_FMUL:
3214 case TargetOpcode::G_FSUB:
3215 case TargetOpcode::G_FMA:
Matt Arsenaultcf103722019-09-06 20:49:10 +00003216 case TargetOpcode::G_FMAD:
Matt Arsenault745fd9f2019-01-20 19:10:31 +00003217 case TargetOpcode::G_FNEG:
3218 case TargetOpcode::G_FABS:
Matt Arsenault9dba67f2019-02-11 17:05:20 +00003219 case TargetOpcode::G_FCANONICALIZE:
Matt Arsenault6ce1b4f2019-07-10 16:31:19 +00003220 case TargetOpcode::G_FMINNUM:
3221 case TargetOpcode::G_FMAXNUM:
3222 case TargetOpcode::G_FMINNUM_IEEE:
3223 case TargetOpcode::G_FMAXNUM_IEEE:
3224 case TargetOpcode::G_FMINIMUM:
3225 case TargetOpcode::G_FMAXIMUM:
Matt Arsenault745fd9f2019-01-20 19:10:31 +00003226 case TargetOpcode::G_FDIV:
3227 case TargetOpcode::G_FREM:
Jessica Paquette453ab1d2018-12-21 17:05:26 +00003228 case TargetOpcode::G_FCEIL:
Jessica Paquetteebdb0212019-02-11 17:22:58 +00003229 case TargetOpcode::G_FFLOOR:
Jessica Paquette7db82d72019-01-28 18:34:18 +00003230 case TargetOpcode::G_FCOS:
3231 case TargetOpcode::G_FSIN:
Farzon Lotfi1d874332024-06-05 15:01:33 -04003232 case TargetOpcode::G_FTAN:
Farzon Lotfi0b58f342024-07-11 15:58:43 -04003233 case TargetOpcode::G_FACOS:
3234 case TargetOpcode::G_FASIN:
3235 case TargetOpcode::G_FATAN:
Tex Riddellc03d09c2024-10-24 17:53:12 -07003236 case TargetOpcode::G_FATAN2:
Farzon Lotfi0b58f342024-07-11 15:58:43 -04003237 case TargetOpcode::G_FCOSH:
3238 case TargetOpcode::G_FSINH:
3239 case TargetOpcode::G_FTANH:
Jessica Paquettec49428a2019-01-28 19:53:14 +00003240 case TargetOpcode::G_FLOG10:
Jessica Paquette2d73ecd2019-01-28 21:27:23 +00003241 case TargetOpcode::G_FLOG:
Jessica Paquette0154bd12019-01-30 21:16:04 +00003242 case TargetOpcode::G_FLOG2:
Jessica Paquetted5c69e02019-04-19 23:41:52 +00003243 case TargetOpcode::G_FRINT:
Jessica Paquetteba557672019-04-25 16:44:40 +00003244 case TargetOpcode::G_FNEARBYINT:
Jessica Paquette22457f82019-01-30 21:03:52 +00003245 case TargetOpcode::G_FSQRT:
Jessica Paquette84bedac2019-01-30 23:46:15 +00003246 case TargetOpcode::G_FEXP:
Jessica Paquettee7941212019-04-03 16:58:32 +00003247 case TargetOpcode::G_FEXP2:
Matt Arsenaultb14e83d2023-08-12 07:20:00 -04003248 case TargetOpcode::G_FEXP10:
Jessica Paquettedfd87f62019-04-19 16:28:08 +00003249 case TargetOpcode::G_FPOW:
Jessica Paquette56342642019-04-23 18:20:44 +00003250 case TargetOpcode::G_INTRINSIC_TRUNC:
Jessica Paquette3cc6d1f2019-04-23 21:11:57 +00003251 case TargetOpcode::G_INTRINSIC_ROUND:
Matt Arsenault0da582d2020-07-19 09:56:15 -04003252 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
Matt Arsenault745fd9f2019-01-20 19:10:31 +00003253 assert(TypeIdx == 0);
Jessica Paquette453ab1d2018-12-21 17:05:26 +00003254 Observer.changingInstr(MI);
Matt Arsenault745fd9f2019-01-20 19:10:31 +00003255
3256 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3257 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
3258
Jessica Paquette453ab1d2018-12-21 17:05:26 +00003259 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3260 Observer.changedInstr(MI);
3261 return Legalized;
Matt Arsenaulteece6ba2023-04-26 22:02:42 -04003262 case TargetOpcode::G_FPOWI:
3263 case TargetOpcode::G_FLDEXP:
3264 case TargetOpcode::G_STRICT_FLDEXP: {
3265 if (TypeIdx == 0) {
David Green10fe5312024-08-18 11:08:49 +01003266 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
Matt Arsenaulteece6ba2023-04-26 22:02:42 -04003267 return UnableToLegalize;
3268
3269 Observer.changingInstr(MI);
3270 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3271 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3272 Observer.changedInstr(MI);
3273 return Legalized;
3274 }
3275
3276 if (TypeIdx == 1) {
3277 // For some reason SelectionDAG tries to promote to a libcall without
3278 // actually changing the integer type for promotion.
3279 Observer.changingInstr(MI);
3280 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3281 Observer.changedInstr(MI);
3282 return Legalized;
3283 }
3284
3285 return UnableToLegalize;
Matt Arsenault7cd8a022020-07-17 11:01:15 -04003286 }
Matt Arsenault003b58f2023-04-26 21:57:10 -04003287 case TargetOpcode::G_FFREXP: {
3288 Observer.changingInstr(MI);
3289
3290 if (TypeIdx == 0) {
3291 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3292 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3293 } else {
3294 widenScalarDst(MI, WideTy, 1);
3295 }
3296
3297 Observer.changedInstr(MI);
3298 return Legalized;
3299 }
Matt Arsenaultcbaada62019-02-02 23:29:55 +00003300 case TargetOpcode::G_INTTOPTR:
3301 if (TypeIdx != 1)
3302 return UnableToLegalize;
3303
3304 Observer.changingInstr(MI);
3305 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3306 Observer.changedInstr(MI);
3307 return Legalized;
3308 case TargetOpcode::G_PTRTOINT:
3309 if (TypeIdx != 0)
3310 return UnableToLegalize;
3311
3312 Observer.changingInstr(MI);
3313 widenScalarDst(MI, WideTy, 0);
3314 Observer.changedInstr(MI);
3315 return Legalized;
Matt Arsenaultbd791b52019-07-08 13:48:06 +00003316 case TargetOpcode::G_BUILD_VECTOR: {
3317 Observer.changingInstr(MI);
3318
3319 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
3320 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
3321 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
3322
3323 // Avoid changing the result vector type if the source element type was
3324 // requested.
3325 if (TypeIdx == 1) {
Matt Arsenaulta679f272020-07-19 12:29:48 -04003326 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
Matt Arsenaultbd791b52019-07-08 13:48:06 +00003327 } else {
3328 widenScalarDst(MI, WideTy, 0);
3329 }
3330
3331 Observer.changedInstr(MI);
3332 return Legalized;
3333 }
Daniel Sanderse9a57c22019-08-09 21:11:20 +00003334 case TargetOpcode::G_SEXT_INREG:
3335 if (TypeIdx != 0)
3336 return UnableToLegalize;
3337
3338 Observer.changingInstr(MI);
3339 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3340 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
3341 Observer.changedInstr(MI);
3342 return Legalized;
Matt Arsenaultef3e83122020-05-23 18:10:34 -04003343 case TargetOpcode::G_PTRMASK: {
3344 if (TypeIdx != 1)
3345 return UnableToLegalize;
3346 Observer.changingInstr(MI);
3347 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3348 Observer.changedInstr(MI);
3349 return Legalized;
3350 }
David Greenac7c1992025-01-30 22:17:34 +00003351 case TargetOpcode::G_VECREDUCE_ADD: {
3352 if (TypeIdx != 1)
3353 return UnableToLegalize;
3354 Observer.changingInstr(MI);
3355 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3356 widenScalarDst(MI, WideTy.getScalarType(), 0, TargetOpcode::G_TRUNC);
3357 Observer.changedInstr(MI);
3358 return Legalized;
3359 }
David Green295edaa2023-11-27 08:20:54 +00003360 case TargetOpcode::G_VECREDUCE_FADD:
David Green5b5614c2024-01-03 07:49:20 +00003361 case TargetOpcode::G_VECREDUCE_FMUL:
David Greend199478a2023-08-14 09:19:47 +01003362 case TargetOpcode::G_VECREDUCE_FMIN:
3363 case TargetOpcode::G_VECREDUCE_FMAX:
David Greena3f27512023-08-14 10:03:25 +01003364 case TargetOpcode::G_VECREDUCE_FMINIMUM:
Nikita Popovf2f18452024-06-21 08:33:40 +02003365 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
David Greend199478a2023-08-14 09:19:47 +01003366 if (TypeIdx != 0)
3367 return UnableToLegalize;
3368 Observer.changingInstr(MI);
3369 Register VecReg = MI.getOperand(1).getReg();
3370 LLT VecTy = MRI.getType(VecReg);
3371 LLT WideVecTy = VecTy.isVector()
3372 ? LLT::vector(VecTy.getElementCount(), WideTy)
3373 : WideTy;
3374 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3375 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3376 Observer.changedInstr(MI);
3377 return Legalized;
Tim Northover32335812016-08-04 18:35:11 +00003378 }
Michael Maitland54a9f0e2024-03-26 20:17:22 -04003379 case TargetOpcode::G_VSCALE: {
3380 MachineOperand &SrcMO = MI.getOperand(1);
3381 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3382 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3383 // The CImm is always a signed value
3384 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3385 Observer.changingInstr(MI);
3386 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3387 widenScalarDst(MI, WideTy);
3388 Observer.changedInstr(MI);
3389 return Legalized;
3390 }
Michael Maitland8aa3a772024-03-07 13:40:30 -08003391 case TargetOpcode::G_SPLAT_VECTOR: {
3392 if (TypeIdx != 1)
3393 return UnableToLegalize;
3394
3395 Observer.changingInstr(MI);
3396 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3397 Observer.changedInstr(MI);
3398 return Legalized;
3399 }
Michael Maitland6bac4142024-10-21 08:49:13 -04003400 case TargetOpcode::G_INSERT_SUBVECTOR: {
3401 if (TypeIdx != 0)
3402 return UnableToLegalize;
3403
3404 GInsertSubvector &IS = cast<GInsertSubvector>(MI);
3405 Register BigVec = IS.getBigVec();
3406 Register SubVec = IS.getSubVec();
3407
3408 LLT SubVecTy = MRI.getType(SubVec);
3409 LLT SubVecWideTy = SubVecTy.changeElementType(WideTy.getElementType());
3410
3411 // Widen the G_INSERT_SUBVECTOR
3412 auto BigZExt = MIRBuilder.buildZExt(WideTy, BigVec);
3413 auto SubZExt = MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3414 auto WideInsert = MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3415 IS.getIndexImm());
3416
3417 // Truncate back down
3418 auto SplatZero = MIRBuilder.buildSplatVector(
3419 WideTy, MIRBuilder.buildConstant(WideTy.getElementType(), 0));
3420 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, IS.getReg(0), WideInsert,
3421 SplatZero);
3422
3423 MI.eraseFromParent();
3424
3425 return Legalized;
3426 }
Michael Maitland54a9f0e2024-03-26 20:17:22 -04003427 }
Tim Northover33b07d62016-07-22 20:03:43 +00003428}
3429
Matt Arsenault936483f2020-01-09 21:53:28 -05003430static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
3431 MachineIRBuilder &B, Register Src, LLT Ty) {
3432 auto Unmerge = B.buildUnmerge(Ty, Src);
3433 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3434 Pieces.push_back(Unmerge.getReg(I));
3435}
3436
Mikhail Gudim35cfaec2024-02-16 18:51:44 -05003437static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3438 MachineIRBuilder &MIRBuilder) {
3439 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
Chen Zheng6ee2f772022-12-12 09:53:53 +00003440 MachineFunction &MF = MIRBuilder.getMF();
3441 const DataLayout &DL = MIRBuilder.getDataLayout();
Chen Zheng6ee2f772022-12-12 09:53:53 +00003442 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3443 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
Mikhail Gudim35cfaec2024-02-16 18:51:44 -05003444 LLT DstLLT = MRI.getType(DstReg);
3445
3446 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
Chen Zheng6ee2f772022-12-12 09:53:53 +00003447
3448 auto Addr = MIRBuilder.buildConstantPool(
Mikhail Gudim35cfaec2024-02-16 18:51:44 -05003449 AddrPtrTy,
3450 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
Chen Zheng6ee2f772022-12-12 09:53:53 +00003451
Mikhail Gudim35cfaec2024-02-16 18:51:44 -05003452 MachineMemOperand *MMO =
3453 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
3454 MachineMemOperand::MOLoad, DstLLT, Alignment);
Chen Zheng6ee2f772022-12-12 09:53:53 +00003455
Mikhail Gudim35cfaec2024-02-16 18:51:44 -05003456 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3457}
3458
3459LegalizerHelper::LegalizeResult
3460LegalizerHelper::lowerConstant(MachineInstr &MI) {
3461 const MachineOperand &ConstOperand = MI.getOperand(1);
3462 const Constant *ConstantVal = ConstOperand.getCImm();
3463
3464 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3465 MI.eraseFromParent();
3466
3467 return Legalized;
3468}
3469
3470LegalizerHelper::LegalizeResult
3471LegalizerHelper::lowerFConstant(MachineInstr &MI) {
3472 const MachineOperand &ConstOperand = MI.getOperand(1);
3473 const Constant *ConstantVal = ConstOperand.getFPImm();
3474
3475 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
Chen Zheng6ee2f772022-12-12 09:53:53 +00003476 MI.eraseFromParent();
3477
3478 return Legalized;
3479}
3480
3481LegalizerHelper::LegalizeResult
Matt Arsenault936483f2020-01-09 21:53:28 -05003482LegalizerHelper::lowerBitcast(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08003483 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
Matt Arsenault33e90862020-06-09 11:19:12 -04003484 if (SrcTy.isVector()) {
3485 LLT SrcEltTy = SrcTy.getElementType();
Matt Arsenault936483f2020-01-09 21:53:28 -05003486 SmallVector<Register, 8> SrcRegs;
Matt Arsenault33e90862020-06-09 11:19:12 -04003487
3488 if (DstTy.isVector()) {
3489 int NumDstElt = DstTy.getNumElements();
3490 int NumSrcElt = SrcTy.getNumElements();
3491
3492 LLT DstEltTy = DstTy.getElementType();
3493 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3494 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3495
3496 // If there's an element size mismatch, insert intermediate casts to match
3497 // the result element type.
3498 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3499 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3500 //
3501 // =>
3502 //
3503 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3504 // %3:_(<2 x s8>) = G_BITCAST %2
3505 // %4:_(<2 x s8>) = G_BITCAST %3
3506 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
Sander de Smalend5e14ba2021-06-24 09:58:21 +01003507 DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
Matt Arsenault33e90862020-06-09 11:19:12 -04003508 SrcPartTy = SrcEltTy;
3509 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3510 //
3511 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3512 //
3513 // =>
3514 //
3515 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3516 // %3:_(s16) = G_BITCAST %2
3517 // %4:_(s16) = G_BITCAST %3
3518 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
Sander de Smalend5e14ba2021-06-24 09:58:21 +01003519 SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
Matt Arsenault33e90862020-06-09 11:19:12 -04003520 DstCastTy = DstEltTy;
3521 }
3522
3523 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3524 for (Register &SrcReg : SrcRegs)
3525 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3526 } else
3527 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3528
Diana Picusf95a5fb2023-01-09 11:59:00 +01003529 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
Matt Arsenault936483f2020-01-09 21:53:28 -05003530 MI.eraseFromParent();
3531 return Legalized;
3532 }
3533
Matt Arsenault33e90862020-06-09 11:19:12 -04003534 if (DstTy.isVector()) {
Matt Arsenault936483f2020-01-09 21:53:28 -05003535 SmallVector<Register, 8> SrcRegs;
3536 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
Diana Picusf95a5fb2023-01-09 11:59:00 +01003537 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
Matt Arsenault936483f2020-01-09 21:53:28 -05003538 MI.eraseFromParent();
3539 return Legalized;
3540 }
3541
3542 return UnableToLegalize;
3543}
3544
Matt Arsenaulte2f1b482020-06-15 21:35:15 -04003545/// Figure out the bit offset into a register when coercing a vector index for
3546/// the wide element type. This is only for the case when promoting vector to
3547/// one with larger elements.
3548//
3549///
3550/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3551/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3552static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,
3553 Register Idx,
3554 unsigned NewEltSize,
3555 unsigned OldEltSize) {
3556 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3557 LLT IdxTy = B.getMRI()->getType(Idx);
3558
3559 // Now figure out the amount we need to shift to get the target bits.
3560 auto OffsetMask = B.buildConstant(
Chris Lattner735f4672021-09-08 22:13:13 -07003561 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
Matt Arsenaulte2f1b482020-06-15 21:35:15 -04003562 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3563 return B.buildShl(IdxTy, OffsetIdx,
3564 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3565}
3566
Matt Arsenault212570a2020-06-15 11:54:49 -04003567/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3568/// is casting to a vector with a smaller element size, perform multiple element
3569/// extracts and merge the results. If this is coercing to a vector with larger
3570/// elements, index the bitcasted vector and extract the target element with bit
3571/// operations. This is intended to force the indexing in the native register
3572/// size for architectures that can dynamically index the register file.
3573LegalizerHelper::LegalizeResult
3574LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
3575 LLT CastTy) {
3576 if (TypeIdx != 1)
3577 return UnableToLegalize;
3578
Amara Emerson719024a2023-02-23 16:35:39 -08003579 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
Matt Arsenault212570a2020-06-15 11:54:49 -04003580
3581 LLT SrcEltTy = SrcVecTy.getElementType();
3582 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3583 unsigned OldNumElts = SrcVecTy.getNumElements();
3584
3585 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3586 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3587
3588 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3589 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3590 if (NewNumElts > OldNumElts) {
3591 // Decreasing the vector element size
3592 //
3593 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3594 // =>
3595 // v4i32:castx = bitcast x:v2i64
3596 //
3597 // i64 = bitcast
3598 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3599 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3600 //
3601 if (NewNumElts % OldNumElts != 0)
3602 return UnableToLegalize;
3603
3604 // Type of the intermediate result vector.
3605 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
Sander de Smalen968980e2021-06-25 08:25:41 +01003606 LLT MidTy =
3607 LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
Matt Arsenault212570a2020-06-15 11:54:49 -04003608
3609 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3610
3611 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3612 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3613
3614 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3615 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3616 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3617 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3618 NewOps[I] = Elt.getReg(0);
3619 }
3620
3621 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3622 MIRBuilder.buildBitcast(Dst, NewVec);
3623 MI.eraseFromParent();
3624 return Legalized;
3625 }
3626
3627 if (NewNumElts < OldNumElts) {
3628 if (NewEltSize % OldEltSize != 0)
3629 return UnableToLegalize;
3630
3631 // This only depends on powers of 2 because we use bit tricks to figure out
3632 // the bit offset we need to shift to get the target element. A general
3633 // expansion could emit division/multiply.
3634 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3635 return UnableToLegalize;
3636
3637 // Increasing the vector element size.
3638 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3639 //
3640 // =>
3641 //
3642 // %cast = G_BITCAST %vec
3643 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3644 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3645 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3646 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3647 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3648 // %elt = G_TRUNC %elt_bits
3649
3650 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3651 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3652
3653 // Divide to get the index in the wider element type.
3654 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3655
3656 Register WideElt = CastVec;
3657 if (CastTy.isVector()) {
3658 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3659 ScaledIdx).getReg(0);
3660 }
3661
Matt Arsenaulte2f1b482020-06-15 21:35:15 -04003662 // Compute the bit offset into the register of the target element.
3663 Register OffsetBits = getBitcastWiderVectorElementOffset(
3664 MIRBuilder, Idx, NewEltSize, OldEltSize);
Matt Arsenault212570a2020-06-15 11:54:49 -04003665
3666 // Shift the wide element to get the target element.
3667 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3668 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3669 MI.eraseFromParent();
3670 return Legalized;
3671 }
3672
3673 return UnableToLegalize;
3674}
3675
Matt Arsenaulte2f1b482020-06-15 21:35:15 -04003676/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3677/// TargetReg, while preserving other bits in \p TargetReg.
3678///
3679/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3680static Register buildBitFieldInsert(MachineIRBuilder &B,
3681 Register TargetReg, Register InsertReg,
3682 Register OffsetBits) {
3683 LLT TargetTy = B.getMRI()->getType(TargetReg);
3684 LLT InsertTy = B.getMRI()->getType(InsertReg);
3685 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3686 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3687
3688 // Produce a bitmask of the value to insert
3689 auto EltMask = B.buildConstant(
3690 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3691 InsertTy.getSizeInBits()));
3692 // Shift it into position
3693 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3694 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3695
3696 // Clear out the bits in the wide element
3697 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3698
3699 // The value to insert has all zeros already, so stick it into the masked
3700 // wide element.
3701 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3702}
3703
3704/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3705/// is increasing the element size, perform the indexing in the target element
3706/// type, and use bit operations to insert at the element position. This is
3707/// intended for architectures that can dynamically index the register file and
3708/// want to force indexing in the native register size.
3709LegalizerHelper::LegalizeResult
3710LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
3711 LLT CastTy) {
3712 if (TypeIdx != 0)
3713 return UnableToLegalize;
3714
Amara Emerson719024a2023-02-23 16:35:39 -08003715 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3716 MI.getFirst4RegLLTs();
3717 LLT VecTy = DstTy;
Matt Arsenaulte2f1b482020-06-15 21:35:15 -04003718
3719 LLT VecEltTy = VecTy.getElementType();
3720 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3721 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3722 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3723
3724 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3725 unsigned OldNumElts = VecTy.getNumElements();
3726
3727 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3728 if (NewNumElts < OldNumElts) {
3729 if (NewEltSize % OldEltSize != 0)
3730 return UnableToLegalize;
3731
3732 // This only depends on powers of 2 because we use bit tricks to figure out
3733 // the bit offset we need to shift to get the target element. A general
3734 // expansion could emit division/multiply.
3735 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3736 return UnableToLegalize;
3737
3738 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3739 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3740
3741 // Divide to get the index in the wider element type.
3742 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3743
3744 Register ExtractedElt = CastVec;
3745 if (CastTy.isVector()) {
3746 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3747 ScaledIdx).getReg(0);
3748 }
3749
3750 // Compute the bit offset into the register of the target element.
3751 Register OffsetBits = getBitcastWiderVectorElementOffset(
3752 MIRBuilder, Idx, NewEltSize, OldEltSize);
3753
3754 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3755 Val, OffsetBits);
3756 if (CastTy.isVector()) {
3757 InsertedElt = MIRBuilder.buildInsertVectorElement(
3758 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3759 }
3760
3761 MIRBuilder.buildBitcast(Dst, InsertedElt);
3762 MI.eraseFromParent();
3763 return Legalized;
3764 }
3765
3766 return UnableToLegalize;
3767}
3768
chuongg30d5db4e2024-07-15 12:00:47 +01003769// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
3770// those that have smaller than legal operands.
3771//
3772// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
3773//
3774// ===>
3775//
3776// s32 = G_BITCAST <4 x s8>
3777// s32 = G_BITCAST <4 x s8>
3778// s32 = G_BITCAST <4 x s8>
3779// s32 = G_BITCAST <4 x s8>
3780// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
3781// <16 x s8> = G_BITCAST <4 x s32>
3782LegalizerHelper::LegalizeResult
3783LegalizerHelper::bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx,
3784 LLT CastTy) {
3785 // Convert it to CONCAT instruction
3786 auto ConcatMI = dyn_cast<GConcatVectors>(&MI);
3787 if (!ConcatMI) {
3788 return UnableToLegalize;
3789 }
3790
3791 // Check if bitcast is Legal
3792 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
3793 LLT SrcScalTy = LLT::scalar(SrcTy.getSizeInBits());
3794
3795 // Check if the build vector is Legal
3796 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3797 return UnableToLegalize;
3798 }
3799
3800 // Bitcast the sources
3801 SmallVector<Register> BitcastRegs;
3802 for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3803 BitcastRegs.push_back(
3804 MIRBuilder.buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3805 .getReg(0));
3806 }
3807
3808 // Build the scalar values into a vector
3809 Register BuildReg =
3810 MIRBuilder.buildBuildVector(CastTy, BitcastRegs).getReg(0);
3811 MIRBuilder.buildBitcast(DstReg, BuildReg);
3812
3813 MI.eraseFromParent();
3814 return Legalized;
3815}
3816
David Greend3ce0692024-11-23 17:00:51 +00003817// This bitcasts a shuffle vector to a different type currently of the same
3818// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
3819// will be used instead.
3820//
3821// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
3822// ===>
3823// <4 x s64> = G_PTRTOINT <4 x p0>
3824// <4 x s64> = G_PTRTOINT <4 x p0>
3825// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
3826// <16 x p0> = G_INTTOPTR <16 x s64>
3827LegalizerHelper::LegalizeResult
3828LegalizerHelper::bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx,
3829 LLT CastTy) {
3830 auto ShuffleMI = cast<GShuffleVector>(&MI);
3831 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
3832 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
3833
3834 // We currently only handle vectors of the same size.
3835 if (TypeIdx != 0 ||
3836 CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
3837 CastTy.getElementCount() != DstTy.getElementCount())
3838 return UnableToLegalize;
3839
3840 LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType());
3841
3842 auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
3843 auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
3844 auto Shuf =
3845 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
3846 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
3847
3848 MI.eraseFromParent();
3849 return Legalized;
3850}
3851
Michael Maitlandf957d082024-10-01 14:08:49 -04003852/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
3853///
3854/// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
3855///
3856/// ===>
3857///
3858/// <vscale x 2 x i1> = G_BITCAST <vscale x 16 x i1>
3859/// <vscale x 1 x i8> = G_EXTRACT_SUBVECTOR <vscale x 2 x i1>, N / 8
3860/// <vscale x 8 x i1> = G_BITCAST <vscale x 1 x i8>
3861LegalizerHelper::LegalizeResult
3862LegalizerHelper::bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx,
3863 LLT CastTy) {
3864 auto ES = cast<GExtractSubvector>(&MI);
3865
3866 if (!CastTy.isVector())
3867 return UnableToLegalize;
3868
3869 if (TypeIdx != 0)
3870 return UnableToLegalize;
3871
3872 Register Dst = ES->getReg(0);
3873 Register Src = ES->getSrcVec();
3874 uint64_t Idx = ES->getIndexImm();
3875
3876 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3877
3878 LLT DstTy = MRI.getType(Dst);
3879 LLT SrcTy = MRI.getType(Src);
3880 ElementCount DstTyEC = DstTy.getElementCount();
3881 ElementCount SrcTyEC = SrcTy.getElementCount();
3882 auto DstTyMinElts = DstTyEC.getKnownMinValue();
3883 auto SrcTyMinElts = SrcTyEC.getKnownMinValue();
3884
3885 if (DstTy == CastTy)
3886 return Legalized;
3887
3888 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
3889 return UnableToLegalize;
3890
3891 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
3892 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
3893 if (CastEltSize < DstEltSize)
3894 return UnableToLegalize;
3895
3896 auto AdjustAmt = CastEltSize / DstEltSize;
3897 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
3898 SrcTyMinElts % AdjustAmt != 0)
3899 return UnableToLegalize;
3900
3901 Idx /= AdjustAmt;
3902 SrcTy = LLT::vector(SrcTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
3903 auto CastVec = MIRBuilder.buildBitcast(SrcTy, Src);
3904 auto PromotedES = MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
3905 MIRBuilder.buildBitcast(Dst, PromotedES);
3906
3907 ES->eraseFromParent();
3908 return Legalized;
3909}
3910
Michael Maitland6bac4142024-10-21 08:49:13 -04003911/// This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
3912///
3913/// <vscale x 16 x i1> = G_INSERT_SUBVECTOR <vscale x 16 x i1>,
3914/// <vscale x 8 x i1>,
3915/// N
3916///
3917/// ===>
3918///
3919/// <vscale x 2 x i8> = G_BITCAST <vscale x 16 x i1>
3920/// <vscale x 1 x i8> = G_BITCAST <vscale x 8 x i1>
3921/// <vscale x 2 x i8> = G_INSERT_SUBVECTOR <vscale x 2 x i8>,
3922/// <vscale x 1 x i8>, N / 8
3923/// <vscale x 16 x i1> = G_BITCAST <vscale x 2 x i8>
3924LegalizerHelper::LegalizeResult
3925LegalizerHelper::bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx,
3926 LLT CastTy) {
3927 auto ES = cast<GInsertSubvector>(&MI);
3928
3929 if (!CastTy.isVector())
3930 return UnableToLegalize;
3931
3932 if (TypeIdx != 0)
3933 return UnableToLegalize;
3934
3935 Register Dst = ES->getReg(0);
3936 Register BigVec = ES->getBigVec();
3937 Register SubVec = ES->getSubVec();
3938 uint64_t Idx = ES->getIndexImm();
3939
3940 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3941
3942 LLT DstTy = MRI.getType(Dst);
3943 LLT BigVecTy = MRI.getType(BigVec);
3944 LLT SubVecTy = MRI.getType(SubVec);
3945
3946 if (DstTy == CastTy)
3947 return Legalized;
3948
3949 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
3950 return UnableToLegalize;
3951
3952 ElementCount DstTyEC = DstTy.getElementCount();
3953 ElementCount BigVecTyEC = BigVecTy.getElementCount();
3954 ElementCount SubVecTyEC = SubVecTy.getElementCount();
3955 auto DstTyMinElts = DstTyEC.getKnownMinValue();
3956 auto BigVecTyMinElts = BigVecTyEC.getKnownMinValue();
3957 auto SubVecTyMinElts = SubVecTyEC.getKnownMinValue();
3958
3959 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
3960 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
3961 if (CastEltSize < DstEltSize)
3962 return UnableToLegalize;
3963
3964 auto AdjustAmt = CastEltSize / DstEltSize;
3965 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
3966 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
3967 return UnableToLegalize;
3968
3969 Idx /= AdjustAmt;
3970 BigVecTy = LLT::vector(BigVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
3971 SubVecTy = LLT::vector(SubVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
3972 auto CastBigVec = MIRBuilder.buildBitcast(BigVecTy, BigVec);
3973 auto CastSubVec = MIRBuilder.buildBitcast(SubVecTy, SubVec);
3974 auto PromotedIS =
3975 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
3976 MIRBuilder.buildBitcast(Dst, PromotedIS);
3977
3978 ES->eraseFromParent();
3979 return Legalized;
3980}
3981
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07003982LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
Matt Arsenault54615ec2020-07-31 10:09:00 -04003983 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07003984 Register DstReg = LoadMI.getDstReg();
3985 Register PtrReg = LoadMI.getPointerReg();
Matt Arsenault54615ec2020-07-31 10:09:00 -04003986 LLT DstTy = MRI.getType(DstReg);
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07003987 MachineMemOperand &MMO = LoadMI.getMMO();
Matt Arsenaulta601b302021-06-08 17:11:12 -04003988 LLT MemTy = MMO.getMemoryType();
3989 MachineFunction &MF = MIRBuilder.getMF();
Matt Arsenaulta601b302021-06-08 17:11:12 -04003990
3991 unsigned MemSizeInBits = MemTy.getSizeInBits();
3992 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
3993
3994 if (MemSizeInBits != MemStoreSizeInBits) {
Matt Arsenaulte46badd2021-07-26 14:10:26 -04003995 if (MemTy.isVector())
3996 return UnableToLegalize;
3997
Matt Arsenaulta601b302021-06-08 17:11:12 -04003998 // Promote to a byte-sized load if not loading an integral number of
3999 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
4000 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
4001 MachineMemOperand *NewMMO =
4002 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
4003
4004 Register LoadReg = DstReg;
4005 LLT LoadTy = DstTy;
4006
4007 // If this wasn't already an extending load, we need to widen the result
4008 // register to avoid creating a load with a narrower result than the source.
4009 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
4010 LoadTy = WideMemTy;
4011 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
4012 }
4013
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07004014 if (isa<GSExtLoad>(LoadMI)) {
Matt Arsenaulta601b302021-06-08 17:11:12 -04004015 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4016 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
Matt Arsenaultd1f97a32022-04-10 19:50:47 -04004017 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
Matt Arsenaulta601b302021-06-08 17:11:12 -04004018 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4019 // The extra bits are guaranteed to be zero, since we stored them that
4020 // way. A zext load from Wide thus automatically gives zext from MemVT.
4021 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4022 } else {
4023 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4024 }
4025
4026 if (DstTy != LoadTy)
4027 MIRBuilder.buildTrunc(DstReg, LoadReg);
4028
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07004029 LoadMI.eraseFromParent();
Matt Arsenaulta601b302021-06-08 17:11:12 -04004030 return Legalized;
4031 }
Matt Arsenault54615ec2020-07-31 10:09:00 -04004032
Matt Arsenault47269da2021-06-10 09:28:20 -04004033 // Big endian lowering not implemented.
4034 if (MIRBuilder.getDataLayout().isBigEndian())
Matt Arsenault9d7299b2021-06-09 21:22:00 -04004035 return UnableToLegalize;
Matt Arsenault54615ec2020-07-31 10:09:00 -04004036
Matt Arsenaultf19226d2021-07-22 08:11:14 -04004037 // This load needs splitting into power of 2 sized loads.
4038 //
Matt Arsenault47269da2021-06-10 09:28:20 -04004039 // Our strategy here is to generate anyextending loads for the smaller
4040 // types up to next power-2 result type, and then combine the two larger
4041 // result values together, before truncating back down to the non-pow-2
4042 // type.
4043 // E.g. v1 = i24 load =>
4044 // v2 = i32 zextload (2 byte)
4045 // v3 = i32 load (1 byte)
4046 // v4 = i32 shl v3, 16
4047 // v5 = i32 or v4, v2
4048 // v1 = i24 trunc v5
4049 // By doing this we generate the correct truncate which should get
4050 // combined away as an artifact with a matching extend.
Matt Arsenaultf19226d2021-07-22 08:11:14 -04004051
4052 uint64_t LargeSplitSize, SmallSplitSize;
4053
4054 if (!isPowerOf2_32(MemSizeInBits)) {
Matt Arsenaulte46badd2021-07-26 14:10:26 -04004055 // This load needs splitting into power of 2 sized loads.
Kazu Hirataf20b5072023-01-28 09:06:31 -08004056 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
Matt Arsenaultf19226d2021-07-22 08:11:14 -04004057 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4058 } else {
Matt Arsenaulte46badd2021-07-26 14:10:26 -04004059 // This is already a power of 2, but we still need to split this in half.
4060 //
Matt Arsenaultf19226d2021-07-22 08:11:14 -04004061 // Assume we're being asked to decompose an unaligned load.
4062 // TODO: If this requires multiple splits, handle them all at once.
4063 auto &Ctx = MF.getFunction().getContext();
4064 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4065 return UnableToLegalize;
4066
4067 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4068 }
Matt Arsenault54615ec2020-07-31 10:09:00 -04004069
Matt Arsenaulte46badd2021-07-26 14:10:26 -04004070 if (MemTy.isVector()) {
4071 // TODO: Handle vector extloads
4072 if (MemTy != DstTy)
4073 return UnableToLegalize;
4074
4075 // TODO: We can do better than scalarizing the vector and at least split it
4076 // in half.
4077 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
4078 }
4079
Matt Arsenault47269da2021-06-10 09:28:20 -04004080 MachineMemOperand *LargeMMO =
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07004081 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4082 MachineMemOperand *SmallMMO =
4083 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
Matt Arsenault54615ec2020-07-31 10:09:00 -04004084
Matt Arsenault47269da2021-06-10 09:28:20 -04004085 LLT PtrTy = MRI.getType(PtrReg);
4086 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
4087 LLT AnyExtTy = LLT::scalar(AnyExtSize);
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07004088 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4089 PtrReg, *LargeMMO);
Matt Arsenault54615ec2020-07-31 10:09:00 -04004090
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07004091 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
4092 LargeSplitSize / 8);
Matt Arsenault47269da2021-06-10 09:28:20 -04004093 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07004094 auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
4095 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
4096 SmallPtr, *SmallMMO);
Matt Arsenault54615ec2020-07-31 10:09:00 -04004097
Matt Arsenault47269da2021-06-10 09:28:20 -04004098 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4099 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
Matt Arsenault54615ec2020-07-31 10:09:00 -04004100
Matt Arsenault47269da2021-06-10 09:28:20 -04004101 if (AnyExtTy == DstTy)
4102 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
Matt Arsenaultf19226d2021-07-22 08:11:14 -04004103 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
Matt Arsenault9d7299b2021-06-09 21:22:00 -04004104 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4105 MIRBuilder.buildTrunc(DstReg, {Or});
Matt Arsenaultf19226d2021-07-22 08:11:14 -04004106 } else {
4107 assert(DstTy.isPointer() && "expected pointer");
4108 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4109
4110 // FIXME: We currently consider this to be illegal for non-integral address
4111 // spaces, but we need still need a way to reinterpret the bits.
4112 MIRBuilder.buildIntToPtr(DstReg, Or);
Matt Arsenault54615ec2020-07-31 10:09:00 -04004113 }
4114
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07004115 LoadMI.eraseFromParent();
Matt Arsenault47269da2021-06-10 09:28:20 -04004116 return Legalized;
Matt Arsenault54615ec2020-07-31 10:09:00 -04004117}
4118
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07004119LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
Matt Arsenault54615ec2020-07-31 10:09:00 -04004120 // Lower a non-power of 2 store into multiple pow-2 stores.
4121 // E.g. split an i24 store into an i16 store + i8 store.
4122 // We do this by first extending the stored value to the next largest power
4123 // of 2 type, and then using truncating stores to store the components.
4124 // By doing this, likewise with G_LOAD, generate an extend that can be
4125 // artifact-combined away instead of leaving behind extracts.
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07004126 Register SrcReg = StoreMI.getValueReg();
4127 Register PtrReg = StoreMI.getPointerReg();
Matt Arsenault54615ec2020-07-31 10:09:00 -04004128 LLT SrcTy = MRI.getType(SrcReg);
Matt Arsenaulta601b302021-06-08 17:11:12 -04004129 MachineFunction &MF = MIRBuilder.getMF();
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07004130 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
Matt Arsenaulta601b302021-06-08 17:11:12 -04004131 LLT MemTy = MMO.getMemoryType();
4132
Matt Arsenaulta601b302021-06-08 17:11:12 -04004133 unsigned StoreWidth = MemTy.getSizeInBits();
4134 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
4135
Amara Emerson2d53eaf2025-01-06 10:22:48 -08004136 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
Matt Arsenaulta601b302021-06-08 17:11:12 -04004137 // Promote to a byte-sized store with upper bits zero if not
4138 // storing an integral number of bytes. For example, promote
4139 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
4140 LLT WideTy = LLT::scalar(StoreSizeInBits);
4141
4142 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4143 // Avoid creating a store with a narrower source than result.
4144 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4145 SrcTy = WideTy;
4146 }
4147
4148 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4149
4150 MachineMemOperand *NewMMO =
4151 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
4152 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07004153 StoreMI.eraseFromParent();
Matt Arsenaulta601b302021-06-08 17:11:12 -04004154 return Legalized;
4155 }
4156
Matt Arsenaultebc17a02021-07-27 11:08:06 -04004157 if (MemTy.isVector()) {
Matt Arsenaultebc17a02021-07-27 11:08:06 -04004158 if (MemTy != SrcTy)
Amara Emerson6b0807f2025-01-06 10:21:42 -08004159 return scalarizeVectorBooleanStore(StoreMI);
Matt Arsenaultebc17a02021-07-27 11:08:06 -04004160
4161 // TODO: We can do better than scalarizing the vector and at least split it
4162 // in half.
4163 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
4164 }
4165
Matt Arsenaultbc2cb912021-07-26 19:41:48 -04004166 unsigned MemSizeInBits = MemTy.getSizeInBits();
4167 uint64_t LargeSplitSize, SmallSplitSize;
4168
4169 if (!isPowerOf2_32(MemSizeInBits)) {
Kazu Hirataf20b5072023-01-28 09:06:31 -08004170 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
Matt Arsenaultbc2cb912021-07-26 19:41:48 -04004171 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
4172 } else {
4173 auto &Ctx = MF.getFunction().getContext();
4174 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4175 return UnableToLegalize; // Don't know what we're being asked to do.
4176
4177 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4178 }
Matt Arsenault54615ec2020-07-31 10:09:00 -04004179
Amara Emerson96378482021-07-16 12:56:11 -07004180 // Extend to the next pow-2. If this store was itself the result of lowering,
4181 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
Matt Arsenaultbc2cb912021-07-26 19:41:48 -04004182 // that's wider than the stored size.
4183 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
4184 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
4185
4186 if (SrcTy.isPointer()) {
4187 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
4188 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4189 }
4190
Amara Emerson96378482021-07-16 12:56:11 -07004191 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
Matt Arsenault54615ec2020-07-31 10:09:00 -04004192
4193 // Obtain the smaller value by shifting away the larger value.
Amara Emerson96378482021-07-16 12:56:11 -07004194 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4195 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
Matt Arsenault54615ec2020-07-31 10:09:00 -04004196
4197 // Generate the PtrAdd and truncating stores.
4198 LLT PtrTy = MRI.getType(PtrReg);
4199 auto OffsetCst = MIRBuilder.buildConstant(
4200 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
Matt Arsenault54615ec2020-07-31 10:09:00 -04004201 auto SmallPtr =
Matt Arsenaultbc2cb912021-07-26 19:41:48 -04004202 MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
Matt Arsenault54615ec2020-07-31 10:09:00 -04004203
Matt Arsenault54615ec2020-07-31 10:09:00 -04004204 MachineMemOperand *LargeMMO =
4205 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4206 MachineMemOperand *SmallMMO =
4207 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
Matt Arsenaultf6555b92021-06-07 14:11:52 -04004208 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4209 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07004210 StoreMI.eraseFromParent();
Matt Arsenault54615ec2020-07-31 10:09:00 -04004211 return Legalized;
4212}
4213
4214LegalizerHelper::LegalizeResult
Amara Emerson6b0807f2025-01-06 10:21:42 -08004215LegalizerHelper::scalarizeVectorBooleanStore(GStore &StoreMI) {
4216 Register SrcReg = StoreMI.getValueReg();
4217 Register PtrReg = StoreMI.getPointerReg();
4218 LLT SrcTy = MRI.getType(SrcReg);
4219 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4220 LLT MemTy = MMO.getMemoryType();
4221 LLT MemScalarTy = MemTy.getElementType();
4222 MachineFunction &MF = MIRBuilder.getMF();
4223
4224 assert(SrcTy.isVector() && "Expect a vector store type");
4225
4226 if (!MemScalarTy.isByteSized()) {
4227 // We need to build an integer scalar of the vector bit pattern.
4228 // It's not legal for us to add padding when storing a vector.
4229 unsigned NumBits = MemTy.getSizeInBits();
4230 LLT IntTy = LLT::scalar(NumBits);
4231 auto CurrVal = MIRBuilder.buildConstant(IntTy, 0);
David Greenbd1be8a2025-03-18 08:31:11 +00004232 LLT IdxTy = TLI.getVectorIdxLLT(MF.getDataLayout());
Amara Emerson6b0807f2025-01-06 10:21:42 -08004233
4234 for (unsigned I = 0, E = MemTy.getNumElements(); I < E; ++I) {
4235 auto Elt = MIRBuilder.buildExtractVectorElement(
4236 SrcTy.getElementType(), SrcReg, MIRBuilder.buildConstant(IdxTy, I));
4237 auto Trunc = MIRBuilder.buildTrunc(MemScalarTy, Elt);
4238 auto ZExt = MIRBuilder.buildZExt(IntTy, Trunc);
4239 unsigned ShiftIntoIdx = MF.getDataLayout().isBigEndian()
4240 ? (MemTy.getNumElements() - 1) - I
4241 : I;
4242 auto ShiftAmt = MIRBuilder.buildConstant(
4243 IntTy, ShiftIntoIdx * MemScalarTy.getSizeInBits());
4244 auto Shifted = MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4245 CurrVal = MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4246 }
4247 auto PtrInfo = MMO.getPointerInfo();
4248 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, IntTy);
4249 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4250 StoreMI.eraseFromParent();
4251 return Legalized;
4252 }
4253
4254 // TODO: implement simple scalarization.
4255 return UnableToLegalize;
4256}
4257
4258LegalizerHelper::LegalizeResult
Matt Arsenault39c55ce2020-02-13 15:52:32 -05004259LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
Matt Arsenault39c55ce2020-02-13 15:52:32 -05004260 switch (MI.getOpcode()) {
4261 case TargetOpcode::G_LOAD: {
4262 if (TypeIdx != 0)
4263 return UnableToLegalize;
Matt Arsenault92361252021-06-10 19:32:41 -04004264 MachineMemOperand &MMO = **MI.memoperands_begin();
4265
4266 // Not sure how to interpret a bitcast of an extending load.
4267 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4268 return UnableToLegalize;
Matt Arsenault39c55ce2020-02-13 15:52:32 -05004269
4270 Observer.changingInstr(MI);
4271 bitcastDst(MI, CastTy, 0);
Matt Arsenault92361252021-06-10 19:32:41 -04004272 MMO.setType(CastTy);
Matt Arsenault70320762024-07-01 15:26:09 +02004273 // The range metadata is no longer valid when reinterpreted as a different
4274 // type.
4275 MMO.clearRanges();
Matt Arsenault39c55ce2020-02-13 15:52:32 -05004276 Observer.changedInstr(MI);
4277 return Legalized;
4278 }
4279 case TargetOpcode::G_STORE: {
4280 if (TypeIdx != 0)
4281 return UnableToLegalize;
4282
Matt Arsenault92361252021-06-10 19:32:41 -04004283 MachineMemOperand &MMO = **MI.memoperands_begin();
4284
4285 // Not sure how to interpret a bitcast of a truncating store.
4286 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4287 return UnableToLegalize;
4288
Matt Arsenault39c55ce2020-02-13 15:52:32 -05004289 Observer.changingInstr(MI);
4290 bitcastSrc(MI, CastTy, 0);
Matt Arsenault92361252021-06-10 19:32:41 -04004291 MMO.setType(CastTy);
Matt Arsenault39c55ce2020-02-13 15:52:32 -05004292 Observer.changedInstr(MI);
4293 return Legalized;
4294 }
4295 case TargetOpcode::G_SELECT: {
4296 if (TypeIdx != 0)
4297 return UnableToLegalize;
4298
4299 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
4300 LLVM_DEBUG(
4301 dbgs() << "bitcast action not implemented for vector select\n");
4302 return UnableToLegalize;
4303 }
4304
4305 Observer.changingInstr(MI);
4306 bitcastSrc(MI, CastTy, 2);
4307 bitcastSrc(MI, CastTy, 3);
4308 bitcastDst(MI, CastTy, 0);
4309 Observer.changedInstr(MI);
4310 return Legalized;
4311 }
4312 case TargetOpcode::G_AND:
4313 case TargetOpcode::G_OR:
4314 case TargetOpcode::G_XOR: {
4315 Observer.changingInstr(MI);
4316 bitcastSrc(MI, CastTy, 1);
4317 bitcastSrc(MI, CastTy, 2);
4318 bitcastDst(MI, CastTy, 0);
4319 Observer.changedInstr(MI);
4320 return Legalized;
4321 }
Matt Arsenault212570a2020-06-15 11:54:49 -04004322 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4323 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
Matt Arsenaulte2f1b482020-06-15 21:35:15 -04004324 case TargetOpcode::G_INSERT_VECTOR_ELT:
4325 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
chuongg30d5db4e2024-07-15 12:00:47 +01004326 case TargetOpcode::G_CONCAT_VECTORS:
4327 return bitcastConcatVector(MI, TypeIdx, CastTy);
David Greend3ce0692024-11-23 17:00:51 +00004328 case TargetOpcode::G_SHUFFLE_VECTOR:
4329 return bitcastShuffleVector(MI, TypeIdx, CastTy);
Michael Maitlandf957d082024-10-01 14:08:49 -04004330 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4331 return bitcastExtractSubvector(MI, TypeIdx, CastTy);
Michael Maitland6bac4142024-10-21 08:49:13 -04004332 case TargetOpcode::G_INSERT_SUBVECTOR:
4333 return bitcastInsertSubvector(MI, TypeIdx, CastTy);
Matt Arsenault39c55ce2020-02-13 15:52:32 -05004334 default:
4335 return UnableToLegalize;
4336 }
4337}
4338
Matt Arsenault0da582d2020-07-19 09:56:15 -04004339// Legalize an instruction by changing the opcode in place.
4340void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
4341 Observer.changingInstr(MI);
4342 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
4343 Observer.changedInstr(MI);
4344}
4345
Matt Arsenault39c55ce2020-02-13 15:52:32 -05004346LegalizerHelper::LegalizeResult
Matt Arsenaulta1282922020-07-15 11:10:54 -04004347LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
Tim Northovercecee562016-08-26 17:46:13 +00004348 using namespace TargetOpcode;
Tim Northovercecee562016-08-26 17:46:13 +00004349
4350 switch(MI.getOpcode()) {
4351 default:
4352 return UnableToLegalize;
Chen Zheng6ee2f772022-12-12 09:53:53 +00004353 case TargetOpcode::G_FCONSTANT:
4354 return lowerFConstant(MI);
Matt Arsenault936483f2020-01-09 21:53:28 -05004355 case TargetOpcode::G_BITCAST:
4356 return lowerBitcast(MI);
Tim Northovercecee562016-08-26 17:46:13 +00004357 case TargetOpcode::G_SREM:
4358 case TargetOpcode::G_UREM: {
Matt Arsenaulta1282922020-07-15 11:10:54 -04004359 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
Matt Arsenaultc7e8d8b2020-02-26 17:18:43 -05004360 auto Quot =
4361 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4362 {MI.getOperand(1), MI.getOperand(2)});
Tim Northovercecee562016-08-26 17:46:13 +00004363
Matt Arsenaultc7e8d8b2020-02-26 17:18:43 -05004364 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
4365 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
Tim Northovercecee562016-08-26 17:46:13 +00004366 MI.eraseFromParent();
4367 return Legalized;
4368 }
Matt Arsenault34ed76e2019-10-16 20:46:32 +00004369 case TargetOpcode::G_SADDO:
4370 case TargetOpcode::G_SSUBO:
4371 return lowerSADDO_SSUBO(MI);
Pushpinder Singh41d66692020-08-10 05:47:50 -04004372 case TargetOpcode::G_UMULH:
4373 case TargetOpcode::G_SMULH:
4374 return lowerSMULH_UMULH(MI);
Tim Northover0a9b2792017-02-08 21:22:15 +00004375 case TargetOpcode::G_SMULO:
4376 case TargetOpcode::G_UMULO: {
4377 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
4378 // result.
Amara Emerson719024a2023-02-23 16:35:39 -08004379 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
Matt Arsenaulta1282922020-07-15 11:10:54 -04004380 LLT Ty = MRI.getType(Res);
Tim Northover0a9b2792017-02-08 21:22:15 +00004381
Tim Northover0a9b2792017-02-08 21:22:15 +00004382 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
4383 ? TargetOpcode::G_SMULH
4384 : TargetOpcode::G_UMULH;
4385
Jay Foadf465b1a2020-01-16 14:46:36 +00004386 Observer.changingInstr(MI);
4387 const auto &TII = MIRBuilder.getTII();
4388 MI.setDesc(TII.get(TargetOpcode::G_MUL));
Shengchen Kan37b37832022-03-16 20:21:25 +08004389 MI.removeOperand(1);
Jay Foadf465b1a2020-01-16 14:46:36 +00004390 Observer.changedInstr(MI);
4391
Jay Foadf465b1a2020-01-16 14:46:36 +00004392 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
Matt Arsenaultc7e8d8b2020-02-26 17:18:43 -05004393 auto Zero = MIRBuilder.buildConstant(Ty, 0);
Amara Emerson9de62132018-01-03 04:56:56 +00004394
Amara Emerson1d54e752020-09-29 14:39:54 -07004395 // Move insert point forward so we can use the Res register if needed.
4396 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
4397
Amara Emerson9de62132018-01-03 04:56:56 +00004398 // For *signed* multiply, overflow is detected by checking:
4399 // (hi != (lo >> bitwidth-1))
4400 if (Opcode == TargetOpcode::G_SMULH) {
Jay Foadf465b1a2020-01-16 14:46:36 +00004401 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4402 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
Amara Emerson9de62132018-01-03 04:56:56 +00004403 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
4404 } else {
4405 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
4406 }
Tim Northover0a9b2792017-02-08 21:22:15 +00004407 return Legalized;
4408 }
Volkan Keles5698b2a2017-03-08 18:09:14 +00004409 case TargetOpcode::G_FNEG: {
Amara Emerson719024a2023-02-23 16:35:39 -08004410 auto [Res, SubByReg] = MI.getFirst2Regs();
Matt Arsenaulta1282922020-07-15 11:10:54 -04004411 LLT Ty = MRI.getType(Res);
4412
David Green9f255d82024-09-27 07:43:58 +01004413 auto SignMask = MIRBuilder.buildConstant(
4414 Ty, APInt::getSignMask(Ty.getScalarSizeInBits()));
Eli Friedman3f739f72020-09-23 14:10:33 -07004415 MIRBuilder.buildXor(Res, SubByReg, SignMask);
Volkan Keles5698b2a2017-03-08 18:09:14 +00004416 MI.eraseFromParent();
4417 return Legalized;
4418 }
Matt Arsenault1fe12992022-11-17 23:03:23 -08004419 case TargetOpcode::G_FSUB:
4420 case TargetOpcode::G_STRICT_FSUB: {
Amara Emerson719024a2023-02-23 16:35:39 -08004421 auto [Res, LHS, RHS] = MI.getFirst3Regs();
Matt Arsenaulta1282922020-07-15 11:10:54 -04004422 LLT Ty = MRI.getType(Res);
4423
Volkan Keles225921a2017-03-10 21:25:09 +00004424 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
Matt Arsenault1fe12992022-11-17 23:03:23 -08004425 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
4426
4427 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4428 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
4429 else
4430 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
4431
Volkan Keles225921a2017-03-10 21:25:09 +00004432 MI.eraseFromParent();
4433 return Legalized;
4434 }
Matt Arsenault4d339182019-09-13 00:44:35 +00004435 case TargetOpcode::G_FMAD:
4436 return lowerFMad(MI);
Matt Arsenault19a03502020-03-14 14:52:48 -04004437 case TargetOpcode::G_FFLOOR:
4438 return lowerFFloor(MI);
Sumanth Gundapanenifc832d52024-07-23 11:34:34 -05004439 case TargetOpcode::G_LROUND:
4440 case TargetOpcode::G_LLROUND: {
4441 Register DstReg = MI.getOperand(0).getReg();
4442 Register SrcReg = MI.getOperand(1).getReg();
4443 LLT SrcTy = MRI.getType(SrcReg);
4444 auto Round = MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4445 {SrcReg});
4446 MIRBuilder.buildFPTOSI(DstReg, Round);
4447 MI.eraseFromParent();
4448 return Legalized;
4449 }
Matt Arsenaultf3de8ab2019-12-24 14:49:31 -05004450 case TargetOpcode::G_INTRINSIC_ROUND:
4451 return lowerIntrinsicRound(MI);
Acim-Maravicf3138522023-11-14 18:49:21 +01004452 case TargetOpcode::G_FRINT: {
Matt Arsenault0da582d2020-07-19 09:56:15 -04004453 // Since round even is the assumed rounding mode for unconstrained FP
4454 // operations, rint and roundeven are the same operation.
Acim-Maravicf3138522023-11-14 18:49:21 +01004455 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
Matt Arsenault0da582d2020-07-19 09:56:15 -04004456 return Legalized;
4457 }
Sumanth Gundapaneni0ee32c42024-07-24 14:34:31 -05004458 case TargetOpcode::G_INTRINSIC_LRINT:
4459 case TargetOpcode::G_INTRINSIC_LLRINT: {
4460 Register DstReg = MI.getOperand(0).getReg();
4461 Register SrcReg = MI.getOperand(1).getReg();
4462 LLT SrcTy = MRI.getType(SrcReg);
4463 auto Round =
4464 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4465 MIRBuilder.buildFPTOSI(DstReg, Round);
4466 MI.eraseFromParent();
4467 return Legalized;
4468 }
Daniel Sandersaef1dfc2017-11-30 20:11:42 +00004469 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
Amara Emerson719024a2023-02-23 16:35:39 -08004470 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
Shilei Tian3a106e52024-03-29 15:59:50 -04004471 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4472 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
Daniel Sandersaef1dfc2017-11-30 20:11:42 +00004473 **MI.memoperands_begin());
Shilei Tian3a106e52024-03-29 15:59:50 -04004474 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
4475 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
Daniel Sandersaef1dfc2017-11-30 20:11:42 +00004476 MI.eraseFromParent();
4477 return Legalized;
4478 }
Daniel Sanders5eb9f582018-04-28 18:14:50 +00004479 case TargetOpcode::G_LOAD:
4480 case TargetOpcode::G_SEXTLOAD:
Matt Arsenault54615ec2020-07-31 10:09:00 -04004481 case TargetOpcode::G_ZEXTLOAD:
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07004482 return lowerLoad(cast<GAnyLoad>(MI));
Matt Arsenault54615ec2020-07-31 10:09:00 -04004483 case TargetOpcode::G_STORE:
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07004484 return lowerStore(cast<GStore>(MI));
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00004485 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4486 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4487 case TargetOpcode::G_CTLZ:
4488 case TargetOpcode::G_CTTZ:
4489 case TargetOpcode::G_CTPOP:
Matt Arsenaulta1282922020-07-15 11:10:54 -04004490 return lowerBitCount(MI);
Petar Avramovicbd395692019-02-26 17:22:42 +00004491 case G_UADDO: {
Amara Emerson719024a2023-02-23 16:35:39 -08004492 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
Petar Avramovicbd395692019-02-26 17:22:42 +00004493
Shilei Tian3a106e52024-03-29 15:59:50 -04004494 Register NewRes = MRI.cloneVirtualRegister(Res);
4495
4496 MIRBuilder.buildAdd(NewRes, LHS, RHS);
4497 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
4498
4499 MIRBuilder.buildCopy(Res, NewRes);
Petar Avramovicbd395692019-02-26 17:22:42 +00004500
4501 MI.eraseFromParent();
4502 return Legalized;
4503 }
Petar Avramovicb8276f22018-12-17 12:31:07 +00004504 case G_UADDE: {
Amara Emerson719024a2023-02-23 16:35:39 -08004505 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
Craig Topperebb2e5e2023-08-17 14:27:45 -07004506 const LLT CondTy = MRI.getType(CarryOut);
4507 const LLT Ty = MRI.getType(Res);
Petar Avramovicb8276f22018-12-17 12:31:07 +00004508
Shilei Tian3a106e52024-03-29 15:59:50 -04004509 Register NewRes = MRI.cloneVirtualRegister(Res);
4510
Craig Topperc6dee692023-08-17 20:32:37 -07004511 // Initial add of the two operands.
Matt Arsenaultc7e8d8b2020-02-26 17:18:43 -05004512 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
Craig Topperc6dee692023-08-17 20:32:37 -07004513
4514 // Initial check for carry.
4515 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
4516
4517 // Add the sum and the carry.
Matt Arsenaultc7e8d8b2020-02-26 17:18:43 -05004518 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
Shilei Tian3a106e52024-03-29 15:59:50 -04004519 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
Craig Topperebb2e5e2023-08-17 14:27:45 -07004520
Craig Topperc6dee692023-08-17 20:32:37 -07004521 // Second check for carry. We can only carry if the initial sum is all 1s
4522 // and the carry is set, resulting in a new sum of 0.
4523 auto Zero = MIRBuilder.buildConstant(Ty, 0);
Shilei Tian3a106e52024-03-29 15:59:50 -04004524 auto ResEqZero =
4525 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
Craig Topperc6dee692023-08-17 20:32:37 -07004526 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4527 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
Petar Avramovicb8276f22018-12-17 12:31:07 +00004528
Shilei Tian3a106e52024-03-29 15:59:50 -04004529 MIRBuilder.buildCopy(Res, NewRes);
4530
Petar Avramovicb8276f22018-12-17 12:31:07 +00004531 MI.eraseFromParent();
4532 return Legalized;
4533 }
Petar Avramovic7cecadb2019-01-28 12:10:17 +00004534 case G_USUBO: {
Amara Emerson719024a2023-02-23 16:35:39 -08004535 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
Petar Avramovic7cecadb2019-01-28 12:10:17 +00004536
4537 MIRBuilder.buildSub(Res, LHS, RHS);
4538 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
4539
4540 MI.eraseFromParent();
4541 return Legalized;
4542 }
4543 case G_USUBE: {
Amara Emerson719024a2023-02-23 16:35:39 -08004544 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
Matt Arsenault6fc0d002020-02-26 17:21:10 -05004545 const LLT CondTy = MRI.getType(BorrowOut);
4546 const LLT Ty = MRI.getType(Res);
Petar Avramovic7cecadb2019-01-28 12:10:17 +00004547
Craig Topperc6dee692023-08-17 20:32:37 -07004548 // Initial subtract of the two operands.
Matt Arsenaultc7e8d8b2020-02-26 17:18:43 -05004549 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
Craig Topperc6dee692023-08-17 20:32:37 -07004550
4551 // Initial check for borrow.
4552 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
4553
4554 // Subtract the borrow from the first subtract.
Matt Arsenaultc7e8d8b2020-02-26 17:18:43 -05004555 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
Petar Avramovic7cecadb2019-01-28 12:10:17 +00004556 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
Matt Arsenaultc7e8d8b2020-02-26 17:18:43 -05004557
Craig Topperc6dee692023-08-17 20:32:37 -07004558 // Second check for borrow. We can only borrow if the initial difference is
4559 // 0 and the borrow is set, resulting in a new difference of all 1s.
4560 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4561 auto TmpResEqZero =
4562 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
4563 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4564 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
Petar Avramovic7cecadb2019-01-28 12:10:17 +00004565
4566 MI.eraseFromParent();
4567 return Legalized;
4568 }
Matt Arsenault02b5ca82019-05-17 23:05:13 +00004569 case G_UITOFP:
Matt Arsenaulta1282922020-07-15 11:10:54 -04004570 return lowerUITOFP(MI);
Matt Arsenault02b5ca82019-05-17 23:05:13 +00004571 case G_SITOFP:
Matt Arsenaulta1282922020-07-15 11:10:54 -04004572 return lowerSITOFP(MI);
Petar Avramovic6412b562019-08-30 05:44:02 +00004573 case G_FPTOUI:
Matt Arsenaulta1282922020-07-15 11:10:54 -04004574 return lowerFPTOUI(MI);
Matt Arsenaultea956682020-01-04 17:09:48 -05004575 case G_FPTOSI:
4576 return lowerFPTOSI(MI);
David Greenfeac7612024-09-16 10:33:59 +01004577 case G_FPTOUI_SAT:
4578 case G_FPTOSI_SAT:
4579 return lowerFPTOINT_SAT(MI);
Matt Arsenaultbfbfa182020-01-18 10:08:11 -05004580 case G_FPTRUNC:
Matt Arsenaulta1282922020-07-15 11:10:54 -04004581 return lowerFPTRUNC(MI);
Matt Arsenault7cd8a022020-07-17 11:01:15 -04004582 case G_FPOWI:
4583 return lowerFPOWI(MI);
Matt Arsenault6f74f552019-07-01 17:18:03 +00004584 case G_SMIN:
4585 case G_SMAX:
4586 case G_UMIN:
4587 case G_UMAX:
Matt Arsenaulta1282922020-07-15 11:10:54 -04004588 return lowerMinMax(MI);
Thorsten Schütt2d2d6852024-07-23 10:12:28 +02004589 case G_SCMP:
4590 case G_UCMP:
4591 return lowerThreewayCompare(MI);
Matt Arsenaultb1843e12019-07-09 23:34:29 +00004592 case G_FCOPYSIGN:
Matt Arsenaulta1282922020-07-15 11:10:54 -04004593 return lowerFCopySign(MI);
Matt Arsenault6ce1b4f2019-07-10 16:31:19 +00004594 case G_FMINNUM:
4595 case G_FMAXNUM:
4596 return lowerFMinNumMaxNum(MI);
Matt Arsenault69999602020-03-29 15:51:54 -04004597 case G_MERGE_VALUES:
4598 return lowerMergeValues(MI);
Matt Arsenaultd9d30a42019-08-01 19:10:05 +00004599 case G_UNMERGE_VALUES:
4600 return lowerUnmergeValues(MI);
Daniel Sanderse9a57c22019-08-09 21:11:20 +00004601 case TargetOpcode::G_SEXT_INREG: {
4602 assert(MI.getOperand(2).isImm() && "Expected immediate");
4603 int64_t SizeInBits = MI.getOperand(2).getImm();
4604
Amara Emerson719024a2023-02-23 16:35:39 -08004605 auto [DstReg, SrcReg] = MI.getFirst2Regs();
Daniel Sanderse9a57c22019-08-09 21:11:20 +00004606 LLT DstTy = MRI.getType(DstReg);
4607 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4608
4609 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
Jay Foad63f73542020-01-16 12:37:00 +00004610 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4611 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
Daniel Sanderse9a57c22019-08-09 21:11:20 +00004612 MI.eraseFromParent();
4613 return Legalized;
4614 }
Matt Arsenault0b7de792020-07-26 21:25:10 -04004615 case G_EXTRACT_VECTOR_ELT:
Matt Arsenault1ad051dd2020-07-27 21:13:40 -04004616 case G_INSERT_VECTOR_ELT:
4617 return lowerExtractInsertVectorElt(MI);
Matt Arsenault690645b2019-08-13 16:09:07 +00004618 case G_SHUFFLE_VECTOR:
4619 return lowerShuffleVector(MI);
Lawrence Benson177ce192024-07-17 14:24:24 +02004620 case G_VECTOR_COMPRESS:
4621 return lowerVECTOR_COMPRESS(MI);
Amara Emersone20b91c2019-08-27 19:54:27 +00004622 case G_DYN_STACKALLOC:
4623 return lowerDynStackAlloc(MI);
Matt Arsenault1ca08082023-07-29 19:12:24 -04004624 case G_STACKSAVE:
4625 return lowerStackSave(MI);
4626 case G_STACKRESTORE:
4627 return lowerStackRestore(MI);
Matt Arsenaulta5b9c752019-10-06 01:37:35 +00004628 case G_EXTRACT:
4629 return lowerExtract(MI);
Matt Arsenault4bcdcad2019-10-07 19:13:27 +00004630 case G_INSERT:
4631 return lowerInsert(MI);
Petar Avramovic94a24e72019-12-30 11:13:22 +01004632 case G_BSWAP:
4633 return lowerBswap(MI);
Petar Avramovic98f72a52019-12-30 18:06:29 +01004634 case G_BITREVERSE:
4635 return lowerBitreverse(MI);
Matt Arsenault0ea3c722019-12-27 19:26:51 -05004636 case G_READ_REGISTER:
Matt Arsenaultc5c1bb32020-01-12 13:29:44 -05004637 case G_WRITE_REGISTER:
4638 return lowerReadWriteRegister(MI);
Jay Foadb35833b2020-07-12 14:18:45 -04004639 case G_UADDSAT:
4640 case G_USUBSAT: {
4641 // Try to make a reasonable guess about which lowering strategy to use. The
4642 // target can override this with custom lowering and calling the
4643 // implementation functions.
4644 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
AtariDreamsd5829582024-05-28 12:25:43 -04004645 if (LI.isLegalOrCustom({G_UMIN, Ty}))
Jay Foadb35833b2020-07-12 14:18:45 -04004646 return lowerAddSubSatToMinMax(MI);
4647 return lowerAddSubSatToAddoSubo(MI);
4648 }
4649 case G_SADDSAT:
4650 case G_SSUBSAT: {
4651 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4652
4653 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
4654 // since it's a shorter expansion. However, we would need to figure out the
4655 // preferred boolean type for the carry out for the query.
4656 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4657 return lowerAddSubSatToMinMax(MI);
4658 return lowerAddSubSatToAddoSubo(MI);
4659 }
Bevin Hansson5de6c562020-07-16 17:02:04 +02004660 case G_SSHLSAT:
4661 case G_USHLSAT:
4662 return lowerShlSat(MI);
Mirko Brkusanin35ef4c92021-06-03 18:09:45 +02004663 case G_ABS:
4664 return lowerAbsToAddXor(MI);
Him1880748f422024-09-03 12:47:26 +01004665 case G_FABS:
4666 return lowerFAbs(MI);
Amara Emerson08232192020-09-26 10:02:39 -07004667 case G_SELECT:
4668 return lowerSelect(MI);
Janek van Oirschot587747d2022-12-06 20:36:07 +00004669 case G_IS_FPCLASS:
4670 return lowerISFPCLASS(MI);
Christudasan Devadasan4c6ab482021-03-10 18:03:10 +05304671 case G_SDIVREM:
4672 case G_UDIVREM:
4673 return lowerDIVREM(MI);
Matt Arsenaultb24436a2020-03-19 22:48:13 -04004674 case G_FSHL:
4675 case G_FSHR:
4676 return lowerFunnelShift(MI);
Amara Emersonf5e9be62021-03-26 15:27:15 -07004677 case G_ROTL:
4678 case G_ROTR:
4679 return lowerRotate(MI);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02004680 case G_MEMSET:
4681 case G_MEMCPY:
4682 case G_MEMMOVE:
4683 return lowerMemCpyFamily(MI);
4684 case G_MEMCPY_INLINE:
4685 return lowerMemcpyInline(MI);
Tuan Chuong Goha40c9842023-08-17 16:31:54 +01004686 case G_ZEXT:
4687 case G_SEXT:
4688 case G_ANYEXT:
4689 return lowerEXT(MI);
chuongg3d88d9832023-10-11 16:05:25 +01004690 case G_TRUNC:
4691 return lowerTRUNC(MI);
Amara Emerson95ac3d12021-08-18 00:19:58 -07004692 GISEL_VECREDUCE_CASES_NONSEQ
4693 return lowerVectorReduction(MI);
Michael Maitland6f9cb9a72023-12-08 13:24:27 -05004694 case G_VAARG:
4695 return lowerVAArg(MI);
Tim Northovercecee562016-08-26 17:46:13 +00004696 }
4697}
4698
Matt Arsenault0b7de792020-07-26 21:25:10 -04004699Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty,
4700 Align MinAlign) const {
4701 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4702 // datalayout for the preferred alignment. Also there should be a target hook
4703 // for this to allow targets to reduce the alignment and ignore the
4704 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4705 // the type.
4706 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4707}
4708
4709MachineInstrBuilder
4710LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment,
4711 MachinePointerInfo &PtrInfo) {
4712 MachineFunction &MF = MIRBuilder.getMF();
4713 const DataLayout &DL = MIRBuilder.getDataLayout();
4714 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4715
4716 unsigned AddrSpace = DL.getAllocaAddrSpace();
4717 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4718
4719 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4720 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4721}
4722
Amara Emerson41ebbed2025-01-05 21:32:27 -08004723MachineInstrBuilder LegalizerHelper::createStackStoreLoad(const DstOp &Res,
4724 const SrcOp &Val) {
4725 LLT SrcTy = Val.getLLTTy(MRI);
4726 Align StackTypeAlign =
4727 std::max(getStackTemporaryAlignment(SrcTy),
4728 getStackTemporaryAlignment(Res.getLLTTy(MRI)));
4729 MachinePointerInfo PtrInfo;
4730 auto StackTemp =
4731 createStackTemporary(SrcTy.getSizeInBytes(), StackTypeAlign, PtrInfo);
4732
4733 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
4734 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
4735}
4736
Owen Anderson44b717d2024-02-21 00:42:22 -05004737static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg,
4738 LLT VecTy) {
Matt Arsenault0b7de792020-07-26 21:25:10 -04004739 LLT IdxTy = B.getMRI()->getType(IdxReg);
4740 unsigned NElts = VecTy.getNumElements();
Owen Anderson44b717d2024-02-21 00:42:22 -05004741
4742 int64_t IdxVal;
4743 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
4744 if (IdxVal < VecTy.getNumElements())
4745 return IdxReg;
4746 // If a constant index would be out of bounds, clamp it as well.
4747 }
4748
Matt Arsenault0b7de792020-07-26 21:25:10 -04004749 if (isPowerOf2_32(NElts)) {
4750 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4751 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4752 }
4753
4754 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4755 .getReg(0);
4756}
4757
4758Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
4759 Register Index) {
4760 LLT EltTy = VecTy.getElementType();
4761
4762 // Calculate the element offset and add it to the pointer.
4763 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4764 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4765 "Converting bits to bytes lost precision");
4766
Owen Anderson44b717d2024-02-21 00:42:22 -05004767 Index = clampVectorIndex(MIRBuilder, Index, VecTy);
Matt Arsenault0b7de792020-07-26 21:25:10 -04004768
Jay Foadfd3eaf72024-03-09 09:07:22 +00004769 // Convert index to the correct size for the address space.
4770 const DataLayout &DL = MIRBuilder.getDataLayout();
4771 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4772 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
4773 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
4774 if (IdxTy != MRI.getType(Index))
4775 Index = MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
4776
Matt Arsenault0b7de792020-07-26 21:25:10 -04004777 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4778 MIRBuilder.buildConstant(IdxTy, EltSize));
4779
4780 LLT PtrTy = MRI.getType(VecPtr);
4781 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
4782}
4783
Fangrui Songea2d4c52021-12-24 00:55:54 -08004784#ifndef NDEBUG
Petar Avramovic29f88b92021-12-23 14:09:51 +01004785/// Check that all vector operands have same number of elements. Other operands
4786/// should be listed in NonVecOp.
4787static bool hasSameNumEltsOnAllVectorOperands(
4788 GenericMachineInstr &MI, MachineRegisterInfo &MRI,
4789 std::initializer_list<unsigned> NonVecOpIndices) {
4790 if (MI.getNumMemOperands() != 0)
4791 return false;
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00004792
Petar Avramovic29f88b92021-12-23 14:09:51 +01004793 LLT VecTy = MRI.getType(MI.getReg(0));
4794 if (!VecTy.isVector())
4795 return false;
4796 unsigned NumElts = VecTy.getNumElements();
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00004797
Petar Avramovic29f88b92021-12-23 14:09:51 +01004798 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
4799 MachineOperand &Op = MI.getOperand(OpIdx);
4800 if (!Op.isReg()) {
4801 if (!is_contained(NonVecOpIndices, OpIdx))
4802 return false;
4803 continue;
4804 }
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00004805
Petar Avramovic29f88b92021-12-23 14:09:51 +01004806 LLT Ty = MRI.getType(Op.getReg());
4807 if (!Ty.isVector()) {
4808 if (!is_contained(NonVecOpIndices, OpIdx))
4809 return false;
Petar Avramovic29f88b92021-12-23 14:09:51 +01004810 continue;
4811 }
4812
4813 if (Ty.getNumElements() != NumElts)
4814 return false;
4815 }
4816
4817 return true;
4818}
Fangrui Songea2d4c52021-12-24 00:55:54 -08004819#endif
Petar Avramovic29f88b92021-12-23 14:09:51 +01004820
4821/// Fill \p DstOps with DstOps that have same number of elements combined as
4822/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
4823/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
4824/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
4825static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
4826 unsigned NumElts) {
4827 LLT LeftoverTy;
4828 assert(Ty.isVector() && "Expected vector type");
4829 LLT EltTy = Ty.getElementType();
4830 LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
4831 int NumParts, NumLeftover;
4832 std::tie(NumParts, NumLeftover) =
4833 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
4834
4835 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
4836 for (int i = 0; i < NumParts; ++i) {
4837 DstOps.push_back(NarrowTy);
4838 }
4839
4840 if (LeftoverTy.isValid()) {
4841 assert(NumLeftover == 1 && "expected exactly one leftover");
4842 DstOps.push_back(LeftoverTy);
4843 }
4844}
4845
4846/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
4847/// made from \p Op depending on operand type.
4848static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
4849 MachineOperand &Op) {
4850 for (unsigned i = 0; i < N; ++i) {
4851 if (Op.isReg())
4852 Ops.push_back(Op.getReg());
4853 else if (Op.isImm())
4854 Ops.push_back(Op.getImm());
4855 else if (Op.isPredicate())
4856 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
4857 else
4858 llvm_unreachable("Unsupported type");
4859 }
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00004860}
4861
Matt Arsenaultc83b8232019-02-07 17:38:00 +00004862// Handle splitting vector operations which need to have the same number of
4863// elements in each type index, but each type index may have a different element
4864// type.
4865//
4866// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
4867// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4868// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4869//
4870// Also handles some irregular breakdown cases, e.g.
4871// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
4872// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4873// s64 = G_SHL s64, s32
4874LegalizerHelper::LegalizeResult
4875LegalizerHelper::fewerElementsVectorMultiEltType(
Petar Avramovic29f88b92021-12-23 14:09:51 +01004876 GenericMachineInstr &MI, unsigned NumElts,
4877 std::initializer_list<unsigned> NonVecOpIndices) {
4878 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
4879 "Non-compatible opcode or not specified non-vector operands");
4880 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
Matt Arsenaultc83b8232019-02-07 17:38:00 +00004881
Petar Avramovic29f88b92021-12-23 14:09:51 +01004882 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4883 unsigned NumDefs = MI.getNumDefs();
Matt Arsenaultc83b8232019-02-07 17:38:00 +00004884
Petar Avramovic29f88b92021-12-23 14:09:51 +01004885 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
4886 // Build instructions with DstOps to use instruction found by CSE directly.
4887 // CSE copies found instruction into given vreg when building with vreg dest.
4888 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
4889 // Output registers will be taken from created instructions.
4890 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
4891 for (unsigned i = 0; i < NumDefs; ++i) {
4892 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
4893 }
Matt Arsenaultc83b8232019-02-07 17:38:00 +00004894
Petar Avramovic29f88b92021-12-23 14:09:51 +01004895 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
4896 // Operands listed in NonVecOpIndices will be used as is without splitting;
4897 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
4898 // scalar condition (op 1), immediate in sext_inreg (op 2).
4899 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
4900 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4901 ++UseIdx, ++UseNo) {
4902 if (is_contained(NonVecOpIndices, UseIdx)) {
4903 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
4904 MI.getOperand(UseIdx));
Matt Arsenaultc83b8232019-02-07 17:38:00 +00004905 } else {
Petar Avramovic29f88b92021-12-23 14:09:51 +01004906 SmallVector<Register, 8> SplitPieces;
chuongg3fcfe1b62024-01-15 16:40:39 +00004907 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
4908 MRI);
Petar Avramovic29f88b92021-12-23 14:09:51 +01004909 for (auto Reg : SplitPieces)
4910 InputOpsPieces[UseNo].push_back(Reg);
Matt Arsenaultc83b8232019-02-07 17:38:00 +00004911 }
Matt Arsenaultc83b8232019-02-07 17:38:00 +00004912 }
4913
Petar Avramovic29f88b92021-12-23 14:09:51 +01004914 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
Matt Arsenaultc83b8232019-02-07 17:38:00 +00004915
Petar Avramovic29f88b92021-12-23 14:09:51 +01004916 // Take i-th piece of each input operand split and build sub-vector/scalar
4917 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
4918 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4919 SmallVector<DstOp, 2> Defs;
4920 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4921 Defs.push_back(OutputOpsPieces[DstNo][i]);
Matt Arsenaultc83b8232019-02-07 17:38:00 +00004922
Petar Avramovic29f88b92021-12-23 14:09:51 +01004923 SmallVector<SrcOp, 3> Uses;
4924 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
4925 Uses.push_back(InputOpsPieces[InputNo][i]);
Matt Arsenaultc83b8232019-02-07 17:38:00 +00004926
Petar Avramovic29f88b92021-12-23 14:09:51 +01004927 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
4928 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4929 OutputRegs[DstNo].push_back(I.getReg(DstNo));
4930 }
Matt Arsenaultca676342019-01-25 02:36:32 +00004931
Petar Avramovic29f88b92021-12-23 14:09:51 +01004932 // Merge small outputs into MI's output for each def operand.
4933 if (NumLeftovers) {
4934 for (unsigned i = 0; i < NumDefs; ++i)
4935 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
Matt Arsenaultcbaada62019-02-02 23:29:55 +00004936 } else {
Petar Avramovic29f88b92021-12-23 14:09:51 +01004937 for (unsigned i = 0; i < NumDefs; ++i)
Diana Picusf95a5fb2023-01-09 11:59:00 +01004938 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
Matt Arsenaultca676342019-01-25 02:36:32 +00004939 }
4940
Matt Arsenault1b1e6852019-01-25 02:59:34 +00004941 MI.eraseFromParent();
4942 return Legalized;
4943}
4944
4945LegalizerHelper::LegalizeResult
Petar Avramovic29f88b92021-12-23 14:09:51 +01004946LegalizerHelper::fewerElementsVectorPhi(GenericMachineInstr &MI,
4947 unsigned NumElts) {
4948 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
Matt Arsenault1b1e6852019-01-25 02:59:34 +00004949
Petar Avramovic29f88b92021-12-23 14:09:51 +01004950 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4951 unsigned NumDefs = MI.getNumDefs();
Matt Arsenault1b1e6852019-01-25 02:59:34 +00004952
Petar Avramovic29f88b92021-12-23 14:09:51 +01004953 SmallVector<DstOp, 8> OutputOpsPieces;
4954 SmallVector<Register, 8> OutputRegs;
4955 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
Matt Arsenault1b1e6852019-01-25 02:59:34 +00004956
Petar Avramovic29f88b92021-12-23 14:09:51 +01004957 // Instructions that perform register split will be inserted in basic block
4958 // where register is defined (basic block is in the next operand).
4959 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
4960 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4961 UseIdx += 2, ++UseNo) {
4962 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
Amara Emerson53445f52022-11-13 01:43:04 -08004963 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
chuongg3fcfe1b62024-01-15 16:40:39 +00004964 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
4965 MIRBuilder, MRI);
Petar Avramovic29f88b92021-12-23 14:09:51 +01004966 }
Matt Arsenaultd3093c22019-02-28 00:16:32 +00004967
Petar Avramovic29f88b92021-12-23 14:09:51 +01004968 // Build PHIs with fewer elements.
4969 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4970 MIRBuilder.setInsertPt(*MI.getParent(), MI);
4971 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4972 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
4973 Phi.addDef(
4974 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
4975 OutputRegs.push_back(Phi.getReg(0));
Matt Arsenaultd3093c22019-02-28 00:16:32 +00004976
Petar Avramovic29f88b92021-12-23 14:09:51 +01004977 for (unsigned j = 0; j < NumInputs / 2; ++j) {
4978 Phi.addUse(InputOpsPieces[j][i]);
4979 Phi.add(MI.getOperand(1 + j * 2 + 1));
Matt Arsenaultd3093c22019-02-28 00:16:32 +00004980 }
4981 }
4982
Dávid Ferenc Szabó23470202024-04-15 11:01:55 +02004983 // Set the insert point after the existing PHIs
4984 MachineBasicBlock &MBB = *MI.getParent();
4985 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
4986
Petar Avramovic29f88b92021-12-23 14:09:51 +01004987 // Merge small outputs into MI's def.
4988 if (NumLeftovers) {
4989 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
4990 } else {
Diana Picusf95a5fb2023-01-09 11:59:00 +01004991 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
Petar Avramovic29f88b92021-12-23 14:09:51 +01004992 }
4993
Matt Arsenaultd3093c22019-02-28 00:16:32 +00004994 MI.eraseFromParent();
4995 return Legalized;
4996}
4997
4998LegalizerHelper::LegalizeResult
Matt Arsenault28215ca2019-08-13 16:26:28 +00004999LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
5000 unsigned TypeIdx,
5001 LLT NarrowTy) {
Matt Arsenault28215ca2019-08-13 16:26:28 +00005002 const int NumDst = MI.getNumOperands() - 1;
5003 const Register SrcReg = MI.getOperand(NumDst).getReg();
Petar Avramovic29f88b92021-12-23 14:09:51 +01005004 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
Matt Arsenault28215ca2019-08-13 16:26:28 +00005005 LLT SrcTy = MRI.getType(SrcReg);
5006
Petar Avramovic29f88b92021-12-23 14:09:51 +01005007 if (TypeIdx != 1 || NarrowTy == DstTy)
Matt Arsenault28215ca2019-08-13 16:26:28 +00005008 return UnableToLegalize;
5009
Petar Avramovic29f88b92021-12-23 14:09:51 +01005010 // Requires compatible types. Otherwise SrcReg should have been defined by
5011 // merge-like instruction that would get artifact combined. Most likely
5012 // instruction that defines SrcReg has to perform more/fewer elements
5013 // legalization compatible with NarrowTy.
5014 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5015 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
Matt Arsenault28215ca2019-08-13 16:26:28 +00005016
Petar Avramovic29f88b92021-12-23 14:09:51 +01005017 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5018 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
5019 return UnableToLegalize;
5020
5021 // This is most likely DstTy (smaller then register size) packed in SrcTy
5022 // (larger then register size) and since unmerge was not combined it will be
5023 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
5024 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
5025
5026 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
5027 //
5028 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
5029 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
5030 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
5031 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
Matt Arsenault28215ca2019-08-13 16:26:28 +00005032 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5033 const int PartsPerUnmerge = NumDst / NumUnmerge;
5034
5035 for (int I = 0; I != NumUnmerge; ++I) {
5036 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5037
5038 for (int J = 0; J != PartsPerUnmerge; ++J)
5039 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
5040 MIB.addUse(Unmerge.getReg(I));
5041 }
5042
5043 MI.eraseFromParent();
5044 return Legalized;
5045}
5046
Pushpinder Singhd0e54222021-03-09 06:10:00 +00005047LegalizerHelper::LegalizeResult
Matt Arsenault901e3312020-08-03 18:37:29 -04005048LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
5049 LLT NarrowTy) {
Amara Emerson719024a2023-02-23 16:35:39 -08005050 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
Petar Avramovic29f88b92021-12-23 14:09:51 +01005051 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
5052 // that should have been artifact combined. Most likely instruction that uses
5053 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
5054 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5055 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5056 if (NarrowTy == SrcTy)
5057 return UnableToLegalize;
Matt Arsenault31adc282020-08-03 14:13:38 -04005058
Petar Avramovic29f88b92021-12-23 14:09:51 +01005059 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
5060 // is for old mir tests. Since the changes to more/fewer elements it should no
5061 // longer be possible to generate MIR like this when starting from llvm-ir
5062 // because LCMTy approach was replaced with merge/unmerge to vector elements.
5063 if (TypeIdx == 1) {
5064 assert(SrcTy.isVector() && "Expected vector types");
5065 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5066 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5067 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
5068 return UnableToLegalize;
5069 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
5070 //
5071 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
5072 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
5073 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
5074 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
5075 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
5076 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
Matt Arsenault31adc282020-08-03 14:13:38 -04005077
Petar Avramovic29f88b92021-12-23 14:09:51 +01005078 SmallVector<Register, 8> Elts;
5079 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
5080 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
5081 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
5082 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5083 Elts.push_back(Unmerge.getReg(j));
5084 }
Matt Arsenault31adc282020-08-03 14:13:38 -04005085
Petar Avramovic29f88b92021-12-23 14:09:51 +01005086 SmallVector<Register, 8> NarrowTyElts;
5087 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
5088 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5089 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
5090 ++i, Offset += NumNarrowTyElts) {
5091 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
Diana Picusf95a5fb2023-01-09 11:59:00 +01005092 NarrowTyElts.push_back(
5093 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
Petar Avramovic29f88b92021-12-23 14:09:51 +01005094 }
Matt Arsenault31adc282020-08-03 14:13:38 -04005095
Diana Picusf95a5fb2023-01-09 11:59:00 +01005096 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
Petar Avramovic29f88b92021-12-23 14:09:51 +01005097 MI.eraseFromParent();
5098 return Legalized;
5099 }
5100
5101 assert(TypeIdx == 0 && "Bad type index");
5102 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5103 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
5104 return UnableToLegalize;
5105
5106 // This is most likely SrcTy (smaller then register size) packed in DstTy
5107 // (larger then register size) and since merge was not combined it will be
5108 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
5109 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
5110
5111 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
5112 //
5113 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
5114 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
5115 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
5116 SmallVector<Register, 8> NarrowTyElts;
5117 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
5118 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5119 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
5120 for (unsigned i = 0; i < NumParts; ++i) {
5121 SmallVector<Register, 8> Sources;
5122 for (unsigned j = 0; j < NumElts; ++j)
5123 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
Diana Picusf95a5fb2023-01-09 11:59:00 +01005124 NarrowTyElts.push_back(
5125 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
Petar Avramovic29f88b92021-12-23 14:09:51 +01005126 }
5127
Diana Picusf95a5fb2023-01-09 11:59:00 +01005128 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
Matt Arsenault31adc282020-08-03 14:13:38 -04005129 MI.eraseFromParent();
5130 return Legalized;
5131}
5132
5133LegalizerHelper::LegalizeResult
Matt Arsenault5a15f662020-07-27 22:00:50 -04005134LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
5135 unsigned TypeIdx,
5136 LLT NarrowVecTy) {
Amara Emerson719024a2023-02-23 16:35:39 -08005137 auto [DstReg, SrcVec] = MI.getFirst2Regs();
Matt Arsenault5a15f662020-07-27 22:00:50 -04005138 Register InsertVal;
5139 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5140
5141 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
5142 if (IsInsert)
5143 InsertVal = MI.getOperand(2).getReg();
5144
5145 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
Matt Arsenaulte0020152020-07-27 09:58:17 -04005146
5147 // TODO: Handle total scalarization case.
5148 if (!NarrowVecTy.isVector())
5149 return UnableToLegalize;
5150
Matt Arsenaulte0020152020-07-27 09:58:17 -04005151 LLT VecTy = MRI.getType(SrcVec);
5152
5153 // If the index is a constant, we can really break this down as you would
5154 // expect, and index into the target size pieces.
5155 int64_t IdxVal;
Petar Avramovicd477a7c2021-09-17 11:21:55 +02005156 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
Amara Emerson59a4ee92021-05-26 23:28:44 -07005157 if (MaybeCst) {
5158 IdxVal = MaybeCst->Value.getSExtValue();
Matt Arsenaulte0020152020-07-27 09:58:17 -04005159 // Avoid out of bounds indexing the pieces.
5160 if (IdxVal >= VecTy.getNumElements()) {
5161 MIRBuilder.buildUndef(DstReg);
5162 MI.eraseFromParent();
5163 return Legalized;
5164 }
5165
5166 SmallVector<Register, 8> VecParts;
5167 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5168
5169 // Build a sequence of NarrowTy pieces in VecParts for this operand.
Matt Arsenault5a15f662020-07-27 22:00:50 -04005170 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5171 TargetOpcode::G_ANYEXT);
Matt Arsenaulte0020152020-07-27 09:58:17 -04005172
5173 unsigned NewNumElts = NarrowVecTy.getNumElements();
5174
5175 LLT IdxTy = MRI.getType(Idx);
5176 int64_t PartIdx = IdxVal / NewNumElts;
5177 auto NewIdx =
5178 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5179
Matt Arsenault5a15f662020-07-27 22:00:50 -04005180 if (IsInsert) {
5181 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5182
5183 // Use the adjusted index to insert into one of the subvectors.
5184 auto InsertPart = MIRBuilder.buildInsertVectorElement(
5185 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5186 VecParts[PartIdx] = InsertPart.getReg(0);
5187
5188 // Recombine the inserted subvector with the others to reform the result
5189 // vector.
5190 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5191 } else {
5192 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5193 }
5194
Matt Arsenaulte0020152020-07-27 09:58:17 -04005195 MI.eraseFromParent();
5196 return Legalized;
5197 }
5198
Matt Arsenault5a15f662020-07-27 22:00:50 -04005199 // With a variable index, we can't perform the operation in a smaller type, so
Matt Arsenaulte0020152020-07-27 09:58:17 -04005200 // we're forced to expand this.
5201 //
5202 // TODO: We could emit a chain of compare/select to figure out which piece to
5203 // index.
Matt Arsenault1ad051dd2020-07-27 21:13:40 -04005204 return lowerExtractInsertVectorElt(MI);
Matt Arsenaulte0020152020-07-27 09:58:17 -04005205}
5206
5207LegalizerHelper::LegalizeResult
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07005208LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx,
Matt Arsenault7f09fd62019-02-05 00:26:12 +00005209 LLT NarrowTy) {
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005210 // FIXME: Don't know how to handle secondary types yet.
5211 if (TypeIdx != 0)
5212 return UnableToLegalize;
5213
Matt Arsenaultcfca2a72019-01-27 22:36:24 +00005214 // This implementation doesn't work for atomics. Give up instead of doing
5215 // something invalid.
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07005216 if (LdStMI.isAtomic())
Matt Arsenaultcfca2a72019-01-27 22:36:24 +00005217 return UnableToLegalize;
5218
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07005219 bool IsLoad = isa<GLoad>(LdStMI);
5220 Register ValReg = LdStMI.getReg(0);
5221 Register AddrReg = LdStMI.getPointerReg();
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005222 LLT ValTy = MRI.getType(ValReg);
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005223
Matt Arsenaultc0ad75e2020-02-13 15:08:59 -05005224 // FIXME: Do we need a distinct NarrowMemory legalize action?
David Green601e1022024-03-17 18:15:56 +00005225 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
Matt Arsenaultc0ad75e2020-02-13 15:08:59 -05005226 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
5227 return UnableToLegalize;
5228 }
5229
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005230 int NumParts = -1;
Matt Arsenaultd3093c22019-02-28 00:16:32 +00005231 int NumLeftover = -1;
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005232 LLT LeftoverTy;
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00005233 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005234 if (IsLoad) {
Matt Arsenaultd3093c22019-02-28 00:16:32 +00005235 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005236 } else {
5237 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
chuongg3fcfe1b62024-01-15 16:40:39 +00005238 NarrowLeftoverRegs, MIRBuilder, MRI)) {
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005239 NumParts = NarrowRegs.size();
Matt Arsenaultd3093c22019-02-28 00:16:32 +00005240 NumLeftover = NarrowLeftoverRegs.size();
5241 }
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005242 }
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005243
5244 if (NumParts == -1)
5245 return UnableToLegalize;
5246
Matt Arsenault1ea182c2020-07-31 10:19:02 -04005247 LLT PtrTy = MRI.getType(AddrReg);
5248 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005249
5250 unsigned TotalSize = ValTy.getSizeInBits();
5251
5252 // Split the load/store into PartTy sized pieces starting at Offset. If this
5253 // is a load, return the new registers in ValRegs. For a store, each elements
5254 // of ValRegs should be PartTy. Returns the next offset that needs to be
5255 // handled.
Sheng146c7822022-02-07 19:04:27 -05005256 bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian();
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07005257 auto MMO = LdStMI.getMMO();
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00005258 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
Sheng146c7822022-02-07 19:04:27 -05005259 unsigned NumParts, unsigned Offset) -> unsigned {
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005260 MachineFunction &MF = MIRBuilder.getMF();
5261 unsigned PartSize = PartTy.getSizeInBits();
5262 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
Sheng146c7822022-02-07 19:04:27 -05005263 ++Idx) {
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005264 unsigned ByteOffset = Offset / 8;
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00005265 Register NewAddrReg;
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005266
Daniel Sanderse74c5b92019-11-01 13:18:00 -07005267 MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005268
5269 MachineMemOperand *NewMMO =
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07005270 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005271
5272 if (IsLoad) {
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00005273 Register Dst = MRI.createGenericVirtualRegister(PartTy);
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005274 ValRegs.push_back(Dst);
5275 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5276 } else {
5277 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5278 }
Sheng146c7822022-02-07 19:04:27 -05005279 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005280 }
5281
5282 return Offset;
5283 };
5284
Sheng146c7822022-02-07 19:04:27 -05005285 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
5286 unsigned HandledOffset =
5287 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005288
5289 // Handle the rest of the register if this isn't an even type breakdown.
5290 if (LeftoverTy.isValid())
Sheng146c7822022-02-07 19:04:27 -05005291 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
Matt Arsenaultc7bce732019-01-31 02:46:05 +00005292
5293 if (IsLoad) {
5294 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5295 LeftoverTy, NarrowLeftoverRegs);
5296 }
5297
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07005298 LdStMI.eraseFromParent();
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005299 return Legalized;
5300}
5301
5302LegalizerHelper::LegalizeResult
Tim Northover69fa84a2016-10-14 22:18:18 +00005303LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
5304 LLT NarrowTy) {
Matt Arsenault1b1e6852019-01-25 02:59:34 +00005305 using namespace TargetOpcode;
Petar Avramovic29f88b92021-12-23 14:09:51 +01005306 GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI);
5307 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
Volkan Keles574d7372018-12-14 22:11:20 +00005308
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005309 switch (MI.getOpcode()) {
5310 case G_IMPLICIT_DEF:
Matt Arsenaultce8a1f72020-02-15 20:24:36 -05005311 case G_TRUNC:
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005312 case G_AND:
5313 case G_OR:
5314 case G_XOR:
5315 case G_ADD:
5316 case G_SUB:
5317 case G_MUL:
Matt Arsenault3e8bb7a2020-07-25 10:47:33 -04005318 case G_PTR_ADD:
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005319 case G_SMULH:
5320 case G_UMULH:
5321 case G_FADD:
5322 case G_FMUL:
5323 case G_FSUB:
5324 case G_FNEG:
5325 case G_FABS:
Matt Arsenault9dba67f2019-02-11 17:05:20 +00005326 case G_FCANONICALIZE:
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005327 case G_FDIV:
5328 case G_FREM:
5329 case G_FMA:
Matt Arsenaultcf103722019-09-06 20:49:10 +00005330 case G_FMAD:
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005331 case G_FPOW:
5332 case G_FEXP:
5333 case G_FEXP2:
Matt Arsenaultb14e83d2023-08-12 07:20:00 -04005334 case G_FEXP10:
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005335 case G_FLOG:
5336 case G_FLOG2:
5337 case G_FLOG10:
Matt Arsenaulteece6ba2023-04-26 22:02:42 -04005338 case G_FLDEXP:
Jessica Paquetteba557672019-04-25 16:44:40 +00005339 case G_FNEARBYINT:
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005340 case G_FCEIL:
Jessica Paquetteebdb0212019-02-11 17:22:58 +00005341 case G_FFLOOR:
Jessica Paquetted5c69e02019-04-19 23:41:52 +00005342 case G_FRINT:
Sumanth Gundapaneni0ee32c42024-07-24 14:34:31 -05005343 case G_INTRINSIC_LRINT:
5344 case G_INTRINSIC_LLRINT:
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005345 case G_INTRINSIC_ROUND:
Matt Arsenault0da582d2020-07-19 09:56:15 -04005346 case G_INTRINSIC_ROUNDEVEN:
Sumanth Gundapanenie78156a2024-08-21 12:13:56 -05005347 case G_LROUND:
5348 case G_LLROUND:
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005349 case G_INTRINSIC_TRUNC:
Jessica Paquette7db82d72019-01-28 18:34:18 +00005350 case G_FCOS:
5351 case G_FSIN:
Farzon Lotfi1d874332024-06-05 15:01:33 -04005352 case G_FTAN:
Farzon Lotfie2f463b2024-07-19 10:18:23 -04005353 case G_FACOS:
5354 case G_FASIN:
5355 case G_FATAN:
Tex Riddellc03d09c2024-10-24 17:53:12 -07005356 case G_FATAN2:
Farzon Lotfie2f463b2024-07-19 10:18:23 -04005357 case G_FCOSH:
5358 case G_FSINH:
5359 case G_FTANH:
Jessica Paquette22457f82019-01-30 21:03:52 +00005360 case G_FSQRT:
Matt Arsenaultd1bfc8d2019-01-31 02:34:03 +00005361 case G_BSWAP:
Matt Arsenault5ff310e2019-09-04 20:46:15 +00005362 case G_BITREVERSE:
Amara Emersonae878da2019-04-10 23:06:08 +00005363 case G_SDIV:
Matt Arsenaultd12f2a22020-01-04 13:24:09 -05005364 case G_UDIV:
5365 case G_SREM:
5366 case G_UREM:
Christudasan Devadasan90d78402021-04-12 15:49:47 +05305367 case G_SDIVREM:
5368 case G_UDIVREM:
Matt Arsenault0f3ba442019-05-23 17:58:48 +00005369 case G_SMIN:
5370 case G_SMAX:
5371 case G_UMIN:
5372 case G_UMAX:
Mirko Brkusanin35ef4c92021-06-03 18:09:45 +02005373 case G_ABS:
Matt Arsenault6ce1b4f2019-07-10 16:31:19 +00005374 case G_FMINNUM:
5375 case G_FMAXNUM:
5376 case G_FMINNUM_IEEE:
5377 case G_FMAXNUM_IEEE:
5378 case G_FMINIMUM:
5379 case G_FMAXIMUM:
Matt Arsenault4919f2e2020-03-19 21:25:27 -04005380 case G_FSHL:
5381 case G_FSHR:
Mirko Brkusanin5263bf52021-09-07 16:18:19 +02005382 case G_ROTL:
5383 case G_ROTR:
Dominik Montada55e3a7c2020-04-14 11:25:05 +02005384 case G_FREEZE:
Matt Arsenault23ec7732020-07-12 16:11:53 -04005385 case G_SADDSAT:
5386 case G_SSUBSAT:
5387 case G_UADDSAT:
5388 case G_USUBSAT:
Pushpinder Singhd0e54222021-03-09 06:10:00 +00005389 case G_UMULO:
5390 case G_SMULO:
Matt Arsenaultc83b8232019-02-07 17:38:00 +00005391 case G_SHL:
5392 case G_LSHR:
5393 case G_ASHR:
Bevin Hansson5de6c562020-07-16 17:02:04 +02005394 case G_SSHLSAT:
5395 case G_USHLSAT:
Matt Arsenault75e30c42019-02-20 16:42:52 +00005396 case G_CTLZ:
5397 case G_CTLZ_ZERO_UNDEF:
5398 case G_CTTZ:
5399 case G_CTTZ_ZERO_UNDEF:
5400 case G_CTPOP:
Matt Arsenault1448f562019-05-17 12:19:52 +00005401 case G_FCOPYSIGN:
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005402 case G_ZEXT:
5403 case G_SEXT:
5404 case G_ANYEXT:
5405 case G_FPEXT:
5406 case G_FPTRUNC:
5407 case G_SITOFP:
5408 case G_UITOFP:
5409 case G_FPTOSI:
5410 case G_FPTOUI:
David Greenfeac7612024-09-16 10:33:59 +01005411 case G_FPTOSI_SAT:
5412 case G_FPTOUI_SAT:
Matt Arsenaultcbaada62019-02-02 23:29:55 +00005413 case G_INTTOPTR:
5414 case G_PTRTOINT:
Matt Arsenaulta8b43392019-02-08 02:40:47 +00005415 case G_ADDRSPACE_CAST:
Abinav Puthan Purayil898d5772022-03-31 16:33:28 +05305416 case G_UADDO:
5417 case G_USUBO:
5418 case G_UADDE:
5419 case G_USUBE:
5420 case G_SADDO:
5421 case G_SSUBO:
5422 case G_SADDE:
5423 case G_SSUBE:
Matt Arsenaultfe5b9a62020-05-31 13:23:20 -04005424 case G_STRICT_FADD:
Matt Arsenault1fe12992022-11-17 23:03:23 -08005425 case G_STRICT_FSUB:
Matt Arsenaultfe5b9a62020-05-31 13:23:20 -04005426 case G_STRICT_FMUL:
5427 case G_STRICT_FMA:
Matt Arsenaulteece6ba2023-04-26 22:02:42 -04005428 case G_STRICT_FLDEXP:
Matt Arsenault003b58f2023-04-26 21:57:10 -04005429 case G_FFREXP:
Petar Avramovic29f88b92021-12-23 14:09:51 +01005430 return fewerElementsVectorMultiEltType(GMI, NumElts);
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005431 case G_ICMP:
5432 case G_FCMP:
Petar Avramovic29f88b92021-12-23 14:09:51 +01005433 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
Janek van Oirschot322966f2022-11-28 15:40:31 -05005434 case G_IS_FPCLASS:
5435 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
Matt Arsenaultdc6c7852019-01-30 04:19:31 +00005436 case G_SELECT:
Petar Avramovic29f88b92021-12-23 14:09:51 +01005437 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
5438 return fewerElementsVectorMultiEltType(GMI, NumElts);
5439 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
Matt Arsenaultd3093c22019-02-28 00:16:32 +00005440 case G_PHI:
Petar Avramovic29f88b92021-12-23 14:09:51 +01005441 return fewerElementsVectorPhi(GMI, NumElts);
Matt Arsenault28215ca2019-08-13 16:26:28 +00005442 case G_UNMERGE_VALUES:
5443 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
Matt Arsenault3cd39592019-10-09 22:44:43 +00005444 case G_BUILD_VECTOR:
Matt Arsenault901e3312020-08-03 18:37:29 -04005445 assert(TypeIdx == 0 && "not a vector type index");
5446 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
Matt Arsenault31adc282020-08-03 14:13:38 -04005447 case G_CONCAT_VECTORS:
Matt Arsenault901e3312020-08-03 18:37:29 -04005448 if (TypeIdx != 1) // TODO: This probably does work as expected already.
5449 return UnableToLegalize;
5450 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
Matt Arsenaulte0020152020-07-27 09:58:17 -04005451 case G_EXTRACT_VECTOR_ELT:
Matt Arsenault5a15f662020-07-27 22:00:50 -04005452 case G_INSERT_VECTOR_ELT:
5453 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
Matt Arsenault816c9b3e2019-01-27 21:53:09 +00005454 case G_LOAD:
5455 case G_STORE:
Amara Emerson4e3dc6b2021-07-09 15:48:47 -07005456 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
Matt Arsenaultcd7650c2020-01-11 19:05:06 -05005457 case G_SEXT_INREG:
Petar Avramovic29f88b92021-12-23 14:09:51 +01005458 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
Amara Emersona35c2c72021-02-21 14:17:03 -08005459 GISEL_VECREDUCE_CASES_NONSEQ
5460 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
David Green77b124c2024-01-05 08:11:44 +00005461 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5462 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5463 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
Amara Emerson9f39ba12021-05-19 21:35:05 -07005464 case G_SHUFFLE_VECTOR:
5465 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
David Green5550e9c2024-01-04 07:26:23 +00005466 case G_FPOWI:
5467 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
chuongg30fb3d422024-02-21 13:24:45 +00005468 case G_BITCAST:
5469 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
Matt Arsenault401658c2024-04-24 12:25:02 +02005470 case G_INTRINSIC_FPTRUNC_ROUND:
5471 return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
Tim Northover33b07d62016-07-22 20:03:43 +00005472 default:
5473 return UnableToLegalize;
Tim Northover33b07d62016-07-22 20:03:43 +00005474 }
5475}
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00005476
chuongg30fb3d422024-02-21 13:24:45 +00005477LegalizerHelper::LegalizeResult
5478LegalizerHelper::fewerElementsBitcast(MachineInstr &MI, unsigned int TypeIdx,
5479 LLT NarrowTy) {
5480 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
5481 "Not a bitcast operation");
5482
5483 if (TypeIdx != 0)
5484 return UnableToLegalize;
5485
5486 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5487
Tim Gymnich2db2dc8a2024-12-12 18:47:46 +01005488 unsigned NewElemCount =
5489 NarrowTy.getSizeInBits() / SrcTy.getScalarSizeInBits();
5490 LLT SrcNarrowTy = LLT::fixed_vector(NewElemCount, SrcTy.getElementType());
chuongg30fb3d422024-02-21 13:24:45 +00005491
5492 // Split the Src and Dst Reg into smaller registers
5493 SmallVector<Register> SrcVRegs, BitcastVRegs;
5494 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5495 return UnableToLegalize;
5496
5497 // Build new smaller bitcast instructions
5498 // Not supporting Leftover types for now but will have to
5499 for (unsigned i = 0; i < SrcVRegs.size(); i++)
5500 BitcastVRegs.push_back(
5501 MIRBuilder.buildBitcast(NarrowTy, SrcVRegs[i]).getReg(0));
5502
5503 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5504 MI.eraseFromParent();
5505 return Legalized;
5506}
5507
Amara Emerson9f39ba12021-05-19 21:35:05 -07005508LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
5509 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5510 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5511 if (TypeIdx != 0)
5512 return UnableToLegalize;
5513
Amara Emerson719024a2023-02-23 16:35:39 -08005514 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5515 MI.getFirst3RegLLTs();
Amara Emerson9f39ba12021-05-19 21:35:05 -07005516 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
Amara Emerson9f39ba12021-05-19 21:35:05 -07005517 // The shuffle should be canonicalized by now.
5518 if (DstTy != Src1Ty)
5519 return UnableToLegalize;
5520 if (DstTy != Src2Ty)
5521 return UnableToLegalize;
5522
5523 if (!isPowerOf2_32(DstTy.getNumElements()))
5524 return UnableToLegalize;
5525
5526 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
5527 // Further legalization attempts will be needed to do split further.
Sander de Smalenc9acd2f2021-06-25 11:27:41 +01005528 NarrowTy =
5529 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
David Green4c8c1302024-12-15 10:44:40 +00005530 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
Amara Emerson9f39ba12021-05-19 21:35:05 -07005531
5532 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
chuongg3fcfe1b62024-01-15 16:40:39 +00005533 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
5534 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
Amara Emerson9f39ba12021-05-19 21:35:05 -07005535 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5536 SplitSrc2Regs[1]};
5537
5538 Register Hi, Lo;
5539
5540 // If Lo or Hi uses elements from at most two of the four input vectors, then
5541 // express it as a vector shuffle of those two inputs. Otherwise extract the
5542 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
5543 SmallVector<int, 16> Ops;
5544 for (unsigned High = 0; High < 2; ++High) {
5545 Register &Output = High ? Hi : Lo;
5546
5547 // Build a shuffle mask for the output, discovering on the fly which
5548 // input vectors to use as shuffle operands (recorded in InputUsed).
5549 // If building a suitable shuffle vector proves too hard, then bail
5550 // out with useBuildVector set.
5551 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
5552 unsigned FirstMaskIdx = High * NewElts;
5553 bool UseBuildVector = false;
5554 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5555 // The mask element. This indexes into the input.
5556 int Idx = Mask[FirstMaskIdx + MaskOffset];
5557
5558 // The input vector this mask element indexes into.
5559 unsigned Input = (unsigned)Idx / NewElts;
5560
Joe Loser5e96cea2022-09-06 18:06:58 -06005561 if (Input >= std::size(Inputs)) {
Amara Emerson9f39ba12021-05-19 21:35:05 -07005562 // The mask element does not index into any input vector.
5563 Ops.push_back(-1);
5564 continue;
5565 }
5566
5567 // Turn the index into an offset from the start of the input vector.
5568 Idx -= Input * NewElts;
5569
5570 // Find or create a shuffle vector operand to hold this input.
5571 unsigned OpNo;
Joe Loser5e96cea2022-09-06 18:06:58 -06005572 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
Amara Emerson9f39ba12021-05-19 21:35:05 -07005573 if (InputUsed[OpNo] == Input) {
5574 // This input vector is already an operand.
5575 break;
5576 } else if (InputUsed[OpNo] == -1U) {
5577 // Create a new operand for this input vector.
5578 InputUsed[OpNo] = Input;
5579 break;
5580 }
5581 }
5582
Joe Loser5e96cea2022-09-06 18:06:58 -06005583 if (OpNo >= std::size(InputUsed)) {
Amara Emerson9f39ba12021-05-19 21:35:05 -07005584 // More than two input vectors used! Give up on trying to create a
5585 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
5586 UseBuildVector = true;
5587 break;
5588 }
5589
5590 // Add the mask index for the new shuffle vector.
5591 Ops.push_back(Idx + OpNo * NewElts);
5592 }
5593
5594 if (UseBuildVector) {
5595 LLT EltTy = NarrowTy.getElementType();
5596 SmallVector<Register, 16> SVOps;
5597
5598 // Extract the input elements by hand.
5599 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5600 // The mask element. This indexes into the input.
5601 int Idx = Mask[FirstMaskIdx + MaskOffset];
5602
5603 // The input vector this mask element indexes into.
5604 unsigned Input = (unsigned)Idx / NewElts;
5605
Joe Loser5e96cea2022-09-06 18:06:58 -06005606 if (Input >= std::size(Inputs)) {
Amara Emerson9f39ba12021-05-19 21:35:05 -07005607 // The mask element is "undef" or indexes off the end of the input.
5608 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
5609 continue;
5610 }
5611
5612 // Turn the index into an offset from the start of the input vector.
5613 Idx -= Input * NewElts;
5614
5615 // Extract the vector element by hand.
5616 SVOps.push_back(MIRBuilder
5617 .buildExtractVectorElement(
5618 EltTy, Inputs[Input],
5619 MIRBuilder.buildConstant(LLT::scalar(32), Idx))
5620 .getReg(0));
5621 }
5622
5623 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
5624 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5625 } else if (InputUsed[0] == -1U) {
5626 // No input vectors were used! The result is undefined.
5627 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
5628 } else {
5629 Register Op0 = Inputs[InputUsed[0]];
5630 // If only one input was used, use an undefined vector for the other.
5631 Register Op1 = InputUsed[1] == -1U
5632 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
5633 : Inputs[InputUsed[1]];
5634 // At least one input vector was used. Create a new shuffle vector.
5635 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
5636 }
5637
5638 Ops.clear();
5639 }
5640
David Green4c8c1302024-12-15 10:44:40 +00005641 MIRBuilder.buildMergeLikeInstr(DstReg, {Lo, Hi});
Amara Emerson9f39ba12021-05-19 21:35:05 -07005642 MI.eraseFromParent();
5643 return Legalized;
5644}
5645
Amara Emerson95ac3d12021-08-18 00:19:58 -07005646LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
5647 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
Amara Emersonb9669782023-08-12 13:55:08 -07005648 auto &RdxMI = cast<GVecReduce>(MI);
Amara Emerson95ac3d12021-08-18 00:19:58 -07005649
5650 if (TypeIdx != 1)
5651 return UnableToLegalize;
5652
5653 // The semantics of the normal non-sequential reductions allow us to freely
5654 // re-associate the operation.
Amara Emersonb9669782023-08-12 13:55:08 -07005655 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
Amara Emerson95ac3d12021-08-18 00:19:58 -07005656
5657 if (NarrowTy.isVector() &&
5658 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
5659 return UnableToLegalize;
5660
Amara Emersonb9669782023-08-12 13:55:08 -07005661 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
Amara Emerson95ac3d12021-08-18 00:19:58 -07005662 SmallVector<Register> SplitSrcs;
5663 // If NarrowTy is a scalar then we're being asked to scalarize.
5664 const unsigned NumParts =
5665 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
5666 : SrcTy.getNumElements();
5667
chuongg3fcfe1b62024-01-15 16:40:39 +00005668 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
Amara Emerson95ac3d12021-08-18 00:19:58 -07005669 if (NarrowTy.isScalar()) {
5670 if (DstTy != NarrowTy)
5671 return UnableToLegalize; // FIXME: handle implicit extensions.
5672
5673 if (isPowerOf2_32(NumParts)) {
5674 // Generate a tree of scalar operations to reduce the critical path.
5675 SmallVector<Register> PartialResults;
5676 unsigned NumPartsLeft = NumParts;
5677 while (NumPartsLeft > 1) {
5678 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5679 PartialResults.emplace_back(
5680 MIRBuilder
5681 .buildInstr(ScalarOpc, {NarrowTy},
5682 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5683 .getReg(0));
5684 }
5685 SplitSrcs = PartialResults;
5686 PartialResults.clear();
5687 NumPartsLeft = SplitSrcs.size();
5688 }
5689 assert(SplitSrcs.size() == 1);
5690 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
5691 MI.eraseFromParent();
5692 return Legalized;
5693 }
5694 // If we can't generate a tree, then just do sequential operations.
5695 Register Acc = SplitSrcs[0];
5696 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
5697 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5698 .getReg(0);
5699 MIRBuilder.buildCopy(DstReg, Acc);
5700 MI.eraseFromParent();
5701 return Legalized;
5702 }
5703 SmallVector<Register> PartialReductions;
5704 for (unsigned Part = 0; Part < NumParts; ++Part) {
5705 PartialReductions.push_back(
Amara Emersonb9669782023-08-12 13:55:08 -07005706 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5707 .getReg(0));
Amara Emerson95ac3d12021-08-18 00:19:58 -07005708 }
5709
Amara Emersona35c2c72021-02-21 14:17:03 -08005710 // If the types involved are powers of 2, we can generate intermediate vector
5711 // ops, before generating a final reduction operation.
5712 if (isPowerOf2_32(SrcTy.getNumElements()) &&
5713 isPowerOf2_32(NarrowTy.getNumElements())) {
5714 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5715 }
5716
5717 Register Acc = PartialReductions[0];
5718 for (unsigned Part = 1; Part < NumParts; ++Part) {
5719 if (Part == NumParts - 1) {
5720 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
5721 {Acc, PartialReductions[Part]});
5722 } else {
5723 Acc = MIRBuilder
5724 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5725 .getReg(0);
5726 }
5727 }
5728 MI.eraseFromParent();
5729 return Legalized;
5730}
5731
5732LegalizerHelper::LegalizeResult
David Green77b124c2024-01-05 08:11:44 +00005733LegalizerHelper::fewerElementsVectorSeqReductions(MachineInstr &MI,
5734 unsigned int TypeIdx,
5735 LLT NarrowTy) {
5736 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5737 MI.getFirst3RegLLTs();
5738 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5739 DstTy != NarrowTy)
5740 return UnableToLegalize;
5741
5742 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5743 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5744 "Unexpected vecreduce opcode");
5745 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5746 ? TargetOpcode::G_FADD
5747 : TargetOpcode::G_FMUL;
5748
5749 SmallVector<Register> SplitSrcs;
5750 unsigned NumParts = SrcTy.getNumElements();
chuongg3fcfe1b62024-01-15 16:40:39 +00005751 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
David Green77b124c2024-01-05 08:11:44 +00005752 Register Acc = ScalarReg;
5753 for (unsigned i = 0; i < NumParts; i++)
5754 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5755 .getReg(0);
5756
5757 MIRBuilder.buildCopy(DstReg, Acc);
5758 MI.eraseFromParent();
5759 return Legalized;
5760}
5761
5762LegalizerHelper::LegalizeResult
Amara Emersona35c2c72021-02-21 14:17:03 -08005763LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
5764 LLT SrcTy, LLT NarrowTy,
5765 unsigned ScalarOpc) {
5766 SmallVector<Register> SplitSrcs;
5767 // Split the sources into NarrowTy size pieces.
5768 extractParts(SrcReg, NarrowTy,
chuongg3fcfe1b62024-01-15 16:40:39 +00005769 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
5770 MIRBuilder, MRI);
Amara Emersona35c2c72021-02-21 14:17:03 -08005771 // We're going to do a tree reduction using vector operations until we have
5772 // one NarrowTy size value left.
5773 while (SplitSrcs.size() > 1) {
5774 SmallVector<Register> PartialRdxs;
5775 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
5776 Register LHS = SplitSrcs[Idx];
5777 Register RHS = SplitSrcs[Idx + 1];
5778 // Create the intermediate vector op.
5779 Register Res =
5780 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
5781 PartialRdxs.push_back(Res);
5782 }
5783 SplitSrcs = std::move(PartialRdxs);
5784 }
5785 // Finally generate the requested NarrowTy based reduction.
5786 Observer.changingInstr(MI);
5787 MI.getOperand(1).setReg(SplitSrcs[0]);
5788 Observer.changedInstr(MI);
5789 return Legalized;
5790}
5791
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00005792LegalizerHelper::LegalizeResult
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005793LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
5794 const LLT HalfTy, const LLT AmtTy) {
5795
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00005796 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5797 Register InH = MRI.createGenericVirtualRegister(HalfTy);
Jay Foad63f73542020-01-16 12:37:00 +00005798 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005799
Jay Foada9bceb22021-09-30 09:54:57 +01005800 if (Amt.isZero()) {
Diana Picusf95a5fb2023-01-09 11:59:00 +01005801 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005802 MI.eraseFromParent();
5803 return Legalized;
5804 }
5805
5806 LLT NVT = HalfTy;
5807 unsigned NVTBits = HalfTy.getSizeInBits();
5808 unsigned VTBits = 2 * NVTBits;
5809
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00005810 SrcOp Lo(Register(0)), Hi(Register(0));
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005811 if (MI.getOpcode() == TargetOpcode::G_SHL) {
5812 if (Amt.ugt(VTBits)) {
5813 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5814 } else if (Amt.ugt(NVTBits)) {
5815 Lo = MIRBuilder.buildConstant(NVT, 0);
5816 Hi = MIRBuilder.buildShl(NVT, InL,
5817 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5818 } else if (Amt == NVTBits) {
5819 Lo = MIRBuilder.buildConstant(NVT, 0);
5820 Hi = InL;
5821 } else {
5822 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
Matt Arsenaulte98cab12019-02-07 20:44:08 +00005823 auto OrLHS =
5824 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
5825 auto OrRHS = MIRBuilder.buildLShr(
5826 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5827 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005828 }
5829 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5830 if (Amt.ugt(VTBits)) {
5831 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5832 } else if (Amt.ugt(NVTBits)) {
5833 Lo = MIRBuilder.buildLShr(NVT, InH,
5834 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5835 Hi = MIRBuilder.buildConstant(NVT, 0);
5836 } else if (Amt == NVTBits) {
5837 Lo = InH;
5838 Hi = MIRBuilder.buildConstant(NVT, 0);
5839 } else {
5840 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5841
5842 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5843 auto OrRHS = MIRBuilder.buildShl(
5844 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5845
5846 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5847 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
5848 }
5849 } else {
5850 if (Amt.ugt(VTBits)) {
5851 Hi = Lo = MIRBuilder.buildAShr(
5852 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5853 } else if (Amt.ugt(NVTBits)) {
5854 Lo = MIRBuilder.buildAShr(NVT, InH,
5855 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5856 Hi = MIRBuilder.buildAShr(NVT, InH,
5857 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5858 } else if (Amt == NVTBits) {
5859 Lo = InH;
5860 Hi = MIRBuilder.buildAShr(NVT, InH,
5861 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5862 } else {
5863 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5864
5865 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5866 auto OrRHS = MIRBuilder.buildShl(
5867 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5868
5869 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5870 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
5871 }
5872 }
5873
Diana Picusf95a5fb2023-01-09 11:59:00 +01005874 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005875 MI.eraseFromParent();
5876
5877 return Legalized;
5878}
5879
5880// TODO: Optimize if constant shift amount.
5881LegalizerHelper::LegalizeResult
5882LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
5883 LLT RequestedTy) {
5884 if (TypeIdx == 1) {
5885 Observer.changingInstr(MI);
5886 narrowScalarSrc(MI, RequestedTy, 2);
5887 Observer.changedInstr(MI);
5888 return Legalized;
5889 }
5890
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00005891 Register DstReg = MI.getOperand(0).getReg();
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005892 LLT DstTy = MRI.getType(DstReg);
5893 if (DstTy.isVector())
5894 return UnableToLegalize;
5895
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00005896 Register Amt = MI.getOperand(2).getReg();
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005897 LLT ShiftAmtTy = MRI.getType(Amt);
5898 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
5899 if (DstEltSize % 2 != 0)
5900 return UnableToLegalize;
5901
5902 // Ignore the input type. We can only go to exactly half the size of the
5903 // input. If that isn't small enough, the resulting pieces will be further
5904 // legalized.
5905 const unsigned NewBitSize = DstEltSize / 2;
5906 const LLT HalfTy = LLT::scalar(NewBitSize);
5907 const LLT CondTy = LLT::scalar(1);
5908
Petar Avramovicd477a7c2021-09-17 11:21:55 +02005909 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
Konstantin Schwarz64bef132020-10-08 14:30:33 +02005910 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
5911 ShiftAmtTy);
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005912 }
5913
5914 // TODO: Expand with known bits.
5915
5916 // Handle the fully general expansion by an unknown amount.
5917 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
5918
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00005919 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5920 Register InH = MRI.createGenericVirtualRegister(HalfTy);
Jay Foad63f73542020-01-16 12:37:00 +00005921 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005922
5923 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
5924 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
5925
5926 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
5927 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
5928 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
5929
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00005930 Register ResultRegs[2];
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005931 switch (MI.getOpcode()) {
5932 case TargetOpcode::G_SHL: {
5933 // Short: ShAmt < NewBitSize
Petar Avramovicd568ed42019-08-27 14:22:32 +00005934 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005935
Petar Avramovicd568ed42019-08-27 14:22:32 +00005936 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
5937 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
5938 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005939
5940 // Long: ShAmt >= NewBitSize
5941 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
5942 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
5943
5944 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
5945 auto Hi = MIRBuilder.buildSelect(
5946 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
5947
5948 ResultRegs[0] = Lo.getReg(0);
5949 ResultRegs[1] = Hi.getReg(0);
5950 break;
5951 }
Petar Avramovica3932382019-08-27 14:33:05 +00005952 case TargetOpcode::G_LSHR:
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005953 case TargetOpcode::G_ASHR: {
5954 // Short: ShAmt < NewBitSize
Petar Avramovica3932382019-08-27 14:33:05 +00005955 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005956
Petar Avramovicd568ed42019-08-27 14:22:32 +00005957 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
5958 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
5959 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005960
5961 // Long: ShAmt >= NewBitSize
Petar Avramovica3932382019-08-27 14:33:05 +00005962 MachineInstrBuilder HiL;
5963 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5964 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
5965 } else {
5966 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
5967 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
5968 }
5969 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
5970 {InH, AmtExcess}); // Lo from Hi part.
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005971
5972 auto Lo = MIRBuilder.buildSelect(
5973 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
5974
5975 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
5976
5977 ResultRegs[0] = Lo.getReg(0);
5978 ResultRegs[1] = Hi.getReg(0);
5979 break;
5980 }
5981 default:
5982 llvm_unreachable("not a shift");
5983 }
5984
Diana Picusf95a5fb2023-01-09 11:59:00 +01005985 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
Matt Arsenaultfbec8fe2019-02-07 19:37:44 +00005986 MI.eraseFromParent();
5987 return Legalized;
5988}
5989
5990LegalizerHelper::LegalizeResult
Matt Arsenault72bcf152019-02-28 00:01:05 +00005991LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
5992 LLT MoreTy) {
5993 assert(TypeIdx == 0 && "Expecting only Idx 0");
5994
5995 Observer.changingInstr(MI);
5996 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
5997 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
5998 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
5999 moreElementsVectorSrc(MI, MoreTy, I);
6000 }
6001
6002 MachineBasicBlock &MBB = *MI.getParent();
Amara Emerson9d647212019-09-16 23:46:03 +00006003 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
Matt Arsenault72bcf152019-02-28 00:01:05 +00006004 moreElementsVectorDst(MI, MoreTy, 0);
6005 Observer.changedInstr(MI);
6006 return Legalized;
6007}
6008
Dhruv Chawla (work)2c9b6c12024-02-27 15:57:46 +05306009MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
6010 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
6011 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
6012
6013 switch (Opcode) {
6014 default:
6015 llvm_unreachable(
6016 "getNeutralElementForVecReduce called with invalid opcode!");
6017 case TargetOpcode::G_VECREDUCE_ADD:
6018 case TargetOpcode::G_VECREDUCE_OR:
6019 case TargetOpcode::G_VECREDUCE_XOR:
6020 case TargetOpcode::G_VECREDUCE_UMAX:
6021 return MIRBuilder.buildConstant(Ty, 0);
6022 case TargetOpcode::G_VECREDUCE_MUL:
6023 return MIRBuilder.buildConstant(Ty, 1);
6024 case TargetOpcode::G_VECREDUCE_AND:
6025 case TargetOpcode::G_VECREDUCE_UMIN:
6026 return MIRBuilder.buildConstant(
6027 Ty, APInt::getAllOnes(Ty.getScalarSizeInBits()));
6028 case TargetOpcode::G_VECREDUCE_SMAX:
6029 return MIRBuilder.buildConstant(
6030 Ty, APInt::getSignedMinValue(Ty.getSizeInBits()));
6031 case TargetOpcode::G_VECREDUCE_SMIN:
6032 return MIRBuilder.buildConstant(
6033 Ty, APInt::getSignedMaxValue(Ty.getSizeInBits()));
6034 case TargetOpcode::G_VECREDUCE_FADD:
6035 return MIRBuilder.buildFConstant(Ty, -0.0);
6036 case TargetOpcode::G_VECREDUCE_FMUL:
6037 return MIRBuilder.buildFConstant(Ty, 1.0);
6038 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6039 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6040 assert(false && "getNeutralElementForVecReduce unimplemented for "
Nikita Popovf2f18452024-06-21 08:33:40 +02006041 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
Dhruv Chawla (work)2c9b6c12024-02-27 15:57:46 +05306042 }
6043 llvm_unreachable("switch expected to return!");
6044}
6045
Matt Arsenault72bcf152019-02-28 00:01:05 +00006046LegalizerHelper::LegalizeResult
Matt Arsenault18ec3822019-02-11 22:00:39 +00006047LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
6048 LLT MoreTy) {
Matt Arsenault18ec3822019-02-11 22:00:39 +00006049 unsigned Opc = MI.getOpcode();
6050 switch (Opc) {
Matt Arsenault7bedceb2019-08-01 01:44:22 +00006051 case TargetOpcode::G_IMPLICIT_DEF:
6052 case TargetOpcode::G_LOAD: {
6053 if (TypeIdx != 0)
6054 return UnableToLegalize;
Matt Arsenault18ec3822019-02-11 22:00:39 +00006055 Observer.changingInstr(MI);
6056 moreElementsVectorDst(MI, MoreTy, 0);
6057 Observer.changedInstr(MI);
6058 return Legalized;
6059 }
Matt Arsenault7bedceb2019-08-01 01:44:22 +00006060 case TargetOpcode::G_STORE:
6061 if (TypeIdx != 0)
6062 return UnableToLegalize;
6063 Observer.changingInstr(MI);
6064 moreElementsVectorSrc(MI, MoreTy, 0);
6065 Observer.changedInstr(MI);
6066 return Legalized;
Matt Arsenault26b7e852019-02-19 16:30:19 +00006067 case TargetOpcode::G_AND:
6068 case TargetOpcode::G_OR:
Matt Arsenault0f3ba442019-05-23 17:58:48 +00006069 case TargetOpcode::G_XOR:
Petar Avramovic29f88b92021-12-23 14:09:51 +01006070 case TargetOpcode::G_ADD:
6071 case TargetOpcode::G_SUB:
6072 case TargetOpcode::G_MUL:
6073 case TargetOpcode::G_FADD:
David Greenef0b8cf2023-08-23 09:51:06 +01006074 case TargetOpcode::G_FSUB:
Petar Avramovic29f88b92021-12-23 14:09:51 +01006075 case TargetOpcode::G_FMUL:
David Green58a2f832023-08-30 22:09:53 +01006076 case TargetOpcode::G_FDIV:
David Green3a775222024-02-17 10:19:27 +00006077 case TargetOpcode::G_FCOPYSIGN:
Petar Avramovic29f88b92021-12-23 14:09:51 +01006078 case TargetOpcode::G_UADDSAT:
6079 case TargetOpcode::G_USUBSAT:
6080 case TargetOpcode::G_SADDSAT:
6081 case TargetOpcode::G_SSUBSAT:
Matt Arsenault0f3ba442019-05-23 17:58:48 +00006082 case TargetOpcode::G_SMIN:
6083 case TargetOpcode::G_SMAX:
6084 case TargetOpcode::G_UMIN:
Matt Arsenault9fd31fd2019-07-27 17:47:08 -04006085 case TargetOpcode::G_UMAX:
6086 case TargetOpcode::G_FMINNUM:
6087 case TargetOpcode::G_FMAXNUM:
6088 case TargetOpcode::G_FMINNUM_IEEE:
6089 case TargetOpcode::G_FMAXNUM_IEEE:
6090 case TargetOpcode::G_FMINIMUM:
Matt Arsenault08ec15e2022-11-17 22:14:35 -08006091 case TargetOpcode::G_FMAXIMUM:
6092 case TargetOpcode::G_STRICT_FADD:
6093 case TargetOpcode::G_STRICT_FSUB:
chuongg3bfef1612024-01-22 14:08:26 +00006094 case TargetOpcode::G_STRICT_FMUL:
6095 case TargetOpcode::G_SHL:
6096 case TargetOpcode::G_ASHR:
6097 case TargetOpcode::G_LSHR: {
Matt Arsenault26b7e852019-02-19 16:30:19 +00006098 Observer.changingInstr(MI);
6099 moreElementsVectorSrc(MI, MoreTy, 1);
6100 moreElementsVectorSrc(MI, MoreTy, 2);
6101 moreElementsVectorDst(MI, MoreTy, 0);
6102 Observer.changedInstr(MI);
6103 return Legalized;
6104 }
Petar Avramovic29f88b92021-12-23 14:09:51 +01006105 case TargetOpcode::G_FMA:
Matt Arsenaultfe5b9a62020-05-31 13:23:20 -04006106 case TargetOpcode::G_STRICT_FMA:
Petar Avramovic29f88b92021-12-23 14:09:51 +01006107 case TargetOpcode::G_FSHR:
6108 case TargetOpcode::G_FSHL: {
6109 Observer.changingInstr(MI);
6110 moreElementsVectorSrc(MI, MoreTy, 1);
6111 moreElementsVectorSrc(MI, MoreTy, 2);
6112 moreElementsVectorSrc(MI, MoreTy, 3);
6113 moreElementsVectorDst(MI, MoreTy, 0);
6114 Observer.changedInstr(MI);
6115 return Legalized;
6116 }
Mateja Marjanoviccf760742023-05-03 17:32:22 +02006117 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
Matt Arsenault4d884272019-02-19 16:44:22 +00006118 case TargetOpcode::G_EXTRACT:
6119 if (TypeIdx != 1)
6120 return UnableToLegalize;
6121 Observer.changingInstr(MI);
6122 moreElementsVectorSrc(MI, MoreTy, 1);
6123 Observer.changedInstr(MI);
6124 return Legalized;
Matt Arsenaultc4d07552019-02-20 16:11:22 +00006125 case TargetOpcode::G_INSERT:
Mateja Marjanoviccf760742023-05-03 17:32:22 +02006126 case TargetOpcode::G_INSERT_VECTOR_ELT:
Dominik Montada55e3a7c2020-04-14 11:25:05 +02006127 case TargetOpcode::G_FREEZE:
Petar Avramovic29f88b92021-12-23 14:09:51 +01006128 case TargetOpcode::G_FNEG:
6129 case TargetOpcode::G_FABS:
David Greenacd17ea2023-08-11 10:16:45 +01006130 case TargetOpcode::G_FSQRT:
David Greencf65afb2023-08-17 16:25:32 +01006131 case TargetOpcode::G_FCEIL:
6132 case TargetOpcode::G_FFLOOR:
6133 case TargetOpcode::G_FNEARBYINT:
6134 case TargetOpcode::G_FRINT:
6135 case TargetOpcode::G_INTRINSIC_ROUND:
6136 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6137 case TargetOpcode::G_INTRINSIC_TRUNC:
Petar Avramovic29f88b92021-12-23 14:09:51 +01006138 case TargetOpcode::G_BSWAP:
6139 case TargetOpcode::G_FCANONICALIZE:
6140 case TargetOpcode::G_SEXT_INREG:
chuongg32c552d32024-01-28 20:21:38 +00006141 case TargetOpcode::G_ABS:
David Greenb0876992025-03-19 19:28:36 +00006142 case TargetOpcode::G_CTLZ:
David Green53a395fd2025-03-20 07:21:01 +00006143 case TargetOpcode::G_CTPOP:
Matt Arsenaultc4d07552019-02-20 16:11:22 +00006144 if (TypeIdx != 0)
6145 return UnableToLegalize;
6146 Observer.changingInstr(MI);
6147 moreElementsVectorSrc(MI, MoreTy, 1);
6148 moreElementsVectorDst(MI, MoreTy, 0);
6149 Observer.changedInstr(MI);
6150 return Legalized;
Matt Arsenault3754f602022-04-11 21:31:15 -04006151 case TargetOpcode::G_SELECT: {
Amara Emerson719024a2023-02-23 16:35:39 -08006152 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
Matt Arsenault3754f602022-04-11 21:31:15 -04006153 if (TypeIdx == 1) {
6154 if (!CondTy.isScalar() ||
6155 DstTy.getElementCount() != MoreTy.getElementCount())
6156 return UnableToLegalize;
6157
6158 // This is turning a scalar select of vectors into a vector
6159 // select. Broadcast the select condition.
6160 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6161 Observer.changingInstr(MI);
6162 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6163 Observer.changedInstr(MI);
6164 return Legalized;
6165 }
6166
6167 if (CondTy.isVector())
Matt Arsenaultb4c95b32019-02-19 17:03:09 +00006168 return UnableToLegalize;
6169
6170 Observer.changingInstr(MI);
6171 moreElementsVectorSrc(MI, MoreTy, 2);
6172 moreElementsVectorSrc(MI, MoreTy, 3);
6173 moreElementsVectorDst(MI, MoreTy, 0);
6174 Observer.changedInstr(MI);
6175 return Legalized;
Matt Arsenault3754f602022-04-11 21:31:15 -04006176 }
Petar Avramovic29f88b92021-12-23 14:09:51 +01006177 case TargetOpcode::G_UNMERGE_VALUES:
6178 return UnableToLegalize;
Matt Arsenault72bcf152019-02-28 00:01:05 +00006179 case TargetOpcode::G_PHI:
6180 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
Amara Emerson97c42632021-07-09 23:11:22 -07006181 case TargetOpcode::G_SHUFFLE_VECTOR:
6182 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
Petar Avramovic29f88b92021-12-23 14:09:51 +01006183 case TargetOpcode::G_BUILD_VECTOR: {
6184 SmallVector<SrcOp, 8> Elts;
6185 for (auto Op : MI.uses()) {
6186 Elts.push_back(Op.getReg());
6187 }
6188
6189 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
6190 Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
6191 }
6192
6193 MIRBuilder.buildDeleteTrailingVectorElements(
6194 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
6195 MI.eraseFromParent();
6196 return Legalized;
6197 }
Dhruv Chawla843a9782024-03-18 07:46:17 +05306198 case TargetOpcode::G_SEXT:
6199 case TargetOpcode::G_ZEXT:
6200 case TargetOpcode::G_ANYEXT:
chuongg3d88d9832023-10-11 16:05:25 +01006201 case TargetOpcode::G_TRUNC:
David Green6edc9a72023-07-23 16:58:13 +01006202 case TargetOpcode::G_FPTRUNC:
David Green54574d32023-11-04 11:47:05 +00006203 case TargetOpcode::G_FPEXT:
6204 case TargetOpcode::G_FPTOSI:
David Green10ce3192023-11-10 13:41:13 +00006205 case TargetOpcode::G_FPTOUI:
David Greenfeac7612024-09-16 10:33:59 +01006206 case TargetOpcode::G_FPTOSI_SAT:
6207 case TargetOpcode::G_FPTOUI_SAT:
David Green10ce3192023-11-10 13:41:13 +00006208 case TargetOpcode::G_SITOFP:
6209 case TargetOpcode::G_UITOFP: {
David Green74c0bdf2023-07-18 18:52:19 +01006210 Observer.changingInstr(MI);
David Greenfbc24732024-03-26 09:48:06 +00006211 LLT SrcExtTy;
6212 LLT DstExtTy;
6213 if (TypeIdx == 0) {
6214 DstExtTy = MoreTy;
6215 SrcExtTy = LLT::fixed_vector(
6216 MoreTy.getNumElements(),
6217 MRI.getType(MI.getOperand(1).getReg()).getElementType());
6218 } else {
6219 DstExtTy = LLT::fixed_vector(
6220 MoreTy.getNumElements(),
6221 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6222 SrcExtTy = MoreTy;
6223 }
6224 moreElementsVectorSrc(MI, SrcExtTy, 1);
6225 moreElementsVectorDst(MI, DstExtTy, 0);
David Green74c0bdf2023-07-18 18:52:19 +01006226 Observer.changedInstr(MI);
6227 return Legalized;
6228 }
David Greenf297d0b2024-01-28 15:42:36 +00006229 case TargetOpcode::G_ICMP:
6230 case TargetOpcode::G_FCMP: {
6231 if (TypeIdx != 1)
6232 return UnableToLegalize;
6233
Thorsten Schütt67dc6e92024-01-17 22:23:51 +01006234 Observer.changingInstr(MI);
6235 moreElementsVectorSrc(MI, MoreTy, 2);
6236 moreElementsVectorSrc(MI, MoreTy, 3);
David Greenf297d0b2024-01-28 15:42:36 +00006237 LLT CondTy = LLT::fixed_vector(
6238 MoreTy.getNumElements(),
6239 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6240 moreElementsVectorDst(MI, CondTy, 0);
Thorsten Schütt67dc6e92024-01-17 22:23:51 +01006241 Observer.changedInstr(MI);
6242 return Legalized;
6243 }
chuongg30fb3d422024-02-21 13:24:45 +00006244 case TargetOpcode::G_BITCAST: {
6245 if (TypeIdx != 0)
6246 return UnableToLegalize;
6247
6248 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
6249 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6250
6251 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
6252 if (coefficient % DstTy.getNumElements() != 0)
6253 return UnableToLegalize;
6254
6255 coefficient = coefficient / DstTy.getNumElements();
6256
6257 LLT NewTy = SrcTy.changeElementCount(
6258 ElementCount::get(coefficient, MoreTy.isScalable()));
6259 Observer.changingInstr(MI);
6260 moreElementsVectorSrc(MI, NewTy, 1);
6261 moreElementsVectorDst(MI, MoreTy, 0);
6262 Observer.changedInstr(MI);
6263 return Legalized;
6264 }
Dhruv Chawla (work)2c9b6c12024-02-27 15:57:46 +05306265 case TargetOpcode::G_VECREDUCE_FADD:
6266 case TargetOpcode::G_VECREDUCE_FMUL:
6267 case TargetOpcode::G_VECREDUCE_ADD:
6268 case TargetOpcode::G_VECREDUCE_MUL:
6269 case TargetOpcode::G_VECREDUCE_AND:
6270 case TargetOpcode::G_VECREDUCE_OR:
6271 case TargetOpcode::G_VECREDUCE_XOR:
6272 case TargetOpcode::G_VECREDUCE_SMAX:
6273 case TargetOpcode::G_VECREDUCE_SMIN:
6274 case TargetOpcode::G_VECREDUCE_UMAX:
6275 case TargetOpcode::G_VECREDUCE_UMIN: {
6276 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
6277 MachineOperand &MO = MI.getOperand(1);
6278 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6279 auto NeutralElement = getNeutralElementForVecReduce(
6280 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
6281
David Greenbd1be8a2025-03-18 08:31:11 +00006282 LLT IdxTy(TLI.getVectorIdxLLT(MIRBuilder.getDataLayout()));
Dhruv Chawla (work)2c9b6c12024-02-27 15:57:46 +05306283 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
6284 i != e; i++) {
6285 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
6286 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6287 NeutralElement, Idx);
6288 }
6289
6290 Observer.changingInstr(MI);
6291 MO.setReg(NewVec.getReg(0));
6292 Observer.changedInstr(MI);
6293 return Legalized;
6294 }
6295
Matt Arsenault18ec3822019-02-11 22:00:39 +00006296 default:
6297 return UnableToLegalize;
6298 }
6299}
6300
Vladislav Dzhidzhoev3a51eed2023-02-07 21:32:50 +01006301LegalizerHelper::LegalizeResult
6302LegalizerHelper::equalizeVectorShuffleLengths(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08006303 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
Kevin Atheyec7cffc2022-12-15 11:19:24 -08006304 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6305 unsigned MaskNumElts = Mask.size();
6306 unsigned SrcNumElts = SrcTy.getNumElements();
Kevin Atheyec7cffc2022-12-15 11:19:24 -08006307 LLT DestEltTy = DstTy.getElementType();
6308
Vladislav Dzhidzhoev3a51eed2023-02-07 21:32:50 +01006309 if (MaskNumElts == SrcNumElts)
6310 return Legalized;
6311
6312 if (MaskNumElts < SrcNumElts) {
6313 // Extend mask to match new destination vector size with
6314 // undef values.
Craig Topper5797ed62024-12-10 22:18:46 -08006315 SmallVector<int, 16> NewMask(SrcNumElts, -1);
6316 llvm::copy(Mask, NewMask.begin());
Vladislav Dzhidzhoev3a51eed2023-02-07 21:32:50 +01006317
6318 moreElementsVectorDst(MI, SrcTy, 0);
6319 MIRBuilder.setInstrAndDebugLoc(MI);
6320 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6321 MI.getOperand(1).getReg(),
6322 MI.getOperand(2).getReg(), NewMask);
6323 MI.eraseFromParent();
6324
6325 return Legalized;
Kevin Atheyec7cffc2022-12-15 11:19:24 -08006326 }
6327
6328 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
6329 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
6330 LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy);
6331
6332 // Create new source vectors by concatenating the initial
6333 // source vectors with undefined vectors of the same size.
6334 auto Undef = MIRBuilder.buildUndef(SrcTy);
6335 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
6336 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
6337 MOps1[0] = MI.getOperand(1).getReg();
6338 MOps2[0] = MI.getOperand(2).getReg();
6339
6340 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
6341 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
6342
6343 // Readjust mask for new input vector length.
6344 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
6345 for (unsigned I = 0; I != MaskNumElts; ++I) {
6346 int Idx = Mask[I];
6347 if (Idx >= static_cast<int>(SrcNumElts))
6348 Idx += PaddedMaskNumElts - SrcNumElts;
6349 MappedOps[I] = Idx;
6350 }
6351
6352 // If we got more elements than required, extract subvector.
6353 if (MaskNumElts != PaddedMaskNumElts) {
6354 auto Shuffle =
6355 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
6356
6357 SmallVector<Register, 16> Elts(MaskNumElts);
6358 for (unsigned I = 0; I < MaskNumElts; ++I) {
6359 Elts[I] =
6360 MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle, I)
6361 .getReg(0);
6362 }
6363 MIRBuilder.buildBuildVector(DstReg, Elts);
6364 } else {
6365 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
6366 }
6367
6368 MI.eraseFromParent();
6369 return LegalizerHelper::LegalizeResult::Legalized;
6370}
6371
Amara Emerson97c42632021-07-09 23:11:22 -07006372LegalizerHelper::LegalizeResult
6373LegalizerHelper::moreElementsVectorShuffle(MachineInstr &MI,
6374 unsigned int TypeIdx, LLT MoreTy) {
Amara Emerson719024a2023-02-23 16:35:39 -08006375 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
Amara Emerson97c42632021-07-09 23:11:22 -07006376 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
Amara Emerson97c42632021-07-09 23:11:22 -07006377 unsigned NumElts = DstTy.getNumElements();
6378 unsigned WidenNumElts = MoreTy.getNumElements();
6379
Kevin Atheyec7cffc2022-12-15 11:19:24 -08006380 if (DstTy.isVector() && Src1Ty.isVector() &&
Vladislav Dzhidzhoev3a51eed2023-02-07 21:32:50 +01006381 DstTy.getNumElements() != Src1Ty.getNumElements()) {
6382 return equalizeVectorShuffleLengths(MI);
Kevin Atheyec7cffc2022-12-15 11:19:24 -08006383 }
6384
6385 if (TypeIdx != 0)
6386 return UnableToLegalize;
6387
Amara Emerson97c42632021-07-09 23:11:22 -07006388 // Expect a canonicalized shuffle.
6389 if (DstTy != Src1Ty || DstTy != Src2Ty)
6390 return UnableToLegalize;
6391
6392 moreElementsVectorSrc(MI, MoreTy, 1);
6393 moreElementsVectorSrc(MI, MoreTy, 2);
6394
6395 // Adjust mask based on new input vector length.
Craig Topper5797ed62024-12-10 22:18:46 -08006396 SmallVector<int, 16> NewMask(WidenNumElts, -1);
Amara Emerson97c42632021-07-09 23:11:22 -07006397 for (unsigned I = 0; I != NumElts; ++I) {
6398 int Idx = Mask[I];
6399 if (Idx < static_cast<int>(NumElts))
Craig Topper5797ed62024-12-10 22:18:46 -08006400 NewMask[I] = Idx;
Amara Emerson97c42632021-07-09 23:11:22 -07006401 else
Craig Topper5797ed62024-12-10 22:18:46 -08006402 NewMask[I] = Idx - NumElts + WidenNumElts;
Amara Emerson97c42632021-07-09 23:11:22 -07006403 }
Amara Emerson97c42632021-07-09 23:11:22 -07006404 moreElementsVectorDst(MI, MoreTy, 0);
6405 MIRBuilder.setInstrAndDebugLoc(MI);
6406 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6407 MI.getOperand(1).getReg(),
6408 MI.getOperand(2).getReg(), NewMask);
6409 MI.eraseFromParent();
6410 return Legalized;
6411}
6412
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00006413void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
6414 ArrayRef<Register> Src1Regs,
6415 ArrayRef<Register> Src2Regs,
Petar Avramovic0b17e592019-03-11 10:00:17 +00006416 LLT NarrowTy) {
6417 MachineIRBuilder &B = MIRBuilder;
6418 unsigned SrcParts = Src1Regs.size();
6419 unsigned DstParts = DstRegs.size();
6420
6421 unsigned DstIdx = 0; // Low bits of the result.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00006422 Register FactorSum =
Petar Avramovic0b17e592019-03-11 10:00:17 +00006423 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
6424 DstRegs[DstIdx] = FactorSum;
6425
Craig Toppercaa798c2025-03-02 23:02:13 -08006426 Register CarrySumPrevDstIdx;
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00006427 SmallVector<Register, 4> Factors;
Petar Avramovic0b17e592019-03-11 10:00:17 +00006428
6429 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
6430 // Collect low parts of muls for DstIdx.
6431 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
6432 i <= std::min(DstIdx, SrcParts - 1); ++i) {
6433 MachineInstrBuilder Mul =
6434 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
6435 Factors.push_back(Mul.getReg(0));
6436 }
6437 // Collect high parts of muls from previous DstIdx.
6438 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
6439 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
6440 MachineInstrBuilder Umulh =
6441 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
6442 Factors.push_back(Umulh.getReg(0));
6443 }
Greg Bedwellb1c4b4d2019-10-28 14:28:00 +00006444 // Add CarrySum from additions calculated for previous DstIdx.
Petar Avramovic0b17e592019-03-11 10:00:17 +00006445 if (DstIdx != 1) {
6446 Factors.push_back(CarrySumPrevDstIdx);
6447 }
6448
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00006449 Register CarrySum;
Petar Avramovic0b17e592019-03-11 10:00:17 +00006450 // Add all factors and accumulate all carries into CarrySum.
6451 if (DstIdx != DstParts - 1) {
6452 MachineInstrBuilder Uaddo =
Jay Foad24688f82021-10-04 20:25:42 +01006453 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
Petar Avramovic0b17e592019-03-11 10:00:17 +00006454 FactorSum = Uaddo.getReg(0);
6455 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
6456 for (unsigned i = 2; i < Factors.size(); ++i) {
6457 MachineInstrBuilder Uaddo =
Jay Foad24688f82021-10-04 20:25:42 +01006458 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
Petar Avramovic0b17e592019-03-11 10:00:17 +00006459 FactorSum = Uaddo.getReg(0);
6460 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
6461 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
6462 }
6463 } else {
6464 // Since value for the next index is not calculated, neither is CarrySum.
6465 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
6466 for (unsigned i = 2; i < Factors.size(); ++i)
6467 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
6468 }
6469
6470 CarrySumPrevDstIdx = CarrySum;
6471 DstRegs[DstIdx] = FactorSum;
6472 Factors.clear();
6473 }
6474}
6475
Matt Arsenault18ec3822019-02-11 22:00:39 +00006476LegalizerHelper::LegalizeResult
Cassie Jones362463882021-02-14 14:37:55 -05006477LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx,
6478 LLT NarrowTy) {
6479 if (TypeIdx != 0)
6480 return UnableToLegalize;
6481
Cassie Jones97a1cdb2021-02-14 14:42:46 -05006482 Register DstReg = MI.getOperand(0).getReg();
6483 LLT DstType = MRI.getType(DstReg);
6484 // FIXME: add support for vector types
6485 if (DstType.isVector())
6486 return UnableToLegalize;
6487
Cassie Jonese1532642021-02-22 17:11:23 -05006488 unsigned Opcode = MI.getOpcode();
6489 unsigned OpO, OpE, OpF;
6490 switch (Opcode) {
6491 case TargetOpcode::G_SADDO:
Cassie Jones8f956a52021-02-22 17:11:35 -05006492 case TargetOpcode::G_SADDE:
Cassie Jonesc63b33b2021-02-22 17:10:58 -05006493 case TargetOpcode::G_UADDO:
Cassie Jones8f956a52021-02-22 17:11:35 -05006494 case TargetOpcode::G_UADDE:
Cassie Jones362463882021-02-14 14:37:55 -05006495 case TargetOpcode::G_ADD:
6496 OpO = TargetOpcode::G_UADDO;
6497 OpE = TargetOpcode::G_UADDE;
Cassie Jonese1532642021-02-22 17:11:23 -05006498 OpF = TargetOpcode::G_UADDE;
Cassie Jones8f956a52021-02-22 17:11:35 -05006499 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
Cassie Jonese1532642021-02-22 17:11:23 -05006500 OpF = TargetOpcode::G_SADDE;
Cassie Jones362463882021-02-14 14:37:55 -05006501 break;
Cassie Jonese1532642021-02-22 17:11:23 -05006502 case TargetOpcode::G_SSUBO:
Cassie Jones8f956a52021-02-22 17:11:35 -05006503 case TargetOpcode::G_SSUBE:
Cassie Jonesc63b33b2021-02-22 17:10:58 -05006504 case TargetOpcode::G_USUBO:
Cassie Jones8f956a52021-02-22 17:11:35 -05006505 case TargetOpcode::G_USUBE:
Cassie Jones362463882021-02-14 14:37:55 -05006506 case TargetOpcode::G_SUB:
6507 OpO = TargetOpcode::G_USUBO;
6508 OpE = TargetOpcode::G_USUBE;
Cassie Jonese1532642021-02-22 17:11:23 -05006509 OpF = TargetOpcode::G_USUBE;
Cassie Jones8f956a52021-02-22 17:11:35 -05006510 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
Cassie Jonese1532642021-02-22 17:11:23 -05006511 OpF = TargetOpcode::G_SSUBE;
Cassie Jones362463882021-02-14 14:37:55 -05006512 break;
6513 default:
6514 llvm_unreachable("Unexpected add/sub opcode!");
6515 }
6516
Cassie Jonesc63b33b2021-02-22 17:10:58 -05006517 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
6518 unsigned NumDefs = MI.getNumExplicitDefs();
6519 Register Src1 = MI.getOperand(NumDefs).getReg();
6520 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
Justin Bogner4271e1d2021-03-02 14:46:03 -08006521 Register CarryDst, CarryIn;
Cassie Jonesc63b33b2021-02-22 17:10:58 -05006522 if (NumDefs == 2)
6523 CarryDst = MI.getOperand(1).getReg();
Cassie Jones8f956a52021-02-22 17:11:35 -05006524 if (MI.getNumOperands() == NumDefs + 3)
6525 CarryIn = MI.getOperand(NumDefs + 2).getReg();
Cassie Jonesc63b33b2021-02-22 17:10:58 -05006526
Justin Bogner4271e1d2021-03-02 14:46:03 -08006527 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
6528 LLT LeftoverTy, DummyTy;
6529 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
chuongg3fcfe1b62024-01-15 16:40:39 +00006530 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
6531 MIRBuilder, MRI);
6532 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
6533 MRI);
Cassie Jones362463882021-02-14 14:37:55 -05006534
Justin Bogner4271e1d2021-03-02 14:46:03 -08006535 int NarrowParts = Src1Regs.size();
Craig Toppere3284d82024-12-10 07:18:20 -08006536 Src1Regs.append(Src1Left);
6537 Src2Regs.append(Src2Left);
Justin Bogner4271e1d2021-03-02 14:46:03 -08006538 DstRegs.reserve(Src1Regs.size());
6539
6540 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
6541 Register DstReg =
6542 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
Craig Topper7c124182024-12-09 20:23:24 -08006543 Register CarryOut;
Cassie Jonesc63b33b2021-02-22 17:10:58 -05006544 // Forward the final carry-out to the destination register
Justin Bogner4271e1d2021-03-02 14:46:03 -08006545 if (i == e - 1 && CarryDst)
Cassie Jonesc63b33b2021-02-22 17:10:58 -05006546 CarryOut = CarryDst;
Craig Topper7c124182024-12-09 20:23:24 -08006547 else
6548 CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
Cassie Jones362463882021-02-14 14:37:55 -05006549
Cassie Jones8f956a52021-02-22 17:11:35 -05006550 if (!CarryIn) {
Cassie Jones362463882021-02-14 14:37:55 -05006551 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
6552 {Src1Regs[i], Src2Regs[i]});
Justin Bogner4271e1d2021-03-02 14:46:03 -08006553 } else if (i == e - 1) {
Cassie Jonese1532642021-02-22 17:11:23 -05006554 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
6555 {Src1Regs[i], Src2Regs[i], CarryIn});
6556 } else {
Cassie Jones362463882021-02-14 14:37:55 -05006557 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
6558 {Src1Regs[i], Src2Regs[i], CarryIn});
6559 }
6560
6561 DstRegs.push_back(DstReg);
6562 CarryIn = CarryOut;
6563 }
Justin Bogner4271e1d2021-03-02 14:46:03 -08006564 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
serge-sans-paille38818b62023-01-04 08:28:45 +01006565 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
6566 ArrayRef(DstRegs).drop_front(NarrowParts));
Justin Bogner4271e1d2021-03-02 14:46:03 -08006567
Cassie Jones362463882021-02-14 14:37:55 -05006568 MI.eraseFromParent();
6569 return Legalized;
6570}
6571
6572LegalizerHelper::LegalizeResult
Petar Avramovic0b17e592019-03-11 10:00:17 +00006573LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
Amara Emerson719024a2023-02-23 16:35:39 -08006574 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
Petar Avramovic0b17e592019-03-11 10:00:17 +00006575
Matt Arsenault211e89d2019-01-27 00:52:51 +00006576 LLT Ty = MRI.getType(DstReg);
Jay Foad24688f82021-10-04 20:25:42 +01006577 if (Ty.isVector())
Matt Arsenault211e89d2019-01-27 00:52:51 +00006578 return UnableToLegalize;
6579
Jay Foad0a031f52021-10-05 10:47:54 +01006580 unsigned Size = Ty.getSizeInBits();
Jay Foad24688f82021-10-04 20:25:42 +01006581 unsigned NarrowSize = NarrowTy.getSizeInBits();
Jay Foad0a031f52021-10-05 10:47:54 +01006582 if (Size % NarrowSize != 0)
Jay Foad24688f82021-10-04 20:25:42 +01006583 return UnableToLegalize;
6584
Jay Foad0a031f52021-10-05 10:47:54 +01006585 unsigned NumParts = Size / NarrowSize;
Petar Avramovic5229f472019-03-11 10:08:44 +00006586 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
Jay Foad0a031f52021-10-05 10:47:54 +01006587 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
Matt Arsenault211e89d2019-01-27 00:52:51 +00006588
Matt Arsenaultde8451f2020-02-04 10:34:22 -05006589 SmallVector<Register, 2> Src1Parts, Src2Parts;
6590 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
chuongg3fcfe1b62024-01-15 16:40:39 +00006591 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
6592 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
Petar Avramovic5229f472019-03-11 10:08:44 +00006593 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
Matt Arsenault211e89d2019-01-27 00:52:51 +00006594
Petar Avramovic5229f472019-03-11 10:08:44 +00006595 // Take only high half of registers if this is high mul.
Jay Foad0a031f52021-10-05 10:47:54 +01006596 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
Diana Picusf95a5fb2023-01-09 11:59:00 +01006597 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
Matt Arsenault211e89d2019-01-27 00:52:51 +00006598 MI.eraseFromParent();
6599 return Legalized;
6600}
6601
Matt Arsenault1cf713662019-02-12 14:54:52 +00006602LegalizerHelper::LegalizeResult
Matt Arsenault83a25a12021-03-26 17:29:36 -04006603LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx,
6604 LLT NarrowTy) {
6605 if (TypeIdx != 0)
6606 return UnableToLegalize;
6607
6608 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
6609
6610 Register Src = MI.getOperand(1).getReg();
6611 LLT SrcTy = MRI.getType(Src);
6612
6613 // If all finite floats fit into the narrowed integer type, we can just swap
6614 // out the result type. This is practically only useful for conversions from
6615 // half to at least 16-bits, so just handle the one case.
6616 if (SrcTy.getScalarType() != LLT::scalar(16) ||
Simon Pilgrimbc980762021-04-20 17:19:15 +01006617 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
Matt Arsenault83a25a12021-03-26 17:29:36 -04006618 return UnableToLegalize;
6619
6620 Observer.changingInstr(MI);
6621 narrowScalarDst(MI, NarrowTy, 0,
6622 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
6623 Observer.changedInstr(MI);
6624 return Legalized;
6625}
6626
6627LegalizerHelper::LegalizeResult
Matt Arsenault1cf713662019-02-12 14:54:52 +00006628LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
6629 LLT NarrowTy) {
6630 if (TypeIdx != 1)
6631 return UnableToLegalize;
6632
6633 uint64_t NarrowSize = NarrowTy.getSizeInBits();
6634
6635 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
6636 // FIXME: add support for when SizeOp1 isn't an exact multiple of
6637 // NarrowSize.
6638 if (SizeOp1 % NarrowSize != 0)
6639 return UnableToLegalize;
6640 int NumParts = SizeOp1 / NarrowSize;
6641
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00006642 SmallVector<Register, 2> SrcRegs, DstRegs;
Matt Arsenault1cf713662019-02-12 14:54:52 +00006643 SmallVector<uint64_t, 2> Indexes;
chuongg3fcfe1b62024-01-15 16:40:39 +00006644 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
6645 MIRBuilder, MRI);
Matt Arsenault1cf713662019-02-12 14:54:52 +00006646
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00006647 Register OpReg = MI.getOperand(0).getReg();
Matt Arsenault1cf713662019-02-12 14:54:52 +00006648 uint64_t OpStart = MI.getOperand(2).getImm();
6649 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
6650 for (int i = 0; i < NumParts; ++i) {
6651 unsigned SrcStart = i * NarrowSize;
6652
6653 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
6654 // No part of the extract uses this subregister, ignore it.
6655 continue;
6656 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
6657 // The entire subregister is extracted, forward the value.
6658 DstRegs.push_back(SrcRegs[i]);
6659 continue;
6660 }
6661
6662 // OpSegStart is where this destination segment would start in OpReg if it
6663 // extended infinitely in both directions.
6664 int64_t ExtractOffset;
6665 uint64_t SegSize;
6666 if (OpStart < SrcStart) {
6667 ExtractOffset = 0;
6668 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
6669 } else {
6670 ExtractOffset = OpStart - SrcStart;
6671 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
6672 }
6673
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00006674 Register SegReg = SrcRegs[i];
Matt Arsenault1cf713662019-02-12 14:54:52 +00006675 if (ExtractOffset != 0 || SegSize != NarrowSize) {
6676 // A genuine extract is needed.
6677 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
6678 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
6679 }
6680
6681 DstRegs.push_back(SegReg);
6682 }
6683
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00006684 Register DstReg = MI.getOperand(0).getReg();
Dominik Montada6b966232020-03-12 09:03:08 +01006685 if (MRI.getType(DstReg).isVector())
Matt Arsenault1cf713662019-02-12 14:54:52 +00006686 MIRBuilder.buildBuildVector(DstReg, DstRegs);
Dominik Montada6b966232020-03-12 09:03:08 +01006687 else if (DstRegs.size() > 1)
Diana Picusf95a5fb2023-01-09 11:59:00 +01006688 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
Dominik Montada6b966232020-03-12 09:03:08 +01006689 else
6690 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
Matt Arsenault1cf713662019-02-12 14:54:52 +00006691 MI.eraseFromParent();
6692 return Legalized;
6693}
6694
6695LegalizerHelper::LegalizeResult
6696LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
6697 LLT NarrowTy) {
6698 // FIXME: Don't know how to handle secondary types yet.
6699 if (TypeIdx != 0)
6700 return UnableToLegalize;
6701
Justin Bogner2a7e7592021-03-02 09:49:15 -08006702 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
Matt Arsenault1cf713662019-02-12 14:54:52 +00006703 SmallVector<uint64_t, 2> Indexes;
Justin Bogner2a7e7592021-03-02 09:49:15 -08006704 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
6705 LLT LeftoverTy;
6706 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
chuongg3fcfe1b62024-01-15 16:40:39 +00006707 LeftoverRegs, MIRBuilder, MRI);
Matt Arsenault1cf713662019-02-12 14:54:52 +00006708
Craig Toppere3284d82024-12-10 07:18:20 -08006709 SrcRegs.append(LeftoverRegs);
Justin Bogner2a7e7592021-03-02 09:49:15 -08006710
6711 uint64_t NarrowSize = NarrowTy.getSizeInBits();
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00006712 Register OpReg = MI.getOperand(2).getReg();
Matt Arsenault1cf713662019-02-12 14:54:52 +00006713 uint64_t OpStart = MI.getOperand(3).getImm();
6714 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
Justin Bogner2a7e7592021-03-02 09:49:15 -08006715 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
6716 unsigned DstStart = I * NarrowSize;
Matt Arsenault1cf713662019-02-12 14:54:52 +00006717
Justin Bogner2a7e7592021-03-02 09:49:15 -08006718 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
Matt Arsenault1cf713662019-02-12 14:54:52 +00006719 // The entire subregister is defined by this insert, forward the new
6720 // value.
6721 DstRegs.push_back(OpReg);
6722 continue;
6723 }
6724
Justin Bogner2a7e7592021-03-02 09:49:15 -08006725 Register SrcReg = SrcRegs[I];
6726 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
6727 // The leftover reg is smaller than NarrowTy, so we need to extend it.
6728 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
6729 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
6730 }
6731
6732 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
6733 // No part of the insert affects this subregister, forward the original.
6734 DstRegs.push_back(SrcReg);
6735 continue;
6736 }
6737
Matt Arsenault1cf713662019-02-12 14:54:52 +00006738 // OpSegStart is where this destination segment would start in OpReg if it
6739 // extended infinitely in both directions.
6740 int64_t ExtractOffset, InsertOffset;
6741 uint64_t SegSize;
6742 if (OpStart < DstStart) {
6743 InsertOffset = 0;
6744 ExtractOffset = DstStart - OpStart;
6745 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
6746 } else {
6747 InsertOffset = OpStart - DstStart;
6748 ExtractOffset = 0;
6749 SegSize =
6750 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
6751 }
6752
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00006753 Register SegReg = OpReg;
Matt Arsenault1cf713662019-02-12 14:54:52 +00006754 if (ExtractOffset != 0 || SegSize != OpSize) {
6755 // A genuine extract is needed.
6756 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
6757 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
6758 }
6759
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00006760 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
Justin Bogner2a7e7592021-03-02 09:49:15 -08006761 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
Matt Arsenault1cf713662019-02-12 14:54:52 +00006762 DstRegs.push_back(DstReg);
6763 }
6764
Justin Bogner2a7e7592021-03-02 09:49:15 -08006765 uint64_t WideSize = DstRegs.size() * NarrowSize;
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00006766 Register DstReg = MI.getOperand(0).getReg();
Justin Bogner2a7e7592021-03-02 09:49:15 -08006767 if (WideSize > RegTy.getSizeInBits()) {
6768 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
Diana Picusf95a5fb2023-01-09 11:59:00 +01006769 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
Justin Bogner2a7e7592021-03-02 09:49:15 -08006770 MIRBuilder.buildTrunc(DstReg, MergeReg);
6771 } else
Diana Picusf95a5fb2023-01-09 11:59:00 +01006772 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
Justin Bogner2a7e7592021-03-02 09:49:15 -08006773
Matt Arsenault1cf713662019-02-12 14:54:52 +00006774 MI.eraseFromParent();
6775 return Legalized;
6776}
6777
Matt Arsenault211e89d2019-01-27 00:52:51 +00006778LegalizerHelper::LegalizeResult
Matt Arsenault9e0eeba2019-04-10 17:07:56 +00006779LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
6780 LLT NarrowTy) {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00006781 Register DstReg = MI.getOperand(0).getReg();
Matt Arsenault9e0eeba2019-04-10 17:07:56 +00006782 LLT DstTy = MRI.getType(DstReg);
6783
6784 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
6785
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00006786 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
6787 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
6788 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
Matt Arsenault9e0eeba2019-04-10 17:07:56 +00006789 LLT LeftoverTy;
6790 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
chuongg3fcfe1b62024-01-15 16:40:39 +00006791 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
Matt Arsenault9e0eeba2019-04-10 17:07:56 +00006792 return UnableToLegalize;
6793
6794 LLT Unused;
6795 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
chuongg3fcfe1b62024-01-15 16:40:39 +00006796 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
Matt Arsenault9e0eeba2019-04-10 17:07:56 +00006797 llvm_unreachable("inconsistent extractParts result");
6798
6799 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
6800 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
6801 {Src0Regs[I], Src1Regs[I]});
Jay Foadb482e1b2020-01-23 11:51:35 +00006802 DstRegs.push_back(Inst.getReg(0));
Matt Arsenault9e0eeba2019-04-10 17:07:56 +00006803 }
6804
6805 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
6806 auto Inst = MIRBuilder.buildInstr(
6807 MI.getOpcode(),
6808 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
Jay Foadb482e1b2020-01-23 11:51:35 +00006809 DstLeftoverRegs.push_back(Inst.getReg(0));
Matt Arsenault9e0eeba2019-04-10 17:07:56 +00006810 }
6811
6812 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6813 LeftoverTy, DstLeftoverRegs);
6814
6815 MI.eraseFromParent();
6816 return Legalized;
6817}
6818
6819LegalizerHelper::LegalizeResult
Matt Arsenaultbe31a7b2020-01-10 11:02:18 -05006820LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx,
6821 LLT NarrowTy) {
6822 if (TypeIdx != 0)
6823 return UnableToLegalize;
6824
Amara Emerson719024a2023-02-23 16:35:39 -08006825 auto [DstReg, SrcReg] = MI.getFirst2Regs();
Matt Arsenaultbe31a7b2020-01-10 11:02:18 -05006826
Matt Arsenaulta66d2812020-01-10 10:41:29 -05006827 LLT DstTy = MRI.getType(DstReg);
6828 if (DstTy.isVector())
Matt Arsenaultbe31a7b2020-01-10 11:02:18 -05006829 return UnableToLegalize;
6830
Matt Arsenaulta66d2812020-01-10 10:41:29 -05006831 SmallVector<Register, 8> Parts;
6832 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
Matt Arsenaultcd7650c2020-01-11 19:05:06 -05006833 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
6834 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
6835
Matt Arsenaultbe31a7b2020-01-10 11:02:18 -05006836 MI.eraseFromParent();
6837 return Legalized;
6838}
6839
6840LegalizerHelper::LegalizeResult
Matt Arsenault81511e52019-02-05 00:13:44 +00006841LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
6842 LLT NarrowTy) {
6843 if (TypeIdx != 0)
6844 return UnableToLegalize;
6845
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00006846 Register CondReg = MI.getOperand(1).getReg();
Matt Arsenault81511e52019-02-05 00:13:44 +00006847 LLT CondTy = MRI.getType(CondReg);
6848 if (CondTy.isVector()) // TODO: Handle vselect
6849 return UnableToLegalize;
6850
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00006851 Register DstReg = MI.getOperand(0).getReg();
Matt Arsenault81511e52019-02-05 00:13:44 +00006852 LLT DstTy = MRI.getType(DstReg);
6853
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00006854 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
6855 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
6856 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
Matt Arsenault81511e52019-02-05 00:13:44 +00006857 LLT LeftoverTy;
6858 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
chuongg3fcfe1b62024-01-15 16:40:39 +00006859 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
Matt Arsenault81511e52019-02-05 00:13:44 +00006860 return UnableToLegalize;
6861
6862 LLT Unused;
6863 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
chuongg3fcfe1b62024-01-15 16:40:39 +00006864 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
Matt Arsenault81511e52019-02-05 00:13:44 +00006865 llvm_unreachable("inconsistent extractParts result");
6866
6867 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
6868 auto Select = MIRBuilder.buildSelect(NarrowTy,
6869 CondReg, Src1Regs[I], Src2Regs[I]);
Jay Foadb482e1b2020-01-23 11:51:35 +00006870 DstRegs.push_back(Select.getReg(0));
Matt Arsenault81511e52019-02-05 00:13:44 +00006871 }
6872
6873 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
6874 auto Select = MIRBuilder.buildSelect(
6875 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
Jay Foadb482e1b2020-01-23 11:51:35 +00006876 DstLeftoverRegs.push_back(Select.getReg(0));
Matt Arsenault81511e52019-02-05 00:13:44 +00006877 }
6878
6879 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6880 LeftoverTy, DstLeftoverRegs);
6881
6882 MI.eraseFromParent();
6883 return Legalized;
6884}
6885
6886LegalizerHelper::LegalizeResult
Petar Avramovic2b66d322020-01-27 09:43:38 +01006887LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
6888 LLT NarrowTy) {
6889 if (TypeIdx != 1)
6890 return UnableToLegalize;
6891
Amara Emerson719024a2023-02-23 16:35:39 -08006892 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
Petar Avramovic2b66d322020-01-27 09:43:38 +01006893 unsigned NarrowSize = NarrowTy.getSizeInBits();
6894
6895 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
Matt Arsenault312a9d12020-02-07 12:24:15 -05006896 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
6897
Petar Avramovic2b66d322020-01-27 09:43:38 +01006898 MachineIRBuilder &B = MIRBuilder;
Matt Arsenault6135f5e2020-02-07 11:55:39 -05006899 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
Petar Avramovic2b66d322020-01-27 09:43:38 +01006900 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
6901 auto C_0 = B.buildConstant(NarrowTy, 0);
6902 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
6903 UnmergeSrc.getReg(1), C_0);
Matt Arsenault312a9d12020-02-07 12:24:15 -05006904 auto LoCTLZ = IsUndef ?
6905 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
6906 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
Matt Arsenault6135f5e2020-02-07 11:55:39 -05006907 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
6908 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
6909 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
6910 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
Petar Avramovic2b66d322020-01-27 09:43:38 +01006911
6912 MI.eraseFromParent();
6913 return Legalized;
6914 }
6915
6916 return UnableToLegalize;
6917}
6918
6919LegalizerHelper::LegalizeResult
Petar Avramovic8bc7ba52020-01-27 09:51:06 +01006920LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
6921 LLT NarrowTy) {
6922 if (TypeIdx != 1)
6923 return UnableToLegalize;
6924
Amara Emerson719024a2023-02-23 16:35:39 -08006925 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
Petar Avramovic8bc7ba52020-01-27 09:51:06 +01006926 unsigned NarrowSize = NarrowTy.getSizeInBits();
6927
6928 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
Matt Arsenault312a9d12020-02-07 12:24:15 -05006929 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
6930
Petar Avramovic8bc7ba52020-01-27 09:51:06 +01006931 MachineIRBuilder &B = MIRBuilder;
Matt Arsenault6135f5e2020-02-07 11:55:39 -05006932 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
Petar Avramovic8bc7ba52020-01-27 09:51:06 +01006933 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
6934 auto C_0 = B.buildConstant(NarrowTy, 0);
6935 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
6936 UnmergeSrc.getReg(0), C_0);
Matt Arsenault312a9d12020-02-07 12:24:15 -05006937 auto HiCTTZ = IsUndef ?
6938 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
6939 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
Matt Arsenault6135f5e2020-02-07 11:55:39 -05006940 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
6941 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
6942 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
6943 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
Petar Avramovic8bc7ba52020-01-27 09:51:06 +01006944
6945 MI.eraseFromParent();
6946 return Legalized;
6947 }
6948
6949 return UnableToLegalize;
6950}
6951
6952LegalizerHelper::LegalizeResult
Petar Avramoviccbf03aee2020-01-27 09:59:50 +01006953LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
6954 LLT NarrowTy) {
6955 if (TypeIdx != 1)
6956 return UnableToLegalize;
6957
Amara Emerson719024a2023-02-23 16:35:39 -08006958 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
Petar Avramoviccbf03aee2020-01-27 09:59:50 +01006959 unsigned NarrowSize = NarrowTy.getSizeInBits();
6960
6961 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6962 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
6963
Matt Arsenault3b198512020-02-06 22:29:23 -05006964 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
6965 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
Jon Roelofsf2e8e462021-07-26 16:42:20 -07006966 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
Petar Avramoviccbf03aee2020-01-27 09:59:50 +01006967
6968 MI.eraseFromParent();
6969 return Legalized;
6970 }
6971
6972 return UnableToLegalize;
6973}
6974
6975LegalizerHelper::LegalizeResult
Matt Arsenaulteece6ba2023-04-26 22:02:42 -04006976LegalizerHelper::narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx,
6977 LLT NarrowTy) {
6978 if (TypeIdx != 1)
6979 return UnableToLegalize;
6980
6981 MachineIRBuilder &B = MIRBuilder;
6982 Register ExpReg = MI.getOperand(2).getReg();
6983 LLT ExpTy = MRI.getType(ExpReg);
6984
6985 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
6986
6987 // Clamp the exponent to the range of the target type.
6988 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
6989 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
6990 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
6991 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
6992
6993 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
6994 Observer.changingInstr(MI);
6995 MI.getOperand(2).setReg(Trunc.getReg(0));
6996 Observer.changedInstr(MI);
6997 return Legalized;
6998}
6999
7000LegalizerHelper::LegalizeResult
Matt Arsenaulta1282922020-07-15 11:10:54 -04007001LegalizerHelper::lowerBitCount(MachineInstr &MI) {
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007002 unsigned Opc = MI.getOpcode();
Matt Arsenaulta679f272020-07-19 12:29:48 -04007003 const auto &TII = MIRBuilder.getTII();
Diana Picus0528e2c2018-11-26 11:07:02 +00007004 auto isSupported = [this](const LegalityQuery &Q) {
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007005 auto QAction = LI.getAction(Q).Action;
Diana Picus0528e2c2018-11-26 11:07:02 +00007006 return QAction == Legal || QAction == Libcall || QAction == Custom;
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007007 };
7008 switch (Opc) {
7009 default:
7010 return UnableToLegalize;
7011 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
7012 // This trivially expands to CTLZ.
Daniel Sandersd001e0e2018-12-12 23:48:13 +00007013 Observer.changingInstr(MI);
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007014 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00007015 Observer.changedInstr(MI);
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007016 return Legalized;
7017 }
7018 case TargetOpcode::G_CTLZ: {
Amara Emerson719024a2023-02-23 16:35:39 -08007019 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
Matt Arsenault8de2dad2020-02-06 21:11:52 -05007020 unsigned Len = SrcTy.getSizeInBits();
7021
7022 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
Diana Picus0528e2c2018-11-26 11:07:02 +00007023 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
Matt Arsenault8de2dad2020-02-06 21:11:52 -05007024 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
7025 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
7026 auto ICmp = MIRBuilder.buildICmp(
7027 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
7028 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7029 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007030 MI.eraseFromParent();
7031 return Legalized;
7032 }
7033 // for now, we do this:
7034 // NewLen = NextPowerOf2(Len);
7035 // x = x | (x >> 1);
7036 // x = x | (x >> 2);
7037 // ...
7038 // x = x | (x >>16);
7039 // x = x | (x >>32); // for 64-bit input
7040 // Upto NewLen/2
7041 // return Len - popcount(x);
7042 //
7043 // Ref: "Hacker's Delight" by Henry Warren
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00007044 Register Op = SrcReg;
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007045 unsigned NewLen = PowerOf2Ceil(Len);
7046 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
Matt Arsenault8de2dad2020-02-06 21:11:52 -05007047 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7048 auto MIBOp = MIRBuilder.buildOr(
7049 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
Jay Foadb482e1b2020-01-23 11:51:35 +00007050 Op = MIBOp.getReg(0);
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007051 }
Matt Arsenault8de2dad2020-02-06 21:11:52 -05007052 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
7053 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
Jay Foad63f73542020-01-16 12:37:00 +00007054 MIBPop);
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007055 MI.eraseFromParent();
7056 return Legalized;
7057 }
7058 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
7059 // This trivially expands to CTTZ.
Daniel Sandersd001e0e2018-12-12 23:48:13 +00007060 Observer.changingInstr(MI);
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007061 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
Aditya Nandakumarf75d4f32018-12-05 20:14:52 +00007062 Observer.changedInstr(MI);
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007063 return Legalized;
7064 }
7065 case TargetOpcode::G_CTTZ: {
Amara Emerson719024a2023-02-23 16:35:39 -08007066 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
Matt Arsenault8de2dad2020-02-06 21:11:52 -05007067
7068 unsigned Len = SrcTy.getSizeInBits();
7069 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007070 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
7071 // zero.
Matt Arsenault8de2dad2020-02-06 21:11:52 -05007072 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
7073 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
7074 auto ICmp = MIRBuilder.buildICmp(
7075 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
7076 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7077 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007078 MI.eraseFromParent();
7079 return Legalized;
7080 }
7081 // for now, we use: { return popcount(~x & (x - 1)); }
7082 // unless the target has ctlz but not ctpop, in which case we use:
7083 // { return 32 - nlz(~x & (x-1)); }
7084 // Ref: "Hacker's Delight" by Henry Warren
Matt Arsenaulta1282922020-07-15 11:10:54 -04007085 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
7086 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
Jay Foad28bb43b2020-01-16 12:09:48 +00007087 auto MIBTmp = MIRBuilder.buildAnd(
Matt Arsenaulta1282922020-07-15 11:10:54 -04007088 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7089 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7090 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7091 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
Jay Foad63f73542020-01-16 12:37:00 +00007092 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
Matt Arsenaulta1282922020-07-15 11:10:54 -04007093 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007094 MI.eraseFromParent();
7095 return Legalized;
7096 }
Craig Topper44e8bea2023-11-12 19:36:24 -08007097 Observer.changingInstr(MI);
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007098 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
Jay Foadb482e1b2020-01-23 11:51:35 +00007099 MI.getOperand(1).setReg(MIBTmp.getReg(0));
Craig Topper44e8bea2023-11-12 19:36:24 -08007100 Observer.changedInstr(MI);
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007101 return Legalized;
7102 }
Petar Avramoviccbf03aee2020-01-27 09:59:50 +01007103 case TargetOpcode::G_CTPOP: {
Matt Arsenaulta1282922020-07-15 11:10:54 -04007104 Register SrcReg = MI.getOperand(1).getReg();
7105 LLT Ty = MRI.getType(SrcReg);
Petar Avramoviccbf03aee2020-01-27 09:59:50 +01007106 unsigned Size = Ty.getSizeInBits();
7107 MachineIRBuilder &B = MIRBuilder;
7108
7109 // Count set bits in blocks of 2 bits. Default approach would be
7110 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
7111 // We use following formula instead:
7112 // B2Count = val - { (val >> 1) & 0x55555555 }
7113 // since it gives same result in blocks of 2 with one instruction less.
7114 auto C_1 = B.buildConstant(Ty, 1);
Matt Arsenaulta1282922020-07-15 11:10:54 -04007115 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
Petar Avramoviccbf03aee2020-01-27 09:59:50 +01007116 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
7117 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
7118 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
Matt Arsenaulta1282922020-07-15 11:10:54 -04007119 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
Petar Avramoviccbf03aee2020-01-27 09:59:50 +01007120
7121 // In order to get count in blocks of 4 add values from adjacent block of 2.
7122 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
7123 auto C_2 = B.buildConstant(Ty, 2);
7124 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
7125 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
7126 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
7127 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7128 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7129 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7130
7131 // For count in blocks of 8 bits we don't have to mask high 4 bits before
7132 // addition since count value sits in range {0,...,8} and 4 bits are enough
7133 // to hold such binary values. After addition high 4 bits still hold count
7134 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
7135 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
7136 auto C_4 = B.buildConstant(Ty, 4);
7137 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
7138 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
7139 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
7140 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
7141 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7142
7143 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
7144 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
7145 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
7146 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
Petar Avramoviccbf03aee2020-01-27 09:59:50 +01007147
7148 // Shift count result from 8 high bits to low bits.
7149 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
Petar Avramoviccbf03aee2020-01-27 09:59:50 +01007150
Wang Pengcheng610b9e22024-03-29 15:38:39 +08007151 auto IsMulSupported = [this](const LLT Ty) {
7152 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7153 return Action == Legal || Action == WidenScalar || Action == Custom;
7154 };
7155 if (IsMulSupported(Ty)) {
7156 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
7157 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7158 } else {
7159 auto ResTmp = B8Count;
7160 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
7161 auto ShiftC = B.buildConstant(Ty, Shift);
7162 auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
7163 ResTmp = B.buildAdd(Ty, ResTmp, Shl);
7164 }
7165 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7166 }
Petar Avramoviccbf03aee2020-01-27 09:59:50 +01007167 MI.eraseFromParent();
7168 return Legalized;
7169 }
Aditya Nandakumarc0333f72018-08-21 17:30:31 +00007170 }
7171}
Matt Arsenault02b5ca82019-05-17 23:05:13 +00007172
Matt Arsenaultb24436a2020-03-19 22:48:13 -04007173// Check that (every element of) Reg is undef or not an exact multiple of BW.
7174static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI,
7175 Register Reg, unsigned BW) {
7176 return matchUnaryPredicate(
7177 MRI, Reg,
7178 [=](const Constant *C) {
7179 // Null constant here means an undef.
7180 const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
7181 return !CI || CI->getValue().urem(BW) != 0;
7182 },
7183 /*AllowUndefs*/ true);
7184}
7185
7186LegalizerHelper::LegalizeResult
7187LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08007188 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
Matt Arsenaultb24436a2020-03-19 22:48:13 -04007189 LLT Ty = MRI.getType(Dst);
7190 LLT ShTy = MRI.getType(Z);
7191
7192 unsigned BW = Ty.getScalarSizeInBits();
Matt Arsenault14b03b42021-03-29 17:26:49 -04007193
7194 if (!isPowerOf2_32(BW))
7195 return UnableToLegalize;
7196
Matt Arsenaultb24436a2020-03-19 22:48:13 -04007197 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7198 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7199
7200 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7201 // fshl X, Y, Z -> fshr X, Y, -Z
7202 // fshr X, Y, Z -> fshl X, Y, -Z
7203 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
7204 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7205 } else {
7206 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7207 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7208 auto One = MIRBuilder.buildConstant(ShTy, 1);
7209 if (IsFSHL) {
7210 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7211 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
7212 } else {
7213 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7214 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
7215 }
7216
7217 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
7218 }
7219
7220 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
7221 MI.eraseFromParent();
7222 return Legalized;
7223}
7224
7225LegalizerHelper::LegalizeResult
7226LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08007227 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
Matt Arsenaultb24436a2020-03-19 22:48:13 -04007228 LLT Ty = MRI.getType(Dst);
7229 LLT ShTy = MRI.getType(Z);
7230
7231 const unsigned BW = Ty.getScalarSizeInBits();
7232 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7233
7234 Register ShX, ShY;
7235 Register ShAmt, InvShAmt;
7236
7237 // FIXME: Emit optimized urem by constant instead of letting it expand later.
7238 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7239 // fshl: X << C | Y >> (BW - C)
7240 // fshr: X << (BW - C) | Y >> C
7241 // where C = Z % BW is not zero
7242 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7243 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7244 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
7245 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
7246 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
7247 } else {
7248 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7249 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7250 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
7251 if (isPowerOf2_32(BW)) {
7252 // Z % BW -> Z & (BW - 1)
7253 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
7254 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7255 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
7256 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
7257 } else {
7258 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7259 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7260 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
7261 }
7262
7263 auto One = MIRBuilder.buildConstant(ShTy, 1);
7264 if (IsFSHL) {
7265 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
7266 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
7267 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
7268 } else {
7269 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
7270 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
7271 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
7272 }
7273 }
7274
David Green070e1292025-02-02 21:01:49 +00007275 MIRBuilder.buildOr(Dst, ShX, ShY, MachineInstr::Disjoint);
Matt Arsenaultb24436a2020-03-19 22:48:13 -04007276 MI.eraseFromParent();
7277 return Legalized;
7278}
7279
7280LegalizerHelper::LegalizeResult
7281LegalizerHelper::lowerFunnelShift(MachineInstr &MI) {
7282 // These operations approximately do the following (while avoiding undefined
7283 // shifts by BW):
7284 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
7285 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
7286 Register Dst = MI.getOperand(0).getReg();
7287 LLT Ty = MRI.getType(Dst);
7288 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
7289
7290 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7291 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
Matt Arsenault14b03b42021-03-29 17:26:49 -04007292
7293 // TODO: Use smarter heuristic that accounts for vector legalization.
Matt Arsenaultb24436a2020-03-19 22:48:13 -04007294 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
7295 return lowerFunnelShiftAsShifts(MI);
Matt Arsenault14b03b42021-03-29 17:26:49 -04007296
7297 // This only works for powers of 2, fallback to shifts if it fails.
7298 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
7299 if (Result == UnableToLegalize)
7300 return lowerFunnelShiftAsShifts(MI);
7301 return Result;
Matt Arsenaultb24436a2020-03-19 22:48:13 -04007302}
7303
Tuan Chuong Goha40c9842023-08-17 16:31:54 +01007304LegalizerHelper::LegalizeResult LegalizerHelper::lowerEXT(MachineInstr &MI) {
7305 auto [Dst, Src] = MI.getFirst2Regs();
7306 LLT DstTy = MRI.getType(Dst);
7307 LLT SrcTy = MRI.getType(Src);
7308
7309 uint32_t DstTySize = DstTy.getSizeInBits();
7310 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
7311 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
7312
7313 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
7314 !isPowerOf2_32(SrcTyScalarSize))
7315 return UnableToLegalize;
7316
7317 // The step between extend is too large, split it by creating an intermediate
7318 // extend instruction
7319 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
7320 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
7321 // If the destination type is illegal, split it into multiple statements
7322 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
7323 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
7324 // Unmerge the vector
7325 LLT EltTy = MidTy.changeElementCount(
7326 MidTy.getElementCount().divideCoefficientBy(2));
7327 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
7328
7329 // ZExt the vectors
7330 LLT ZExtResTy = DstTy.changeElementCount(
7331 DstTy.getElementCount().divideCoefficientBy(2));
7332 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7333 {UnmergeSrc.getReg(0)});
7334 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7335 {UnmergeSrc.getReg(1)});
7336
7337 // Merge the ending vectors
7338 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
7339
7340 MI.eraseFromParent();
7341 return Legalized;
7342 }
7343 return UnableToLegalize;
7344}
7345
chuongg3d88d9832023-10-11 16:05:25 +01007346LegalizerHelper::LegalizeResult LegalizerHelper::lowerTRUNC(MachineInstr &MI) {
7347 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
7348 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
7349 // Similar to how operand splitting is done in SelectiondDAG, we can handle
7350 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
7351 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
7352 // %lo16(<4 x s16>) = G_TRUNC %inlo
7353 // %hi16(<4 x s16>) = G_TRUNC %inhi
7354 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
7355 // %res(<8 x s8>) = G_TRUNC %in16
7356
7357 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
7358
7359 Register DstReg = MI.getOperand(0).getReg();
7360 Register SrcReg = MI.getOperand(1).getReg();
7361 LLT DstTy = MRI.getType(DstReg);
7362 LLT SrcTy = MRI.getType(SrcReg);
7363
7364 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
7365 isPowerOf2_32(DstTy.getScalarSizeInBits()) &&
7366 isPowerOf2_32(SrcTy.getNumElements()) &&
7367 isPowerOf2_32(SrcTy.getScalarSizeInBits())) {
7368 // Split input type.
7369 LLT SplitSrcTy = SrcTy.changeElementCount(
7370 SrcTy.getElementCount().divideCoefficientBy(2));
7371
7372 // First, split the source into two smaller vectors.
7373 SmallVector<Register, 2> SplitSrcs;
chuongg3fcfe1b62024-01-15 16:40:39 +00007374 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
chuongg3d88d9832023-10-11 16:05:25 +01007375
7376 // Truncate the splits into intermediate narrower elements.
7377 LLT InterTy;
7378 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7379 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
7380 else
7381 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
7382 for (unsigned I = 0; I < SplitSrcs.size(); ++I) {
7383 SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
7384 }
7385
7386 // Combine the new truncates into one vector
7387 auto Merge = MIRBuilder.buildMergeLikeInstr(
7388 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
7389
7390 // Truncate the new vector to the final result type
7391 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7392 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
7393 else
7394 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
7395
7396 MI.eraseFromParent();
7397
7398 return Legalized;
7399 }
7400 return UnableToLegalize;
7401}
7402
Amara Emersonf5e9be62021-03-26 15:27:15 -07007403LegalizerHelper::LegalizeResult
7404LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08007405 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
Amara Emersonf5e9be62021-03-26 15:27:15 -07007406 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
7407 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
7408 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7409 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
7410 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
7411 MI.eraseFromParent();
7412 return Legalized;
7413}
7414
7415LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08007416 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
Amara Emersonf5e9be62021-03-26 15:27:15 -07007417
7418 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
7419 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
7420
7421 MIRBuilder.setInstrAndDebugLoc(MI);
7422
7423 // If a rotate in the other direction is supported, use it.
7424 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7425 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
7426 isPowerOf2_32(EltSizeInBits))
7427 return lowerRotateWithReverseRotate(MI);
7428
Mirko Brkusanin5263bf52021-09-07 16:18:19 +02007429 // If a funnel shift is supported, use it.
7430 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7431 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7432 bool IsFShLegal = false;
7433 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
7434 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
7435 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
7436 Register R3) {
7437 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
7438 MI.eraseFromParent();
7439 return Legalized;
7440 };
7441 // If a funnel shift in the other direction is supported, use it.
7442 if (IsFShLegal) {
7443 return buildFunnelShift(FShOpc, Dst, Src, Amt);
7444 } else if (isPowerOf2_32(EltSizeInBits)) {
7445 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
7446 return buildFunnelShift(RevFsh, Dst, Src, Amt);
7447 }
7448 }
7449
Amara Emersonf5e9be62021-03-26 15:27:15 -07007450 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
7451 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
7452 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
7453 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
7454 Register ShVal;
7455 Register RevShiftVal;
7456 if (isPowerOf2_32(EltSizeInBits)) {
7457 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
7458 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
7459 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
7460 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
7461 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
7462 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
7463 RevShiftVal =
7464 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
7465 } else {
7466 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
7467 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
7468 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
7469 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
7470 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
7471 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
7472 auto One = MIRBuilder.buildConstant(AmtTy, 1);
7473 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
7474 RevShiftVal =
7475 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
7476 }
7477 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
7478 MI.eraseFromParent();
7479 return Legalized;
7480}
7481
Matt Arsenault02b5ca82019-05-17 23:05:13 +00007482// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
7483// representation.
7484LegalizerHelper::LegalizeResult
7485LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08007486 auto [Dst, Src] = MI.getFirst2Regs();
Matt Arsenault02b5ca82019-05-17 23:05:13 +00007487 const LLT S64 = LLT::scalar(64);
7488 const LLT S32 = LLT::scalar(32);
7489 const LLT S1 = LLT::scalar(1);
7490
7491 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
7492
7493 // unsigned cul2f(ulong u) {
7494 // uint lz = clz(u);
7495 // uint e = (u != 0) ? 127U + 63U - lz : 0;
7496 // u = (u << lz) & 0x7fffffffffffffffUL;
7497 // ulong t = u & 0xffffffffffUL;
7498 // uint v = (e << 23) | (uint)(u >> 40);
7499 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
7500 // return as_float(v + r);
7501 // }
7502
7503 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
7504 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
7505
7506 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
7507
7508 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
7509 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
7510
7511 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
7512 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
7513
7514 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
7515 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
7516
7517 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
7518
7519 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
7520 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
7521
7522 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
7523 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
7524 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
7525
7526 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
7527 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
7528 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
7529 auto One = MIRBuilder.buildConstant(S32, 1);
7530
7531 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
7532 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
7533 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
7534 MIRBuilder.buildAdd(Dst, V, R);
7535
Matt Arsenault350ee7fb2020-06-12 10:20:07 -04007536 MI.eraseFromParent();
Matt Arsenault02b5ca82019-05-17 23:05:13 +00007537 return Legalized;
7538}
7539
Evgenii Kudriashove9cb4402024-09-25 17:15:36 +03007540// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit
7541// operations and G_SITOFP
7542LegalizerHelper::LegalizeResult
7543LegalizerHelper::lowerU64ToF32WithSITOFP(MachineInstr &MI) {
7544 auto [Dst, Src] = MI.getFirst2Regs();
7545 const LLT S64 = LLT::scalar(64);
7546 const LLT S32 = LLT::scalar(32);
7547 const LLT S1 = LLT::scalar(1);
7548
7549 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
7550
7551 // For i64 < INT_MAX we simply reuse SITOFP.
7552 // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
7553 // saved before division, convert to float by SITOFP, multiply the result
7554 // by 2.
7555 auto One = MIRBuilder.buildConstant(S64, 1);
7556 auto Zero = MIRBuilder.buildConstant(S64, 0);
7557 // Result if Src < INT_MAX
7558 auto SmallResult = MIRBuilder.buildSITOFP(S32, Src);
7559 // Result if Src >= INT_MAX
7560 auto Halved = MIRBuilder.buildLShr(S64, Src, One);
7561 auto LowerBit = MIRBuilder.buildAnd(S64, Src, One);
7562 auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit);
7563 auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved);
7564 auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP);
7565 // Check if the original value is larger than INT_MAX by comparing with
7566 // zero to pick one of the two conversions.
7567 auto IsLarge =
7568 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT, S1, Src, Zero);
7569 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
7570
7571 MI.eraseFromParent();
7572 return Legalized;
7573}
7574
7575// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an
7576// IEEE double representation.
7577LegalizerHelper::LegalizeResult
7578LegalizerHelper::lowerU64ToF64BitFloatOps(MachineInstr &MI) {
7579 auto [Dst, Src] = MI.getFirst2Regs();
7580 const LLT S64 = LLT::scalar(64);
7581 const LLT S32 = LLT::scalar(32);
7582
7583 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
7584
7585 // We create double value from 32 bit parts with 32 exponent difference.
7586 // Note that + and - are float operations that adjust the implicit leading
7587 // one, the bases 2^52 and 2^84 are for illustrative purposes.
7588 //
7589 // X = 2^52 * 1.0...LowBits
7590 // Y = 2^84 * 1.0...HighBits
7591 // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
7592 // = - 2^52 * 1.0...HighBits
7593 // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
7594 auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000));
7595 auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000));
7596 auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
7597 auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84);
7598 auto HalfWidth = MIRBuilder.buildConstant(S64, 32);
7599
7600 auto LowBits = MIRBuilder.buildTrunc(S32, Src);
7601 LowBits = MIRBuilder.buildZExt(S64, LowBits);
7602 auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits);
7603 auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth);
7604 auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits);
7605 auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP);
7606 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
7607
7608 MI.eraseFromParent();
7609 return Legalized;
7610}
7611
Matt Arsenaulta1282922020-07-15 11:10:54 -04007612LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08007613 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
Matt Arsenault02b5ca82019-05-17 23:05:13 +00007614
Matt Arsenaultbc276c62019-11-15 11:59:12 +05307615 if (SrcTy == LLT::scalar(1)) {
7616 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
7617 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
7618 MIRBuilder.buildSelect(Dst, Src, True, False);
7619 MI.eraseFromParent();
7620 return Legalized;
7621 }
7622
Matt Arsenault02b5ca82019-05-17 23:05:13 +00007623 if (SrcTy != LLT::scalar(64))
7624 return UnableToLegalize;
7625
Evgenii Kudriashove9cb4402024-09-25 17:15:36 +03007626 if (DstTy == LLT::scalar(32))
Matt Arsenault02b5ca82019-05-17 23:05:13 +00007627 // TODO: SelectionDAG has several alternative expansions to port which may
Evgenii Kudriashove9cb4402024-09-25 17:15:36 +03007628 // be more reasonable depending on the available instructions. We also need
7629 // a more advanced mechanism to choose an optimal version depending on
7630 // target features such as sitofp or CTLZ availability.
7631 return lowerU64ToF32WithSITOFP(MI);
7632
7633 if (DstTy == LLT::scalar(64))
7634 return lowerU64ToF64BitFloatOps(MI);
Matt Arsenault02b5ca82019-05-17 23:05:13 +00007635
7636 return UnableToLegalize;
7637}
7638
Matt Arsenaulta1282922020-07-15 11:10:54 -04007639LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08007640 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
Matt Arsenault02b5ca82019-05-17 23:05:13 +00007641
7642 const LLT S64 = LLT::scalar(64);
7643 const LLT S32 = LLT::scalar(32);
7644 const LLT S1 = LLT::scalar(1);
7645
Matt Arsenaultbc276c62019-11-15 11:59:12 +05307646 if (SrcTy == S1) {
7647 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
7648 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
7649 MIRBuilder.buildSelect(Dst, Src, True, False);
7650 MI.eraseFromParent();
7651 return Legalized;
7652 }
7653
Matt Arsenault02b5ca82019-05-17 23:05:13 +00007654 if (SrcTy != S64)
7655 return UnableToLegalize;
7656
7657 if (DstTy == S32) {
7658 // signed cl2f(long l) {
7659 // long s = l >> 63;
7660 // float r = cul2f((l + s) ^ s);
7661 // return s ? -r : r;
7662 // }
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00007663 Register L = Src;
Matt Arsenault02b5ca82019-05-17 23:05:13 +00007664 auto SignBit = MIRBuilder.buildConstant(S64, 63);
7665 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
7666
7667 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
7668 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
7669 auto R = MIRBuilder.buildUITOFP(S32, Xor);
7670
7671 auto RNeg = MIRBuilder.buildFNeg(S32, R);
7672 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
7673 MIRBuilder.buildConstant(S64, 0));
7674 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
Matt Arsenault350ee7fb2020-06-12 10:20:07 -04007675 MI.eraseFromParent();
Matt Arsenault02b5ca82019-05-17 23:05:13 +00007676 return Legalized;
7677 }
7678
7679 return UnableToLegalize;
7680}
Matt Arsenault6f74f552019-07-01 17:18:03 +00007681
Matt Arsenaulta1282922020-07-15 11:10:54 -04007682LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08007683 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
Petar Avramovic6412b562019-08-30 05:44:02 +00007684 const LLT S64 = LLT::scalar(64);
7685 const LLT S32 = LLT::scalar(32);
7686
7687 if (SrcTy != S64 && SrcTy != S32)
7688 return UnableToLegalize;
7689 if (DstTy != S32 && DstTy != S64)
7690 return UnableToLegalize;
7691
7692 // FPTOSI gives same result as FPTOUI for positive signed integers.
7693 // FPTOUI needs to deal with fp values that convert to unsigned integers
7694 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
7695
7696 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
7697 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
7698 : APFloat::IEEEdouble(),
Chris Lattner735f4672021-09-08 22:13:13 -07007699 APInt::getZero(SrcTy.getSizeInBits()));
Petar Avramovic6412b562019-08-30 05:44:02 +00007700 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
7701
7702 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
7703
7704 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
7705 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
7706 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
7707 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
7708 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
7709 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
7710 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
7711
Matt Arsenault1060b9e2020-01-04 17:06:47 -05007712 const LLT S1 = LLT::scalar(1);
7713
Petar Avramovic6412b562019-08-30 05:44:02 +00007714 MachineInstrBuilder FCMP =
Matt Arsenault1060b9e2020-01-04 17:06:47 -05007715 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
Petar Avramovic6412b562019-08-30 05:44:02 +00007716 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
7717
7718 MI.eraseFromParent();
7719 return Legalized;
7720}
7721
Matt Arsenaultea956682020-01-04 17:09:48 -05007722LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08007723 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
Matt Arsenaultea956682020-01-04 17:09:48 -05007724 const LLT S64 = LLT::scalar(64);
7725 const LLT S32 = LLT::scalar(32);
7726
7727 // FIXME: Only f32 to i64 conversions are supported.
7728 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
7729 return UnableToLegalize;
7730
7731 // Expand f32 -> i64 conversion
7732 // This algorithm comes from compiler-rt's implementation of fixsfdi:
xgupta94fac812021-02-01 12:54:21 +05307733 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
Matt Arsenaultea956682020-01-04 17:09:48 -05007734
7735 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
7736
7737 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
7738 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
7739
7740 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
7741 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
7742
7743 auto SignMask = MIRBuilder.buildConstant(SrcTy,
7744 APInt::getSignMask(SrcEltBits));
7745 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
7746 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
7747 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
7748 Sign = MIRBuilder.buildSExt(DstTy, Sign);
7749
7750 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
7751 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
7752 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
7753
7754 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
7755 R = MIRBuilder.buildZExt(DstTy, R);
7756
7757 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
7758 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
7759 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
7760 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
7761
7762 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
7763 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
7764
7765 const LLT S1 = LLT::scalar(1);
7766 auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
7767 S1, Exponent, ExponentLoBit);
7768
7769 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
7770
7771 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
7772 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
7773
7774 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
7775
7776 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
7777 S1, Exponent, ZeroSrcTy);
7778
7779 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
7780 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
7781
7782 MI.eraseFromParent();
7783 return Legalized;
7784}
7785
David Greenfeac7612024-09-16 10:33:59 +01007786LegalizerHelper::LegalizeResult
7787LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) {
7788 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7789
7790 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
7791 unsigned SatWidth = DstTy.getScalarSizeInBits();
7792
7793 // Determine minimum and maximum integer values and their corresponding
7794 // floating-point values.
7795 APInt MinInt, MaxInt;
7796 if (IsSigned) {
7797 MinInt = APInt::getSignedMinValue(SatWidth);
7798 MaxInt = APInt::getSignedMaxValue(SatWidth);
7799 } else {
7800 MinInt = APInt::getMinValue(SatWidth);
7801 MaxInt = APInt::getMaxValue(SatWidth);
7802 }
7803
7804 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
7805 APFloat MinFloat(Semantics);
7806 APFloat MaxFloat(Semantics);
7807
7808 APFloat::opStatus MinStatus =
7809 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
7810 APFloat::opStatus MaxStatus =
7811 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
7812 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
7813 !(MaxStatus & APFloat::opStatus::opInexact);
7814
7815 // If the integer bounds are exactly representable as floats, emit a
7816 // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
7817 // and selects.
7818 if (AreExactFloatBounds) {
7819 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
7820 auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
David Green70ed3812025-02-20 12:22:11 +00007821 auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT,
David Greenfeac7612024-09-16 10:33:59 +01007822 SrcTy.changeElementSize(1), Src, MaxC);
7823 auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
7824 // Clamp by MaxFloat from above. NaN cannot occur.
7825 auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
7826 auto MinP =
David Green70ed3812025-02-20 12:22:11 +00007827 MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, SrcTy.changeElementSize(1), Max,
David Greenfeac7612024-09-16 10:33:59 +01007828 MinC, MachineInstr::FmNoNans);
7829 auto Min =
7830 MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
7831 // Convert clamped value to integer. In the unsigned case we're done,
7832 // because we mapped NaN to MinFloat, which will cast to zero.
7833 if (!IsSigned) {
7834 MIRBuilder.buildFPTOUI(Dst, Min);
7835 MI.eraseFromParent();
7836 return Legalized;
7837 }
7838
7839 // Otherwise, select 0 if Src is NaN.
7840 auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
7841 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
7842 DstTy.changeElementSize(1), Src, Src);
7843 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
7844 FpToInt);
7845 MI.eraseFromParent();
7846 return Legalized;
7847 }
7848
7849 // Result of direct conversion. The assumption here is that the operation is
7850 // non-trapping and it's fine to apply it to an out-of-range value if we
7851 // select it away later.
7852 auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
7853 : MIRBuilder.buildFPTOUI(DstTy, Src);
7854
7855 // If Src ULT MinFloat, select MinInt. In particular, this also selects
7856 // MinInt if Src is NaN.
7857 auto ULT =
7858 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, SrcTy.changeElementSize(1), Src,
7859 MIRBuilder.buildFConstant(SrcTy, MinFloat));
7860 auto Max = MIRBuilder.buildSelect(
7861 DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
7862 // If Src OGT MaxFloat, select MaxInt.
7863 auto OGT =
7864 MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Src,
7865 MIRBuilder.buildFConstant(SrcTy, MaxFloat));
7866
7867 // In the unsigned case we are done, because we mapped NaN to MinInt, which
7868 // is already zero.
7869 if (!IsSigned) {
7870 MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
7871 Max);
7872 MI.eraseFromParent();
7873 return Legalized;
7874 }
7875
7876 // Otherwise, select 0 if Src is NaN.
7877 auto Min = MIRBuilder.buildSelect(
7878 DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
7879 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
7880 DstTy.changeElementSize(1), Src, Src);
7881 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
7882 MI.eraseFromParent();
7883 return Legalized;
7884}
7885
Matt Arsenaultbfbfa182020-01-18 10:08:11 -05007886// f64 -> f16 conversion using round-to-nearest-even rounding mode.
7887LegalizerHelper::LegalizeResult
7888LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
Ivan Kosarev15e77492023-07-12 11:19:36 +01007889 const LLT S1 = LLT::scalar(1);
Ivan Kosarev15e77492023-07-12 11:19:36 +01007890 const LLT S32 = LLT::scalar(32);
Ivan Kosarev15e77492023-07-12 11:19:36 +01007891
Amara Emerson719024a2023-02-23 16:35:39 -08007892 auto [Dst, Src] = MI.getFirst2Regs();
Ivan Kosareve705b2b2023-07-12 14:35:42 +01007893 assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
7894 MRI.getType(Src).getScalarType() == LLT::scalar(64));
Ivan Kosarev15e77492023-07-12 11:19:36 +01007895
Matt Arsenaultbfbfa182020-01-18 10:08:11 -05007896 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
7897 return UnableToLegalize;
7898
Ivan Kosarev15e77492023-07-12 11:19:36 +01007899 if (MIRBuilder.getMF().getTarget().Options.UnsafeFPMath) {
7900 unsigned Flags = MI.getFlags();
7901 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
7902 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
7903 MI.eraseFromParent();
7904 return Legalized;
7905 }
7906
Matt Arsenaultbfbfa182020-01-18 10:08:11 -05007907 const unsigned ExpMask = 0x7ff;
7908 const unsigned ExpBiasf64 = 1023;
7909 const unsigned ExpBiasf16 = 15;
Matt Arsenaultbfbfa182020-01-18 10:08:11 -05007910
7911 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
7912 Register U = Unmerge.getReg(0);
7913 Register UH = Unmerge.getReg(1);
7914
7915 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
Petar Avramovicbd3d9512020-06-11 17:55:59 +02007916 E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
Matt Arsenaultbfbfa182020-01-18 10:08:11 -05007917
7918 // Subtract the fp64 exponent bias (1023) to get the real exponent and
7919 // add the f16 bias (15) to get the biased exponent for the f16 format.
7920 E = MIRBuilder.buildAdd(
7921 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
Matt Arsenaultbfbfa182020-01-18 10:08:11 -05007922
7923 auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
7924 M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
7925
7926 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
7927 MIRBuilder.buildConstant(S32, 0x1ff));
7928 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
7929
7930 auto Zero = MIRBuilder.buildConstant(S32, 0);
7931 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
7932 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
7933 M = MIRBuilder.buildOr(S32, M, Lo40Set);
7934
7935 // (M != 0 ? 0x0200 : 0) | 0x7c00;
7936 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
7937 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
7938 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
7939
7940 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
7941 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
7942
7943 // N = M | (E << 12);
7944 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
7945 auto N = MIRBuilder.buildOr(S32, M, EShl12);
7946
7947 // B = clamp(1-E, 0, 13);
7948 auto One = MIRBuilder.buildConstant(S32, 1);
7949 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
7950 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
7951 B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
7952
7953 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
7954 MIRBuilder.buildConstant(S32, 0x1000));
7955
7956 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
7957 auto D0 = MIRBuilder.buildShl(S32, D, B);
7958
7959 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
7960 D0, SigSetHigh);
7961 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
7962 D = MIRBuilder.buildOr(S32, D, D1);
7963
7964 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
7965 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
7966
7967 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
7968 V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
7969
7970 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
7971 MIRBuilder.buildConstant(S32, 3));
7972 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
7973
7974 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
7975 MIRBuilder.buildConstant(S32, 5));
7976 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
7977
7978 V1 = MIRBuilder.buildOr(S32, V0, V1);
7979 V = MIRBuilder.buildAdd(S32, V, V1);
7980
7981 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
7982 E, MIRBuilder.buildConstant(S32, 30));
7983 V = MIRBuilder.buildSelect(S32, CmpEGt30,
7984 MIRBuilder.buildConstant(S32, 0x7c00), V);
7985
7986 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
7987 E, MIRBuilder.buildConstant(S32, 1039));
7988 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
7989
7990 // Extract the sign bit.
7991 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
7992 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
7993
7994 // Insert the sign bit
7995 V = MIRBuilder.buildOr(S32, Sign, V);
7996
7997 MIRBuilder.buildTrunc(Dst, V);
7998 MI.eraseFromParent();
7999 return Legalized;
8000}
8001
8002LegalizerHelper::LegalizeResult
Matt Arsenaulta1282922020-07-15 11:10:54 -04008003LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08008004 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
Matt Arsenaultbfbfa182020-01-18 10:08:11 -05008005 const LLT S64 = LLT::scalar(64);
8006 const LLT S16 = LLT::scalar(16);
8007
8008 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
8009 return lowerFPTRUNC_F64_TO_F16(MI);
8010
8011 return UnableToLegalize;
8012}
8013
Matt Arsenault7cd8a022020-07-17 11:01:15 -04008014LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08008015 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
Matt Arsenault7cd8a022020-07-17 11:01:15 -04008016 LLT Ty = MRI.getType(Dst);
8017
8018 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
8019 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
8020 MI.eraseFromParent();
8021 return Legalized;
8022}
8023
Matt Arsenault6f74f552019-07-01 17:18:03 +00008024static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
8025 switch (Opc) {
8026 case TargetOpcode::G_SMIN:
8027 return CmpInst::ICMP_SLT;
8028 case TargetOpcode::G_SMAX:
8029 return CmpInst::ICMP_SGT;
8030 case TargetOpcode::G_UMIN:
8031 return CmpInst::ICMP_ULT;
8032 case TargetOpcode::G_UMAX:
8033 return CmpInst::ICMP_UGT;
8034 default:
8035 llvm_unreachable("not in integer min/max");
8036 }
8037}
8038
Matt Arsenaulta1282922020-07-15 11:10:54 -04008039LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08008040 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
Matt Arsenault6f74f552019-07-01 17:18:03 +00008041
8042 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
8043 LLT CmpType = MRI.getType(Dst).changeElementSize(1);
8044
8045 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8046 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8047
8048 MI.eraseFromParent();
8049 return Legalized;
8050}
Matt Arsenaultb1843e12019-07-09 23:34:29 +00008051
8052LegalizerHelper::LegalizeResult
Thorsten Schütt2d2d6852024-07-23 10:12:28 +02008053LegalizerHelper::lowerThreewayCompare(MachineInstr &MI) {
8054 GSUCmp *Cmp = cast<GSUCmp>(&MI);
8055
8056 Register Dst = Cmp->getReg(0);
8057 LLT DstTy = MRI.getType(Dst);
Craig Topperde1a4232024-12-15 20:47:17 -08008058 LLT SrcTy = MRI.getType(Cmp->getReg(1));
Thorsten Schütt2d2d6852024-07-23 10:12:28 +02008059 LLT CmpTy = DstTy.changeElementSize(1);
8060
8061 CmpInst::Predicate LTPredicate = Cmp->isSigned()
8062 ? CmpInst::Predicate::ICMP_SLT
8063 : CmpInst::Predicate::ICMP_ULT;
8064 CmpInst::Predicate GTPredicate = Cmp->isSigned()
8065 ? CmpInst::Predicate::ICMP_SGT
8066 : CmpInst::Predicate::ICMP_UGT;
8067
Thorsten Schütt2d2d6852024-07-23 10:12:28 +02008068 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
8069 auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8070 Cmp->getRHSReg());
Thorsten Schütt2d2d6852024-07-23 10:12:28 +02008071 auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8072 Cmp->getRHSReg());
Craig Topperde1a4232024-12-15 20:47:17 -08008073
8074 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
8075 auto BC = TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false);
8076 if (TLI.shouldExpandCmpUsingSelects(getApproximateEVTForLLT(SrcTy, Ctx)) ||
8077 BC == TargetLowering::UndefinedBooleanContent) {
8078 auto One = MIRBuilder.buildConstant(DstTy, 1);
8079 auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8080
8081 auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
8082 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8083 } else {
8084 if (BC == TargetLowering::ZeroOrNegativeOneBooleanContent)
8085 std::swap(IsGT, IsLT);
8086 // Extend boolean results to DstTy, which is at least i2, before subtracting
8087 // them.
8088 unsigned BoolExtOp =
8089 MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false);
8090 IsGT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8091 IsLT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8092 MIRBuilder.buildSub(Dst, IsGT, IsLT);
8093 }
Thorsten Schütt2d2d6852024-07-23 10:12:28 +02008094
8095 MI.eraseFromParent();
8096 return Legalized;
8097}
8098
8099LegalizerHelper::LegalizeResult
Matt Arsenaulta1282922020-07-15 11:10:54 -04008100LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08008101 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
Matt Arsenaultb1843e12019-07-09 23:34:29 +00008102 const int Src0Size = Src0Ty.getScalarSizeInBits();
8103 const int Src1Size = Src1Ty.getScalarSizeInBits();
8104
8105 auto SignBitMask = MIRBuilder.buildConstant(
8106 Src0Ty, APInt::getSignMask(Src0Size));
8107
8108 auto NotSignBitMask = MIRBuilder.buildConstant(
8109 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
8110
Jay Foad5cf64122021-01-29 14:41:58 +00008111 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
8112 Register And1;
Matt Arsenaultb1843e12019-07-09 23:34:29 +00008113 if (Src0Ty == Src1Ty) {
Jay Foad5cf64122021-01-29 14:41:58 +00008114 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
Matt Arsenaultb1843e12019-07-09 23:34:29 +00008115 } else if (Src0Size > Src1Size) {
8116 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
8117 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
8118 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
Jay Foad5cf64122021-01-29 14:41:58 +00008119 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
Matt Arsenaultb1843e12019-07-09 23:34:29 +00008120 } else {
8121 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
8122 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
8123 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
Jay Foad5cf64122021-01-29 14:41:58 +00008124 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
Matt Arsenaultb1843e12019-07-09 23:34:29 +00008125 }
8126
8127 // Be careful about setting nsz/nnan/ninf on every instruction, since the
8128 // constants are a nan and -0.0, but the final result should preserve
8129 // everything.
Jay Foad5cf64122021-01-29 14:41:58 +00008130 unsigned Flags = MI.getFlags();
Matt Arsenault2df23732024-06-28 23:03:39 +02008131
8132 // We masked the sign bit and the not-sign bit, so these are disjoint.
8133 Flags |= MachineInstr::Disjoint;
8134
Jay Foad5cf64122021-01-29 14:41:58 +00008135 MIRBuilder.buildOr(Dst, And0, And1, Flags);
Matt Arsenaultb1843e12019-07-09 23:34:29 +00008136
8137 MI.eraseFromParent();
8138 return Legalized;
8139}
Matt Arsenault6ce1b4f2019-07-10 16:31:19 +00008140
8141LegalizerHelper::LegalizeResult
Matt Arsenault37c341d2025-02-20 10:18:03 +07008142LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
Matt Arsenault6ce1b4f2019-07-10 16:31:19 +00008143 unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
8144 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
8145
Amara Emerson719024a2023-02-23 16:35:39 -08008146 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
Matt Arsenault6ce1b4f2019-07-10 16:31:19 +00008147 LLT Ty = MRI.getType(Dst);
8148
Matt Arsenault37c341d2025-02-20 10:18:03 +07008149 if (!MI.getFlag(MachineInstr::FmNoNans)) {
Matt Arsenault6ce1b4f2019-07-10 16:31:19 +00008150 // Insert canonicalizes if it's possible we need to quiet to get correct
8151 // sNaN behavior.
8152
8153 // Note this must be done here, and not as an optimization combine in the
8154 // absence of a dedicate quiet-snan instruction as we're using an
8155 // omni-purpose G_FCANONICALIZE.
8156 if (!isKnownNeverSNaN(Src0, MRI))
8157 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
8158
8159 if (!isKnownNeverSNaN(Src1, MRI))
8160 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
8161 }
8162
8163 // If there are no nans, it's safe to simply replace this with the non-IEEE
8164 // version.
8165 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
8166 MI.eraseFromParent();
8167 return Legalized;
8168}
Matt Arsenaultd9d30a42019-08-01 19:10:05 +00008169
Matt Arsenault4d339182019-09-13 00:44:35 +00008170LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
8171 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
8172 Register DstReg = MI.getOperand(0).getReg();
8173 LLT Ty = MRI.getType(DstReg);
8174 unsigned Flags = MI.getFlags();
8175
8176 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
8177 Flags);
8178 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
8179 MI.eraseFromParent();
8180 return Legalized;
8181}
8182
Matt Arsenaultd9d30a42019-08-01 19:10:05 +00008183LegalizerHelper::LegalizeResult
Matt Arsenaultf3de8ab2019-12-24 14:49:31 -05008184LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08008185 auto [DstReg, X] = MI.getFirst2Regs();
Matt Arsenault19a03502020-03-14 14:52:48 -04008186 const unsigned Flags = MI.getFlags();
8187 const LLT Ty = MRI.getType(DstReg);
8188 const LLT CondTy = Ty.changeElementSize(1);
8189
8190 // round(x) =>
8191 // t = trunc(x);
8192 // d = fabs(x - t);
Matt Arsenault1328a852023-09-19 09:14:17 +03008193 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
8194 // return t + o;
Matt Arsenault19a03502020-03-14 14:52:48 -04008195
8196 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
8197
8198 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
8199 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
Matt Arsenault1328a852023-09-19 09:14:17 +03008200
Matt Arsenault19a03502020-03-14 14:52:48 -04008201 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
Matt Arsenault1328a852023-09-19 09:14:17 +03008202 auto Cmp =
8203 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
Matt Arsenault19a03502020-03-14 14:52:48 -04008204
Matt Arsenault1328a852023-09-19 09:14:17 +03008205 // Could emit G_UITOFP instead
8206 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
8207 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8208 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
8209 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
Matt Arsenault19a03502020-03-14 14:52:48 -04008210
Matt Arsenault1328a852023-09-19 09:14:17 +03008211 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
Matt Arsenault19a03502020-03-14 14:52:48 -04008212
8213 MI.eraseFromParent();
8214 return Legalized;
8215}
8216
Amara Emerson719024a2023-02-23 16:35:39 -08008217LegalizerHelper::LegalizeResult LegalizerHelper::lowerFFloor(MachineInstr &MI) {
8218 auto [DstReg, SrcReg] = MI.getFirst2Regs();
Matt Arsenaultf3de8ab2019-12-24 14:49:31 -05008219 unsigned Flags = MI.getFlags();
8220 LLT Ty = MRI.getType(DstReg);
8221 const LLT CondTy = Ty.changeElementSize(1);
8222
8223 // result = trunc(src);
8224 // if (src < 0.0 && src != result)
8225 // result += -1.0.
8226
Matt Arsenaultf3de8ab2019-12-24 14:49:31 -05008227 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
Matt Arsenault19a03502020-03-14 14:52:48 -04008228 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
Matt Arsenaultf3de8ab2019-12-24 14:49:31 -05008229
8230 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
8231 SrcReg, Zero, Flags);
8232 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
8233 SrcReg, Trunc, Flags);
8234 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
8235 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
8236
Matt Arsenault19a03502020-03-14 14:52:48 -04008237 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
Matt Arsenaultf3de8ab2019-12-24 14:49:31 -05008238 MI.eraseFromParent();
8239 return Legalized;
8240}
8241
8242LegalizerHelper::LegalizeResult
Matt Arsenault69999602020-03-29 15:51:54 -04008243LegalizerHelper::lowerMergeValues(MachineInstr &MI) {
8244 const unsigned NumOps = MI.getNumOperands();
Amara Emerson719024a2023-02-23 16:35:39 -08008245 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
8246 unsigned PartSize = Src0Ty.getSizeInBits();
Matt Arsenault69999602020-03-29 15:51:54 -04008247
8248 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
8249 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
8250
8251 for (unsigned I = 2; I != NumOps; ++I) {
8252 const unsigned Offset = (I - 1) * PartSize;
8253
8254 Register SrcReg = MI.getOperand(I).getReg();
8255 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
8256
8257 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
8258 MRI.createGenericVirtualRegister(WideTy);
8259
8260 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
8261 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
8262 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
8263 ResultReg = NextResult;
8264 }
8265
8266 if (DstTy.isPointer()) {
8267 if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
8268 DstTy.getAddressSpace())) {
8269 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
8270 return UnableToLegalize;
8271 }
8272
8273 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
8274 }
8275
8276 MI.eraseFromParent();
8277 return Legalized;
8278}
8279
8280LegalizerHelper::LegalizeResult
Matt Arsenaultd9d30a42019-08-01 19:10:05 +00008281LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
8282 const unsigned NumDst = MI.getNumOperands() - 1;
Matt Arsenault3af85fa2020-03-29 18:04:53 -04008283 Register SrcReg = MI.getOperand(NumDst).getReg();
Matt Arsenaultd9d30a42019-08-01 19:10:05 +00008284 Register Dst0Reg = MI.getOperand(0).getReg();
8285 LLT DstTy = MRI.getType(Dst0Reg);
Matt Arsenault3af85fa2020-03-29 18:04:53 -04008286 if (DstTy.isPointer())
8287 return UnableToLegalize; // TODO
Matt Arsenaultd9d30a42019-08-01 19:10:05 +00008288
Matt Arsenault3af85fa2020-03-29 18:04:53 -04008289 SrcReg = coerceToScalar(SrcReg);
8290 if (!SrcReg)
8291 return UnableToLegalize;
Matt Arsenaultd9d30a42019-08-01 19:10:05 +00008292
8293 // Expand scalarizing unmerge as bitcast to integer and shift.
Matt Arsenault3af85fa2020-03-29 18:04:53 -04008294 LLT IntTy = MRI.getType(SrcReg);
Matt Arsenaultd9d30a42019-08-01 19:10:05 +00008295
Matt Arsenault3af85fa2020-03-29 18:04:53 -04008296 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
Matt Arsenaultd9d30a42019-08-01 19:10:05 +00008297
Matt Arsenault3af85fa2020-03-29 18:04:53 -04008298 const unsigned DstSize = DstTy.getSizeInBits();
8299 unsigned Offset = DstSize;
8300 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
8301 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
8302 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
8303 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
Matt Arsenaultd9d30a42019-08-01 19:10:05 +00008304 }
8305
Matt Arsenault3af85fa2020-03-29 18:04:53 -04008306 MI.eraseFromParent();
8307 return Legalized;
Matt Arsenaultd9d30a42019-08-01 19:10:05 +00008308}
Matt Arsenault690645b2019-08-13 16:09:07 +00008309
Matt Arsenault1ad051dd2020-07-27 21:13:40 -04008310/// Lower a vector extract or insert by writing the vector to a stack temporary
8311/// and reloading the element or vector.
Matt Arsenault0b7de792020-07-26 21:25:10 -04008312///
8313/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
8314/// =>
8315/// %stack_temp = G_FRAME_INDEX
8316/// G_STORE %vec, %stack_temp
8317/// %idx = clamp(%idx, %vec.getNumElements())
8318/// %element_ptr = G_PTR_ADD %stack_temp, %idx
8319/// %dst = G_LOAD %element_ptr
8320LegalizerHelper::LegalizeResult
Matt Arsenault1ad051dd2020-07-27 21:13:40 -04008321LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
Matt Arsenault0b7de792020-07-26 21:25:10 -04008322 Register DstReg = MI.getOperand(0).getReg();
8323 Register SrcVec = MI.getOperand(1).getReg();
Matt Arsenault1ad051dd2020-07-27 21:13:40 -04008324 Register InsertVal;
8325 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
8326 InsertVal = MI.getOperand(2).getReg();
8327
8328 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
8329
Matt Arsenault0b7de792020-07-26 21:25:10 -04008330 LLT VecTy = MRI.getType(SrcVec);
8331 LLT EltTy = VecTy.getElementType();
Petar Avramovic29f88b92021-12-23 14:09:51 +01008332 unsigned NumElts = VecTy.getNumElements();
8333
8334 int64_t IdxVal;
8335 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
8336 SmallVector<Register, 8> SrcRegs;
chuongg3fcfe1b62024-01-15 16:40:39 +00008337 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
Petar Avramovic29f88b92021-12-23 14:09:51 +01008338
8339 if (InsertVal) {
8340 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
Diana Picusf95a5fb2023-01-09 11:59:00 +01008341 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
Petar Avramovic29f88b92021-12-23 14:09:51 +01008342 } else {
8343 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
8344 }
8345
8346 MI.eraseFromParent();
8347 return Legalized;
8348 }
8349
Matt Arsenault0b7de792020-07-26 21:25:10 -04008350 if (!EltTy.isByteSized()) { // Not implemented.
8351 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
8352 return UnableToLegalize;
8353 }
8354
8355 unsigned EltBytes = EltTy.getSizeInBytes();
Matt Arsenault1ad051dd2020-07-27 21:13:40 -04008356 Align VecAlign = getStackTemporaryAlignment(VecTy);
8357 Align EltAlign;
Matt Arsenault0b7de792020-07-26 21:25:10 -04008358
8359 MachinePointerInfo PtrInfo;
Sander de Smalen81b7f112023-11-22 08:52:53 +00008360 auto StackTemp = createStackTemporary(
8361 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
Matt Arsenault1ad051dd2020-07-27 21:13:40 -04008362 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
Matt Arsenault0b7de792020-07-26 21:25:10 -04008363
8364 // Get the pointer to the element, and be sure not to hit undefined behavior
8365 // if the index is out of bounds.
Matt Arsenault1ad051dd2020-07-27 21:13:40 -04008366 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
Matt Arsenault0b7de792020-07-26 21:25:10 -04008367
Matt Arsenault0b7de792020-07-26 21:25:10 -04008368 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
8369 int64_t Offset = IdxVal * EltBytes;
8370 PtrInfo = PtrInfo.getWithOffset(Offset);
Matt Arsenault1ad051dd2020-07-27 21:13:40 -04008371 EltAlign = commonAlignment(VecAlign, Offset);
Matt Arsenault0b7de792020-07-26 21:25:10 -04008372 } else {
8373 // We lose information with a variable offset.
Matt Arsenault1ad051dd2020-07-27 21:13:40 -04008374 EltAlign = getStackTemporaryAlignment(EltTy);
8375 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
Matt Arsenault0b7de792020-07-26 21:25:10 -04008376 }
8377
Matt Arsenault1ad051dd2020-07-27 21:13:40 -04008378 if (InsertVal) {
8379 // Write the inserted element
8380 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
8381
8382 // Reload the whole vector.
8383 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
8384 } else {
8385 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
8386 }
8387
Matt Arsenault0b7de792020-07-26 21:25:10 -04008388 MI.eraseFromParent();
8389 return Legalized;
8390}
8391
Matt Arsenault690645b2019-08-13 16:09:07 +00008392LegalizerHelper::LegalizeResult
8393LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08008394 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
8395 MI.getFirst3RegLLTs();
Matt Arsenault690645b2019-08-13 16:09:07 +00008396 LLT IdxTy = LLT::scalar(32);
8397
Eli Friedmane68e4cb2020-01-13 15:32:45 -08008398 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
Matt Arsenault690645b2019-08-13 16:09:07 +00008399 Register Undef;
8400 SmallVector<Register, 32> BuildVec;
Jay Foad71ca53b2023-09-04 18:32:43 +01008401 LLT EltTy = DstTy.getScalarType();
Matt Arsenault690645b2019-08-13 16:09:07 +00008402
8403 for (int Idx : Mask) {
8404 if (Idx < 0) {
8405 if (!Undef.isValid())
8406 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
8407 BuildVec.push_back(Undef);
8408 continue;
8409 }
8410
Aditya Nandakumar615eee62019-08-13 21:49:11 +00008411 if (Src0Ty.isScalar()) {
8412 BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
8413 } else {
Aditya Nandakumarc65ac862019-08-14 01:23:33 +00008414 int NumElts = Src0Ty.getNumElements();
Aditya Nandakumar615eee62019-08-13 21:49:11 +00008415 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
8416 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
8417 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
8418 auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
8419 BuildVec.push_back(Extract.getReg(0));
8420 }
Matt Arsenault690645b2019-08-13 16:09:07 +00008421 }
8422
Jay Foad71ca53b2023-09-04 18:32:43 +01008423 if (DstTy.isScalar())
8424 MIRBuilder.buildCopy(DstReg, BuildVec[0]);
8425 else
8426 MIRBuilder.buildBuildVector(DstReg, BuildVec);
Matt Arsenault690645b2019-08-13 16:09:07 +00008427 MI.eraseFromParent();
8428 return Legalized;
8429}
Amara Emersone20b91c2019-08-27 19:54:27 +00008430
Lawrence Benson177ce192024-07-17 14:24:24 +02008431LegalizerHelper::LegalizeResult
8432LegalizerHelper::lowerVECTOR_COMPRESS(llvm::MachineInstr &MI) {
8433 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
8434 MI.getFirst4RegLLTs();
8435
8436 if (VecTy.isScalableVector())
8437 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
8438
8439 Align VecAlign = getStackTemporaryAlignment(VecTy);
8440 MachinePointerInfo PtrInfo;
8441 Register StackPtr =
8442 createStackTemporary(TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign,
8443 PtrInfo)
8444 .getReg(0);
8445 MachinePointerInfo ValPtrInfo =
8446 MachinePointerInfo::getUnknownStack(*MI.getMF());
8447
8448 LLT IdxTy = LLT::scalar(32);
8449 LLT ValTy = VecTy.getElementType();
8450 Align ValAlign = getStackTemporaryAlignment(ValTy);
8451
8452 auto OutPos = MIRBuilder.buildConstant(IdxTy, 0);
8453
8454 bool HasPassthru =
8455 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
8456
8457 if (HasPassthru)
8458 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
8459
8460 Register LastWriteVal;
8461 std::optional<APInt> PassthruSplatVal =
8462 isConstantOrConstantSplatVector(*MRI.getVRegDef(Passthru), MRI);
8463
8464 if (PassthruSplatVal.has_value()) {
8465 LastWriteVal =
8466 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
8467 } else if (HasPassthru) {
8468 auto Popcount = MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
8469 Popcount = MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
8470 {LLT::scalar(32)}, {Popcount});
8471
8472 Register LastElmtPtr =
8473 getVectorElementPointer(StackPtr, VecTy, Popcount.getReg(0));
8474 LastWriteVal =
8475 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
8476 .getReg(0);
8477 }
8478
8479 unsigned NumElmts = VecTy.getNumElements();
8480 for (unsigned I = 0; I < NumElmts; ++I) {
8481 auto Idx = MIRBuilder.buildConstant(IdxTy, I);
8482 auto Val = MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
8483 Register ElmtPtr =
8484 getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
8485 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
8486
8487 LLT MaskITy = MaskTy.getElementType();
8488 auto MaskI = MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
8489 if (MaskITy.getSizeInBits() > 1)
8490 MaskI = MIRBuilder.buildTrunc(LLT::scalar(1), MaskI);
8491
8492 MaskI = MIRBuilder.buildZExt(IdxTy, MaskI);
8493 OutPos = MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
8494
8495 if (HasPassthru && I == NumElmts - 1) {
8496 auto EndOfVector =
8497 MIRBuilder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
8498 auto AllLanesSelected = MIRBuilder.buildICmp(
8499 CmpInst::ICMP_UGT, LLT::scalar(1), OutPos, EndOfVector);
8500 OutPos = MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
8501 {OutPos, EndOfVector});
8502 ElmtPtr = getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
8503
8504 LastWriteVal =
8505 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
8506 .getReg(0);
8507 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
8508 }
8509 }
8510
8511 // TODO: Use StackPtr's FrameIndex alignment.
8512 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
8513
8514 MI.eraseFromParent();
8515 return Legalized;
8516}
8517
Momchil Velikovc1140d42023-12-04 09:44:02 +00008518Register LegalizerHelper::getDynStackAllocTargetPtr(Register SPReg,
8519 Register AllocSize,
8520 Align Alignment,
8521 LLT PtrTy) {
Amara Emersone20b91c2019-08-27 19:54:27 +00008522 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
8523
Amara Emersone20b91c2019-08-27 19:54:27 +00008524 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
8525 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
8526
8527 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
8528 // have to generate an extra instruction to negate the alloc and then use
Daniel Sanderse74c5b92019-11-01 13:18:00 -07008529 // G_PTR_ADD to add the negative offset.
Amara Emersone20b91c2019-08-27 19:54:27 +00008530 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
Guillaume Chatelet9f5c7862020-04-03 08:10:59 +00008531 if (Alignment > Align(1)) {
8532 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
Amara Emersone20b91c2019-08-27 19:54:27 +00008533 AlignMask.negate();
8534 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
8535 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
8536 }
8537
Momchil Velikovc1140d42023-12-04 09:44:02 +00008538 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
8539}
8540
8541LegalizerHelper::LegalizeResult
8542LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
8543 const auto &MF = *MI.getMF();
8544 const auto &TFI = *MF.getSubtarget().getFrameLowering();
8545 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
8546 return UnableToLegalize;
8547
8548 Register Dst = MI.getOperand(0).getReg();
8549 Register AllocSize = MI.getOperand(1).getReg();
8550 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
8551
8552 LLT PtrTy = MRI.getType(Dst);
8553 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
8554 Register SPTmp =
8555 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
8556
Amara Emersone20b91c2019-08-27 19:54:27 +00008557 MIRBuilder.buildCopy(SPReg, SPTmp);
8558 MIRBuilder.buildCopy(Dst, SPTmp);
8559
8560 MI.eraseFromParent();
8561 return Legalized;
8562}
Matt Arsenaulta5b9c752019-10-06 01:37:35 +00008563
8564LegalizerHelper::LegalizeResult
Matt Arsenault1ca08082023-07-29 19:12:24 -04008565LegalizerHelper::lowerStackSave(MachineInstr &MI) {
8566 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
8567 if (!StackPtr)
8568 return UnableToLegalize;
8569
8570 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
8571 MI.eraseFromParent();
8572 return Legalized;
8573}
8574
8575LegalizerHelper::LegalizeResult
8576LegalizerHelper::lowerStackRestore(MachineInstr &MI) {
8577 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
8578 if (!StackPtr)
8579 return UnableToLegalize;
8580
8581 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
8582 MI.eraseFromParent();
8583 return Legalized;
8584}
8585
8586LegalizerHelper::LegalizeResult
Matt Arsenaulta5b9c752019-10-06 01:37:35 +00008587LegalizerHelper::lowerExtract(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08008588 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
Matt Arsenaulta5b9c752019-10-06 01:37:35 +00008589 unsigned Offset = MI.getOperand(2).getImm();
8590
Petar Avramovic29f88b92021-12-23 14:09:51 +01008591 // Extract sub-vector or one element
8592 if (SrcTy.isVector()) {
8593 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
8594 unsigned DstSize = DstTy.getSizeInBits();
8595
8596 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
8597 (Offset + DstSize <= SrcTy.getSizeInBits())) {
8598 // Unmerge and allow access to each Src element for the artifact combiner.
Amara Emerson719024a2023-02-23 16:35:39 -08008599 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
Petar Avramovic29f88b92021-12-23 14:09:51 +01008600
8601 // Take element(s) we need to extract and copy it (merge them).
8602 SmallVector<Register, 8> SubVectorElts;
8603 for (unsigned Idx = Offset / SrcEltSize;
8604 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
8605 SubVectorElts.push_back(Unmerge.getReg(Idx));
8606 }
8607 if (SubVectorElts.size() == 1)
Amara Emerson719024a2023-02-23 16:35:39 -08008608 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
Petar Avramovic29f88b92021-12-23 14:09:51 +01008609 else
Amara Emerson719024a2023-02-23 16:35:39 -08008610 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
Petar Avramovic29f88b92021-12-23 14:09:51 +01008611
8612 MI.eraseFromParent();
8613 return Legalized;
8614 }
8615 }
8616
Matt Arsenaulta5b9c752019-10-06 01:37:35 +00008617 if (DstTy.isScalar() &&
8618 (SrcTy.isScalar() ||
8619 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
8620 LLT SrcIntTy = SrcTy;
8621 if (!SrcTy.isScalar()) {
8622 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
Amara Emerson719024a2023-02-23 16:35:39 -08008623 SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
Matt Arsenaulta5b9c752019-10-06 01:37:35 +00008624 }
8625
8626 if (Offset == 0)
Amara Emerson719024a2023-02-23 16:35:39 -08008627 MIRBuilder.buildTrunc(DstReg, SrcReg);
Matt Arsenaulta5b9c752019-10-06 01:37:35 +00008628 else {
8629 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
Amara Emerson719024a2023-02-23 16:35:39 -08008630 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
8631 MIRBuilder.buildTrunc(DstReg, Shr);
Matt Arsenaulta5b9c752019-10-06 01:37:35 +00008632 }
8633
8634 MI.eraseFromParent();
8635 return Legalized;
8636 }
8637
8638 return UnableToLegalize;
8639}
Matt Arsenault4bcdcad2019-10-07 19:13:27 +00008640
8641LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08008642 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
Matt Arsenault4bcdcad2019-10-07 19:13:27 +00008643 uint64_t Offset = MI.getOperand(3).getImm();
8644
8645 LLT DstTy = MRI.getType(Src);
8646 LLT InsertTy = MRI.getType(InsertSrc);
8647
Petar Avramovic29f88b92021-12-23 14:09:51 +01008648 // Insert sub-vector or one element
8649 if (DstTy.isVector() && !InsertTy.isPointer()) {
8650 LLT EltTy = DstTy.getElementType();
8651 unsigned EltSize = EltTy.getSizeInBits();
8652 unsigned InsertSize = InsertTy.getSizeInBits();
8653
8654 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
8655 (Offset + InsertSize <= DstTy.getSizeInBits())) {
8656 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
8657 SmallVector<Register, 8> DstElts;
8658 unsigned Idx = 0;
8659 // Elements from Src before insert start Offset
8660 for (; Idx < Offset / EltSize; ++Idx) {
8661 DstElts.push_back(UnmergeSrc.getReg(Idx));
8662 }
8663
8664 // Replace elements in Src with elements from InsertSrc
8665 if (InsertTy.getSizeInBits() > EltSize) {
8666 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
8667 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
8668 ++Idx, ++i) {
8669 DstElts.push_back(UnmergeInsertSrc.getReg(i));
8670 }
8671 } else {
8672 DstElts.push_back(InsertSrc);
8673 ++Idx;
8674 }
8675
8676 // Remaining elements from Src after insert
8677 for (; Idx < DstTy.getNumElements(); ++Idx) {
8678 DstElts.push_back(UnmergeSrc.getReg(Idx));
8679 }
8680
Diana Picusf95a5fb2023-01-09 11:59:00 +01008681 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
Petar Avramovic29f88b92021-12-23 14:09:51 +01008682 MI.eraseFromParent();
8683 return Legalized;
8684 }
8685 }
8686
Dominik Montada8ff2dcb12020-03-11 12:18:59 +01008687 if (InsertTy.isVector() ||
8688 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
8689 return UnableToLegalize;
Matt Arsenault4bcdcad2019-10-07 19:13:27 +00008690
Dominik Montada8ff2dcb12020-03-11 12:18:59 +01008691 const DataLayout &DL = MIRBuilder.getDataLayout();
8692 if ((DstTy.isPointer() &&
8693 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
8694 (InsertTy.isPointer() &&
8695 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
8696 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
8697 return UnableToLegalize;
Matt Arsenault4bcdcad2019-10-07 19:13:27 +00008698 }
8699
Dominik Montada8ff2dcb12020-03-11 12:18:59 +01008700 LLT IntDstTy = DstTy;
8701
8702 if (!DstTy.isScalar()) {
8703 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
8704 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
8705 }
8706
8707 if (!InsertTy.isScalar()) {
8708 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
8709 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
8710 }
8711
8712 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
8713 if (Offset != 0) {
8714 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
8715 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
8716 }
8717
8718 APInt MaskVal = APInt::getBitsSetWithWrap(
8719 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
8720
8721 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
8722 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
8723 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
8724
8725 MIRBuilder.buildCast(Dst, Or);
8726 MI.eraseFromParent();
8727 return Legalized;
Matt Arsenault4bcdcad2019-10-07 19:13:27 +00008728}
Matt Arsenault34ed76e2019-10-16 20:46:32 +00008729
8730LegalizerHelper::LegalizeResult
8731LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08008732 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
8733 MI.getFirst4RegLLTs();
Matt Arsenault34ed76e2019-10-16 20:46:32 +00008734 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
8735
Amara Emerson719024a2023-02-23 16:35:39 -08008736 LLT Ty = Dst0Ty;
8737 LLT BoolTy = Dst1Ty;
Matt Arsenault34ed76e2019-10-16 20:46:32 +00008738
Shilei Tian3a106e52024-03-29 15:59:50 -04008739 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
8740
Matt Arsenault34ed76e2019-10-16 20:46:32 +00008741 if (IsAdd)
Shilei Tian3a106e52024-03-29 15:59:50 -04008742 MIRBuilder.buildAdd(NewDst0, LHS, RHS);
Matt Arsenault34ed76e2019-10-16 20:46:32 +00008743 else
Shilei Tian3a106e52024-03-29 15:59:50 -04008744 MIRBuilder.buildSub(NewDst0, LHS, RHS);
Matt Arsenault34ed76e2019-10-16 20:46:32 +00008745
8746 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
8747
8748 auto Zero = MIRBuilder.buildConstant(Ty, 0);
8749
8750 // For an addition, the result should be less than one of the operands (LHS)
8751 // if and only if the other operand (RHS) is negative, otherwise there will
8752 // be overflow.
8753 // For a subtraction, the result should be less than one of the operands
8754 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
8755 // otherwise there will be overflow.
8756 auto ResultLowerThanLHS =
Shilei Tian3a106e52024-03-29 15:59:50 -04008757 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
Matt Arsenault34ed76e2019-10-16 20:46:32 +00008758 auto ConditionRHS = MIRBuilder.buildICmp(
8759 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
8760
8761 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
Shilei Tian3a106e52024-03-29 15:59:50 -04008762
8763 MIRBuilder.buildCopy(Dst0, NewDst0);
Matt Arsenault34ed76e2019-10-16 20:46:32 +00008764 MI.eraseFromParent();
Shilei Tian3a106e52024-03-29 15:59:50 -04008765
Matt Arsenault34ed76e2019-10-16 20:46:32 +00008766 return Legalized;
8767}
Petar Avramovic94a24e72019-12-30 11:13:22 +01008768
8769LegalizerHelper::LegalizeResult
Jay Foadb35833b2020-07-12 14:18:45 -04008770LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08008771 auto [Res, LHS, RHS] = MI.getFirst3Regs();
Jay Foadb35833b2020-07-12 14:18:45 -04008772 LLT Ty = MRI.getType(Res);
8773 bool IsSigned;
8774 bool IsAdd;
8775 unsigned BaseOp;
8776 switch (MI.getOpcode()) {
8777 default:
8778 llvm_unreachable("unexpected addsat/subsat opcode");
8779 case TargetOpcode::G_UADDSAT:
8780 IsSigned = false;
8781 IsAdd = true;
8782 BaseOp = TargetOpcode::G_ADD;
8783 break;
8784 case TargetOpcode::G_SADDSAT:
8785 IsSigned = true;
8786 IsAdd = true;
8787 BaseOp = TargetOpcode::G_ADD;
8788 break;
8789 case TargetOpcode::G_USUBSAT:
8790 IsSigned = false;
8791 IsAdd = false;
8792 BaseOp = TargetOpcode::G_SUB;
8793 break;
8794 case TargetOpcode::G_SSUBSAT:
8795 IsSigned = true;
8796 IsAdd = false;
8797 BaseOp = TargetOpcode::G_SUB;
8798 break;
8799 }
8800
8801 if (IsSigned) {
8802 // sadd.sat(a, b) ->
8803 // hi = 0x7fffffff - smax(a, 0)
8804 // lo = 0x80000000 - smin(a, 0)
8805 // a + smin(smax(lo, b), hi)
8806 // ssub.sat(a, b) ->
8807 // lo = smax(a, -1) - 0x7fffffff
8808 // hi = smin(a, -1) - 0x80000000
8809 // a - smin(smax(lo, b), hi)
8810 // TODO: AMDGPU can use a "median of 3" instruction here:
8811 // a +/- med3(lo, b, hi)
8812 uint64_t NumBits = Ty.getScalarSizeInBits();
8813 auto MaxVal =
8814 MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
8815 auto MinVal =
8816 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
8817 MachineInstrBuilder Hi, Lo;
8818 if (IsAdd) {
8819 auto Zero = MIRBuilder.buildConstant(Ty, 0);
8820 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
8821 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
8822 } else {
8823 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
8824 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
8825 MaxVal);
8826 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
8827 MinVal);
8828 }
8829 auto RHSClamped =
8830 MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
8831 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
8832 } else {
8833 // uadd.sat(a, b) -> a + umin(~a, b)
8834 // usub.sat(a, b) -> a - umin(a, b)
8835 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
8836 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
8837 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
8838 }
8839
8840 MI.eraseFromParent();
8841 return Legalized;
8842}
8843
8844LegalizerHelper::LegalizeResult
8845LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08008846 auto [Res, LHS, RHS] = MI.getFirst3Regs();
Jay Foadb35833b2020-07-12 14:18:45 -04008847 LLT Ty = MRI.getType(Res);
8848 LLT BoolTy = Ty.changeElementSize(1);
8849 bool IsSigned;
8850 bool IsAdd;
8851 unsigned OverflowOp;
8852 switch (MI.getOpcode()) {
8853 default:
8854 llvm_unreachable("unexpected addsat/subsat opcode");
8855 case TargetOpcode::G_UADDSAT:
8856 IsSigned = false;
8857 IsAdd = true;
8858 OverflowOp = TargetOpcode::G_UADDO;
8859 break;
8860 case TargetOpcode::G_SADDSAT:
8861 IsSigned = true;
8862 IsAdd = true;
8863 OverflowOp = TargetOpcode::G_SADDO;
8864 break;
8865 case TargetOpcode::G_USUBSAT:
8866 IsSigned = false;
8867 IsAdd = false;
8868 OverflowOp = TargetOpcode::G_USUBO;
8869 break;
8870 case TargetOpcode::G_SSUBSAT:
8871 IsSigned = true;
8872 IsAdd = false;
8873 OverflowOp = TargetOpcode::G_SSUBO;
8874 break;
8875 }
8876
8877 auto OverflowRes =
8878 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
8879 Register Tmp = OverflowRes.getReg(0);
8880 Register Ov = OverflowRes.getReg(1);
8881 MachineInstrBuilder Clamp;
8882 if (IsSigned) {
8883 // sadd.sat(a, b) ->
8884 // {tmp, ov} = saddo(a, b)
8885 // ov ? (tmp >>s 31) + 0x80000000 : r
8886 // ssub.sat(a, b) ->
8887 // {tmp, ov} = ssubo(a, b)
8888 // ov ? (tmp >>s 31) + 0x80000000 : r
8889 uint64_t NumBits = Ty.getScalarSizeInBits();
8890 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
8891 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
8892 auto MinVal =
8893 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
8894 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
8895 } else {
8896 // uadd.sat(a, b) ->
8897 // {tmp, ov} = uaddo(a, b)
8898 // ov ? 0xffffffff : tmp
8899 // usub.sat(a, b) ->
8900 // {tmp, ov} = usubo(a, b)
8901 // ov ? 0 : tmp
8902 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
8903 }
8904 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
8905
8906 MI.eraseFromParent();
8907 return Legalized;
8908}
8909
8910LegalizerHelper::LegalizeResult
Bevin Hansson5de6c562020-07-16 17:02:04 +02008911LegalizerHelper::lowerShlSat(MachineInstr &MI) {
8912 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
8913 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
8914 "Expected shlsat opcode!");
8915 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
Amara Emerson719024a2023-02-23 16:35:39 -08008916 auto [Res, LHS, RHS] = MI.getFirst3Regs();
Bevin Hansson5de6c562020-07-16 17:02:04 +02008917 LLT Ty = MRI.getType(Res);
8918 LLT BoolTy = Ty.changeElementSize(1);
8919
8920 unsigned BW = Ty.getScalarSizeInBits();
8921 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
8922 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
8923 : MIRBuilder.buildLShr(Ty, Result, RHS);
8924
8925 MachineInstrBuilder SatVal;
8926 if (IsSigned) {
8927 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
8928 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
8929 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
8930 MIRBuilder.buildConstant(Ty, 0));
8931 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
8932 } else {
8933 SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
8934 }
Mirko Brkusanin4cf6dd52020-11-16 17:43:15 +01008935 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
Bevin Hansson5de6c562020-07-16 17:02:04 +02008936 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
8937
8938 MI.eraseFromParent();
8939 return Legalized;
8940}
8941
Amara Emerson719024a2023-02-23 16:35:39 -08008942LegalizerHelper::LegalizeResult LegalizerHelper::lowerBswap(MachineInstr &MI) {
8943 auto [Dst, Src] = MI.getFirst2Regs();
Petar Avramovic94a24e72019-12-30 11:13:22 +01008944 const LLT Ty = MRI.getType(Src);
Matt Arsenault2e773622020-02-14 11:51:57 -05008945 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
Petar Avramovic94a24e72019-12-30 11:13:22 +01008946 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
8947
8948 // Swap most and least significant byte, set remaining bytes in Res to zero.
8949 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
8950 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
8951 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
8952 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
8953
8954 // Set i-th high/low byte in Res to i-th low/high byte from Src.
8955 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
8956 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
8957 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
8958 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
8959 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
8960 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
8961 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
8962 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
8963 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
8964 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
8965 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
8966 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
8967 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
8968 }
8969 Res.getInstr()->getOperand(0).setReg(Dst);
8970
8971 MI.eraseFromParent();
8972 return Legalized;
8973}
Petar Avramovic98f72a52019-12-30 18:06:29 +01008974
8975//{ (Src & Mask) >> N } | { (Src << N) & Mask }
8976static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
Yingwei Zheng6c1932f2024-03-23 14:57:35 +08008977 MachineInstrBuilder Src, const APInt &Mask) {
Petar Avramovic98f72a52019-12-30 18:06:29 +01008978 const LLT Ty = Dst.getLLTTy(*B.getMRI());
8979 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
8980 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
8981 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
8982 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
8983 return B.buildOr(Dst, LHS, RHS);
8984}
8985
8986LegalizerHelper::LegalizeResult
8987LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08008988 auto [Dst, Src] = MI.getFirst2Regs();
Petar Avramovic98f72a52019-12-30 18:06:29 +01008989 const LLT Ty = MRI.getType(Src);
Yingwei Zheng24ddce62024-05-29 21:42:08 +08008990 unsigned Size = Ty.getScalarSizeInBits();
Petar Avramovic98f72a52019-12-30 18:06:29 +01008991
Yingwei Zheng24ddce62024-05-29 21:42:08 +08008992 if (Size >= 8) {
8993 MachineInstrBuilder BSWAP =
8994 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
Petar Avramovic98f72a52019-12-30 18:06:29 +01008995
Yingwei Zheng24ddce62024-05-29 21:42:08 +08008996 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
8997 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
8998 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
8999 MachineInstrBuilder Swap4 =
9000 SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
Petar Avramovic98f72a52019-12-30 18:06:29 +01009001
Yingwei Zheng24ddce62024-05-29 21:42:08 +08009002 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
9003 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
9004 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
9005 MachineInstrBuilder Swap2 =
9006 SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
Petar Avramovic98f72a52019-12-30 18:06:29 +01009007
Yingwei Zheng24ddce62024-05-29 21:42:08 +08009008 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
9009 // 6|7
9010 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
9011 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
9012 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
9013 } else {
9014 // Expand bitreverse for types smaller than 8 bits.
9015 MachineInstrBuilder Tmp;
9016 for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
9017 MachineInstrBuilder Tmp2;
9018 if (I < J) {
9019 auto ShAmt = MIRBuilder.buildConstant(Ty, J - I);
9020 Tmp2 = MIRBuilder.buildShl(Ty, Src, ShAmt);
9021 } else {
9022 auto ShAmt = MIRBuilder.buildConstant(Ty, I - J);
9023 Tmp2 = MIRBuilder.buildLShr(Ty, Src, ShAmt);
9024 }
9025
Simon Pilgrim4e251e72024-05-29 17:57:23 +01009026 auto Mask = MIRBuilder.buildConstant(Ty, 1ULL << J);
Yingwei Zheng24ddce62024-05-29 21:42:08 +08009027 Tmp2 = MIRBuilder.buildAnd(Ty, Tmp2, Mask);
9028 if (I == 0)
9029 Tmp = Tmp2;
9030 else
9031 Tmp = MIRBuilder.buildOr(Ty, Tmp, Tmp2);
9032 }
9033 MIRBuilder.buildCopy(Dst, Tmp);
9034 }
Petar Avramovic98f72a52019-12-30 18:06:29 +01009035
9036 MI.eraseFromParent();
9037 return Legalized;
9038}
Matt Arsenault0ea3c722019-12-27 19:26:51 -05009039
9040LegalizerHelper::LegalizeResult
Matt Arsenaultc5c1bb32020-01-12 13:29:44 -05009041LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
Matt Arsenault0ea3c722019-12-27 19:26:51 -05009042 MachineFunction &MF = MIRBuilder.getMF();
Matt Arsenaultc5c1bb32020-01-12 13:29:44 -05009043
9044 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
9045 int NameOpIdx = IsRead ? 1 : 0;
9046 int ValRegIndex = IsRead ? 0 : 1;
9047
9048 Register ValReg = MI.getOperand(ValRegIndex).getReg();
9049 const LLT Ty = MRI.getType(ValReg);
9050 const MDString *RegStr = cast<MDString>(
9051 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
9052
Matt Arsenaultadbcc8e2020-07-31 11:41:05 -04009053 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
Matt Arsenaultc5c1bb32020-01-12 13:29:44 -05009054 if (!PhysReg.isValid())
Matt Arsenault0ea3c722019-12-27 19:26:51 -05009055 return UnableToLegalize;
9056
Matt Arsenaultc5c1bb32020-01-12 13:29:44 -05009057 if (IsRead)
9058 MIRBuilder.buildCopy(ValReg, PhysReg);
9059 else
9060 MIRBuilder.buildCopy(PhysReg, ValReg);
9061
Matt Arsenault0ea3c722019-12-27 19:26:51 -05009062 MI.eraseFromParent();
9063 return Legalized;
9064}
Pushpinder Singh41d66692020-08-10 05:47:50 -04009065
9066LegalizerHelper::LegalizeResult
9067LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
9068 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
9069 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
9070 Register Result = MI.getOperand(0).getReg();
9071 LLT OrigTy = MRI.getType(Result);
9072 auto SizeInBits = OrigTy.getScalarSizeInBits();
9073 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
9074
9075 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
9076 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
9077 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
9078 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
9079
9080 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
9081 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
9082 MIRBuilder.buildTrunc(Result, Shifted);
9083
9084 MI.eraseFromParent();
9085 return Legalized;
9086}
Amara Emerson08232192020-09-26 10:02:39 -07009087
Janek van Oirschot587747d2022-12-06 20:36:07 +00009088LegalizerHelper::LegalizeResult
9089LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
Amara Emerson719024a2023-02-23 16:35:39 -08009090 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
Matt Arsenault61f2f2c2023-03-17 09:21:57 -04009091 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
Janek van Oirschot587747d2022-12-06 20:36:07 +00009092
Matt Arsenault61f2f2c2023-03-17 09:21:57 -04009093 if (Mask == fcNone) {
Janek van Oirschot587747d2022-12-06 20:36:07 +00009094 MIRBuilder.buildConstant(DstReg, 0);
9095 MI.eraseFromParent();
9096 return Legalized;
9097 }
Matt Arsenault61f2f2c2023-03-17 09:21:57 -04009098 if (Mask == fcAllFlags) {
Janek van Oirschot587747d2022-12-06 20:36:07 +00009099 MIRBuilder.buildConstant(DstReg, 1);
9100 MI.eraseFromParent();
9101 return Legalized;
9102 }
9103
Matt Arsenault61820f82023-02-02 10:28:05 -04009104 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
9105 // version
9106
Janek van Oirschot587747d2022-12-06 20:36:07 +00009107 unsigned BitSize = SrcTy.getScalarSizeInBits();
9108 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
9109
9110 LLT IntTy = LLT::scalar(BitSize);
9111 if (SrcTy.isVector())
9112 IntTy = LLT::vector(SrcTy.getElementCount(), IntTy);
9113 auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
9114
9115 // Various masks.
9116 APInt SignBit = APInt::getSignMask(BitSize);
9117 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9118 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9119 APInt ExpMask = Inf;
9120 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9121 APInt QNaNBitMask =
9122 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
Kazu Hiratab7ffd962023-02-19 22:54:23 -08009123 APInt InvertionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
Janek van Oirschot587747d2022-12-06 20:36:07 +00009124
9125 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
9126 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
9127 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
9128 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
9129 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
9130
9131 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
9132 auto Sign =
9133 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs);
9134
9135 auto Res = MIRBuilder.buildConstant(DstTy, 0);
Amara Emerson719024a2023-02-23 16:35:39 -08009136 // Clang doesn't support capture of structured bindings:
9137 LLT DstTyCopy = DstTy;
Janek van Oirschot587747d2022-12-06 20:36:07 +00009138 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
Amara Emerson719024a2023-02-23 16:35:39 -08009139 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
Janek van Oirschot587747d2022-12-06 20:36:07 +00009140 };
9141
9142 // Tests that involve more than one class should be processed first.
9143 if ((Mask & fcFinite) == fcFinite) {
9144 // finite(V) ==> abs(V) u< exp_mask
9145 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9146 ExpMaskC));
9147 Mask &= ~fcFinite;
9148 } else if ((Mask & fcFinite) == fcPosFinite) {
9149 // finite(V) && V > 0 ==> V u< exp_mask
9150 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
9151 ExpMaskC));
9152 Mask &= ~fcPosFinite;
9153 } else if ((Mask & fcFinite) == fcNegFinite) {
9154 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
9155 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9156 ExpMaskC);
9157 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
9158 appendToRes(And);
9159 Mask &= ~fcNegFinite;
9160 }
9161
Matt Arsenault61820f82023-02-02 10:28:05 -04009162 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
9163 // fcZero | fcSubnormal => test all exponent bits are 0
9164 // TODO: Handle sign bit specific cases
9165 // TODO: Handle inverted case
9166 if (PartialCheck == (fcZero | fcSubnormal)) {
9167 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
9168 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9169 ExpBits, ZeroC));
9170 Mask &= ~PartialCheck;
9171 }
9172 }
9173
Janek van Oirschot587747d2022-12-06 20:36:07 +00009174 // Check for individual classes.
Matt Arsenault61f2f2c2023-03-17 09:21:57 -04009175 if (FPClassTest PartialCheck = Mask & fcZero) {
Janek van Oirschot587747d2022-12-06 20:36:07 +00009176 if (PartialCheck == fcPosZero)
9177 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9178 AsInt, ZeroC));
9179 else if (PartialCheck == fcZero)
9180 appendToRes(
9181 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, ZeroC));
9182 else // fcNegZero
9183 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9184 AsInt, SignBitC));
9185 }
9186
Matt Arsenault9356ec12023-02-02 10:14:36 -04009187 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
9188 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
9189 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
9190 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
9191 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
9192 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
9193 auto SubnormalRes =
9194 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
9195 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
9196 if (PartialCheck == fcNegSubnormal)
9197 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
9198 appendToRes(SubnormalRes);
9199 }
9200
Matt Arsenault61f2f2c2023-03-17 09:21:57 -04009201 if (FPClassTest PartialCheck = Mask & fcInf) {
Janek van Oirschot587747d2022-12-06 20:36:07 +00009202 if (PartialCheck == fcPosInf)
9203 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9204 AsInt, InfC));
9205 else if (PartialCheck == fcInf)
9206 appendToRes(
9207 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, InfC));
9208 else { // fcNegInf
9209 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9210 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
9211 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9212 AsInt, NegInfC));
9213 }
9214 }
9215
Matt Arsenault61f2f2c2023-03-17 09:21:57 -04009216 if (FPClassTest PartialCheck = Mask & fcNan) {
Janek van Oirschot587747d2022-12-06 20:36:07 +00009217 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
9218 if (PartialCheck == fcNan) {
9219 // isnan(V) ==> abs(V) u> int(inf)
9220 appendToRes(
9221 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC));
9222 } else if (PartialCheck == fcQNan) {
9223 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
9224 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
9225 InfWithQnanBitC));
9226 } else { // fcSNan
9227 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
9228 // abs(V) u< (unsigned(Inf) | quiet_bit)
9229 auto IsNan =
9230 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC);
9231 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
9232 Abs, InfWithQnanBitC);
9233 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
9234 }
9235 }
9236
Matt Arsenault61f2f2c2023-03-17 09:21:57 -04009237 if (FPClassTest PartialCheck = Mask & fcNormal) {
Janek van Oirschot587747d2022-12-06 20:36:07 +00009238 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
9239 // (max_exp-1))
9240 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9241 auto ExpMinusOne = MIRBuilder.buildSub(
9242 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
9243 APInt MaxExpMinusOne = ExpMask - ExpLSB;
9244 auto NormalRes =
9245 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne,
9246 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
9247 if (PartialCheck == fcNegNormal)
9248 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
9249 else if (PartialCheck == fcPosNormal) {
9250 auto PosSign = MIRBuilder.buildXor(
9251 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InvertionMask));
9252 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
9253 }
9254 appendToRes(NormalRes);
9255 }
9256
9257 MIRBuilder.buildCopy(DstReg, Res);
9258 MI.eraseFromParent();
9259 return Legalized;
9260}
9261
Amara Emerson08232192020-09-26 10:02:39 -07009262LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
Kai Nackef2d0bba2024-01-26 09:11:29 -05009263 // Implement G_SELECT in terms of XOR, AND, OR.
Amara Emerson719024a2023-02-23 16:35:39 -08009264 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
9265 MI.getFirst4RegLLTs();
Amara Emerson08232192020-09-26 10:02:39 -07009266
Jay Foadd57515bd2024-02-13 08:21:35 +00009267 bool IsEltPtr = DstTy.isPointerOrPointerVector();
Amara Emersonf24f4692022-09-11 16:28:44 +01009268 if (IsEltPtr) {
9269 LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
9270 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
9271 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
9272 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
9273 DstTy = NewTy;
9274 }
9275
Amara Emerson87ff1562020-11-17 12:09:31 -08009276 if (MaskTy.isScalar()) {
Kai Nackef2d0bba2024-01-26 09:11:29 -05009277 // Turn the scalar condition into a vector condition mask if needed.
Matt Arsenault3f2cc7c2022-04-11 21:11:26 -04009278
Amara Emerson87ff1562020-11-17 12:09:31 -08009279 Register MaskElt = MaskReg;
Matt Arsenault3f2cc7c2022-04-11 21:11:26 -04009280
9281 // The condition was potentially zero extended before, but we want a sign
9282 // extended boolean.
Amara Emerson78833a42022-09-20 00:21:55 +01009283 if (MaskTy != LLT::scalar(1))
Matt Arsenault3f2cc7c2022-04-11 21:11:26 -04009284 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
Matt Arsenault3f2cc7c2022-04-11 21:11:26 -04009285
9286 // Continue the sign extension (or truncate) to match the data type.
Kai Nackef2d0bba2024-01-26 09:11:29 -05009287 MaskElt =
9288 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
Matt Arsenault3f2cc7c2022-04-11 21:11:26 -04009289
Kai Nackef2d0bba2024-01-26 09:11:29 -05009290 if (DstTy.isVector()) {
9291 // Generate a vector splat idiom.
9292 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
9293 MaskReg = ShufSplat.getReg(0);
9294 } else {
9295 MaskReg = MaskElt;
9296 }
Matt Arsenault3f2cc7c2022-04-11 21:11:26 -04009297 MaskTy = DstTy;
Kai Nackef2d0bba2024-01-26 09:11:29 -05009298 } else if (!DstTy.isVector()) {
9299 // Cannot handle the case that mask is a vector and dst is a scalar.
9300 return UnableToLegalize;
Amara Emerson87ff1562020-11-17 12:09:31 -08009301 }
9302
Matt Arsenault3f2cc7c2022-04-11 21:11:26 -04009303 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
Amara Emerson08232192020-09-26 10:02:39 -07009304 return UnableToLegalize;
Amara Emerson87ff1562020-11-17 12:09:31 -08009305 }
Amara Emerson08232192020-09-26 10:02:39 -07009306
9307 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
9308 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
9309 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
Amara Emersonf24f4692022-09-11 16:28:44 +01009310 if (IsEltPtr) {
9311 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
9312 MIRBuilder.buildIntToPtr(DstReg, Or);
9313 } else {
9314 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
9315 }
Amara Emerson08232192020-09-26 10:02:39 -07009316 MI.eraseFromParent();
9317 return Legalized;
Kazu Hiratae3d3dbd332021-01-10 09:24:56 -08009318}
Christudasan Devadasan4c6ab482021-03-10 18:03:10 +05309319
9320LegalizerHelper::LegalizeResult LegalizerHelper::lowerDIVREM(MachineInstr &MI) {
9321 // Split DIVREM into individual instructions.
9322 unsigned Opcode = MI.getOpcode();
9323
9324 MIRBuilder.buildInstr(
9325 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
9326 : TargetOpcode::G_UDIV,
9327 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
9328 MIRBuilder.buildInstr(
9329 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
9330 : TargetOpcode::G_UREM,
9331 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
9332 MI.eraseFromParent();
9333 return Legalized;
9334}
Mirko Brkusanin35ef4c92021-06-03 18:09:45 +02009335
9336LegalizerHelper::LegalizeResult
9337LegalizerHelper::lowerAbsToAddXor(MachineInstr &MI) {
9338 // Expand %res = G_ABS %a into:
9339 // %v1 = G_ASHR %a, scalar_size-1
9340 // %v2 = G_ADD %a, %v1
9341 // %res = G_XOR %v2, %v1
9342 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
9343 Register OpReg = MI.getOperand(1).getReg();
9344 auto ShiftAmt =
9345 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
9346 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
9347 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
9348 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
9349 MI.eraseFromParent();
9350 return Legalized;
9351}
9352
9353LegalizerHelper::LegalizeResult
9354LegalizerHelper::lowerAbsToMaxNeg(MachineInstr &MI) {
9355 // Expand %res = G_ABS %a into:
9356 // %v1 = G_CONSTANT 0
9357 // %v2 = G_SUB %v1, %a
9358 // %res = G_SMAX %a, %v2
9359 Register SrcReg = MI.getOperand(1).getReg();
9360 LLT Ty = MRI.getType(SrcReg);
Madhur Amilkanthwar7bb87d52024-03-21 09:54:03 +05309361 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9362 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
9363 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
9364 MI.eraseFromParent();
9365 return Legalized;
9366}
9367
9368LegalizerHelper::LegalizeResult
9369LegalizerHelper::lowerAbsToCNeg(MachineInstr &MI) {
9370 Register SrcReg = MI.getOperand(1).getReg();
9371 Register DestReg = MI.getOperand(0).getReg();
9372 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
Mirko Brkusanin35ef4c92021-06-03 18:09:45 +02009373 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
9374 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
Madhur Amilkanthwar7bb87d52024-03-21 09:54:03 +05309375 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
9376 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
Mirko Brkusanin35ef4c92021-06-03 18:09:45 +02009377 MI.eraseFromParent();
9378 return Legalized;
9379}
Jessica Paquette791006f2021-08-17 10:39:18 -07009380
Him1880748f422024-09-03 12:47:26 +01009381LegalizerHelper::LegalizeResult LegalizerHelper::lowerFAbs(MachineInstr &MI) {
9382 Register SrcReg = MI.getOperand(1).getReg();
9383 Register DstReg = MI.getOperand(0).getReg();
9384
9385 LLT Ty = MRI.getType(DstReg);
9386
9387 // Reset sign bit
9388 MIRBuilder.buildAnd(
9389 DstReg, SrcReg,
9390 MIRBuilder.buildConstant(
9391 Ty, APInt::getSignedMaxValue(Ty.getScalarSizeInBits())));
9392
9393 MI.eraseFromParent();
9394 return Legalized;
9395}
9396
Amara Emerson95ac3d12021-08-18 00:19:58 -07009397LegalizerHelper::LegalizeResult
9398LegalizerHelper::lowerVectorReduction(MachineInstr &MI) {
9399 Register SrcReg = MI.getOperand(1).getReg();
9400 LLT SrcTy = MRI.getType(SrcReg);
9401 LLT DstTy = MRI.getType(SrcReg);
9402
9403 // The source could be a scalar if the IR type was <1 x sN>.
9404 if (SrcTy.isScalar()) {
9405 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
9406 return UnableToLegalize; // FIXME: handle extension.
9407 // This can be just a plain copy.
9408 Observer.changingInstr(MI);
9409 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
9410 Observer.changedInstr(MI);
9411 return Legalized;
9412 }
David Green28027392023-06-11 10:25:24 +01009413 return UnableToLegalize;
Amara Emerson95ac3d12021-08-18 00:19:58 -07009414}
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009415
Michael Maitland6f9cb9a72023-12-08 13:24:27 -05009416LegalizerHelper::LegalizeResult LegalizerHelper::lowerVAArg(MachineInstr &MI) {
9417 MachineFunction &MF = *MI.getMF();
9418 const DataLayout &DL = MIRBuilder.getDataLayout();
9419 LLVMContext &Ctx = MF.getFunction().getContext();
9420 Register ListPtr = MI.getOperand(1).getReg();
9421 LLT PtrTy = MRI.getType(ListPtr);
9422
9423 // LstPtr is a pointer to the head of the list. Get the address
9424 // of the head of the list.
9425 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
9426 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
9427 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
9428 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
9429
9430 const Align A(MI.getOperand(2).getImm());
9431 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
9432 if (A > TLI.getMinStackArgumentAlignment()) {
9433 Register AlignAmt =
9434 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
9435 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
9436 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
9437 VAList = AndDst.getReg(0);
9438 }
9439
9440 // Increment the pointer, VAList, to the next vaarg
9441 // The list should be bumped by the size of element in the current head of
9442 // list.
9443 Register Dst = MI.getOperand(0).getReg();
9444 LLT LLTTy = MRI.getType(Dst);
9445 Type *Ty = getTypeForLLT(LLTTy, Ctx);
9446 auto IncAmt =
9447 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
9448 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
9449
9450 // Store the increment VAList to the legalized pointer
9451 MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
9452 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
9453 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
9454 // Load the actual argument out of the pointer VAList
9455 Align EltAlignment = DL.getABITypeAlign(Ty);
9456 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
9457 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
9458 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
9459
9460 MI.eraseFromParent();
9461 return Legalized;
9462}
9463
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009464static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
9465 // On Darwin, -Os means optimize for size without hurting performance, so
9466 // only really optimize for size when -Oz (MinSize) is used.
9467 if (MF.getTarget().getTargetTriple().isOSDarwin())
9468 return MF.getFunction().hasMinSize();
9469 return MF.getFunction().hasOptSize();
9470}
9471
9472// Returns a list of types to use for memory op lowering in MemOps. A partial
9473// port of findOptimalMemOpLowering in TargetLowering.
9474static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
9475 unsigned Limit, const MemOp &Op,
9476 unsigned DstAS, unsigned SrcAS,
9477 const AttributeList &FuncAttributes,
9478 const TargetLowering &TLI) {
9479 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
9480 return false;
9481
9482 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
9483
9484 if (Ty == LLT()) {
9485 // Use the largest scalar type whose alignment constraints are satisfied.
9486 // We only need to check DstAlign here as SrcAlign is always greater or
9487 // equal to DstAlign (or zero).
9488 Ty = LLT::scalar(64);
9489 if (Op.isFixedDstAlign())
9490 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
9491 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
9492 Ty = LLT::scalar(Ty.getSizeInBytes());
9493 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
9494 // FIXME: check for the largest legal type we can load/store to.
9495 }
9496
9497 unsigned NumMemOps = 0;
9498 uint64_t Size = Op.size();
9499 while (Size) {
9500 unsigned TySize = Ty.getSizeInBytes();
9501 while (TySize > Size) {
9502 // For now, only use non-vector load / store's for the left-over pieces.
9503 LLT NewTy = Ty;
9504 // FIXME: check for mem op safety and legality of the types. Not all of
9505 // SDAGisms map cleanly to GISel concepts.
9506 if (NewTy.isVector())
9507 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
Kazu Hirataf20b5072023-01-28 09:06:31 -08009508 NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009509 unsigned NewTySize = NewTy.getSizeInBytes();
9510 assert(NewTySize > 0 && "Could not find appropriate type");
9511
9512 // If the new LLT cannot cover all of the remaining bits, then consider
9513 // issuing a (or a pair of) unaligned and overlapping load / store.
Stanislav Mekhanoshinbcaf31e2022-04-21 16:23:11 -07009514 unsigned Fast;
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009515 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
9516 MVT VT = getMVTForLLT(Ty);
9517 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
9518 TLI.allowsMisalignedMemoryAccesses(
9519 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
9520 MachineMemOperand::MONone, &Fast) &&
9521 Fast)
9522 TySize = Size;
9523 else {
9524 Ty = NewTy;
9525 TySize = NewTySize;
9526 }
9527 }
9528
9529 if (++NumMemOps > Limit)
9530 return false;
9531
9532 MemOps.push_back(Ty);
9533 Size -= TySize;
9534 }
9535
9536 return true;
9537}
9538
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009539// Get a vectorized representation of the memset value operand, GISel edition.
9540static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
9541 MachineRegisterInfo &MRI = *MIB.getMRI();
9542 unsigned NumBits = Ty.getScalarSizeInBits();
Petar Avramovicd477a7c2021-09-17 11:21:55 +02009543 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009544 if (!Ty.isVector() && ValVRegAndVal) {
Jay Foad6bec3e92021-10-06 10:54:07 +01009545 APInt Scalar = ValVRegAndVal->Value.trunc(8);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009546 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
9547 return MIB.buildConstant(Ty, SplatVal).getReg(0);
9548 }
9549
9550 // Extend the byte value to the larger type, and then multiply by a magic
9551 // value 0x010101... in order to replicate it across every byte.
9552 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
9553 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
9554 return MIB.buildConstant(Ty, 0).getReg(0);
9555 }
9556
9557 LLT ExtType = Ty.getScalarType();
9558 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
9559 if (NumBits > 8) {
9560 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
9561 auto MagicMI = MIB.buildConstant(ExtType, Magic);
9562 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
9563 }
9564
9565 // For vector types create a G_BUILD_VECTOR.
9566 if (Ty.isVector())
Michael Maitland96049fc2024-03-07 09:50:29 -05009567 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009568
9569 return Val;
9570}
9571
9572LegalizerHelper::LegalizeResult
9573LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
9574 uint64_t KnownLen, Align Alignment,
9575 bool IsVolatile) {
9576 auto &MF = *MI.getParent()->getParent();
9577 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9578 auto &DL = MF.getDataLayout();
9579 LLVMContext &C = MF.getFunction().getContext();
9580
9581 assert(KnownLen != 0 && "Have a zero length memset length!");
9582
9583 bool DstAlignCanChange = false;
9584 MachineFrameInfo &MFI = MF.getFrameInfo();
9585 bool OptSize = shouldLowerMemFuncForSize(MF);
9586
9587 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
9588 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
9589 DstAlignCanChange = true;
9590
9591 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
9592 std::vector<LLT> MemOps;
9593
9594 const auto &DstMMO = **MI.memoperands_begin();
9595 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
9596
Petar Avramovicd477a7c2021-09-17 11:21:55 +02009597 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009598 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
9599
9600 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
9601 MemOp::Set(KnownLen, DstAlignCanChange,
9602 Alignment,
9603 /*IsZeroMemset=*/IsZeroVal,
9604 /*IsVolatile=*/IsVolatile),
9605 DstPtrInfo.getAddrSpace(), ~0u,
9606 MF.getFunction().getAttributes(), TLI))
9607 return UnableToLegalize;
9608
9609 if (DstAlignCanChange) {
9610 // Get an estimate of the type from the LLT.
9611 Type *IRTy = getTypeForLLT(MemOps[0], C);
9612 Align NewAlign = DL.getABITypeAlign(IRTy);
9613 if (NewAlign > Alignment) {
9614 Alignment = NewAlign;
9615 unsigned FI = FIDef->getOperand(1).getIndex();
9616 // Give the stack frame object a larger alignment if needed.
9617 if (MFI.getObjectAlign(FI) < Alignment)
9618 MFI.setObjectAlignment(FI, Alignment);
9619 }
9620 }
9621
9622 MachineIRBuilder MIB(MI);
9623 // Find the largest store and generate the bit pattern for it.
9624 LLT LargestTy = MemOps[0];
9625 for (unsigned i = 1; i < MemOps.size(); i++)
9626 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
9627 LargestTy = MemOps[i];
9628
9629 // The memset stored value is always defined as an s8, so in order to make it
9630 // work with larger store types we need to repeat the bit pattern across the
9631 // wider type.
9632 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
9633
9634 if (!MemSetValue)
9635 return UnableToLegalize;
9636
9637 // Generate the stores. For each store type in the list, we generate the
9638 // matching store of that type to the destination address.
9639 LLT PtrTy = MRI.getType(Dst);
9640 unsigned DstOff = 0;
9641 unsigned Size = KnownLen;
9642 for (unsigned I = 0; I < MemOps.size(); I++) {
9643 LLT Ty = MemOps[I];
9644 unsigned TySize = Ty.getSizeInBytes();
9645 if (TySize > Size) {
9646 // Issuing an unaligned load / store pair that overlaps with the previous
9647 // pair. Adjust the offset accordingly.
9648 assert(I == MemOps.size() - 1 && I != 0);
9649 DstOff -= TySize - Size;
9650 }
9651
9652 // If this store is smaller than the largest store see whether we can get
9653 // the smaller value for free with a truncate.
9654 Register Value = MemSetValue;
9655 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
9656 MVT VT = getMVTForLLT(Ty);
9657 MVT LargestVT = getMVTForLLT(LargestTy);
9658 if (!LargestTy.isVector() && !Ty.isVector() &&
9659 TLI.isTruncateFree(LargestVT, VT))
9660 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
9661 else
9662 Value = getMemsetValue(Val, Ty, MIB);
9663 if (!Value)
9664 return UnableToLegalize;
9665 }
9666
9667 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
9668
9669 Register Ptr = Dst;
9670 if (DstOff != 0) {
9671 auto Offset =
9672 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
9673 Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
9674 }
9675
9676 MIB.buildStore(Value, Ptr, *StoreMMO);
9677 DstOff += Ty.getSizeInBytes();
9678 Size -= TySize;
9679 }
9680
9681 MI.eraseFromParent();
9682 return Legalized;
9683}
9684
9685LegalizerHelper::LegalizeResult
9686LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
9687 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
9688
Amara Emerson719024a2023-02-23 16:35:39 -08009689 auto [Dst, Src, Len] = MI.getFirst3Regs();
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009690
9691 const auto *MMOIt = MI.memoperands_begin();
9692 const MachineMemOperand *MemOp = *MMOIt;
9693 bool IsVolatile = MemOp->isVolatile();
9694
9695 // See if this is a constant length copy
Petar Avramovicd477a7c2021-09-17 11:21:55 +02009696 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009697 // FIXME: support dynamically sized G_MEMCPY_INLINE
Kazu Hirata5413bf12022-06-20 11:33:56 -07009698 assert(LenVRegAndVal &&
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009699 "inline memcpy with dynamic size is not yet supported");
9700 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
9701 if (KnownLen == 0) {
9702 MI.eraseFromParent();
9703 return Legalized;
9704 }
9705
9706 const auto &DstMMO = **MI.memoperands_begin();
9707 const auto &SrcMMO = **std::next(MI.memoperands_begin());
9708 Align DstAlign = DstMMO.getBaseAlign();
9709 Align SrcAlign = SrcMMO.getBaseAlign();
9710
9711 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
9712 IsVolatile);
9713}
9714
9715LegalizerHelper::LegalizeResult
9716LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
9717 uint64_t KnownLen, Align DstAlign,
9718 Align SrcAlign, bool IsVolatile) {
9719 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
9720 return lowerMemcpy(MI, Dst, Src, KnownLen,
9721 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
9722 IsVolatile);
9723}
9724
9725LegalizerHelper::LegalizeResult
9726LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
9727 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
9728 Align SrcAlign, bool IsVolatile) {
9729 auto &MF = *MI.getParent()->getParent();
9730 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9731 auto &DL = MF.getDataLayout();
9732 LLVMContext &C = MF.getFunction().getContext();
9733
9734 assert(KnownLen != 0 && "Have a zero length memcpy length!");
9735
9736 bool DstAlignCanChange = false;
9737 MachineFrameInfo &MFI = MF.getFrameInfo();
Guillaume Chatelet3c126d52022-06-22 15:02:48 +00009738 Align Alignment = std::min(DstAlign, SrcAlign);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009739
9740 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
9741 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
9742 DstAlignCanChange = true;
9743
9744 // FIXME: infer better src pointer alignment like SelectionDAG does here.
9745 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
9746 // if the memcpy is in a tail call position.
9747
9748 std::vector<LLT> MemOps;
9749
9750 const auto &DstMMO = **MI.memoperands_begin();
9751 const auto &SrcMMO = **std::next(MI.memoperands_begin());
9752 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
9753 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
9754
9755 if (!findGISelOptimalMemOpLowering(
9756 MemOps, Limit,
9757 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
9758 IsVolatile),
9759 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
9760 MF.getFunction().getAttributes(), TLI))
9761 return UnableToLegalize;
9762
9763 if (DstAlignCanChange) {
9764 // Get an estimate of the type from the LLT.
9765 Type *IRTy = getTypeForLLT(MemOps[0], C);
9766 Align NewAlign = DL.getABITypeAlign(IRTy);
9767
9768 // Don't promote to an alignment that would require dynamic stack
9769 // realignment.
9770 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
9771 if (!TRI->hasStackRealignment(MF))
Sergei Barannikov4d7a0ab2024-08-27 22:59:33 +03009772 if (MaybeAlign StackAlign = DL.getStackAlignment())
9773 NewAlign = std::min(NewAlign, *StackAlign);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009774
9775 if (NewAlign > Alignment) {
9776 Alignment = NewAlign;
9777 unsigned FI = FIDef->getOperand(1).getIndex();
9778 // Give the stack frame object a larger alignment if needed.
9779 if (MFI.getObjectAlign(FI) < Alignment)
9780 MFI.setObjectAlignment(FI, Alignment);
9781 }
9782 }
9783
9784 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
9785
9786 MachineIRBuilder MIB(MI);
9787 // Now we need to emit a pair of load and stores for each of the types we've
9788 // collected. I.e. for each type, generate a load from the source pointer of
9789 // that type width, and then generate a corresponding store to the dest buffer
9790 // of that value loaded. This can result in a sequence of loads and stores
9791 // mixed types, depending on what the target specifies as good types to use.
9792 unsigned CurrOffset = 0;
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009793 unsigned Size = KnownLen;
9794 for (auto CopyTy : MemOps) {
9795 // Issuing an unaligned load / store pair that overlaps with the previous
9796 // pair. Adjust the offset accordingly.
9797 if (CopyTy.getSizeInBytes() > Size)
9798 CurrOffset -= CopyTy.getSizeInBytes() - Size;
9799
9800 // Construct MMOs for the accesses.
9801 auto *LoadMMO =
9802 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
9803 auto *StoreMMO =
9804 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
9805
9806 // Create the load.
9807 Register LoadPtr = Src;
9808 Register Offset;
9809 if (CurrOffset != 0) {
Jameson Nash0332d102021-10-21 11:58:02 -04009810 LLT SrcTy = MRI.getType(Src);
9811 Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009812 .getReg(0);
Jameson Nash0332d102021-10-21 11:58:02 -04009813 LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009814 }
9815 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
9816
9817 // Create the store.
Jameson Nash0332d102021-10-21 11:58:02 -04009818 Register StorePtr = Dst;
9819 if (CurrOffset != 0) {
9820 LLT DstTy = MRI.getType(Dst);
9821 StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
9822 }
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009823 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
9824 CurrOffset += CopyTy.getSizeInBytes();
9825 Size -= CopyTy.getSizeInBytes();
9826 }
9827
9828 MI.eraseFromParent();
9829 return Legalized;
9830}
9831
9832LegalizerHelper::LegalizeResult
9833LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
9834 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
9835 bool IsVolatile) {
9836 auto &MF = *MI.getParent()->getParent();
9837 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9838 auto &DL = MF.getDataLayout();
9839 LLVMContext &C = MF.getFunction().getContext();
9840
9841 assert(KnownLen != 0 && "Have a zero length memmove length!");
9842
9843 bool DstAlignCanChange = false;
9844 MachineFrameInfo &MFI = MF.getFrameInfo();
9845 bool OptSize = shouldLowerMemFuncForSize(MF);
Guillaume Chatelet3c126d52022-06-22 15:02:48 +00009846 Align Alignment = std::min(DstAlign, SrcAlign);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009847
9848 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
9849 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
9850 DstAlignCanChange = true;
9851
9852 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
9853 std::vector<LLT> MemOps;
9854
9855 const auto &DstMMO = **MI.memoperands_begin();
9856 const auto &SrcMMO = **std::next(MI.memoperands_begin());
9857 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
9858 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
9859
9860 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
9861 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
9862 // same thing here.
9863 if (!findGISelOptimalMemOpLowering(
9864 MemOps, Limit,
9865 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
9866 /*IsVolatile*/ true),
9867 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
9868 MF.getFunction().getAttributes(), TLI))
9869 return UnableToLegalize;
9870
9871 if (DstAlignCanChange) {
9872 // Get an estimate of the type from the LLT.
9873 Type *IRTy = getTypeForLLT(MemOps[0], C);
9874 Align NewAlign = DL.getABITypeAlign(IRTy);
9875
9876 // Don't promote to an alignment that would require dynamic stack
9877 // realignment.
9878 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
9879 if (!TRI->hasStackRealignment(MF))
Sergei Barannikov4d7a0ab2024-08-27 22:59:33 +03009880 if (MaybeAlign StackAlign = DL.getStackAlignment())
9881 NewAlign = std::min(NewAlign, *StackAlign);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009882
9883 if (NewAlign > Alignment) {
9884 Alignment = NewAlign;
9885 unsigned FI = FIDef->getOperand(1).getIndex();
9886 // Give the stack frame object a larger alignment if needed.
9887 if (MFI.getObjectAlign(FI) < Alignment)
9888 MFI.setObjectAlignment(FI, Alignment);
9889 }
9890 }
9891
9892 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
9893
9894 MachineIRBuilder MIB(MI);
9895 // Memmove requires that we perform the loads first before issuing the stores.
9896 // Apart from that, this loop is pretty much doing the same thing as the
9897 // memcpy codegen function.
9898 unsigned CurrOffset = 0;
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009899 SmallVector<Register, 16> LoadVals;
9900 for (auto CopyTy : MemOps) {
9901 // Construct MMO for the load.
9902 auto *LoadMMO =
9903 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
9904
9905 // Create the load.
9906 Register LoadPtr = Src;
9907 if (CurrOffset != 0) {
Jameson Nash0332d102021-10-21 11:58:02 -04009908 LLT SrcTy = MRI.getType(Src);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009909 auto Offset =
Jameson Nash0332d102021-10-21 11:58:02 -04009910 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
9911 LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009912 }
9913 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
9914 CurrOffset += CopyTy.getSizeInBytes();
9915 }
9916
9917 CurrOffset = 0;
9918 for (unsigned I = 0; I < MemOps.size(); ++I) {
9919 LLT CopyTy = MemOps[I];
9920 // Now store the values loaded.
9921 auto *StoreMMO =
9922 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
9923
9924 Register StorePtr = Dst;
9925 if (CurrOffset != 0) {
Jameson Nash0332d102021-10-21 11:58:02 -04009926 LLT DstTy = MRI.getType(Dst);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009927 auto Offset =
Jameson Nash0332d102021-10-21 11:58:02 -04009928 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
9929 StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009930 }
9931 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
9932 CurrOffset += CopyTy.getSizeInBytes();
9933 }
9934 MI.eraseFromParent();
9935 return Legalized;
9936}
9937
9938LegalizerHelper::LegalizeResult
9939LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
9940 const unsigned Opc = MI.getOpcode();
9941 // This combine is fairly complex so it's not written with a separate
9942 // matcher function.
9943 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
9944 Opc == TargetOpcode::G_MEMSET) &&
9945 "Expected memcpy like instruction");
9946
9947 auto MMOIt = MI.memoperands_begin();
9948 const MachineMemOperand *MemOp = *MMOIt;
9949
9950 Align DstAlign = MemOp->getBaseAlign();
9951 Align SrcAlign;
Amara Emerson719024a2023-02-23 16:35:39 -08009952 auto [Dst, Src, Len] = MI.getFirst3Regs();
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009953
9954 if (Opc != TargetOpcode::G_MEMSET) {
9955 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
9956 MemOp = *(++MMOIt);
9957 SrcAlign = MemOp->getBaseAlign();
9958 }
9959
9960 // See if this is a constant length copy
Petar Avramovicd477a7c2021-09-17 11:21:55 +02009961 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
Mirko Brkusanin36527cb2021-09-07 11:30:11 +02009962 if (!LenVRegAndVal)
9963 return UnableToLegalize;
9964 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
9965
9966 if (KnownLen == 0) {
9967 MI.eraseFromParent();
9968 return Legalized;
9969 }
9970
9971 bool IsVolatile = MemOp->isVolatile();
9972 if (Opc == TargetOpcode::G_MEMCPY_INLINE)
9973 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
9974 IsVolatile);
9975
9976 // Don't try to optimize volatile.
9977 if (IsVolatile)
9978 return UnableToLegalize;
9979
9980 if (MaxLen && KnownLen > MaxLen)
9981 return UnableToLegalize;
9982
9983 if (Opc == TargetOpcode::G_MEMCPY) {
9984 auto &MF = *MI.getParent()->getParent();
9985 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9986 bool OptSize = shouldLowerMemFuncForSize(MF);
9987 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
9988 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
9989 IsVolatile);
9990 }
9991 if (Opc == TargetOpcode::G_MEMMOVE)
9992 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
9993 if (Opc == TargetOpcode::G_MEMSET)
9994 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
9995 return UnableToLegalize;
9996}