[DAG]Introduce llvm::processShuffleMasks and use it for shuffles in DAG Type Legalizer.
We can process the long shuffles (working across several actual
vector registers) in the best way if we take the actual register
represantion into account. We can build more correct representation of
register shuffles, improve number of recognised buildvector sequences.
Also, same function can be used to improve the cost model for the
shuffles. in future patches.
Part of D100486
Differential Revision: https://reviews.llvm.org/D115653
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 655c248..ac0a357 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -496,6 +496,116 @@
return true;
}
+void llvm::processShuffleMasks(
+ ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
+ unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
+ function_ref<void(ArrayRef<int>, unsigned)> SingleInputAction,
+ function_ref<void(ArrayRef<int>, unsigned, unsigned)> ManyInputsAction) {
+ SmallVector<SmallVector<SmallVector<int>>> Res(NumOfDestRegs);
+ // Try to perform better estimation of the permutation.
+ // 1. Split the source/destination vectors into real registers.
+ // 2. Do the mask analysis to identify which real registers are
+ // permuted.
+ int Sz = Mask.size();
+ unsigned SzDest = Sz / NumOfDestRegs;
+ unsigned SzSrc = Sz / NumOfSrcRegs;
+ for (unsigned I = 0; I < NumOfDestRegs; ++I) {
+ auto &RegMasks = Res[I];
+ RegMasks.assign(NumOfSrcRegs, {});
+ // Check that the values in dest registers are in the one src
+ // register.
+ for (unsigned K = 0; K < SzDest; ++K) {
+ int Idx = I * SzDest + K;
+ if (Idx == Sz)
+ break;
+ if (Mask[Idx] >= Sz || Mask[Idx] == UndefMaskElem)
+ continue;
+ int SrcRegIdx = Mask[Idx] / SzSrc;
+ // Add a cost of PermuteTwoSrc for each new source register permute,
+ // if we have more than one source registers.
+ if (RegMasks[SrcRegIdx].empty())
+ RegMasks[SrcRegIdx].assign(SzDest, UndefMaskElem);
+ RegMasks[SrcRegIdx][K] = Mask[Idx] % SzSrc;
+ }
+ }
+ // Process split mask.
+ for (unsigned I = 0; I < NumOfUsedRegs; ++I) {
+ auto &Dest = Res[I];
+ int NumSrcRegs =
+ count_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
+ switch (NumSrcRegs) {
+ case 0:
+ // No input vectors were used!
+ NoInputAction();
+ break;
+ case 1: {
+ // Find the only mask with at least single undef mask elem.
+ auto *It =
+ find_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
+ unsigned SrcReg = std::distance(Dest.begin(), It);
+ SingleInputAction(*It, SrcReg);
+ break;
+ }
+ default: {
+ // The first mask is a permutation of a single register. Since we have >2
+ // input registers to shuffle, we merge the masks for 2 first registers
+ // and generate a shuffle of 2 registers rather than the reordering of the
+ // first register and then shuffle with the second register. Next,
+ // generate the shuffles of the resulting register + the remaining
+ // registers from the list.
+ auto &&CombineMasks = [](MutableArrayRef<int> FirstMask,
+ ArrayRef<int> SecondMask) {
+ for (int Idx = 0, VF = FirstMask.size(); Idx < VF; ++Idx) {
+ if (SecondMask[Idx] != UndefMaskElem) {
+ assert(FirstMask[Idx] == UndefMaskElem &&
+ "Expected undefined mask element.");
+ FirstMask[Idx] = SecondMask[Idx] + VF;
+ }
+ }
+ };
+ auto &&NormalizeMask = [](MutableArrayRef<int> Mask) {
+ for (int Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {
+ if (Mask[Idx] != UndefMaskElem)
+ Mask[Idx] = Idx;
+ }
+ };
+ int SecondIdx;
+ do {
+ int FirstIdx = -1;
+ SecondIdx = -1;
+ MutableArrayRef<int> FirstMask, SecondMask;
+ for (unsigned I = 0; I < NumOfDestRegs; ++I) {
+ SmallVectorImpl<int> &RegMask = Dest[I];
+ if (RegMask.empty())
+ continue;
+
+ if (FirstIdx == SecondIdx) {
+ FirstIdx = I;
+ FirstMask = RegMask;
+ continue;
+ }
+ SecondIdx = I;
+ SecondMask = RegMask;
+ CombineMasks(FirstMask, SecondMask);
+ ManyInputsAction(FirstMask, FirstIdx, SecondIdx);
+ NormalizeMask(FirstMask);
+ RegMask.clear();
+ SecondMask = FirstMask;
+ SecondIdx = FirstIdx;
+ }
+ if (FirstIdx != SecondIdx && SecondIdx >= 0) {
+ CombineMasks(SecondMask, FirstMask);
+ ManyInputsAction(SecondMask, SecondIdx, FirstIdx);
+ Dest[FirstIdx].clear();
+ NormalizeMask(SecondMask);
+ }
+ } while (SecondIdx >= 0);
+ break;
+ }
+ }
+ }
+}
+
MapVector<Instruction *, uint64_t>
llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
const TargetTransformInfo *TTI) {