| //===-- X86InstrFoldTables.cpp - X86 Instruction Folding Tables -----------===// | 
 | // | 
 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
 | // See https://llvm.org/LICENSE.txt for license information. | 
 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
 | // | 
 | //===----------------------------------------------------------------------===// | 
 | // | 
 | // This file contains the X86 memory folding tables. | 
 | // | 
 | //===----------------------------------------------------------------------===// | 
 |  | 
 | #include "X86InstrFoldTables.h" | 
 | #include "X86InstrInfo.h" | 
 | #include "llvm/ADT/STLExtras.h" | 
 | #include <atomic> | 
 | #include <vector> | 
 |  | 
 | using namespace llvm; | 
 |  | 
 | // These tables are sorted by their RegOp value allowing them to be binary | 
 | // searched at runtime without the need for additional storage. The enum values | 
 | // are currently emitted in X86GenInstrInfo.inc in alphabetical order. Which | 
 | // makes sorting these tables a simple matter of alphabetizing the table. | 
 | #include "X86GenFoldTables.inc" | 
 |  | 
 | // Table to map instructions safe to broadcast using a different width from the | 
 | // element width. | 
 | static const X86FoldTableEntry BroadcastSizeTable2[] = { | 
 |   { X86::VANDNPDZ128rr,        X86::VANDNPSZ128rmb,       TB_BCAST_SS }, | 
 |   { X86::VANDNPDZ256rr,        X86::VANDNPSZ256rmb,       TB_BCAST_SS }, | 
 |   { X86::VANDNPDZrr,           X86::VANDNPSZrmb,          TB_BCAST_SS }, | 
 |   { X86::VANDNPSZ128rr,        X86::VANDNPDZ128rmb,       TB_BCAST_SD }, | 
 |   { X86::VANDNPSZ256rr,        X86::VANDNPDZ256rmb,       TB_BCAST_SD }, | 
 |   { X86::VANDNPSZrr,           X86::VANDNPDZrmb,          TB_BCAST_SD }, | 
 |   { X86::VANDPDZ128rr,         X86::VANDPSZ128rmb,        TB_BCAST_SS }, | 
 |   { X86::VANDPDZ256rr,         X86::VANDPSZ256rmb,        TB_BCAST_SS }, | 
 |   { X86::VANDPDZrr,            X86::VANDPSZrmb,           TB_BCAST_SS }, | 
 |   { X86::VANDPSZ128rr,         X86::VANDPDZ128rmb,        TB_BCAST_SD }, | 
 |   { X86::VANDPSZ256rr,         X86::VANDPDZ256rmb,        TB_BCAST_SD }, | 
 |   { X86::VANDPSZrr,            X86::VANDPDZrmb,           TB_BCAST_SD }, | 
 |   { X86::VORPDZ128rr,          X86::VORPSZ128rmb,         TB_BCAST_SS }, | 
 |   { X86::VORPDZ256rr,          X86::VORPSZ256rmb,         TB_BCAST_SS }, | 
 |   { X86::VORPDZrr,             X86::VORPSZrmb,            TB_BCAST_SS }, | 
 |   { X86::VORPSZ128rr,          X86::VORPDZ128rmb,         TB_BCAST_SD }, | 
 |   { X86::VORPSZ256rr,          X86::VORPDZ256rmb,         TB_BCAST_SD }, | 
 |   { X86::VORPSZrr,             X86::VORPDZrmb,            TB_BCAST_SD }, | 
 |   { X86::VPANDDZ128rr,         X86::VPANDQZ128rmb,        TB_BCAST_Q }, | 
 |   { X86::VPANDDZ256rr,         X86::VPANDQZ256rmb,        TB_BCAST_Q }, | 
 |   { X86::VPANDDZrr,            X86::VPANDQZrmb,           TB_BCAST_Q }, | 
 |   { X86::VPANDNDZ128rr,        X86::VPANDNQZ128rmb,       TB_BCAST_Q }, | 
 |   { X86::VPANDNDZ256rr,        X86::VPANDNQZ256rmb,       TB_BCAST_Q }, | 
 |   { X86::VPANDNDZrr,           X86::VPANDNQZrmb,          TB_BCAST_Q }, | 
 |   { X86::VPANDNQZ128rr,        X86::VPANDNDZ128rmb,       TB_BCAST_D }, | 
 |   { X86::VPANDNQZ256rr,        X86::VPANDNDZ256rmb,       TB_BCAST_D }, | 
 |   { X86::VPANDNQZrr,           X86::VPANDNDZrmb,          TB_BCAST_D }, | 
 |   { X86::VPANDQZ128rr,         X86::VPANDDZ128rmb,        TB_BCAST_D }, | 
 |   { X86::VPANDQZ256rr,         X86::VPANDDZ256rmb,        TB_BCAST_D }, | 
 |   { X86::VPANDQZrr,            X86::VPANDDZrmb,           TB_BCAST_D }, | 
 |   { X86::VPORDZ128rr,          X86::VPORQZ128rmb,         TB_BCAST_Q }, | 
 |   { X86::VPORDZ256rr,          X86::VPORQZ256rmb,         TB_BCAST_Q }, | 
 |   { X86::VPORDZrr,             X86::VPORQZrmb,            TB_BCAST_Q }, | 
 |   { X86::VPORQZ128rr,          X86::VPORDZ128rmb,         TB_BCAST_D }, | 
 |   { X86::VPORQZ256rr,          X86::VPORDZ256rmb,         TB_BCAST_D }, | 
 |   { X86::VPORQZrr,             X86::VPORDZrmb,            TB_BCAST_D }, | 
 |   { X86::VPXORDZ128rr,         X86::VPXORQZ128rmb,        TB_BCAST_Q }, | 
 |   { X86::VPXORDZ256rr,         X86::VPXORQZ256rmb,        TB_BCAST_Q }, | 
 |   { X86::VPXORDZrr,            X86::VPXORQZrmb,           TB_BCAST_Q }, | 
 |   { X86::VPXORQZ128rr,         X86::VPXORDZ128rmb,        TB_BCAST_D }, | 
 |   { X86::VPXORQZ256rr,         X86::VPXORDZ256rmb,        TB_BCAST_D }, | 
 |   { X86::VPXORQZrr,            X86::VPXORDZrmb,           TB_BCAST_D }, | 
 |   { X86::VXORPDZ128rr,         X86::VXORPSZ128rmb,        TB_BCAST_SS }, | 
 |   { X86::VXORPDZ256rr,         X86::VXORPSZ256rmb,        TB_BCAST_SS }, | 
 |   { X86::VXORPDZrr,            X86::VXORPSZrmb,           TB_BCAST_SS }, | 
 |   { X86::VXORPSZ128rr,         X86::VXORPDZ128rmb,        TB_BCAST_SD }, | 
 |   { X86::VXORPSZ256rr,         X86::VXORPDZ256rmb,        TB_BCAST_SD }, | 
 |   { X86::VXORPSZrr,            X86::VXORPDZrmb,           TB_BCAST_SD }, | 
 | }; | 
 |  | 
 | static const X86FoldTableEntry BroadcastSizeTable3[] = { | 
 |   { X86::VPTERNLOGDZ128rri,    X86::VPTERNLOGQZ128rmbi,   TB_BCAST_Q }, | 
 |   { X86::VPTERNLOGDZ256rri,    X86::VPTERNLOGQZ256rmbi,   TB_BCAST_Q }, | 
 |   { X86::VPTERNLOGDZrri,       X86::VPTERNLOGQZrmbi,      TB_BCAST_Q }, | 
 |   { X86::VPTERNLOGQZ128rri,    X86::VPTERNLOGDZ128rmbi,   TB_BCAST_D }, | 
 |   { X86::VPTERNLOGQZ256rri,    X86::VPTERNLOGDZ256rmbi,   TB_BCAST_D }, | 
 |   { X86::VPTERNLOGQZrri,       X86::VPTERNLOGDZrmbi,      TB_BCAST_D }, | 
 | }; | 
 |  | 
 | static const X86FoldTableEntry * | 
 | lookupFoldTableImpl(ArrayRef<X86FoldTableEntry> Table, unsigned RegOp) { | 
 | #ifndef NDEBUG | 
 | #define CHECK_SORTED_UNIQUE(TABLE)                                             \ | 
 |   assert(llvm::is_sorted(TABLE) && #TABLE " is not sorted");                   \ | 
 |   assert(std::adjacent_find(std::begin(Table), std::end(Table)) ==             \ | 
 |              std::end(Table) &&                                                \ | 
 |          #TABLE " is not unique"); | 
 |  | 
 |   // Make sure the tables are sorted. | 
 |   static std::atomic<bool> FoldTablesChecked(false); | 
 |   if (!FoldTablesChecked.load(std::memory_order_relaxed)) { | 
 |     CHECK_SORTED_UNIQUE(Table2Addr) | 
 |     CHECK_SORTED_UNIQUE(Table0) | 
 |     CHECK_SORTED_UNIQUE(Table1) | 
 |     CHECK_SORTED_UNIQUE(Table2) | 
 |     CHECK_SORTED_UNIQUE(Table3) | 
 |     CHECK_SORTED_UNIQUE(Table4) | 
 |     CHECK_SORTED_UNIQUE(BroadcastTable1) | 
 |     CHECK_SORTED_UNIQUE(BroadcastTable2) | 
 |     CHECK_SORTED_UNIQUE(BroadcastTable3) | 
 |     CHECK_SORTED_UNIQUE(BroadcastTable4) | 
 |     CHECK_SORTED_UNIQUE(BroadcastSizeTable2) | 
 |     CHECK_SORTED_UNIQUE(BroadcastSizeTable3) | 
 |     FoldTablesChecked.store(true, std::memory_order_relaxed); | 
 |   } | 
 | #endif | 
 |  | 
 |   const X86FoldTableEntry *Data = llvm::lower_bound(Table, RegOp); | 
 |   if (Data != Table.end() && Data->KeyOp == RegOp && | 
 |       !(Data->Flags & TB_NO_FORWARD)) | 
 |     return Data; | 
 |   return nullptr; | 
 | } | 
 |  | 
 | const X86FoldTableEntry *llvm::lookupTwoAddrFoldTable(unsigned RegOp) { | 
 |   return lookupFoldTableImpl(Table2Addr, RegOp); | 
 | } | 
 |  | 
 | const X86FoldTableEntry *llvm::lookupFoldTable(unsigned RegOp, unsigned OpNum) { | 
 |   ArrayRef<X86FoldTableEntry> FoldTable; | 
 |   if (OpNum == 0) | 
 |     FoldTable = ArrayRef(Table0); | 
 |   else if (OpNum == 1) | 
 |     FoldTable = ArrayRef(Table1); | 
 |   else if (OpNum == 2) | 
 |     FoldTable = ArrayRef(Table2); | 
 |   else if (OpNum == 3) | 
 |     FoldTable = ArrayRef(Table3); | 
 |   else if (OpNum == 4) | 
 |     FoldTable = ArrayRef(Table4); | 
 |   else | 
 |     return nullptr; | 
 |  | 
 |   return lookupFoldTableImpl(FoldTable, RegOp); | 
 | } | 
 |  | 
 | const X86FoldTableEntry *llvm::lookupBroadcastFoldTable(unsigned RegOp, | 
 |                                                         unsigned OpNum) { | 
 |   ArrayRef<X86FoldTableEntry> FoldTable; | 
 |   if (OpNum == 1) | 
 |     FoldTable = ArrayRef(BroadcastTable1); | 
 |   else if (OpNum == 2) | 
 |     FoldTable = ArrayRef(BroadcastTable2); | 
 |   else if (OpNum == 3) | 
 |     FoldTable = ArrayRef(BroadcastTable3); | 
 |   else if (OpNum == 4) | 
 |     FoldTable = ArrayRef(BroadcastTable4); | 
 |   else | 
 |     return nullptr; | 
 |  | 
 |   return lookupFoldTableImpl(FoldTable, RegOp); | 
 | } | 
 |  | 
 | namespace { | 
 |  | 
 | // This class stores the memory unfolding tables. It is instantiated as a | 
 | // function scope static variable to lazily init the unfolding table. | 
 | struct X86MemUnfoldTable { | 
 |   // Stores memory unfolding tables entries sorted by opcode. | 
 |   std::vector<X86FoldTableEntry> Table; | 
 |  | 
 |   X86MemUnfoldTable() { | 
 |     for (const X86FoldTableEntry &Entry : Table2Addr) | 
 |       // Index 0, folded load and store, no alignment requirement. | 
 |       addTableEntry(Entry, TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE); | 
 |  | 
 |     for (const X86FoldTableEntry &Entry : Table0) | 
 |       // Index 0, mix of loads and stores. | 
 |       addTableEntry(Entry, TB_INDEX_0); | 
 |  | 
 |     for (const X86FoldTableEntry &Entry : Table1) | 
 |       // Index 1, folded load | 
 |       addTableEntry(Entry, TB_INDEX_1 | TB_FOLDED_LOAD); | 
 |  | 
 |     for (const X86FoldTableEntry &Entry : Table2) | 
 |       // Index 2, folded load | 
 |       addTableEntry(Entry, TB_INDEX_2 | TB_FOLDED_LOAD); | 
 |  | 
 |     for (const X86FoldTableEntry &Entry : Table3) | 
 |       // Index 3, folded load | 
 |       addTableEntry(Entry, TB_INDEX_3 | TB_FOLDED_LOAD); | 
 |  | 
 |     for (const X86FoldTableEntry &Entry : Table4) | 
 |       // Index 4, folded load | 
 |       addTableEntry(Entry, TB_INDEX_4 | TB_FOLDED_LOAD); | 
 |  | 
 |     // Broadcast tables. | 
 |     for (const X86FoldTableEntry &Entry : BroadcastTable1) | 
 |       // Index 1, folded broadcast | 
 |       addTableEntry(Entry, TB_INDEX_1 | TB_FOLDED_LOAD); | 
 |  | 
 |     for (const X86FoldTableEntry &Entry : BroadcastTable2) | 
 |       // Index 2, folded broadcast | 
 |       addTableEntry(Entry, TB_INDEX_2 | TB_FOLDED_LOAD); | 
 |  | 
 |     for (const X86FoldTableEntry &Entry : BroadcastTable3) | 
 |       // Index 3, folded broadcast | 
 |       addTableEntry(Entry, TB_INDEX_3 | TB_FOLDED_LOAD); | 
 |  | 
 |     for (const X86FoldTableEntry &Entry : BroadcastTable4) | 
 |       // Index 4, folded broadcast | 
 |       addTableEntry(Entry, TB_INDEX_4 | TB_FOLDED_LOAD); | 
 |  | 
 |     // Sort the memory->reg unfold table. | 
 |     array_pod_sort(Table.begin(), Table.end()); | 
 |  | 
 |     // Now that it's sorted, ensure its unique. | 
 |     assert(std::adjacent_find(Table.begin(), Table.end()) == Table.end() && | 
 |            "Memory unfolding table is not unique!"); | 
 |   } | 
 |  | 
 |   void addTableEntry(const X86FoldTableEntry &Entry, uint16_t ExtraFlags) { | 
 |     // NOTE: This swaps the KeyOp and DstOp in the table so we can sort it. | 
 |     if ((Entry.Flags & TB_NO_REVERSE) == 0) | 
 |       Table.push_back({Entry.DstOp, Entry.KeyOp, | 
 |                        static_cast<uint16_t>(Entry.Flags | ExtraFlags)}); | 
 |   } | 
 | }; | 
 | } // namespace | 
 |  | 
 | const X86FoldTableEntry *llvm::lookupUnfoldTable(unsigned MemOp) { | 
 |   static X86MemUnfoldTable MemUnfoldTable; | 
 |   auto &Table = MemUnfoldTable.Table; | 
 |   auto I = llvm::lower_bound(Table, MemOp); | 
 |   if (I != Table.end() && I->KeyOp == MemOp) | 
 |     return &*I; | 
 |   return nullptr; | 
 | } | 
 |  | 
 | namespace { | 
 |  | 
 | // This class stores the memory -> broadcast folding tables. It is instantiated | 
 | // as a function scope static variable to lazily init the folding table. | 
 | struct X86BroadcastFoldTable { | 
 |   // Stores memory broadcast folding tables entries sorted by opcode. | 
 |   std::vector<X86FoldTableEntry> Table; | 
 |  | 
 |   X86BroadcastFoldTable() { | 
 |     // Broadcast tables. | 
 |     for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable2) { | 
 |       unsigned RegOp = Reg2Bcst.KeyOp; | 
 |       unsigned BcstOp = Reg2Bcst.DstOp; | 
 |       if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 2)) { | 
 |         unsigned MemOp = Reg2Mem->DstOp; | 
 |         uint16_t Flags = | 
 |             Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 | TB_FOLDED_LOAD; | 
 |         Table.push_back({MemOp, BcstOp, Flags}); | 
 |       } | 
 |     } | 
 |     for (const X86FoldTableEntry &Reg2Bcst : BroadcastSizeTable2) { | 
 |       unsigned RegOp = Reg2Bcst.KeyOp; | 
 |       unsigned BcstOp = Reg2Bcst.DstOp; | 
 |       if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 2)) { | 
 |         unsigned MemOp = Reg2Mem->DstOp; | 
 |         uint16_t Flags = | 
 |             Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 | TB_FOLDED_LOAD; | 
 |         Table.push_back({MemOp, BcstOp, Flags}); | 
 |       } | 
 |     } | 
 |  | 
 |     for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable3) { | 
 |       unsigned RegOp = Reg2Bcst.KeyOp; | 
 |       unsigned BcstOp = Reg2Bcst.DstOp; | 
 |       if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 3)) { | 
 |         unsigned MemOp = Reg2Mem->DstOp; | 
 |         uint16_t Flags = | 
 |             Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 | TB_FOLDED_LOAD; | 
 |         Table.push_back({MemOp, BcstOp, Flags}); | 
 |       } | 
 |     } | 
 |     for (const X86FoldTableEntry &Reg2Bcst : BroadcastSizeTable3) { | 
 |       unsigned RegOp = Reg2Bcst.KeyOp; | 
 |       unsigned BcstOp = Reg2Bcst.DstOp; | 
 |       if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 3)) { | 
 |         unsigned MemOp = Reg2Mem->DstOp; | 
 |         uint16_t Flags = | 
 |             Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 | TB_FOLDED_LOAD; | 
 |         Table.push_back({MemOp, BcstOp, Flags}); | 
 |       } | 
 |     } | 
 |  | 
 |     for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable4) { | 
 |       unsigned RegOp = Reg2Bcst.KeyOp; | 
 |       unsigned BcstOp = Reg2Bcst.DstOp; | 
 |       if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 4)) { | 
 |         unsigned MemOp = Reg2Mem->DstOp; | 
 |         uint16_t Flags = | 
 |             Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_4 | TB_FOLDED_LOAD; | 
 |         Table.push_back({MemOp, BcstOp, Flags}); | 
 |       } | 
 |     } | 
 |  | 
 |     // Sort the memory->broadcast fold table. | 
 |     array_pod_sort(Table.begin(), Table.end()); | 
 |   } | 
 | }; | 
 | } // namespace | 
 |  | 
 | bool llvm::matchBroadcastSize(const X86FoldTableEntry &Entry, | 
 |                               unsigned BroadcastBits) { | 
 |   switch (Entry.Flags & TB_BCAST_MASK) { | 
 |   case TB_BCAST_W: | 
 |   case TB_BCAST_SH: | 
 |     return BroadcastBits == 16; | 
 |   case TB_BCAST_D: | 
 |   case TB_BCAST_SS: | 
 |     return BroadcastBits == 32; | 
 |   case TB_BCAST_Q: | 
 |   case TB_BCAST_SD: | 
 |     return BroadcastBits == 64; | 
 |   } | 
 |   return false; | 
 | } | 
 |  | 
 | const X86FoldTableEntry * | 
 | llvm::lookupBroadcastFoldTableBySize(unsigned MemOp, unsigned BroadcastBits) { | 
 |   static X86BroadcastFoldTable BroadcastFoldTable; | 
 |   auto &Table = BroadcastFoldTable.Table; | 
 |   for (auto I = llvm::lower_bound(Table, MemOp); | 
 |        I != Table.end() && I->KeyOp == MemOp; ++I) { | 
 |     if (matchBroadcastSize(*I, BroadcastBits)) | 
 |       return &*I; | 
 |   } | 
 |   return nullptr; | 
 | } |