blob: f99b41624a84394f027b7938064ed0949ef43e34 [file] [log] [blame]
Rui Ueyama6b5feed2015-06-24 20:40:03 +00001//===- ICF.cpp ------------------------------------------------------------===//
2//
3// The LLVM Linker
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
Rui Ueyama46ed29b2015-09-11 04:29:03 +000010// Identical COMDAT Folding is a feature to merge COMDAT sections not by
11// name (which is regular COMDAT handling) but by contents. If two COMDAT
12// sections have the same data, relocations, attributes, etc., then the two
13// are considered identical and merged by the linker. This optimization
14// makes outputs smaller.
15//
16// ICF is theoretically a problem of reducing graphs by merging as many
Rui Ueyama9be9aa82015-09-16 03:26:31 +000017// identical subgraphs as possible, if we consider sections as vertices and
Rui Ueyama46ed29b2015-09-11 04:29:03 +000018// relocations as edges. This may be a bit more complicated problem than you
19// might think. The order of processing sections matters since merging two
Rui Ueyamaca0e3672015-09-15 00:35:41 +000020// sections can make other sections, whose relocations now point to the same
Rui Ueyama46ed29b2015-09-11 04:29:03 +000021// section, mergeable. Graphs may contain cycles, which is common in COFF.
22// We need a sophisticated algorithm to do this properly and efficiently.
23//
Rui Ueyama9be9aa82015-09-16 03:26:31 +000024// What we do in this file is this. We split sections into groups. Sections
25// in the same group are considered identical.
Rui Ueyama46ed29b2015-09-11 04:29:03 +000026//
Rui Ueyama9be9aa82015-09-16 03:26:31 +000027// First, all sections are grouped by their "constant" values. Constant
Rui Ueyama437e0d22015-09-16 14:19:10 +000028// values are values that are never changed by ICF, such as section contents,
Rui Ueyama9be9aa82015-09-16 03:26:31 +000029// section name, number of relocations, type and offset of each relocation,
Rui Ueyama437e0d22015-09-16 14:19:10 +000030// etc. Because we do not care about some relocation targets in this step,
31// two sections in the same group may not be identical, but at least two
32// sections in different groups can never be identical.
Rui Ueyama9be9aa82015-09-16 03:26:31 +000033//
34// Then, we try to split each group by relocation targets. Relocations are
35// considered identical if and only if the relocation targets are in the
36// same group. Splitting a group may make more groups to be splittable,
37// because two relocations that were previously considered identical might
38// now point to different groups. We repeat this step until the convergence
39// is obtained.
40//
41// This algorithm is so-called "optimistic" algorithm described in
42// http://research.google.com/pubs/pub36912.html.
Rui Ueyama6b5feed2015-06-24 20:40:03 +000043//
44//===----------------------------------------------------------------------===//
45
46#include "Chunks.h"
Rui Ueyama791416f2015-07-30 22:57:21 +000047#include "Symbols.h"
Rui Ueyama708d7fa2015-09-18 21:06:34 +000048#include "lld/Core/Parallel.h"
Rui Ueyama791416f2015-07-30 22:57:21 +000049#include "llvm/ADT/Hashing.h"
Rui Ueyama46ed29b2015-09-11 04:29:03 +000050#include "llvm/Support/Debug.h"
51#include "llvm/Support/raw_ostream.h"
52#include <algorithm>
Rui Ueyama5197f542015-09-18 22:31:15 +000053#include <atomic>
Rui Ueyama6b5feed2015-06-24 20:40:03 +000054#include <vector>
55
Rui Ueyama791416f2015-07-30 22:57:21 +000056using namespace llvm;
57
Rui Ueyama6b5feed2015-06-24 20:40:03 +000058namespace lld {
59namespace coff {
Rui Ueyama46ed29b2015-09-11 04:29:03 +000060
Rui Ueyama437e0d22015-09-16 14:19:10 +000061typedef std::vector<SectionChunk *>::iterator ChunkIterator;
62typedef bool (*Comparator)(const SectionChunk *, const SectionChunk *);
63
64class ICF {
65public:
Rui Ueyama0fff21f2015-09-18 21:17:44 +000066 void run(const std::vector<Chunk *> &V);
Rui Ueyama437e0d22015-09-16 14:19:10 +000067
68private:
69 static uint64_t getHash(SectionChunk *C);
70 static bool equalsConstant(const SectionChunk *A, const SectionChunk *B);
71 static bool equalsVariable(const SectionChunk *A, const SectionChunk *B);
Rui Ueyama0fff21f2015-09-18 21:17:44 +000072 bool forEachGroup(std::vector<SectionChunk *> &Chunks, Comparator Eq);
Rui Ueyama437e0d22015-09-16 14:19:10 +000073 bool partition(ChunkIterator Begin, ChunkIterator End, Comparator Eq);
74
Rui Ueyama48788ff2015-09-20 01:44:44 +000075 std::atomic<uint64_t> NextID = { 1 };
Rui Ueyama437e0d22015-09-16 14:19:10 +000076};
77
78// Entry point to ICF.
79void doICF(const std::vector<Chunk *> &Chunks) {
Rui Ueyama0fff21f2015-09-18 21:17:44 +000080 ICF().run(Chunks);
Rui Ueyama46ed29b2015-09-11 04:29:03 +000081}
82
Rui Ueyama437e0d22015-09-16 14:19:10 +000083uint64_t ICF::getHash(SectionChunk *C) {
84 return hash_combine(C->getPermissions(),
85 hash_value(C->SectionName),
86 C->NumRelocs,
Rui Ueyama473e5a72015-09-25 16:50:12 +000087 C->getAlign(),
Rui Ueyama437e0d22015-09-16 14:19:10 +000088 uint32_t(C->Header->SizeOfRawData),
Rui Ueyama437e0d22015-09-16 14:19:10 +000089 C->Checksum);
Rui Ueyama9be9aa82015-09-16 03:26:31 +000090}
Rui Ueyama46ed29b2015-09-11 04:29:03 +000091
Rui Ueyama437e0d22015-09-16 14:19:10 +000092bool ICF::equalsConstant(const SectionChunk *A, const SectionChunk *B) {
Rui Ueyama89aa4742015-09-18 02:40:54 +000093 if (A->AssocChildren.size() != B->AssocChildren.size() ||
94 A->NumRelocs != B->NumRelocs) {
Rui Ueyama791416f2015-07-30 22:57:21 +000095 return false;
Rui Ueyama9be9aa82015-09-16 03:26:31 +000096 }
Rui Ueyama791416f2015-07-30 22:57:21 +000097
Rui Ueyamae2d2bfd2015-09-18 01:30:56 +000098 // Compare associative sections.
Rui Ueyama9be9aa82015-09-16 03:26:31 +000099 for (size_t I = 0, E = A->AssocChildren.size(); I != E; ++I)
100 if (A->AssocChildren[I]->GroupID != B->AssocChildren[I]->GroupID)
Rui Ueyama791416f2015-07-30 22:57:21 +0000101 return false;
102
Rui Ueyamae2d2bfd2015-09-18 01:30:56 +0000103 // Compare relocations.
Rui Ueyama791416f2015-07-30 22:57:21 +0000104 auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) {
Rui Ueyama9be9aa82015-09-16 03:26:31 +0000105 if (R1.Type != R2.Type ||
106 R1.VirtualAddress != R2.VirtualAddress) {
Rui Ueyama791416f2015-07-30 22:57:21 +0000107 return false;
Rui Ueyama9be9aa82015-09-16 03:26:31 +0000108 }
109 SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex)->repl();
110 SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex)->repl();
Rui Ueyama791416f2015-07-30 22:57:21 +0000111 if (B1 == B2)
112 return true;
Rui Ueyama89aa4742015-09-18 02:40:54 +0000113 if (auto *D1 = dyn_cast<DefinedRegular>(B1))
114 if (auto *D2 = dyn_cast<DefinedRegular>(B2))
115 return D1->getValue() == D2->getValue() &&
116 D1->getChunk()->GroupID == D2->getChunk()->GroupID;
117 return false;
Rui Ueyama791416f2015-07-30 22:57:21 +0000118 };
Rui Ueyamae2d2bfd2015-09-18 01:30:56 +0000119 if (!std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq))
120 return false;
121
Rui Ueyama89aa4742015-09-18 02:40:54 +0000122 // Compare section attributes and contents.
123 return A->getPermissions() == B->getPermissions() &&
124 A->SectionName == B->SectionName &&
Rui Ueyama473e5a72015-09-25 16:50:12 +0000125 A->getAlign() == B->getAlign() &&
Rui Ueyama89aa4742015-09-18 02:40:54 +0000126 A->Header->SizeOfRawData == B->Header->SizeOfRawData &&
127 A->Checksum == B->Checksum &&
128 A->getContents() == B->getContents();
Rui Ueyama9be9aa82015-09-16 03:26:31 +0000129}
130
Rui Ueyama437e0d22015-09-16 14:19:10 +0000131bool ICF::equalsVariable(const SectionChunk *A, const SectionChunk *B) {
Rui Ueyama75cea682015-09-18 01:51:37 +0000132 // Compare associative sections.
133 for (size_t I = 0, E = A->AssocChildren.size(); I != E; ++I)
134 if (A->AssocChildren[I]->GroupID != B->AssocChildren[I]->GroupID)
135 return false;
136
137 // Compare relocations.
138 auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) {
139 SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex)->repl();
Rui Ueyamacc43c502015-09-20 20:19:12 +0000140 SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex)->repl();
141 if (B1 == B2)
142 return true;
143 if (auto *D1 = dyn_cast<DefinedRegular>(B1))
Rui Ueyama0fff21f2015-09-18 21:17:44 +0000144 if (auto *D2 = dyn_cast<DefinedRegular>(B2))
145 return D1->getChunk()->GroupID == D2->getChunk()->GroupID;
Rui Ueyama0fff21f2015-09-18 21:17:44 +0000146 return false;
Rui Ueyama75cea682015-09-18 01:51:37 +0000147 };
148 return std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq);
Rui Ueyama9be9aa82015-09-16 03:26:31 +0000149}
150
Rui Ueyama437e0d22015-09-16 14:19:10 +0000151bool ICF::partition(ChunkIterator Begin, ChunkIterator End, Comparator Eq) {
Rui Ueyama9be9aa82015-09-16 03:26:31 +0000152 bool R = false;
153 for (auto It = Begin;;) {
154 SectionChunk *Head = *It;
155 auto Bound = std::partition(It + 1, End, [&](SectionChunk *SC) {
156 return Eq(Head, SC);
157 });
158 if (Bound == End)
159 return R;
Rui Ueyama4b1d5792015-09-25 16:38:13 +0000160 uint64_t ID = NextID++;
Rui Ueyama9be9aa82015-09-16 03:26:31 +0000161 std::for_each(It, Bound, [&](SectionChunk *SC) { SC->GroupID = ID; });
162 It = Bound;
163 R = true;
164 }
165}
166
Rui Ueyama0fff21f2015-09-18 21:17:44 +0000167bool ICF::forEachGroup(std::vector<SectionChunk *> &Chunks, Comparator Eq) {
Rui Ueyama9be9aa82015-09-16 03:26:31 +0000168 bool R = false;
Rui Ueyama0fff21f2015-09-18 21:17:44 +0000169 for (auto It = Chunks.begin(), End = Chunks.end(); It != End;) {
Rui Ueyama9be9aa82015-09-16 03:26:31 +0000170 SectionChunk *Head = *It;
171 auto Bound = std::find_if(It + 1, End, [&](SectionChunk *SC) {
172 return SC->GroupID != Head->GroupID;
173 });
174 if (partition(It, Bound, Eq))
175 R = true;
176 It = Bound;
177 }
178 return R;
Rui Ueyama791416f2015-07-30 22:57:21 +0000179}
180
Rui Ueyama6b5feed2015-06-24 20:40:03 +0000181// Merge identical COMDAT sections.
Rui Ueyamaa7f08f12015-09-04 21:35:54 +0000182// Two sections are considered the same if their section headers,
Rui Ueyama6b5feed2015-06-24 20:40:03 +0000183// contents and relocations are all the same.
Rui Ueyama0fff21f2015-09-18 21:17:44 +0000184void ICF::run(const std::vector<Chunk *> &Vec) {
Rui Ueyama437e0d22015-09-16 14:19:10 +0000185 // Collect only mergeable sections and group by hash value.
Rui Ueyama0fff21f2015-09-18 21:17:44 +0000186 parallel_for_each(Vec.begin(), Vec.end(), [&](Chunk *C) {
Rui Ueyama9be9aa82015-09-16 03:26:31 +0000187 if (auto *SC = dyn_cast<SectionChunk>(C)) {
Rui Ueyama72335232015-12-03 02:23:33 +0000188 bool Global = SC->Sym && SC->Sym->isExternal();
Rui Ueyama9be9aa82015-09-16 03:26:31 +0000189 bool Writable = SC->getPermissions() & llvm::COFF::IMAGE_SCN_MEM_WRITE;
Rui Ueyama72335232015-12-03 02:23:33 +0000190 if (SC->isCOMDAT() && SC->isLive() && Global && !Writable)
Rui Ueyama437e0d22015-09-16 14:19:10 +0000191 SC->GroupID = getHash(SC) | (uint64_t(1) << 63);
Rui Ueyama708d7fa2015-09-18 21:06:34 +0000192 }
193 });
Rui Ueyama317c1762015-10-26 16:20:00 +0000194 std::vector<SectionChunk *> Chunks;
Rui Ueyama0fff21f2015-09-18 21:17:44 +0000195 for (Chunk *C : Vec) {
Rui Ueyama708d7fa2015-09-18 21:06:34 +0000196 if (auto *SC = dyn_cast<SectionChunk>(C)) {
197 if (SC->GroupID) {
Rui Ueyama317c1762015-10-26 16:20:00 +0000198 Chunks.push_back(SC);
Rui Ueyama9be9aa82015-09-16 03:26:31 +0000199 } else {
200 SC->GroupID = NextID++;
201 }
Rui Ueyama46ed29b2015-09-11 04:29:03 +0000202 }
Rui Ueyama9be9aa82015-09-16 03:26:31 +0000203 }
204
Rui Ueyama708d7fa2015-09-18 21:06:34 +0000205 // From now on, sections in Chunks are ordered so that sections in
Rui Ueyama437e0d22015-09-16 14:19:10 +0000206 // the same group are consecutive in the vector.
Rui Ueyama317c1762015-10-26 16:20:00 +0000207 std::sort(Chunks.begin(), Chunks.end(),
208 [](SectionChunk *A, SectionChunk *B) {
209 return A->GroupID < B->GroupID;
210 });
Rui Ueyama9be9aa82015-09-16 03:26:31 +0000211
212 // Split groups until we get a convergence.
Rui Ueyama38ea2e52015-09-16 23:55:39 +0000213 int Cnt = 1;
Rui Ueyama317c1762015-10-26 16:20:00 +0000214 forEachGroup(Chunks, equalsConstant);
Rui Ueyama708d7fa2015-09-18 21:06:34 +0000215
216 for (;;) {
Rui Ueyama317c1762015-10-26 16:20:00 +0000217 if (!forEachGroup(Chunks, equalsVariable))
Rui Ueyama708d7fa2015-09-18 21:06:34 +0000218 break;
Rui Ueyama38ea2e52015-09-16 23:55:39 +0000219 ++Cnt;
Rui Ueyama708d7fa2015-09-18 21:06:34 +0000220 }
Rui Ueyama38ea2e52015-09-16 23:55:39 +0000221 if (Config->Verbose)
222 llvm::outs() << "\nICF needed " << Cnt << " iterations.\n";
Rui Ueyama9be9aa82015-09-16 03:26:31 +0000223
224 // Merge sections in the same group.
Rui Ueyama317c1762015-10-26 16:20:00 +0000225 for (auto It = Chunks.begin(), End = Chunks.end(); It != End;) {
226 SectionChunk *Head = *It++;
227 auto Bound = std::find_if(It, End, [&](SectionChunk *SC) {
228 return Head->GroupID != SC->GroupID;
229 });
230 if (It == Bound)
231 continue;
232 if (Config->Verbose)
233 llvm::outs() << "Selected " << Head->getDebugName() << "\n";
234 while (It != Bound) {
235 SectionChunk *SC = *It++;
Rui Ueyama9be9aa82015-09-16 03:26:31 +0000236 if (Config->Verbose)
Rui Ueyama317c1762015-10-26 16:20:00 +0000237 llvm::outs() << " Removed " << SC->getDebugName() << "\n";
238 Head->replace(SC);
Rui Ueyama9be9aa82015-09-16 03:26:31 +0000239 }
Rui Ueyamaa7f08f12015-09-04 21:35:54 +0000240 }
Rui Ueyama6b5feed2015-06-24 20:40:03 +0000241}
242
243} // namespace coff
244} // namespace lld