lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp - llvm-project - Git at Google

 //===- lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp --------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 ///
 /// \file Converts from in-memory normalized mach-o to in-memory Atoms.
 ///
 ///                  +------------+
 ///                  | normalized |
 ///                  +------------+
 ///                        |
 ///                        |
 ///                        v
 ///                    +-------+
 ///                    | Atoms |
 ///                    +-------+

 #include "ArchHandler.h"
 #include "Atoms.h"
 #include "File.h"
 #include "MachONormalizedFile.h"
 #include "MachONormalizedFileBinaryUtils.h"
 #include "lld/Common/LLVM.h"
 #include "lld/Core/Error.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/BinaryFormat/MachO.h"
 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
 #include "llvm/Support/DataExtractor.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/raw_ostream.h"

 using namespace llvm::MachO;
 using namespace lld::mach_o::normalized;

 #define DEBUG_TYPE "normalized-file-to-atoms"

 namespace lld {
 namespace mach_o {


 namespace { // anonymous


 #define ENTRY(seg, sect, type, atomType) \
   {seg, sect, type, DefinedAtom::atomType }

 struct MachORelocatableSectionToAtomType {
   StringRef                 segmentName;
   StringRef                 sectionName;
   SectionType               sectionType;
   DefinedAtom::ContentType  atomType;
 };

 const MachORelocatableSectionToAtomType sectsToAtomType[] = {
   ENTRY("__TEXT", "__text",           S_REGULAR,          typeCode),
   ENTRY("__TEXT", "__text",           S_REGULAR,          typeResolver),
   ENTRY("__TEXT", "__cstring",        S_CSTRING_LITERALS, typeCString),
   ENTRY("",       "",                 S_CSTRING_LITERALS, typeCString),
   ENTRY("__TEXT", "__ustring",        S_REGULAR,          typeUTF16String),
   ENTRY("__TEXT", "__const",          S_REGULAR,          typeConstant),
   ENTRY("__TEXT", "__const_coal",     S_COALESCED,        typeConstant),
   ENTRY("__TEXT", "__eh_frame",       S_COALESCED,        typeCFI),
   ENTRY("__TEXT", "__eh_frame",       S_REGULAR,          typeCFI),
   ENTRY("__TEXT", "__literal4",       S_4BYTE_LITERALS,   typeLiteral4),
   ENTRY("__TEXT", "__literal8",       S_8BYTE_LITERALS,   typeLiteral8),
   ENTRY("__TEXT", "__literal16",      S_16BYTE_LITERALS,  typeLiteral16),
   ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR,          typeLSDA),
   ENTRY("__DATA", "__data",           S_REGULAR,          typeData),
   ENTRY("__DATA", "__datacoal_nt",    S_COALESCED,        typeData),
   ENTRY("__DATA", "__const",          S_REGULAR,          typeConstData),
   ENTRY("__DATA", "__cfstring",       S_REGULAR,          typeCFString),
   ENTRY("__DATA", "__mod_init_func",  S_MOD_INIT_FUNC_POINTERS,
                                                           typeInitializerPtr),
   ENTRY("__DATA", "__mod_term_func",  S_MOD_TERM_FUNC_POINTERS,
                                                           typeTerminatorPtr),
   ENTRY("__DATA", "__got",            S_NON_LAZY_SYMBOL_POINTERS,
                                                           typeGOT),
   ENTRY("__DATA", "__bss",            S_ZEROFILL,         typeZeroFill),
   ENTRY("",       "",                 S_NON_LAZY_SYMBOL_POINTERS,
                                                           typeGOT),
   ENTRY("__DATA", "__interposing",    S_INTERPOSING,      typeInterposingTuples),
   ENTRY("__DATA", "__thread_vars",    S_THREAD_LOCAL_VARIABLES,
                                                           typeThunkTLV),
   ENTRY("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR, typeTLVInitialData),
   ENTRY("__DATA", "__thread_bss",     S_THREAD_LOCAL_ZEROFILL,
                                                         typeTLVInitialZeroFill),
   ENTRY("__DATA", "__objc_imageinfo", S_REGULAR,          typeObjCImageInfo),
   ENTRY("__DATA", "__objc_catlist",   S_REGULAR,          typeObjC2CategoryList),
   ENTRY("",       "",                 S_INTERPOSING,      typeInterposingTuples),
   ENTRY("__LD",   "__compact_unwind", S_REGULAR,
                                                          typeCompactUnwindInfo),
   ENTRY("",       "",                 S_REGULAR,          typeUnknown)
 };
 #undef ENTRY


 /// Figures out ContentType of a mach-o section.
 DefinedAtom::ContentType atomTypeFromSection(const Section &section,
                                              bool &customSectionName) {
   // First look for match of name and type. Empty names in table are wildcards.
   customSectionName = false;
   for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ;
                                  p->atomType != DefinedAtom::typeUnknown; ++p) {
     if (p->sectionType != section.type)
       continue;
     if (!p->segmentName.equals(section.segmentName) && !p->segmentName.empty())
       continue;
     if (!p->sectionName.equals(section.sectionName) && !p->sectionName.empty())
       continue;
     customSectionName = p->segmentName.empty() && p->sectionName.empty();
     return p->atomType;
   }
   // Look for code denoted by section attributes
   if (section.attributes & S_ATTR_PURE_INSTRUCTIONS)
     return DefinedAtom::typeCode;

   return DefinedAtom::typeUnknown;
 }

 enum AtomizeModel {
   atomizeAtSymbols,
   atomizeFixedSize,
   atomizePointerSize,
   atomizeUTF8,
   atomizeUTF16,
   atomizeCFI,
   atomizeCU,
   atomizeCFString
 };

 /// Returns info on how to atomize a section of the specified ContentType.
 void sectionParseInfo(DefinedAtom::ContentType atomType,
                       unsigned int &sizeMultiple,
                       DefinedAtom::Scope &scope,
                       DefinedAtom::Merge &merge,
                       AtomizeModel &atomizeModel) {
   struct ParseInfo {
     DefinedAtom::ContentType  atomType;
     unsigned int              sizeMultiple;
     DefinedAtom::Scope        scope;
     DefinedAtom::Merge        merge;
     AtomizeModel              atomizeModel;
   };

   #define ENTRY(type, size, scope, merge, model) \
     {DefinedAtom::type, size, DefinedAtom::scope, DefinedAtom::merge, model }

   static const ParseInfo parseInfo[] = {
     ENTRY(typeCode,              1, scopeGlobal,          mergeNo,
                                                             atomizeAtSymbols),
     ENTRY(typeData,              1, scopeGlobal,          mergeNo,
                                                             atomizeAtSymbols),
     ENTRY(typeConstData,         1, scopeGlobal,          mergeNo,
                                                             atomizeAtSymbols),
     ENTRY(typeZeroFill,          1, scopeGlobal,          mergeNo,
                                                             atomizeAtSymbols),
     ENTRY(typeConstant,          1, scopeGlobal,          mergeNo,
                                                             atomizeAtSymbols),
     ENTRY(typeCString,           1, scopeLinkageUnit,     mergeByContent,
                                                             atomizeUTF8),
     ENTRY(typeUTF16String,       1, scopeLinkageUnit,     mergeByContent,
                                                             atomizeUTF16),
     ENTRY(typeCFI,               4, scopeTranslationUnit, mergeNo,
                                                             atomizeCFI),
     ENTRY(typeLiteral4,          4, scopeLinkageUnit,     mergeByContent,
                                                             atomizeFixedSize),
     ENTRY(typeLiteral8,          8, scopeLinkageUnit,     mergeByContent,
                                                             atomizeFixedSize),
     ENTRY(typeLiteral16,        16, scopeLinkageUnit,     mergeByContent,
                                                             atomizeFixedSize),
     ENTRY(typeCFString,          4, scopeLinkageUnit,     mergeByContent,
                                                             atomizeCFString),
     ENTRY(typeInitializerPtr,    4, scopeTranslationUnit, mergeNo,
                                                             atomizePointerSize),
     ENTRY(typeTerminatorPtr,     4, scopeTranslationUnit, mergeNo,
                                                             atomizePointerSize),
     ENTRY(typeCompactUnwindInfo, 4, scopeTranslationUnit, mergeNo,
                                                             atomizeCU),
     ENTRY(typeGOT,               4, scopeLinkageUnit,     mergeByContent,
                                                             atomizePointerSize),
     ENTRY(typeObjC2CategoryList, 4, scopeTranslationUnit, mergeByContent,
                                                             atomizePointerSize),
     ENTRY(typeUnknown,           1, scopeGlobal,          mergeNo,
                                                             atomizeAtSymbols)
   };
   #undef ENTRY
   const int tableLen = sizeof(parseInfo) / sizeof(ParseInfo);
   for (int i=0; i < tableLen; ++i) {
     if (parseInfo[i].atomType == atomType) {
       sizeMultiple = parseInfo[i].sizeMultiple;
       scope        = parseInfo[i].scope;
       merge        = parseInfo[i].merge;
       atomizeModel = parseInfo[i].atomizeModel;
       return;
     }
   }

   // Unknown type is atomized by symbols.
   sizeMultiple = 1;
   scope = DefinedAtom::scopeGlobal;
   merge = DefinedAtom::mergeNo;
   atomizeModel = atomizeAtSymbols;
 }


 Atom::Scope atomScope(uint8_t scope) {
   switch (scope) {
   case N_EXT:
     return Atom::scopeGlobal;
   case N_PEXT:
   case N_PEXT | N_EXT:
     return Atom::scopeLinkageUnit;
   case 0:
     return Atom::scopeTranslationUnit;
   }
   llvm_unreachable("unknown scope value!");
 }

 void appendSymbolsInSection(
     const std::vector<lld::mach_o::normalized::Symbol> &inSymbols,
     uint32_t sectionIndex,
     SmallVector<const lld::mach_o::normalized::Symbol *, 64> &outSyms) {
   for (const lld::mach_o::normalized::Symbol &sym : inSymbols) {
     // Only look at definition symbols.
     if ((sym.type & N_TYPE) != N_SECT)
       continue;
     if (sym.sect != sectionIndex)
       continue;
     outSyms.push_back(&sym);
   }
 }

 void atomFromSymbol(DefinedAtom::ContentType atomType, const Section &section,
                     MachOFile &file, uint64_t symbolAddr, StringRef symbolName,
                     uint16_t symbolDescFlags, Atom::Scope symbolScope,
                     uint64_t nextSymbolAddr, bool scatterable, bool copyRefs) {
   // Mach-O symbol table does have size in it. Instead the size is the
   // difference between this and the next symbol.
   uint64_t size = nextSymbolAddr - symbolAddr;
   uint64_t offset = symbolAddr - section.address;
   bool noDeadStrip = (symbolDescFlags & N_NO_DEAD_STRIP) || !scatterable;
   if (isZeroFillSection(section.type)) {
     file.addZeroFillDefinedAtom(symbolName, symbolScope, offset, size,
                                 noDeadStrip, copyRefs, &section);
   } else {
     DefinedAtom::Merge merge = (symbolDescFlags & N_WEAK_DEF)
                               ? DefinedAtom::mergeAsWeak : DefinedAtom::mergeNo;
     bool thumb = (symbolDescFlags & N_ARM_THUMB_DEF);
     if (atomType == DefinedAtom::typeUnknown) {
       // Mach-O needs a segment and section name.  Concatenate those two
       // with a / separator (e.g. "seg/sect") to fit into the lld model
       // of just a section name.
       std::string segSectName = section.segmentName.str()
                                 + "/" + section.sectionName.str();
       file.addDefinedAtomInCustomSection(symbolName, symbolScope, atomType,
                                          merge, thumb, noDeadStrip, offset,
                                          size, segSectName, true, &section);
     } else {
       if ((atomType == lld::DefinedAtom::typeCode) &&
           (symbolDescFlags & N_SYMBOL_RESOLVER)) {
         atomType = lld::DefinedAtom::typeResolver;
       }
       file.addDefinedAtom(symbolName, symbolScope, atomType, merge,
                           offset, size, thumb, noDeadStrip, copyRefs, &section);
     }
   }
 }

 llvm::Error processSymboledSection(DefinedAtom::ContentType atomType,
                                    const Section &section,
                                    const NormalizedFile &normalizedFile,
                                    MachOFile &file, bool scatterable,
                                    bool copyRefs) {
   // Find section's index.
   uint32_t sectIndex = 1;
   for (auto &sect : normalizedFile.sections) {
     if (&sect == &section)
       break;
     ++sectIndex;
   }

   // Find all symbols in this section.
   SmallVector<const lld::mach_o::normalized::Symbol *, 64> symbols;
   appendSymbolsInSection(normalizedFile.globalSymbols, sectIndex, symbols);
   appendSymbolsInSection(normalizedFile.localSymbols,  sectIndex, symbols);

   // Sort symbols.
   std::sort(symbols.begin(), symbols.end(),
             [](const lld::mach_o::normalized::Symbol *lhs,
                const lld::mach_o::normalized::Symbol *rhs) -> bool {
               if (lhs == rhs)
                 return false;
               // First by address.
               uint64_t lhsAddr = lhs->value;
               uint64_t rhsAddr = rhs->value;
               if (lhsAddr != rhsAddr)
                 return lhsAddr < rhsAddr;
               // If same address, one is an alias so sort by scope.
               Atom::Scope lScope = atomScope(lhs->scope);
               Atom::Scope rScope = atomScope(rhs->scope);
               if (lScope != rScope)
                 return lScope < rScope;
               // If same address and scope, see if one might be better as
               // the alias.
               bool lPrivate = (lhs->name.front() == 'l');
               bool rPrivate = (rhs->name.front() == 'l');
               if (lPrivate != rPrivate)
                 return lPrivate;
               // If same address and scope, sort by name.
               return lhs->name < rhs->name;
             });

   // Debug logging of symbols.
   // for (const Symbol *sym : symbols)
   //  llvm::errs() << "  sym: "
   //    << llvm::format("0x%08llx ", (uint64_t)sym->value)
   //    << ", " << sym->name << "\n";

   // If section has no symbols and no content, there are no atoms.
   if (symbols.empty() && section.content.empty())
     return llvm::Error::success();

   if (symbols.empty()) {
     // Section has no symbols, put all content in one anonymous atom.
     atomFromSymbol(atomType, section, file, section.address, StringRef(),
                   0, Atom::scopeTranslationUnit,
                   section.address + section.content.size(),
                   scatterable, copyRefs);
   }
   else if (symbols.front()->value != section.address) {
     // Section has anonymous content before first symbol.
     atomFromSymbol(atomType, section, file, section.address, StringRef(),
                    0, Atom::scopeTranslationUnit, symbols.front()->value,
                    scatterable, copyRefs);
   }

   const lld::mach_o::normalized::Symbol *lastSym = nullptr;
   for (const lld::mach_o::normalized::Symbol *sym : symbols) {
     if (lastSym != nullptr) {
       // Ignore any assembler added "ltmpNNN" symbol at start of section
       // if there is another symbol at the start.
       if ((lastSym->value != sym->value)
           || lastSym->value != section.address
           || !lastSym->name.startswith("ltmp")) {
         atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name,
                        lastSym->desc, atomScope(lastSym->scope), sym->value,
                        scatterable, copyRefs);
       }
     }
     lastSym = sym;
   }
   if (lastSym != nullptr) {
     atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name,
                    lastSym->desc, atomScope(lastSym->scope),
                    section.address + section.content.size(),
                    scatterable, copyRefs);
   }

   // If object built without .subsections_via_symbols, add reference chain.
   if (!scatterable) {
     MachODefinedAtom *prevAtom = nullptr;
     file.eachAtomInSection(section,
                            [&](MachODefinedAtom *atom, uint64_t offset)->void {
       if (prevAtom)
         prevAtom->addReference(Reference::KindNamespace::all,
                                Reference::KindArch::all,
                                Reference::kindLayoutAfter, 0, atom, 0);
       prevAtom = atom;
     });
   }

   return llvm::Error::success();
 }

 llvm::Error processSection(DefinedAtom::ContentType atomType,
                            const Section &section,
                            bool customSectionName,
                            const NormalizedFile &normalizedFile,
                            MachOFile &file, bool scatterable,
                            bool copyRefs) {
   const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
   const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);

   // Get info on how to atomize section.
   unsigned int       sizeMultiple;
   DefinedAtom::Scope scope;
   DefinedAtom::Merge merge;
   AtomizeModel       atomizeModel;
   sectionParseInfo(atomType, sizeMultiple, scope, merge, atomizeModel);

   // Validate section size.
   if ((section.content.size() % sizeMultiple) != 0)
     return llvm::make_error<GenericError>(Twine("Section ")
                                           + section.segmentName
                                           + "/" + section.sectionName
                                           + " has size ("
                                           + Twine(section.content.size())
                                           + ") which is not a multiple of "
                                           + Twine(sizeMultiple));

   if (atomizeModel == atomizeAtSymbols) {
     // Break section up into atoms each with a fixed size.
     return processSymboledSection(atomType, section, normalizedFile, file,
                                   scatterable, copyRefs);
   } else {
     unsigned int size;
     for (unsigned int offset = 0, e = section.content.size(); offset != e;) {
       switch (atomizeModel) {
       case atomizeFixedSize:
         // Break section up into atoms each with a fixed size.
         size = sizeMultiple;
         break;
       case atomizePointerSize:
         // Break section up into atoms each the size of a pointer.
         size = is64 ? 8 : 4;
         break;
       case atomizeUTF8:
         // Break section up into zero terminated c-strings.
         size = 0;
         for (unsigned int i = offset; i < e; ++i) {
           if (section.content[i] == 0) {
             size = i + 1 - offset;
             break;
           }
         }
         break;
       case atomizeUTF16:
         // Break section up into zero terminated UTF16 strings.
         size = 0;
         for (unsigned int i = offset; i < e; i += 2) {
           if ((section.content[i] == 0) && (section.content[i + 1] == 0)) {
             size = i + 2 - offset;
             break;
           }
         }
         break;
       case atomizeCFI:
         // Break section up into dwarf unwind CFIs (FDE or CIE).
         size = read32(&section.content[offset], isBig) + 4;
         if (offset+size > section.content.size()) {
           return llvm::make_error<GenericError>(Twine("Section ")
                                                 + section.segmentName
                                                 + "/" + section.sectionName
                                                 + " is malformed.  Size of CFI "
                                                 "starting at offset ("
                                                 + Twine(offset)
                                                 + ") is past end of section.");
         }
         break;
       case atomizeCU:
         // Break section up into compact unwind entries.
         size = is64 ? 32 : 20;
         break;
       case atomizeCFString:
         // Break section up into NS/CFString objects.
         size = is64 ? 32 : 16;
         break;
       case atomizeAtSymbols:
         break;
       }
       if (size == 0) {
         return llvm::make_error<GenericError>(Twine("Section ")
                                               + section.segmentName
                                               + "/" + section.sectionName
                                               + " is malformed.  The last atom "
                                               "is not zero terminated.");
       }
       if (customSectionName) {
         // Mach-O needs a segment and section name.  Concatenate those two
         // with a / separator (e.g. "seg/sect") to fit into the lld model
         // of just a section name.
         std::string segSectName = section.segmentName.str()
                                   + "/" + section.sectionName.str();
         file.addDefinedAtomInCustomSection(StringRef(), scope, atomType,
                                            merge, false, false, offset,
                                            size, segSectName, true, &section);
       } else {
         file.addDefinedAtom(StringRef(), scope, atomType, merge, offset, size,
                             false, false, copyRefs, &section);
       }
       offset += size;
     }
   }
   return llvm::Error::success();
 }

 const Section* findSectionCoveringAddress(const NormalizedFile &normalizedFile,
                                           uint64_t address) {
   for (const Section &s : normalizedFile.sections) {
     uint64_t sAddr = s.address;
     if ((sAddr <= address) && (address < sAddr+s.content.size())) {
       return &s;
     }
   }
   return nullptr;
 }

 const MachODefinedAtom *
 findAtomCoveringAddress(const NormalizedFile &normalizedFile, MachOFile &file,
                         uint64_t addr, Reference::Addend &addend) {
   const Section *sect = nullptr;
   sect = findSectionCoveringAddress(normalizedFile, addr);
   if (!sect)
     return nullptr;

   uint32_t offsetInTarget;
   uint64_t offsetInSect = addr - sect->address;
   auto atom =
       file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget);
   addend = offsetInTarget;
   return atom;
 }

 // Walks all relocations for a section in a normalized .o file and
 // creates corresponding lld::Reference objects.
 llvm::Error convertRelocs(const Section &section,
                           const NormalizedFile &normalizedFile,
                           bool scatterable,
                           MachOFile &file,
                           ArchHandler &handler) {
   // Utility function for ArchHandler to find atom by its address.
   auto atomByAddr = [&] (uint32_t sectIndex, uint64_t addr,
                          const lld::Atom **atom, Reference::Addend *addend)
                          -> llvm::Error {
     if (sectIndex > normalizedFile.sections.size())
       return llvm::make_error<GenericError>(Twine("out of range section "
                                      "index (") + Twine(sectIndex) + ")");
     const Section *sect = nullptr;
     if (sectIndex == 0) {
       sect = findSectionCoveringAddress(normalizedFile, addr);
       if (!sect)
         return llvm::make_error<GenericError>(Twine("address (" + Twine(addr)
                                        + ") is not in any section"));
     } else {
       sect = &normalizedFile.sections[sectIndex-1];
     }
     uint32_t offsetInTarget;
     uint64_t offsetInSect = addr - sect->address;
     *atom = file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget);
     *addend = offsetInTarget;
     return llvm::Error::success();
   };

   // Utility function for ArchHandler to find atom by its symbol index.
   auto atomBySymbol = [&] (uint32_t symbolIndex, const lld::Atom **result)
                            -> llvm::Error {
     // Find symbol from index.
     const lld::mach_o::normalized::Symbol *sym = nullptr;
     uint32_t numStabs  = normalizedFile.stabsSymbols.size();
     uint32_t numLocal  = normalizedFile.localSymbols.size();
     uint32_t numGlobal = normalizedFile.globalSymbols.size();
     uint32_t numUndef  = normalizedFile.undefinedSymbols.size();
     assert(symbolIndex >= numStabs && "Searched for stab via atomBySymbol?");
     if (symbolIndex < numStabs+numLocal) {
       sym = &normalizedFile.localSymbols[symbolIndex-numStabs];
     } else if (symbolIndex < numStabs+numLocal+numGlobal) {
       sym = &normalizedFile.globalSymbols[symbolIndex-numStabs-numLocal];
     } else if (symbolIndex < numStabs+numLocal+numGlobal+numUndef) {
       sym = &normalizedFile.undefinedSymbols[symbolIndex-numStabs-numLocal-
                                              numGlobal];
     } else {
       return llvm::make_error<GenericError>(Twine("symbol index (")
                                      + Twine(symbolIndex) + ") out of range");
     }

     // Find atom from symbol.
     if ((sym->type & N_TYPE) == N_SECT) {
       if (sym->sect > normalizedFile.sections.size())
         return llvm::make_error<GenericError>(Twine("symbol section index (")
                                         + Twine(sym->sect) + ") out of range ");
       const Section &symSection = normalizedFile.sections[sym->sect-1];
       uint64_t targetOffsetInSect = sym->value - symSection.address;
       MachODefinedAtom *target = file.findAtomCoveringAddress(symSection,
                                                             targetOffsetInSect);
       if (target) {
         *result = target;
         return llvm::Error::success();
       }
       return llvm::make_error<GenericError>("no atom found for defined symbol");
     } else if ((sym->type & N_TYPE) == N_UNDF) {
       const lld::Atom *target = file.findUndefAtom(sym->name);
       if (target) {
         *result = target;
         return llvm::Error::success();
       }
       return llvm::make_error<GenericError>("no undefined atom found for sym");
     } else {
       // Search undefs
       return llvm::make_error<GenericError>("no atom found for symbol");
     }
   };

   const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
   // Use old-school iterator so that paired relocations can be grouped.
   for (auto it=section.relocations.begin(), e=section.relocations.end();
                                                                 it != e; ++it) {
     const Relocation &reloc = *it;
     // Find atom this relocation is in.
     if (reloc.offset > section.content.size())
       return llvm::make_error<GenericError>(
                                     Twine("r_address (") + Twine(reloc.offset)
                                     + ") is larger than section size ("
                                     + Twine(section.content.size()) + ")");
     uint32_t offsetInAtom;
     MachODefinedAtom *inAtom = file.findAtomCoveringAddress(section,
                                                             reloc.offset,
                                                             &offsetInAtom);
     assert(inAtom && "r_address in range, should have found atom");
     uint64_t fixupAddress = section.address + reloc.offset;

     const lld::Atom *target = nullptr;
     Reference::Addend addend = 0;
     Reference::KindValue kind;
     if (handler.isPairedReloc(reloc)) {
       // Handle paired relocations together.
       const Relocation &reloc2 = *++it;
       auto relocErr = handler.getPairReferenceInfo(
           reloc, reloc2, inAtom, offsetInAtom, fixupAddress, isBig, scatterable,
           atomByAddr, atomBySymbol, &kind, &target, &addend);
       if (relocErr) {
         return handleErrors(std::move(relocErr),
                             [&](std::unique_ptr<GenericError> GE) {
           return llvm::make_error<GenericError>(
             Twine("bad relocation (") + GE->getMessage()
              + ") in section "
              + section.segmentName + "/" + section.sectionName
              + " (r1_address=" + Twine::utohexstr(reloc.offset)
              + ", r1_type=" + Twine(reloc.type)
              + ", r1_extern=" + Twine(reloc.isExtern)
              + ", r1_length=" + Twine((int)reloc.length)
              + ", r1_pcrel=" + Twine(reloc.pcRel)
              + (!reloc.scattered ? (Twine(", r1_symbolnum=")
                                     + Twine(reloc.symbol))
                                  : (Twine(", r1_scattered=1, r1_value=")
                                     + Twine(reloc.value)))
              + ")"
              + ", (r2_address=" + Twine::utohexstr(reloc2.offset)
              + ", r2_type=" + Twine(reloc2.type)
              + ", r2_extern=" + Twine(reloc2.isExtern)
              + ", r2_length=" + Twine((int)reloc2.length)
              + ", r2_pcrel=" + Twine(reloc2.pcRel)
              + (!reloc2.scattered ? (Twine(", r2_symbolnum=")
                                      + Twine(reloc2.symbol))
                                   : (Twine(", r2_scattered=1, r2_value=")
                                      + Twine(reloc2.value)))
              + ")" );
           });
       }
     }
     else {
       // Use ArchHandler to convert relocation record into information
       // needed to instantiate an lld::Reference object.
       auto relocErr = handler.getReferenceInfo(
           reloc, inAtom, offsetInAtom, fixupAddress, isBig, atomByAddr,
           atomBySymbol, &kind, &target, &addend);
       if (relocErr) {
         return handleErrors(std::move(relocErr),
                             [&](std::unique_ptr<GenericError> GE) {
           return llvm::make_error<GenericError>(
             Twine("bad relocation (") + GE->getMessage()
              + ") in section "
              + section.segmentName + "/" + section.sectionName
              + " (r_address=" + Twine::utohexstr(reloc.offset)
              + ", r_type=" + Twine(reloc.type)
              + ", r_extern=" + Twine(reloc.isExtern)
              + ", r_length=" + Twine((int)reloc.length)
              + ", r_pcrel=" + Twine(reloc.pcRel)
              + (!reloc.scattered ? (Twine(", r_symbolnum=") + Twine(reloc.symbol))
                                  : (Twine(", r_scattered=1, r_value=")
                                     + Twine(reloc.value)))
              + ")" );
           });
       }
     }
     // Instantiate an lld::Reference object and add to its atom.
     inAtom->addReference(Reference::KindNamespace::mach_o,
                          handler.kindArch(),
                          kind, offsetInAtom, target, addend);
   }

   return llvm::Error::success();
 }

 bool isDebugInfoSection(const Section &section) {
   if ((section.attributes & S_ATTR_DEBUG) == 0)
     return false;
   return section.segmentName.equals("__DWARF");
 }

 static const Atom* findDefinedAtomByName(MachOFile &file, Twine name) {
   std::string strName = name.str();
   for (auto *atom : file.defined())
     if (atom->name() == strName)
       return atom;
   return nullptr;
 }

 static StringRef copyDebugString(StringRef str, BumpPtrAllocator &alloc) {
   char *strCopy = alloc.Allocate<char>(str.size() + 1);
   memcpy(strCopy, str.data(), str.size());
   strCopy[str.size()] = '\0';
   return strCopy;
 }

 llvm::Error parseStabs(MachOFile &file,
                        const NormalizedFile &normalizedFile,
                        bool copyRefs) {

   if (normalizedFile.stabsSymbols.empty())
     return llvm::Error::success();

   // FIXME: Kill this off when we can move to sane yaml parsing.
   std::unique_ptr<BumpPtrAllocator> allocator;
   if (copyRefs)
     allocator = std::make_unique<BumpPtrAllocator>();

   enum { start, inBeginEnd } state = start;

   const Atom *currentAtom = nullptr;
   uint64_t currentAtomAddress = 0;
   StabsDebugInfo::StabsList stabsList;
   for (const auto &stabSym : normalizedFile.stabsSymbols) {
     Stab stab(nullptr, stabSym.type, stabSym.sect, stabSym.desc,
               stabSym.value, stabSym.name);
     switch (state) {
     case start:
       switch (static_cast<StabType>(stabSym.type)) {
       case N_BNSYM:
         state = inBeginEnd;
         currentAtomAddress = stabSym.value;
         Reference::Addend addend;
         currentAtom = findAtomCoveringAddress(normalizedFile, file,
                                               currentAtomAddress, addend);
         if (addend != 0)
           return llvm::make_error<GenericError>(
                    "Non-zero addend for BNSYM '" + stabSym.name + "' in " +
                    file.path());
         if (currentAtom)
           stab.atom = currentAtom;
         else {
           // FIXME: ld64 just issues a warning here - should we match that?
           return llvm::make_error<GenericError>(
                    "can't find atom for stabs BNSYM at " +
                    Twine::utohexstr(stabSym.value) + " in " + file.path());
         }
         break;
       case N_SO:
       case N_OSO:
         // Not associated with an atom, just copy.
         if (copyRefs)
           stab.str = copyDebugString(stabSym.name, *allocator);
         else
           stab.str = stabSym.name;
         break;
       case N_GSYM: {
         auto colonIdx = stabSym.name.find(':');
         if (colonIdx != StringRef::npos) {
           StringRef name = stabSym.name.substr(0, colonIdx);
           currentAtom = findDefinedAtomByName(file, "_" + name);
           stab.atom = currentAtom;
           if (copyRefs)
             stab.str = copyDebugString(stabSym.name, *allocator);
           else
             stab.str = stabSym.name;
         } else {
           currentAtom = findDefinedAtomByName(file, stabSym.name);
           stab.atom = currentAtom;
           if (copyRefs)
             stab.str = copyDebugString(stabSym.name, *allocator);
           else
             stab.str = stabSym.name;
         }
         if (stab.atom == nullptr)
           return llvm::make_error<GenericError>(
                    "can't find atom for N_GSYM stabs" + stabSym.name +
                    " in " + file.path());
         break;
       }
       case N_FUN:
         return llvm::make_error<GenericError>(
                  "old-style N_FUN stab '" + stabSym.name + "' unsupported");
       default:
         return llvm::make_error<GenericError>(
                  "unrecognized stab symbol '" + stabSym.name + "'");
       }
       break;
     case inBeginEnd:
       stab.atom = currentAtom;
       switch (static_cast<StabType>(stabSym.type)) {
       case N_ENSYM:
         state = start;
         currentAtom = nullptr;
         break;
       case N_FUN:
         // Just copy the string.
         if (copyRefs)
           stab.str = copyDebugString(stabSym.name, *allocator);
         else
           stab.str = stabSym.name;
         break;
       default:
         return llvm::make_error<GenericError>(
                  "unrecognized stab symbol '" + stabSym.name + "'");
       }
     }
     llvm::dbgs() << "Adding to stabsList: " << stab << "\n";
     stabsList.push_back(stab);
   }

   file.setDebugInfo(std::make_unique<StabsDebugInfo>(std::move(stabsList)));

   // FIXME: Kill this off when we fix YAML memory ownership.
   file.debugInfo()->setAllocator(std::move(allocator));

   return llvm::Error::success();
 }

 static llvm::DataExtractor
 dataExtractorFromSection(const NormalizedFile &normalizedFile,
                          const Section &S) {
   const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
   const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
   StringRef SecData(reinterpret_cast<const char*>(S.content.data()),
                     S.content.size());
   return llvm::DataExtractor(SecData, !isBig, is64 ? 8 : 4);
 }

 // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
 //        inspection" code if possible.
 static uint64_t getCUAbbrevOffset(llvm::DataExtractor abbrevData,
                                   uint64_t abbrCode) {
   uint64_t curCode;
   uint64_t offset = 0;
   while ((curCode = abbrevData.getULEB128(&offset)) != abbrCode) {
     // Tag
     abbrevData.getULEB128(&offset);
     // DW_CHILDREN
     abbrevData.getU8(&offset);
     // Attributes
     while (abbrevData.getULEB128(&offset) | abbrevData.getULEB128(&offset))
       ;
   }
   return offset;
 }

 // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
 //        inspection" code if possible.
 static Expected<const char *>
 getIndexedString(const NormalizedFile &normalizedFile,
                  llvm::dwarf::Form form, llvm::DataExtractor infoData,
                  uint64_t &infoOffset, const Section &stringsSection) {
   if (form == llvm::dwarf::DW_FORM_string)
    return infoData.getCStr(&infoOffset);
   if (form != llvm::dwarf::DW_FORM_strp)
     return llvm::make_error<GenericError>(
         "string field encoded without DW_FORM_strp");
   uint64_t stringOffset = infoData.getU32(&infoOffset);
   llvm::DataExtractor stringsData =
     dataExtractorFromSection(normalizedFile, stringsSection);
   return stringsData.getCStr(&stringOffset);
 }

 // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
 //        inspection" code if possible.
 static llvm::Expected<TranslationUnitSource>
 readCompUnit(const NormalizedFile &normalizedFile,
              const Section &info,
              const Section &abbrev,
              const Section &strings,
              StringRef path) {
   // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
   //        inspection" code if possible.
   uint64_t offset = 0;
   llvm::dwarf::DwarfFormat Format = llvm::dwarf::DwarfFormat::DWARF32;
   auto infoData = dataExtractorFromSection(normalizedFile, info);
   uint32_t length = infoData.getU32(&offset);
   if (length == llvm::dwarf::DW_LENGTH_DWARF64) {
     Format = llvm::dwarf::DwarfFormat::DWARF64;
     infoData.getU64(&offset);
   }
   else if (length >= llvm::dwarf::DW_LENGTH_lo_reserved)
     return llvm::make_error<GenericError>("Malformed DWARF in " + path);

   uint16_t version = infoData.getU16(&offset);

   if (version < 2 || version > 4)
     return llvm::make_error<GenericError>("Unsupported DWARF version in " +
                                           path);

   infoData.getU32(&offset); // Abbrev offset (should be zero)
   uint8_t addrSize = infoData.getU8(&offset);

   uint32_t abbrCode = infoData.getULEB128(&offset);
   auto abbrevData = dataExtractorFromSection(normalizedFile, abbrev);
   uint64_t abbrevOffset = getCUAbbrevOffset(abbrevData, abbrCode);
   uint64_t tag = abbrevData.getULEB128(&abbrevOffset);
   if (tag != llvm::dwarf::DW_TAG_compile_unit)
     return llvm::make_error<GenericError>("top level DIE is not a compile unit");
   // DW_CHILDREN
   abbrevData.getU8(&abbrevOffset);
   uint32_t name;
   llvm::dwarf::Form form;
   llvm::dwarf::FormParams formParams = {version, addrSize, Format};
   TranslationUnitSource tu;
   while ((name = abbrevData.getULEB128(&abbrevOffset)) |
          (form = static_cast<llvm::dwarf::Form>(
              abbrevData.getULEB128(&abbrevOffset))) &&
          (name != 0 || form != 0)) {
     switch (name) {
     case llvm::dwarf::DW_AT_name: {
       if (auto eName = getIndexedString(normalizedFile, form, infoData, offset,
                                         strings))
           tu.name = *eName;
       else
         return eName.takeError();
       break;
     }
     case llvm::dwarf::DW_AT_comp_dir: {
       if (auto eName = getIndexedString(normalizedFile, form, infoData, offset,
                                         strings))
         tu.path = *eName;
       else
         return eName.takeError();
       break;
     }
     default:
       llvm::DWARFFormValue::skipValue(form, infoData, &offset, formParams);
     }
   }
   return tu;
 }

 llvm::Error parseDebugInfo(MachOFile &file,
                            const NormalizedFile &normalizedFile, bool copyRefs) {

   // Find the interesting debug info sections.
   const Section *debugInfo = nullptr;
   const Section *debugAbbrev = nullptr;
   const Section *debugStrings = nullptr;

   for (auto &s : normalizedFile.sections) {
     if (s.segmentName == "__DWARF") {
       if (s.sectionName == "__debug_info")
         debugInfo = &s;
       else if (s.sectionName == "__debug_abbrev")
         debugAbbrev = &s;
       else if (s.sectionName == "__debug_str")
         debugStrings = &s;
     }
   }

   if (!debugInfo)
     return parseStabs(file, normalizedFile, copyRefs);

   if (debugInfo->content.size() == 0)
     return llvm::Error::success();

   if (debugInfo->content.size() < 12)
     return llvm::make_error<GenericError>("Malformed __debug_info section in " +
                                           file.path() + ": too small");

   if (!debugAbbrev)
     return llvm::make_error<GenericError>("Missing __dwarf_abbrev section in " +
                                           file.path());

   if (auto tuOrErr = readCompUnit(normalizedFile, *debugInfo, *debugAbbrev,
                                   *debugStrings, file.path())) {
     // FIXME: Kill of allocator and code under 'copyRefs' when we fix YAML
     //        memory ownership.
     std::unique_ptr<BumpPtrAllocator> allocator;
     if (copyRefs) {
       allocator = std::make_unique<BumpPtrAllocator>();
       tuOrErr->name = copyDebugString(tuOrErr->name, *allocator);
       tuOrErr->path = copyDebugString(tuOrErr->path, *allocator);
     }
     file.setDebugInfo(std::make_unique<DwarfDebugInfo>(std::move(*tuOrErr)));
     if (copyRefs)
       file.debugInfo()->setAllocator(std::move(allocator));
   } else
     return tuOrErr.takeError();

   return llvm::Error::success();
 }

 static int64_t readSPtr(bool is64, bool isBig, const uint8_t *addr) {
   if (is64)
     return read64(addr, isBig);

   int32_t res = read32(addr, isBig);
   return res;
 }

 /// --- Augmentation String Processing ---

 struct CIEInfo {
   bool _augmentationDataPresent = false;
   bool _mayHaveEH = false;
   uint32_t _offsetOfLSDA = ~0U;
   uint32_t _offsetOfPersonality = ~0U;
   uint32_t _offsetOfFDEPointerEncoding = ~0U;
   uint32_t _augmentationDataLength = ~0U;
 };

 typedef llvm::DenseMap<const MachODefinedAtom*, CIEInfo> CIEInfoMap;

 static llvm::Error processAugmentationString(const uint8_t *augStr,
                                              CIEInfo &cieInfo,
                                              unsigned &len) {

   if (augStr[0] == '\0') {
     len = 1;
     return llvm::Error::success();
   }

   if (augStr[0] != 'z')
     return llvm::make_error<GenericError>("expected 'z' at start of "
                                           "augmentation string");

   cieInfo._augmentationDataPresent = true;
   uint64_t idx = 1;

   uint32_t offsetInAugmentationData = 0;
   while (augStr[idx] != '\0') {
     if (augStr[idx] == 'L') {
       cieInfo._offsetOfLSDA = offsetInAugmentationData;
       // This adds a single byte to the augmentation data.
       ++offsetInAugmentationData;
       ++idx;
       continue;
     }
     if (augStr[idx] == 'P') {
       cieInfo._offsetOfPersonality = offsetInAugmentationData;
       // This adds a single byte to the augmentation data for the encoding,
       // then a number of bytes for the pointer data.
       // FIXME: We are assuming 4 is correct here for the pointer size as we
       // always currently use delta32ToGOT.
       offsetInAugmentationData += 5;
       ++idx;
       continue;
     }
     if (augStr[idx] == 'R') {
       cieInfo._offsetOfFDEPointerEncoding = offsetInAugmentationData;
       // This adds a single byte to the augmentation data.
       ++offsetInAugmentationData;
       ++idx;
       continue;
     }
     if (augStr[idx] == 'e') {
       if (augStr[idx + 1] != 'h')
         return llvm::make_error<GenericError>("expected 'eh' in "
                                               "augmentation string");
       cieInfo._mayHaveEH = true;
       idx += 2;
       continue;
     }
     ++idx;
   }

   cieInfo._augmentationDataLength = offsetInAugmentationData;

   len = idx + 1;
   return llvm::Error::success();
 }

 static llvm::Error processCIE(const NormalizedFile &normalizedFile,
                               MachOFile &file,
                               mach_o::ArchHandler &handler,
                               const Section *ehFrameSection,
                               MachODefinedAtom *atom,
                               uint64_t offset,
                               CIEInfoMap &cieInfos) {
   const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
   const uint8_t *frameData = atom->rawContent().data();

   CIEInfo cieInfo;

   uint32_t size = read32(frameData, isBig);
   uint64_t cieIDField = size == 0xffffffffU
                           ? sizeof(uint32_t) + sizeof(uint64_t)
                           : sizeof(uint32_t);
   uint64_t versionField = cieIDField + sizeof(uint32_t);
   uint64_t augmentationStringField = versionField + sizeof(uint8_t);

   unsigned augmentationStringLength = 0;
   if (auto err = processAugmentationString(frameData + augmentationStringField,
                                            cieInfo, augmentationStringLength))
     return err;

   if (cieInfo._offsetOfPersonality != ~0U) {
     // If we have augmentation data for the personality function, then we may
     // need to implicitly generate its relocation.

     // Parse the EH Data field which is pointer sized.
     uint64_t EHDataField = augmentationStringField + augmentationStringLength;
     const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
     unsigned EHDataFieldSize = (cieInfo._mayHaveEH ? (is64 ? 8 : 4) : 0);

     // Parse Code Align Factor which is a ULEB128.
     uint64_t CodeAlignField = EHDataField + EHDataFieldSize;
     unsigned lengthFieldSize = 0;
     llvm::decodeULEB128(frameData + CodeAlignField, &lengthFieldSize);

     // Parse Data Align Factor which is a SLEB128.
     uint64_t DataAlignField = CodeAlignField + lengthFieldSize;
     llvm::decodeSLEB128(frameData + DataAlignField, &lengthFieldSize);

     // Parse Return Address Register which is a byte.
     uint64_t ReturnAddressField = DataAlignField + lengthFieldSize;

     // Parse the augmentation length which is a ULEB128.
     uint64_t AugmentationLengthField = ReturnAddressField + 1;
     uint64_t AugmentationLength =
       llvm::decodeULEB128(frameData + AugmentationLengthField,
                           &lengthFieldSize);

     if (AugmentationLength != cieInfo._augmentationDataLength)
       return llvm::make_error<GenericError>("CIE augmentation data length "
                                             "mismatch");

     // Get the start address of the augmentation data.
     uint64_t AugmentationDataField = AugmentationLengthField + lengthFieldSize;

     // Parse the personality function from the augmentation data.
     uint64_t PersonalityField =
       AugmentationDataField + cieInfo._offsetOfPersonality;

     // Parse the personality encoding.
     // FIXME: Verify that this is a 32-bit pcrel offset.
     uint64_t PersonalityFunctionField = PersonalityField + 1;

     if (atom->begin() != atom->end()) {
       // If we have an explicit relocation, then make sure it matches this
       // offset as this is where we'd expect it to be applied to.
       DefinedAtom::reference_iterator CurrentRef = atom->begin();
       if (CurrentRef->offsetInAtom() != PersonalityFunctionField)
         return llvm::make_error<GenericError>("CIE personality reloc at "
                                               "wrong offset");

       if (++CurrentRef != atom->end())
         return llvm::make_error<GenericError>("CIE contains too many relocs");
     } else {
       // Implicitly generate the personality function reloc.  It's assumed to
       // be a delta32 offset to a GOT entry.
       // FIXME: Parse the encoding and check this.
       int32_t funcDelta = read32(frameData + PersonalityFunctionField, isBig);
       uint64_t funcAddress = ehFrameSection->address + offset +
                              PersonalityFunctionField;
       funcAddress += funcDelta;

       const MachODefinedAtom *func = nullptr;
       Reference::Addend addend;
       func = findAtomCoveringAddress(normalizedFile, file, funcAddress,
                                      addend);
       atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(),
                          handler.unwindRefToPersonalityFunctionKind(),
                          PersonalityFunctionField, func, addend);
     }
   } else if (atom->begin() != atom->end()) {
     // Otherwise, we expect there to be no relocations in this atom as the only
     // relocation would have been to the personality function.
     return llvm::make_error<GenericError>("unexpected relocation in CIE");
   }


   cieInfos[atom] = std::move(cieInfo);

   return llvm::Error::success();
 }

 static llvm::Error processFDE(const NormalizedFile &normalizedFile,
                               MachOFile &file,
                               mach_o::ArchHandler &handler,
                               const Section *ehFrameSection,
                               MachODefinedAtom *atom,
                               uint64_t offset,
                               const CIEInfoMap &cieInfos) {

   const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
   const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);

   // Compiler wasn't lazy and actually told us what it meant.
   // Unfortunately, the compiler may not have generated references for all of
   // [cie, func, lsda] and so we still need to parse the FDE and add references
   // for any the compiler didn't generate.
   if (atom->begin() != atom->end())
     atom->sortReferences();

   DefinedAtom::reference_iterator CurrentRef = atom->begin();

   // This helper returns the reference (if one exists) at the offset we are
   // currently processing.  It automatically increments the ref iterator if we
   // do return a ref, and throws an error if we pass over a ref without
   // comsuming it.
   auto currentRefGetter = [&CurrentRef,
                            &atom](uint64_t Offset)->const Reference* {
     // If there are no more refs found, then we are done.
     if (CurrentRef == atom->end())
       return nullptr;

     const Reference *Ref = *CurrentRef;

     // If we haven't reached the offset for this reference, then return that
     // we don't yet have a reference to process.
     if (Offset < Ref->offsetInAtom())
       return nullptr;

     // If the offset is equal, then we want to process this ref.
     if (Offset == Ref->offsetInAtom()) {
       ++CurrentRef;
       return Ref;
     }

     // The current ref is at an offset which is earlier than the current
     // offset, then we failed to consume it when we should have.  In this case
     // throw an error.
     llvm::report_fatal_error("Skipped reference when processing FDE");
   };

   // Helper to either get the reference at this current location, and verify
   // that it is of the expected type, or add a reference of that type.
   // Returns the reference target.
   auto verifyOrAddReference = [&](uint64_t targetAddress,
                                   Reference::KindValue refKind,
                                   uint64_t refAddress,
                                   bool allowsAddend)->const Atom* {
     if (auto *ref = currentRefGetter(refAddress)) {
       // The compiler already emitted a relocation for the CIE ref.  This should
       // have been converted to the correct type of reference in
       // get[Pair]ReferenceInfo().
       assert(ref->kindValue() == refKind &&
              "Incorrect EHFrame reference kind");
       return ref->target();
     }
     Reference::Addend addend;
     auto *target = findAtomCoveringAddress(normalizedFile, file,
                                            targetAddress, addend);
     atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(),
                        refKind, refAddress, target, addend);

     if (!allowsAddend)
       assert(!addend && "EHFrame reference cannot have addend");
     return target;
   };

   const uint8_t *startFrameData = atom->rawContent().data();
   const uint8_t *frameData = startFrameData;

   uint32_t size = read32(frameData, isBig);
   uint64_t cieFieldInFDE = size == 0xffffffffU
     ? sizeof(uint32_t) + sizeof(uint64_t)
     : sizeof(uint32_t);

   // Linker needs to fixup a reference from the FDE to its parent CIE (a
   // 32-bit byte offset backwards in the __eh_frame section).
   uint32_t cieDelta = read32(frameData + cieFieldInFDE, isBig);
   uint64_t cieAddress = ehFrameSection->address + offset + cieFieldInFDE;
   cieAddress -= cieDelta;

   auto *cieRefTarget = verifyOrAddReference(cieAddress,
                                             handler.unwindRefToCIEKind(),
                                             cieFieldInFDE, false);
   const MachODefinedAtom *cie = dyn_cast<MachODefinedAtom>(cieRefTarget);
   assert(cie && cie->contentType() == DefinedAtom::typeCFI &&
          "FDE's CIE field does not point at the start of a CIE.");

   const CIEInfo &cieInfo = cieInfos.find(cie)->second;

   // Linker needs to fixup reference from the FDE to the function it's
   // describing. FIXME: there are actually different ways to do this, and the
   // particular method used is specified in the CIE's augmentation fields
   // (hopefully)
   uint64_t rangeFieldInFDE = cieFieldInFDE + sizeof(uint32_t);

   int64_t functionFromFDE = readSPtr(is64, isBig,
                                      frameData + rangeFieldInFDE);
   uint64_t rangeStart = ehFrameSection->address + offset + rangeFieldInFDE;
   rangeStart += functionFromFDE;

   verifyOrAddReference(rangeStart,
                        handler.unwindRefToFunctionKind(),
                        rangeFieldInFDE, true);

   // Handle the augmentation data if there is any.
   if (cieInfo._augmentationDataPresent) {
     // First process the augmentation data length field.
     uint64_t augmentationDataLengthFieldInFDE =
       rangeFieldInFDE + 2 * (is64 ? sizeof(uint64_t) : sizeof(uint32_t));
     unsigned lengthFieldSize = 0;
     uint64_t augmentationDataLength =
       llvm::decodeULEB128(frameData + augmentationDataLengthFieldInFDE,
                           &lengthFieldSize);

     if (cieInfo._offsetOfLSDA != ~0U && augmentationDataLength > 0) {

       // Look at the augmentation data field.
       uint64_t augmentationDataFieldInFDE =
         augmentationDataLengthFieldInFDE + lengthFieldSize;

       int64_t lsdaFromFDE = readSPtr(is64, isBig,
                                      frameData + augmentationDataFieldInFDE);
       uint64_t lsdaStart =
         ehFrameSection->address + offset + augmentationDataFieldInFDE +
         lsdaFromFDE;

       verifyOrAddReference(lsdaStart,
                            handler.unwindRefToFunctionKind(),
                            augmentationDataFieldInFDE, true);
     }
   }

   return llvm::Error::success();
 }

 llvm::Error addEHFrameReferences(const NormalizedFile &normalizedFile,
                                  MachOFile &file,
                                  mach_o::ArchHandler &handler) {

   const Section *ehFrameSection = nullptr;
   for (auto &section : normalizedFile.sections)
     if (section.segmentName == "__TEXT" &&
         section.sectionName == "__eh_frame") {
       ehFrameSection = &section;
       break;
     }

   // No __eh_frame so nothing to do.
   if (!ehFrameSection)
     return llvm::Error::success();

   llvm::Error ehFrameErr = llvm::Error::success();
   CIEInfoMap cieInfos;

   file.eachAtomInSection(*ehFrameSection,
                          [&](MachODefinedAtom *atom, uint64_t offset) -> void {
     assert(atom->contentType() == DefinedAtom::typeCFI);

     // Bail out if we've encountered an error.
     if (ehFrameErr)
       return;

     const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
     if (ArchHandler::isDwarfCIE(isBig, atom))
       ehFrameErr = processCIE(normalizedFile, file, handler, ehFrameSection,
                               atom, offset, cieInfos);
     else
       ehFrameErr = processFDE(normalizedFile, file, handler, ehFrameSection,
                               atom, offset, cieInfos);
   });

   return ehFrameErr;
 }

 llvm::Error parseObjCImageInfo(const Section &sect,
                                const NormalizedFile &normalizedFile,
                                MachOFile &file) {

   //	struct objc_image_info  {
   //		uint32_t	version;	// initially 0
   //		uint32_t	flags;
   //	};

   ArrayRef<uint8_t> content = sect.content;
   if (content.size() != 8)
     return llvm::make_error<GenericError>(sect.segmentName + "/" +
                                           sect.sectionName +
                                           " in file " + file.path() +
                                           " should be 8 bytes in size");

   const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
   uint32_t version = read32(content.data(), isBig);
   if (version)
     return llvm::make_error<GenericError>(sect.segmentName + "/" +
                                           sect.sectionName +
                                           " in file " + file.path() +
                                           " should have version=0");

   uint32_t flags = read32(content.data() + 4, isBig);
   if (flags & (MachOLinkingContext::objc_supports_gc |
                MachOLinkingContext::objc_gc_only))
     return llvm::make_error<GenericError>(sect.segmentName + "/" +
                                           sect.sectionName +
                                           " in file " + file.path() +
                                           " uses GC.  This is not supported");

   if (flags & MachOLinkingContext::objc_retainReleaseForSimulator)
     file.setObjcConstraint(MachOLinkingContext::objc_retainReleaseForSimulator);
   else
     file.setObjcConstraint(MachOLinkingContext::objc_retainRelease);

   file.setSwiftVersion((flags >> 8) & 0xFF);

   return llvm::Error::success();
 }

 /// Converts normalized mach-o file into an lld::File and lld::Atoms.
 llvm::Expected<std::unique_ptr<lld::File>>
 objectToAtoms(const NormalizedFile &normalizedFile, StringRef path,
               bool copyRefs) {
   auto file = std::make_unique<MachOFile>(path);
   if (auto ec = normalizedObjectToAtoms(file.get(), normalizedFile, copyRefs))
     return std::move(ec);
   return std::unique_ptr<File>(std::move(file));
 }

 llvm::Expected<std::unique_ptr<lld::File>>
 dylibToAtoms(const NormalizedFile &normalizedFile, StringRef path,
              bool copyRefs) {
   // Instantiate SharedLibraryFile object.
   auto file = std::make_unique<MachODylibFile>(path);
   if (auto ec = normalizedDylibToAtoms(file.get(), normalizedFile, copyRefs))
     return std::move(ec);
   return std::unique_ptr<File>(std::move(file));
 }

 } // anonymous namespace

 namespace normalized {

 static bool isObjCImageInfo(const Section &sect) {
   return (sect.segmentName == "__OBJC" && sect.sectionName == "__image_info") ||
     (sect.segmentName == "__DATA" && sect.sectionName == "__objc_imageinfo");
 }

 llvm::Error
 normalizedObjectToAtoms(MachOFile *file,
                         const NormalizedFile &normalizedFile,
                         bool copyRefs) {
   LLVM_DEBUG(llvm::dbgs() << "******** Normalizing file to atoms: "
                           << file->path() << "\n");
   bool scatterable = ((normalizedFile.flags & MH_SUBSECTIONS_VIA_SYMBOLS) != 0);

   // Create atoms from each section.
   for (auto &sect : normalizedFile.sections) {

     // If this is a debug-info section parse it specially.
     if (isDebugInfoSection(sect))
       continue;

     // If the file contains an objc_image_info struct, then we should parse the
     // ObjC flags and Swift version.
     if (isObjCImageInfo(sect)) {
       if (auto ec = parseObjCImageInfo(sect, normalizedFile, *file))
         return ec;
       // We then skip adding atoms for this section as we use the ObjCPass to
       // re-emit this data after it has been aggregated for all files.
       continue;
     }

     bool customSectionName;
     DefinedAtom::ContentType atomType = atomTypeFromSection(sect,
                                                             customSectionName);
     if (auto ec =  processSection(atomType, sect, customSectionName,
                                   normalizedFile, *file, scatterable, copyRefs))
       return ec;
   }
   // Create atoms from undefined symbols.
   for (auto &sym : normalizedFile.undefinedSymbols) {
     // Undefined symbols with n_value != 0 are actually tentative definitions.
     if (sym.value == Hex64(0)) {
       file->addUndefinedAtom(sym.name, copyRefs);
     } else {
       file->addTentativeDefAtom(sym.name, atomScope(sym.scope), sym.value,
                                 DefinedAtom::Alignment(1 << (sym.desc >> 8)),
                                 copyRefs);
     }
   }

   // Convert mach-o relocations to References
   std::unique_ptr<mach_o::ArchHandler> handler
                                      = ArchHandler::create(normalizedFile.arch);
   for (auto &sect : normalizedFile.sections) {
     if (isDebugInfoSection(sect))
       continue;
     if (llvm::Error ec = convertRelocs(sect, normalizedFile, scatterable,
                                        *file, *handler))
       return ec;
   }

   // Add additional arch-specific References
   file->eachDefinedAtom([&](MachODefinedAtom* atom) -> void {
     handler->addAdditionalReferences(*atom);
   });

   // Each __eh_frame section needs references to both __text (the function we're
   // providing unwind info for) and itself (FDE -> CIE). These aren't
   // represented in the relocations on some architectures, so we have to add
   // them back in manually there.
   if (auto ec = addEHFrameReferences(normalizedFile, *file, *handler))
     return ec;

   // Process mach-o data-in-code regions array. That information is encoded in
   // atoms as References at each transition point.
   unsigned nextIndex = 0;
   for (const DataInCode &entry : normalizedFile.dataInCode) {
     ++nextIndex;
     const Section* s = findSectionCoveringAddress(normalizedFile, entry.offset);
     if (!s) {
       return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE address ("
                                                   + Twine(entry.offset)
                                                   + ") is not in any section"));
     }
     uint64_t offsetInSect = entry.offset - s->address;
     uint32_t offsetInAtom;
     MachODefinedAtom *atom = file->findAtomCoveringAddress(*s, offsetInSect,
                                                            &offsetInAtom);
     if (offsetInAtom + entry.length > atom->size()) {
       return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE entry "
                                                   "(offset="
                                                   + Twine(entry.offset)
                                                   + ", length="
                                                   + Twine(entry.length)
                                                   + ") crosses atom boundary."));
     }
     // Add reference that marks start of data-in-code.
     atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(),
                        handler->dataInCodeTransitionStart(*atom),
                        offsetInAtom, atom, entry.kind);

     // Peek at next entry, if it starts where this one ends, skip ending ref.
     if (nextIndex < normalizedFile.dataInCode.size()) {
       const DataInCode &nextEntry = normalizedFile.dataInCode[nextIndex];
       if (nextEntry.offset == (entry.offset + entry.length))
         continue;
     }

     // If data goes to end of function, skip ending ref.
     if ((offsetInAtom + entry.length) == atom->size())
       continue;

     // Add reference that marks end of data-in-code.
     atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(),
                        handler->dataInCodeTransitionEnd(*atom),
                        offsetInAtom+entry.length, atom, 0);
   }

   // Cache some attributes on the file for use later.
   file->setFlags(normalizedFile.flags);
   file->setArch(normalizedFile.arch);
   file->setOS(normalizedFile.os);
   file->setMinVersion(normalizedFile.minOSverson);
   file->setMinVersionLoadCommandKind(normalizedFile.minOSVersionKind);

   // Sort references in each atom to their canonical order.
   for (const DefinedAtom* defAtom : file->defined()) {
     reinterpret_cast<const SimpleDefinedAtom*>(defAtom)->sortReferences();
   }

   if (auto err = parseDebugInfo(*file, normalizedFile, copyRefs))
     return err;

   return llvm::Error::success();
 }

 llvm::Error
 normalizedDylibToAtoms(MachODylibFile *file,
                        const NormalizedFile &normalizedFile,
                        bool copyRefs) {
   file->setInstallName(normalizedFile.installName);
   file->setCompatVersion(normalizedFile.compatVersion);
   file->setCurrentVersion(normalizedFile.currentVersion);

   // Tell MachODylibFile object about all symbols it exports.
   if (!normalizedFile.exportInfo.empty()) {
     // If exports trie exists, use it instead of traditional symbol table.
     for (const Export &exp : normalizedFile.exportInfo) {
       bool weakDef = (exp.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION);
       // StringRefs from export iterator are ephemeral, so force copy.
       file->addExportedSymbol(exp.name, weakDef, true);
     }
   } else {
     for (auto &sym : normalizedFile.globalSymbols) {
       assert((sym.scope & N_EXT) && "only expect external symbols here");
       bool weakDef = (sym.desc & N_WEAK_DEF);
       file->addExportedSymbol(sym.name, weakDef, copyRefs);
     }
   }
   // Tell MachODylibFile object about all dylibs it re-exports.
   for (const DependentDylib &dep : normalizedFile.dependentDylibs) {
     if (dep.kind == llvm::MachO::LC_REEXPORT_DYLIB)
       file->addReExportedDylib(dep.path);
   }
   return llvm::Error::success();
 }

 void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType,
                                           StringRef &segmentName,
                                           StringRef &sectionName,
                                           SectionType &sectionType,
                                           SectionAttr &sectionAttrs,
                                           bool &relocsToDefinedCanBeImplicit) {

   for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ;
                                  p->atomType != DefinedAtom::typeUnknown; ++p) {
     if (p->atomType != atomType)
       continue;
     // Wild carded entries are ignored for reverse lookups.
     if (p->segmentName.empty() || p->sectionName.empty())
       continue;
     segmentName = p->segmentName;
     sectionName = p->sectionName;
     sectionType = p->sectionType;
     sectionAttrs = 0;
     relocsToDefinedCanBeImplicit = false;
     if (atomType == DefinedAtom::typeCode)
       sectionAttrs = S_ATTR_PURE_INSTRUCTIONS;
     if (atomType == DefinedAtom::typeCFI)
       relocsToDefinedCanBeImplicit = true;
     return;
   }
   llvm_unreachable("content type not yet supported");
 }

 llvm::Expected<std::unique_ptr<lld::File>>
 normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path,
                   bool copyRefs) {
   switch (normalizedFile.fileType) {
   case MH_DYLIB:
   case MH_DYLIB_STUB:
     return dylibToAtoms(normalizedFile, path, copyRefs);
   case MH_OBJECT:
     return objectToAtoms(normalizedFile, path, copyRefs);
   default:
     llvm_unreachable("unhandled MachO file type!");
   }
 }

 } // namespace normalized
 } // namespace mach_o
 } // namespace lld