| # -*- Python -*- vim: set syntax=python tabstop=4 expandtab cc=80: |
| # ===----------------------------------------------------------------------===## |
| # |
| # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| # See https://llvm.org/LICENSE.txt for license information. |
| # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| # |
| # ===----------------------------------------------------------------------===## |
| """ |
| extract - A set of function that extract symbol lists from shared libraries. |
| """ |
| import os.path |
| from os import environ |
| import re |
| import shutil |
| import subprocess |
| import sys |
| |
| from libcxx.sym_check import util |
| |
| extract_ignore_names = ["_init", "_fini"] |
| |
| |
| class NMExtractor(object): |
| """ |
| NMExtractor - Extract symbol lists from libraries using nm. |
| """ |
| |
| @staticmethod |
| def find_tool(): |
| """ |
| Search for the nm executable and return the path. |
| """ |
| return shutil.which("nm") |
| |
| def __init__(self, static_lib): |
| """ |
| Initialize the nm executable and flags that will be used to extract |
| symbols from shared libraries. |
| """ |
| self.nm_exe = self.find_tool() |
| if self.nm_exe is None: |
| # ERROR no NM found |
| print("ERROR: Could not find nm") |
| sys.exit(1) |
| self.static_lib = static_lib |
| self.flags = ["-P", "-g"] |
| if sys.platform.startswith("aix"): |
| # AIX nm demangles symbols by default, so suppress that. |
| self.flags.append("-C") |
| |
| def extract(self, lib): |
| """ |
| Extract symbols from a library and return the results as a dict of |
| parsed symbols. |
| """ |
| cmd = [self.nm_exe] + self.flags + [lib] |
| out = subprocess.check_output(cmd).decode() |
| fmt_syms = (self._extract_sym(l) for l in out.splitlines() if l.strip()) |
| # Cast symbol to string. |
| final_syms = (repr(s) for s in fmt_syms if self._want_sym(s)) |
| # Make unique and sort strings. |
| tmp_list = list(sorted(set(final_syms))) |
| # Cast string back to symbol. |
| return util.read_syms_from_list(tmp_list) |
| |
| def _extract_sym(self, sym_str): |
| bits = sym_str.split() |
| # Everything we want has at least two columns. |
| if len(bits) < 2: |
| return None |
| new_sym = { |
| "name": bits[0], |
| "type": bits[1], |
| "is_defined": (bits[1].lower() != "u"), |
| } |
| new_sym["name"] = new_sym["name"].replace("@@", "@") |
| new_sym = self._transform_sym_type(new_sym) |
| # NM types which we want to save the size for. |
| if new_sym["type"] == "OBJECT" and len(bits) > 3: |
| new_sym["size"] = int(bits[3], 16) |
| return new_sym |
| |
| @staticmethod |
| def _want_sym(sym): |
| """ |
| Check that s is a valid symbol that we want to keep. |
| """ |
| if sym is None or len(sym) < 2: |
| return False |
| if sym["name"] in extract_ignore_names: |
| return False |
| bad_types = ["t", "b", "r", "d", "w"] |
| return sym["type"] not in bad_types and sym["name"] not in [ |
| "__bss_start", |
| "_end", |
| "_edata", |
| ] |
| |
| @staticmethod |
| def _transform_sym_type(sym): |
| """ |
| Map the nm single letter output for type to either FUNC or OBJECT. |
| If the type is not recognized it is left unchanged. |
| """ |
| func_types = ["T", "W"] |
| obj_types = ["B", "D", "R", "V", "S"] |
| if sym["type"] in func_types: |
| sym["type"] = "FUNC" |
| elif sym["type"] in obj_types: |
| sym["type"] = "OBJECT" |
| return sym |
| |
| |
| class ReadElfExtractor(object): |
| """ |
| ReadElfExtractor - Extract symbol lists from libraries using readelf. |
| """ |
| |
| @staticmethod |
| def find_tool(): |
| """ |
| Search for the readelf executable and return the path. |
| """ |
| return shutil.which("readelf") |
| |
| def __init__(self, static_lib): |
| """ |
| Initialize the readelf executable and flags that will be used to |
| extract symbols from shared libraries. |
| """ |
| self.tool = self.find_tool() |
| if self.tool is None: |
| # ERROR no NM found |
| print("ERROR: Could not find readelf") |
| sys.exit(1) |
| # TODO: Support readelf for reading symbols from archives |
| assert not static_lib and "RealElf does not yet support static libs" |
| self.flags = ["--wide", "--symbols"] |
| |
| def extract(self, lib): |
| """ |
| Extract symbols from a library and return the results as a dict of |
| parsed symbols. |
| """ |
| cmd = [self.tool] + self.flags + [lib] |
| out = subprocess.check_output(cmd).decode() |
| dyn_syms = self.get_dynsym_table(out) |
| return self.process_syms(dyn_syms) |
| |
| def process_syms(self, sym_list): |
| new_syms = [] |
| for s in sym_list: |
| parts = s.split() |
| if not parts: |
| continue |
| assert len(parts) == 7 or len(parts) == 8 or len(parts) == 9 |
| if len(parts) == 7: |
| continue |
| new_sym = { |
| "name": parts[7], |
| "size": int(parts[2]), |
| "type": parts[3], |
| "is_defined": (parts[6] != "UND"), |
| } |
| assert new_sym["type"] in ["OBJECT", "FUNC", "NOTYPE", "TLS"] |
| if new_sym["name"] in extract_ignore_names: |
| continue |
| if new_sym["type"] == "NOTYPE": |
| continue |
| if new_sym["type"] == "FUNC": |
| del new_sym["size"] |
| new_syms += [new_sym] |
| return new_syms |
| |
| def get_dynsym_table(self, out): |
| lines = out.splitlines() |
| start = -1 |
| end = -1 |
| for i in range(len(lines)): |
| # Accept both GNU and ELF Tool Chain readelf format. Some versions |
| # of ELF Tool Chain readelf use ( ) around the symbol table name |
| # instead of ' ', and omit the blank line before the heading. |
| if re.match(r"Symbol table ['(].dynsym[')]", lines[i]): |
| start = i + 2 |
| elif start != -1 and end == -1: |
| if not lines[i].strip(): |
| end = i + 1 |
| if lines[i].startswith("Symbol table ("): |
| end = i |
| assert start != -1 |
| if end == -1: |
| end = len(lines) |
| return lines[start:end] |
| |
| |
| class AIXDumpExtractor(object): |
| """ |
| AIXDumpExtractor - Extract symbol lists from libraries using AIX dump. |
| """ |
| |
| @staticmethod |
| def find_tool(): |
| """ |
| Search for the dump executable and return the path. |
| """ |
| return shutil.which("dump") |
| |
| def __init__(self, static_lib): |
| """ |
| Initialize the dump executable and flags that will be used to |
| extract symbols from shared libraries. |
| """ |
| # TODO: Support dump for reading symbols from static libraries |
| assert not static_lib and "static libs not yet supported with dump" |
| self.tool = self.find_tool() |
| if self.tool is None: |
| print("ERROR: Could not find dump") |
| sys.exit(1) |
| self.flags = ["-n", "-v"] |
| object_mode = environ.get("OBJECT_MODE") |
| if object_mode == "32": |
| self.flags += ["-X32"] |
| elif object_mode == "64": |
| self.flags += ["-X64"] |
| else: |
| self.flags += ["-X32_64"] |
| |
| def extract(self, lib): |
| """ |
| Extract symbols from a library and return the results as a dict of |
| parsed symbols. |
| """ |
| cmd = [self.tool] + self.flags + [lib] |
| out = subprocess.check_output(cmd).decode() |
| loader_syms = self.get_loader_symbol_table(out) |
| return self.process_syms(loader_syms) |
| |
| def process_syms(self, sym_list): |
| new_syms = [] |
| for s in sym_list: |
| parts = s.split() |
| if not parts: |
| continue |
| assert len(parts) == 8 or len(parts) == 7 |
| if len(parts) == 7: |
| continue |
| new_sym = { |
| "name": parts[7], |
| "type": "FUNC" if parts[4] == "DS" else "OBJECT", |
| "is_defined": (parts[5] != "EXTref"), |
| "storage_mapping_class": parts[4], |
| "import_export": parts[3], |
| } |
| if new_sym["name"] in extract_ignore_names: |
| continue |
| new_syms += [new_sym] |
| return new_syms |
| |
| def get_loader_symbol_table(self, out): |
| lines = out.splitlines() |
| return filter(lambda n: re.match(r"^\[[0-9]+\]", n), lines) |
| |
| @staticmethod |
| def is_shared_lib(lib): |
| """ |
| Check for the shared object flag in XCOFF headers of the input file or |
| library archive. |
| """ |
| dump = AIXDumpExtractor.find_tool() |
| if dump is None: |
| print("ERROR: Could not find dump") |
| sys.exit(1) |
| cmd = [dump, "-X32_64", "-ov", lib] |
| out = subprocess.check_output(cmd).decode() |
| return out.find("SHROBJ") != -1 |
| |
| |
| def is_static_library(lib_file): |
| """ |
| Determine if a given library is static or shared. |
| """ |
| if sys.platform.startswith("aix"): |
| # An AIX library could be both, but for simplicity assume it isn't. |
| return not AIXDumpExtractor.is_shared_lib(lib_file) |
| else: |
| _, ext = os.path.splitext(lib_file) |
| return ext == ".a" |
| |
| |
| def extract_symbols(lib_file, static_lib=None): |
| """ |
| Extract and return a list of symbols extracted from a static or dynamic |
| library. The symbols are extracted using dump, nm or readelf. They are |
| then filtered and formated. Finally the symbols are made unique. |
| """ |
| if static_lib is None: |
| static_lib = is_static_library(lib_file) |
| if sys.platform.startswith("aix"): |
| extractor = AIXDumpExtractor(static_lib=static_lib) |
| elif ReadElfExtractor.find_tool() and not static_lib: |
| extractor = ReadElfExtractor(static_lib=static_lib) |
| else: |
| extractor = NMExtractor(static_lib=static_lib) |
| return extractor.extract(lib_file) |