From 764c21f7529df70a19f1e1cb33bb9ece28e0bf8f Mon Sep 17 00:00:00 2001 From: Yunlian Jiang Date: Wed, 4 May 2016 11:09:44 -0700 Subject: Add debug fission support. This added debug fission support. It tries to find the dwp file from the debug dir /usr/lib/debug/*/debug and read symbols from them. Most of this patch comes from https://critique.corp.google.com/#review/52048295 and some fixes after that. The elf_reader.cc comes from TOT google code. I just removed some google dependency. Current problems from this patch 1: Some type mismatch: from uint8_t * to char *. 2: Some hack to find the .dwp file. (replace .debug with .dwp) BUG=chromium:604440 R=dehao@google.com, ivanpe@chromium.org Review URL: https://codereview.chromium.org/1884283002 . --- src/common/dwarf/elf_reader.cc | 1258 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1258 insertions(+) create mode 100644 src/common/dwarf/elf_reader.cc (limited to 'src/common/dwarf/elf_reader.cc') diff --git a/src/common/dwarf/elf_reader.cc b/src/common/dwarf/elf_reader.cc new file mode 100644 index 00000000..81683141 --- /dev/null +++ b/src/common/dwarf/elf_reader.cc @@ -0,0 +1,1258 @@ +// Copyright 2005 Google Inc. All Rights Reserved. +// Author: chatham@google.com (Andrew Chatham) +// Author: satorux@google.com (Satoru Takabayashi) +// +// Code for reading in ELF files. +// +// For information on the ELF format, see +// http://www.x86.org/ftp/manuals/tools/elf.pdf +// +// I also liked: +// http://www.caldera.com/developers/gabi/1998-04-29/contents.html +// +// A note about types: When dealing with the file format, we use types +// like Elf32_Word, but in the public interfaces we treat all +// addresses as uint64. As a result, we should be able to symbolize +// 64-bit binaries from a 32-bit process (which we don't do, +// anyway). size_t should therefore be avoided, except where required +// by things like mmap(). +// +// Although most of this code can deal with arbitrary ELF files of +// either word size, the public ElfReader interface only examines +// files loaded into the current address space, which must all match +// __WORDSIZE. This code cannot handle ELF files with a non-native +// byte ordering. +// +// TODO(chatham): It would be nice if we could accomplish this task +// without using malloc(), so we could use it as the process is dying. + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE // needed for pread() +#endif + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include "zlib.h" + +#include "elf_reader.h" +//#include "using_std_string.h" +// EM_AARCH64 is not defined by elf.h of GRTE v3 on x86. +// TODO(dougkwan): Remove this when v17 is retired. +#if !defined(EM_AARCH64) +#define EM_AARCH64 183 /* ARM AARCH64 */ +#endif + +// TODO(dthomson): Can be removed once all Java code is using the Google3 +// launcher. We need to avoid processing PLT functions as it causes memory +// fragmentation in malloc, which is fixed in tcmalloc - and if the Google3 +// launcher is used the JVM will then use tcmalloc. b/13735638 +//DEFINE_bool(elfreader_process_dynsyms, true, +// "Activate PLT function processing"); + +using std::string; +using std::vector; + +namespace { + +// The lowest bit of an ARM symbol value is used to indicate a Thumb address. +const int kARMThumbBitOffset = 0; + +// Converts an ARM Thumb symbol value to a true aligned address value. +template +T AdjustARMThumbSymbolValue(const T& symbol_table_value) { + return symbol_table_value & ~(1 << kARMThumbBitOffset); +} + +// Names of PLT-related sections. +const char kElfPLTRelSectionName[] = ".rel.plt"; // Use Rel struct. +const char kElfPLTRelaSectionName[] = ".rela.plt"; // Use Rela struct. +const char kElfPLTSectionName[] = ".plt"; +const char kElfDynSymSectionName[] = ".dynsym"; + +const int kX86PLTCodeSize = 0x10; // Size of one x86 PLT function in bytes. +const int kARMPLTCodeSize = 0xc; +const int kAARCH64PLTCodeSize = 0x10; + +const int kX86PLT0Size = 0x10; // Size of the special PLT0 entry. +const int kARMPLT0Size = 0x14; +const int kAARCH64PLT0Size = 0x20; + +// Suffix for PLT functions when it needs to be explicitly identified as such. +const char kPLTFunctionSuffix[] = "@plt"; + +} // namespace + +namespace dwarf2reader { + +template class ElfReaderImpl; + +// 32-bit and 64-bit ELF files are processed exactly the same, except +// for various field sizes. Elf32 and Elf64 encompass all of the +// differences between the two formats, and all format-specific code +// in this file is templated on one of them. +class Elf32 { + public: + typedef Elf32_Ehdr Ehdr; + typedef Elf32_Shdr Shdr; + typedef Elf32_Phdr Phdr; + typedef Elf32_Word Word; + typedef Elf32_Sym Sym; + typedef Elf32_Rel Rel; + typedef Elf32_Rela Rela; + + // What should be in the EI_CLASS header. + static const int kElfClass = ELFCLASS32; + + // Given a symbol pointer, return the binding type (eg STB_WEAK). + static char Bind(const Elf32_Sym *sym) { + return ELF32_ST_BIND(sym->st_info); + } + // Given a symbol pointer, return the symbol type (eg STT_FUNC). + static char Type(const Elf32_Sym *sym) { + return ELF32_ST_TYPE(sym->st_info); + } + + // Extract the symbol index from the r_info field of a relocation. + static int r_sym(const Elf32_Word r_info) { + return ELF32_R_SYM(r_info); + } +}; + + +class Elf64 { + public: + typedef Elf64_Ehdr Ehdr; + typedef Elf64_Shdr Shdr; + typedef Elf64_Phdr Phdr; + typedef Elf64_Word Word; + typedef Elf64_Sym Sym; + typedef Elf64_Rel Rel; + typedef Elf64_Rela Rela; + + // What should be in the EI_CLASS header. + static const int kElfClass = ELFCLASS64; + + static char Bind(const Elf64_Sym *sym) { + return ELF64_ST_BIND(sym->st_info); + } + static char Type(const Elf64_Sym *sym) { + return ELF64_ST_TYPE(sym->st_info); + } + static int r_sym(const Elf64_Xword r_info) { + return ELF64_R_SYM(r_info); + } +}; + + +// ElfSectionReader mmaps a section of an ELF file ("section" is ELF +// terminology). The ElfReaderImpl object providing the section header +// must exist for the lifetime of this object. +// +// The motivation for mmaping individual sections of the file is that +// many Google executables are large enough when unstripped that we +// have to worry about running out of virtual address space. +// +// For compressed sections we have no choice but to allocate memory. +template +class ElfSectionReader { + public: + ElfSectionReader(const char *name, const string &path, int fd, + const typename ElfArch::Shdr §ion_header) + : contents_aligned_(NULL), + contents_(NULL), + header_(section_header) { + // Back up to the beginning of the page we're interested in. + const size_t additional = header_.sh_offset % getpagesize(); + const size_t offset_aligned = header_.sh_offset - additional; + section_size_ = header_.sh_size; + size_aligned_ = section_size_ + additional; + // If the section has been stripped or is empty, do not attempt + // to process its contents. + if (header_.sh_type == SHT_NOBITS || header_.sh_size == 0) + return; + contents_aligned_ = mmap(NULL, size_aligned_, PROT_READ, MAP_SHARED, + fd, offset_aligned); + // Set where the offset really should begin. + contents_ = reinterpret_cast(contents_aligned_) + + (header_.sh_offset - offset_aligned); + + // Check for and handle any compressed contents. + //if (strncmp(name, ".zdebug_", strlen(".zdebug_")) == 0) + // DecompressZlibContents(); + // TODO(saugustine): Add support for proposed elf-section flag + // "SHF_COMPRESS". + } + + ~ElfSectionReader() { + if (contents_aligned_ != NULL) + munmap(contents_aligned_, size_aligned_); + else + delete[] contents_; + } + + // Return the section header for this section. + typename ElfArch::Shdr const &header() const { return header_; } + + // Return memory at the given offset within this section. + const char *GetOffset(typename ElfArch::Word bytes) const { + return contents_ + bytes; + } + + const char *contents() const { return contents_; } + size_t section_size() const { return section_size_; } + + private: + // page-aligned file contents + void *contents_aligned_; + // contents as usable by the client. For non-compressed sections, + // pointer within contents_aligned_ to where the section data + // begins; for compressed sections, pointer to the decompressed + // data. + char *contents_; + // size of contents_aligned_ + size_t size_aligned_; + // size of contents. + size_t section_size_; + const typename ElfArch::Shdr header_; +}; + +// An iterator over symbols in a given section. It handles walking +// through the entries in the specified section and mapping symbol +// entries to their names in the appropriate string table (in +// another section). +template +class SymbolIterator { + public: + SymbolIterator(ElfReaderImpl *reader, + typename ElfArch::Word section_type) + : symbol_section_(reader->GetSectionByType(section_type)), + string_section_(NULL), + num_symbols_in_section_(0), + symbol_within_section_(0) { + + // If this section type doesn't exist, leave + // num_symbols_in_section_ as zero, so this iterator is already + // done(). + if (symbol_section_ != NULL) { + num_symbols_in_section_ = symbol_section_->header().sh_size / + symbol_section_->header().sh_entsize; + + // Symbol sections have sh_link set to the section number of + // the string section containing the symbol names. + string_section_ = reader->GetSection(symbol_section_->header().sh_link); + } + } + + // Return true iff we have passed all symbols in this section. + bool done() const { + return symbol_within_section_ >= num_symbols_in_section_; + } + + // Advance to the next symbol in this section. + // REQUIRES: !done() + void Next() { ++symbol_within_section_; } + + // Return a pointer to the current symbol. + // REQUIRES: !done() + const typename ElfArch::Sym *GetSymbol() const { + return reinterpret_cast( + symbol_section_->GetOffset(symbol_within_section_ * + symbol_section_->header().sh_entsize)); + } + + // Return the name of the current symbol, NULL if it has none. + // REQUIRES: !done() + const char *GetSymbolName() const { + int name_offset = GetSymbol()->st_name; + if (name_offset == 0) + return NULL; + return string_section_->GetOffset(name_offset); + } + + int GetCurrentSymbolIndex() const { + return symbol_within_section_; + } + + private: + const ElfSectionReader *const symbol_section_; + const ElfSectionReader *string_section_; + int num_symbols_in_section_; + int symbol_within_section_; +}; + + +// Copied from strings/strutil.h. Per chatham, +// this library should not depend on strings. + +static inline bool MyHasSuffixString(const string& str, const string& suffix) { + int len = str.length(); + int suflen = suffix.length(); + return (suflen <= len) && (str.compare(len-suflen, suflen, suffix) == 0); +} + + +// ElfReader loads an ELF binary and can provide information about its +// contents. It is most useful for matching addresses to function +// names. It does not understand debugging formats (eg dwarf2), so it +// can't print line numbers. It takes a path to an elf file and a +// readable file descriptor for that file, which it does not assume +// ownership of. +template +class ElfReaderImpl { + public: + explicit ElfReaderImpl(const string &path, int fd) + : path_(path), + fd_(fd), + section_headers_(NULL), + program_headers_(NULL), + opd_section_(NULL), + base_for_text_(0), + plts_supported_(false), + plt_code_size_(0), + plt0_size_(0), + visited_relocation_entries_(false) { + string error; + is_dwp_ = MyHasSuffixString(path, ".dwp"); + ParseHeaders(fd, path); + // Currently we need some extra information for PowerPC64 binaries + // including a way to read the .opd section for function descriptors and a + // way to find the linked base for function symbols. + if (header_.e_machine == EM_PPC64) { + // "opd_section_" must always be checked for NULL before use. + opd_section_ = GetSectionInfoByName(".opd", &opd_info_); + for (unsigned int k = 0u; k < GetNumSections(); ++k) { + const char *name = GetSectionName(section_headers_[k].sh_name); + if (strncmp(name, ".text", strlen(".text")) == 0) { + base_for_text_ = + section_headers_[k].sh_addr - section_headers_[k].sh_offset; + break; + } + } + } + // Turn on PLTs. + if (header_.e_machine == EM_386 || header_.e_machine == EM_X86_64) { + plt_code_size_ = kX86PLTCodeSize; + plt0_size_ = kX86PLT0Size; + plts_supported_ = true; + } else if (header_.e_machine == EM_ARM) { + plt_code_size_ = kARMPLTCodeSize; + plt0_size_ = kARMPLT0Size; + plts_supported_ = true; + } else if (header_.e_machine == EM_AARCH64) { + plt_code_size_ = kAARCH64PLTCodeSize; + plt0_size_ = kAARCH64PLT0Size; + plts_supported_ = true; + } + } + + ~ElfReaderImpl() { + for (unsigned int i = 0u; i < sections_.size(); ++i) + delete sections_[i]; + delete [] section_headers_; + delete [] program_headers_; + } + + // Examine the headers of the file and return whether the file looks + // like an ELF file for this architecture. Takes an already-open + // file descriptor for the candidate file, reading in the prologue + // to see if the ELF file appears to match the current + // architecture. If error is non-NULL, it will be set with a reason + // in case of failure. + static bool IsArchElfFile(int fd, string *error) { + unsigned char header[EI_NIDENT]; + if (pread(fd, header, sizeof(header), 0) != sizeof(header)) { + if (error != NULL) *error = "Could not read header"; + return false; + } + + if (memcmp(header, ELFMAG, SELFMAG) != 0) { + if (error != NULL) *error = "Missing ELF magic"; + return false; + } + + if (header[EI_CLASS] != ElfArch::kElfClass) { + if (error != NULL) *error = "Different word size"; + return false; + } + + int endian = 0; + if (header[EI_DATA] == ELFDATA2LSB) + endian = __LITTLE_ENDIAN; + else if (header[EI_DATA] == ELFDATA2MSB) + endian = __BIG_ENDIAN; + if (endian != __BYTE_ORDER) { + if (error != NULL) *error = "Different byte order"; + return false; + } + + return true; + } + + // Return true if we can use this symbol in Address-to-Symbol map. + bool CanUseSymbol(const char *name, const typename ElfArch::Sym *sym) { + // For now we only save FUNC and NOTYPE symbols. For now we just + // care about functions, but some functions written in assembler + // don't have a proper ELF type attached to them, so we store + // NOTYPE symbols as well. The remaining significant type is + // OBJECT (eg global variables), which represent about 25% of + // the symbols in a typical google3 binary. + if (ElfArch::Type(sym) != STT_FUNC && + ElfArch::Type(sym) != STT_NOTYPE) { + return false; + } + + // Target specific filtering. + switch (header_.e_machine) { + case EM_AARCH64: + case EM_ARM: + // Filter out '$x' special local symbols used by tools + return name[0] != '$' || ElfArch::Bind(sym) != STB_LOCAL; + case EM_X86_64: + // Filter out read-only constants like .LC123. + return name[0] != '.' || ElfArch::Bind(sym) != STB_LOCAL; + default: + return true; + } + } + + // Iterate over the symbols in a section, either SHT_DYNSYM or + // SHT_SYMTAB. Add all symbols to the given SymbolMap. + /* + void GetSymbolPositions(SymbolMap *symbols, + typename ElfArch::Word section_type, + uint64 mem_offset, + uint64 file_offset) { + // This map is used to filter out "nested" functions. + // See comment below. + AddrToSymMap addr_to_sym_map; + for (SymbolIterator it(this, section_type); + !it.done(); it.Next()) { + const char *name = it.GetSymbolName(); + if (name == NULL) + continue; + const typename ElfArch::Sym *sym = it.GetSymbol(); + if (CanUseSymbol(name, sym)) { + const int sec = sym->st_shndx; + + // We don't support special section indices. The most common + // is SHN_ABS, for absolute symbols used deep in the bowels of + // glibc. Also ignore any undefined symbols. + if (sec == SHN_UNDEF || + (sec >= SHN_LORESERVE && sec <= SHN_HIRESERVE)) { + continue; + } + + const typename ElfArch::Shdr& hdr = section_headers_[sec]; + + // Adjust for difference between where we expected to mmap + // this section, and where it was actually mmapped. + const int64 expected_base = hdr.sh_addr - hdr.sh_offset; + const int64 real_base = mem_offset - file_offset; + const int64 adjust = real_base - expected_base; + + uint64 start = sym->st_value + adjust; + + // Adjust function symbols for PowerPC64 by dereferencing and adjusting + // the function descriptor to get the function address. + if (header_.e_machine == EM_PPC64 && ElfArch::Type(sym) == STT_FUNC) { + const uint64 opd_addr = + AdjustPPC64FunctionDescriptorSymbolValue(sym->st_value); + // Only adjust the returned value if the function address was found. + if (opd_addr != sym->st_value) { + const int64 adjust_function_symbols = + real_base - base_for_text_; + start = opd_addr + adjust_function_symbols; + } + } + + addr_to_sym_map.push_back(std::make_pair(start, sym)); + } + } + std::sort(addr_to_sym_map.begin(), addr_to_sym_map.end(), &AddrToSymSorter); + addr_to_sym_map.erase(std::unique(addr_to_sym_map.begin(), + addr_to_sym_map.end(), &AddrToSymEquals), + addr_to_sym_map.end()); + + // Squeeze out any "nested functions". + // Nested functions are not allowed in C, but libc plays tricks. + // + // For example, here is disassembly of /lib64/tls/libc-2.3.5.so: + // 0x00000000000aa380 : cmpl $0x0,0x2781b9(%rip) + // 0x00000000000aa387 : jne 0xaa39b + // 0x00000000000aa389 <__read_nocancel+0>: mov $0x0,%rax + // 0x00000000000aa390 <__read_nocancel+7>: syscall + // 0x00000000000aa392 <__read_nocancel+9>: cmp $0xfffffffffffff001,%rax + // 0x00000000000aa398 <__read_nocancel+15>: jae 0xaa3ef + // 0x00000000000aa39a <__read_nocancel+17>: retq + // 0x00000000000aa39b : sub $0x28,%rsp + // 0x00000000000aa39f : mov %rdi,0x8(%rsp) + // ... + // Without removing __read_nocancel, symbolizer will return NULL + // given e.g. 0xaa39f (because the lower bound is __read_nocancel, + // but 0xaa39f is beyond its end. + if (addr_to_sym_map.empty()) { + return; + } + const ElfSectionReader *const symbol_section = + this->GetSectionByType(section_type); + const ElfSectionReader *const string_section = + this->GetSection(symbol_section->header().sh_link); + + typename AddrToSymMap::iterator curr = addr_to_sym_map.begin(); + // Always insert the first symbol. + symbols->AddSymbol(string_section->GetOffset(curr->second->st_name), + curr->first, curr->second->st_size); + typename AddrToSymMap::iterator prev = curr++; + for (; curr != addr_to_sym_map.end(); ++curr) { + const uint64 prev_addr = prev->first; + const uint64 curr_addr = curr->first; + const typename ElfArch::Sym *const prev_sym = prev->second; + const typename ElfArch::Sym *const curr_sym = curr->second; + if (prev_addr + prev_sym->st_size <= curr_addr || + // The next condition is true if two symbols overlap like this: + // + // Previous symbol |----------------------------| + // Current symbol |-------------------------------| + // + // These symbols are not found in google3 codebase, but in + // jdk1.6.0_01_gg1/jre/lib/i386/server/libjvm.so. + // + // 0619e040 00000046 t CardTableModRefBS::write_region_work() + // 0619e070 00000046 t CardTableModRefBS::write_ref_array_work() + // + // We allow overlapped symbols rather than ignore these. + // Due to the way SymbolMap::GetSymbolAtPosition() works, + // lookup for any address in [curr_addr, curr_addr + its size) + // (e.g. 0619e071) will produce the current symbol, + // which is the desired outcome. + prev_addr + prev_sym->st_size < curr_addr + curr_sym->st_size) { + const char *name = string_section->GetOffset(curr_sym->st_name); + symbols->AddSymbol(name, curr_addr, curr_sym->st_size); + prev = curr; + } else { + // Current symbol is "nested" inside previous one like this: + // + // Previous symbol |----------------------------| + // Current symbol |---------------------| + // + // This happens within glibc, e.g. __read_nocancel is nested + // "inside" __read. Ignore "inner" symbol. + //DCHECK_LE(curr_addr + curr_sym->st_size, + // prev_addr + prev_sym->st_size); + ; + } + } + } +*/ + + void VisitSymbols(typename ElfArch::Word section_type, + ElfReader::SymbolSink *sink) { + VisitSymbols(section_type, sink, -1, -1, false); + } + + void VisitSymbols(typename ElfArch::Word section_type, + ElfReader::SymbolSink *sink, + int symbol_binding, + int symbol_type, + bool get_raw_symbol_values) { + for (SymbolIterator it(this, section_type); + !it.done(); it.Next()) { + const char *name = it.GetSymbolName(); + if (!name) continue; + const typename ElfArch::Sym *sym = it.GetSymbol(); + if ((symbol_binding < 0 || ElfArch::Bind(sym) == symbol_binding) && + (symbol_type < 0 || ElfArch::Type(sym) == symbol_type)) { + typename ElfArch::Sym symbol = *sym; + // Add a PLT symbol in addition to the main undefined symbol. + // Only do this for SHT_DYNSYM, because PLT symbols are dynamic. + int symbol_index = it.GetCurrentSymbolIndex(); + // TODO(dthomson): Can be removed once all Java code is using the + // Google3 launcher. + if (section_type == SHT_DYNSYM && + static_cast(symbol_index) < symbols_plt_offsets_.size() && + symbols_plt_offsets_[symbol_index] != 0) { + string plt_name = string(name) + kPLTFunctionSuffix; + if (plt_function_names_[symbol_index].empty()) { + plt_function_names_[symbol_index] = plt_name; + } else if (plt_function_names_[symbol_index] != plt_name) { + ; + } + sink->AddSymbol(plt_function_names_[symbol_index].c_str(), + symbols_plt_offsets_[it.GetCurrentSymbolIndex()], + plt_code_size_); + } + if (!get_raw_symbol_values) + AdjustSymbolValue(&symbol); + sink->AddSymbol(name, symbol.st_value, symbol.st_size); + } + } + } + + void VisitRelocationEntries() { + if (visited_relocation_entries_) { + return; + } + visited_relocation_entries_ = true; + + if (!plts_supported_) { + return; + } + // First determine if PLTs exist. If not, then there is nothing to do. + ElfReader::SectionInfo plt_section_info; + const char* plt_section = + GetSectionInfoByName(kElfPLTSectionName, &plt_section_info); + if (!plt_section) { + return; + } + if (plt_section_info.size == 0) { + return; + } + + // The PLTs could be referenced by either a Rel or Rela (Rel with Addend) + // section. + ElfReader::SectionInfo rel_section_info; + ElfReader::SectionInfo rela_section_info; + const char* rel_section = + GetSectionInfoByName(kElfPLTRelSectionName, &rel_section_info); + const char* rela_section = + GetSectionInfoByName(kElfPLTRelaSectionName, &rela_section_info); + + const typename ElfArch::Rel* rel = + reinterpret_cast(rel_section); + const typename ElfArch::Rela* rela = + reinterpret_cast(rela_section); + + if (!rel_section && !rela_section) { + return; + } + + // Use either Rel or Rela section, depending on which one exists. + size_t section_size = rel_section ? rel_section_info.size + : rela_section_info.size; + size_t entry_size = rel_section ? sizeof(typename ElfArch::Rel) + : sizeof(typename ElfArch::Rela); + + // Determine the number of entries in the dynamic symbol table. + ElfReader::SectionInfo dynsym_section_info; + const char* dynsym_section = + GetSectionInfoByName(kElfDynSymSectionName, &dynsym_section_info); + // The dynsym section might not exist, or it might be empty. In either case + // there is nothing to be done so return. + if (!dynsym_section || dynsym_section_info.size == 0) { + return; + } + size_t num_dynamic_symbols = + dynsym_section_info.size / dynsym_section_info.entsize; + symbols_plt_offsets_.resize(num_dynamic_symbols, 0); + + // TODO(dthomson): Can be removed once all Java code is using the + // Google3 launcher. + // Make storage room for PLT function name strings. + plt_function_names_.resize(num_dynamic_symbols); + + for (size_t i = 0; i < section_size / entry_size; ++i) { + // Determine symbol index from the |r_info| field. + int sym_index = ElfArch::r_sym(rel_section ? rel[i].r_info + : rela[i].r_info); + if (static_cast(sym_index) >= symbols_plt_offsets_.size()) { + continue; + } + symbols_plt_offsets_[sym_index] = + plt_section_info.addr + plt0_size_ + i * plt_code_size_; + } + } + + // Return an ElfSectionReader for the first section of the given + // type by iterating through all section headers. Returns NULL if + // the section type is not found. + const ElfSectionReader *GetSectionByType( + typename ElfArch::Word section_type) { + for (unsigned int k = 0u; k < GetNumSections(); ++k) { + if (section_headers_[k].sh_type == section_type) { + return GetSection(k); + } + } + return NULL; + } + + // Return the name of section "shndx". Returns NULL if the section + // is not found. + const char *GetSectionNameByIndex(int shndx) { + return GetSectionName(section_headers_[shndx].sh_name); + } + + // Return a pointer to section "shndx", and store the size in + // "size". Returns NULL if the section is not found. + const char *GetSectionContentsByIndex(int shndx, size_t *size) { + const ElfSectionReader *section = GetSection(shndx); + if (section != NULL) { + *size = section->section_size(); + return section->contents(); + } + return NULL; + } + + // Return a pointer to the first section of the given name by + // iterating through all section headers, and store the size in + // "size". Returns NULL if the section name is not found. + const char *GetSectionContentsByName(const string §ion_name, + size_t *size) { + for (unsigned int k = 0u; k < GetNumSections(); ++k) { + // When searching for sections in a .dwp file, the sections + // we're looking for will always be at the end of the section + // table, so reverse the direction of iteration. + int shndx = is_dwp_ ? GetNumSections() - k - 1 : k; + const char *name = GetSectionName(section_headers_[shndx].sh_name); + if (name != NULL && ElfReader::SectionNamesMatch(section_name, name)) { + const ElfSectionReader *section = GetSection(shndx); + if (section == NULL) { + return NULL; + } else { + *size = section->section_size(); + return section->contents(); + } + } + } + return NULL; + } + + // This is like GetSectionContentsByName() but it returns a lot of extra + // information about the section. + const char *GetSectionInfoByName(const string §ion_name, + ElfReader::SectionInfo *info) { + for (unsigned int k = 0u; k < GetNumSections(); ++k) { + // When searching for sections in a .dwp file, the sections + // we're looking for will always be at the end of the section + // table, so reverse the direction of iteration. + int shndx = is_dwp_ ? GetNumSections() - k - 1 : k; + const char *name = GetSectionName(section_headers_[shndx].sh_name); + if (name != NULL && ElfReader::SectionNamesMatch(section_name, name)) { + const ElfSectionReader *section = GetSection(shndx); + if (section == NULL) { + return NULL; + } else { + info->type = section->header().sh_type; + info->flags = section->header().sh_flags; + info->addr = section->header().sh_addr; + info->offset = section->header().sh_offset; + info->size = section->header().sh_size; + info->link = section->header().sh_link; + info->info = section->header().sh_info; + info->addralign = section->header().sh_addralign; + info->entsize = section->header().sh_entsize; + return section->contents(); + } + } + } + return NULL; + } + + // p_vaddr of the first PT_LOAD segment (if any), or 0 if no PT_LOAD + // segments are present. This is the address an ELF image was linked + // (by static linker) to be loaded at. Usually (but not always) 0 for + // shared libraries and position-independent executables. + uint64 VaddrOfFirstLoadSegment() const { + // Relocatable objects (of type ET_REL) do not have LOAD segments. + if (header_.e_type == ET_REL) { + return 0; + } + for (int i = 0; i < GetNumProgramHeaders(); ++i) { + if (program_headers_[i].p_type == PT_LOAD) { + return program_headers_[i].p_vaddr; + } + } + return 0; + } + + // According to the LSB ("ELF special sections"), sections with debug + // info are prefixed by ".debug". The names are not specified, but they + // look like ".debug_line", ".debug_info", etc. + bool HasDebugSections() { + // Debug sections are likely to be near the end, so reverse the + // direction of iteration. + for (int k = GetNumSections() - 1; k >= 0; --k) { + const char *name = GetSectionName(section_headers_[k].sh_name); + if (strncmp(name, ".debug", strlen(".debug")) == 0) return true; + if (strncmp(name, ".zdebug", strlen(".zdebug")) == 0) return true; + } + return false; + } + + bool IsDynamicSharedObject() const { + return header_.e_type == ET_DYN; + } + + // Return the number of sections. + uint64_t GetNumSections() const { + if (HasManySections()) + return first_section_header_.sh_size; + return header_.e_shnum; + } + + private: + typedef vector > AddrToSymMap; + + static bool AddrToSymSorter(const typename AddrToSymMap::value_type& lhs, + const typename AddrToSymMap::value_type& rhs) { + return lhs.first < rhs.first; + } + + static bool AddrToSymEquals(const typename AddrToSymMap::value_type& lhs, + const typename AddrToSymMap::value_type& rhs) { + return lhs.first == rhs.first; + } + + // Does this ELF file have too many sections to fit in the program header? + bool HasManySections() const { + return header_.e_shnum == SHN_UNDEF; + } + + // Return the number of program headers. + int GetNumProgramHeaders() const { + if (HasManySections() && header_.e_phnum == 0xffff && + first_section_header_.sh_info != 0) + return first_section_header_.sh_info; + return header_.e_phnum; + } + + // Return the index of the string table. + int GetStringTableIndex() const { + if (HasManySections()) { + if (header_.e_shstrndx == 0xffff) + return first_section_header_.sh_link; + else if (header_.e_shstrndx >= GetNumSections()) + return 0; + } + return header_.e_shstrndx; + } + + // Given an offset into the section header string table, return the + // section name. + const char *GetSectionName(typename ElfArch::Word sh_name) { + const ElfSectionReader *shstrtab = + GetSection(GetStringTableIndex()); + if (shstrtab != NULL) { + return shstrtab->GetOffset(sh_name); + } + return NULL; + } + + // Return an ElfSectionReader for the given section. The reader will + // be freed when this object is destroyed. + const ElfSectionReader *GetSection(int num) { + const char *name; + // Hard-coding the name for the section-name string table prevents + // infinite recursion. + if (num == GetStringTableIndex()) + name = ".shstrtab"; + else + name = GetSectionNameByIndex(num); + ElfSectionReader *& reader = sections_[num]; + if (reader == NULL) + reader = new ElfSectionReader(name, path_, fd_, + section_headers_[num]); + return reader; + } + + // Parse out the overall header information from the file and assert + // that it looks sane. This contains information like the magic + // number and target architecture. + bool ParseHeaders(int fd, const string &path) { + // Read in the global ELF header. + if (pread(fd, &header_, sizeof(header_), 0) != sizeof(header_)) { + return false; + } + + // Must be an executable, dynamic shared object or relocatable object + if (header_.e_type != ET_EXEC && + header_.e_type != ET_DYN && + header_.e_type != ET_REL) { + return false; + } + // Need a section header. + if (header_.e_shoff == 0) { + return false; + } + + if (header_.e_shnum == SHN_UNDEF) { + // The number of sections in the program header is only a 16-bit value. In + // the event of overflow (greater than SHN_LORESERVE sections), e_shnum + // will read SHN_UNDEF and the true number of section header table entries + // is found in the sh_size field of the first section header. + // See: http://www.sco.com/developers/gabi/2003-12-17/ch4.sheader.html + if (pread(fd, &first_section_header_, sizeof(first_section_header_), + header_.e_shoff) != sizeof(first_section_header_)) { + return false; + } + } + + // Dynamically allocate enough space to store the section headers + // and read them out of the file. + const int section_headers_size = + GetNumSections() * sizeof(*section_headers_); + section_headers_ = new typename ElfArch::Shdr[section_headers_size]; + if (pread(fd, section_headers_, section_headers_size, header_.e_shoff) != + section_headers_size) { + return false; + } + + // Dynamically allocate enough space to store the program headers + // and read them out of the file. + //const int program_headers_size = + // GetNumProgramHeaders() * sizeof(*program_headers_); + program_headers_ = new typename ElfArch::Phdr[GetNumProgramHeaders()]; + + // Presize the sections array for efficiency. + sections_.resize(GetNumSections(), NULL); + return true; + } + + // Given the "value" of a function descriptor return the address of the + // function (i.e. the dereferenced value). Otherwise return "value". + uint64 AdjustPPC64FunctionDescriptorSymbolValue(uint64 value) { + if (opd_section_ != NULL && + opd_info_.addr <= value && + value < opd_info_.addr + opd_info_.size) { + uint64 offset = value - opd_info_.addr; + return (*reinterpret_cast(opd_section_ + offset)); + } + return value; + } + + void AdjustSymbolValue(typename ElfArch::Sym* sym) { + switch (header_.e_machine) { + case EM_ARM: + // For ARM architecture, if the LSB of the function symbol offset is set, + // it indicates a Thumb function. This bit should not be taken literally. + // Clear it. + if (ElfArch::Type(sym) == STT_FUNC) + sym->st_value = AdjustARMThumbSymbolValue(sym->st_value); + break; + case EM_386: + // No adjustment needed for Intel x86 architecture. However, explicitly + // define this case as we use it quite often. + break; + case EM_PPC64: + // PowerPC64 currently has function descriptors as part of the ABI. + // Function symbols need to be adjusted accordingly. + if (ElfArch::Type(sym) == STT_FUNC) + sym->st_value = AdjustPPC64FunctionDescriptorSymbolValue(sym->st_value); + break; + default: + break; + } + } + + friend class SymbolIterator; + + // The file we're reading. + const string path_; + // Open file descriptor for path_. Not owned by this object. + const int fd_; + + // The global header of the ELF file. + typename ElfArch::Ehdr header_; + + // The header of the first section. This may be used to supplement the ELF + // file header. + typename ElfArch::Shdr first_section_header_; + + // Array of GetNumSections() section headers, allocated when we read + // in the global header. + typename ElfArch::Shdr *section_headers_; + + // Array of GetNumProgramHeaders() program headers, allocated when we read + // in the global header. + typename ElfArch::Phdr *program_headers_; + + // An array of pointers to ElfSectionReaders. Sections are + // mmaped as they're needed and not released until this object is + // destroyed. + vector*> sections_; + + // For PowerPC64 we need to keep track of function descriptors when looking up + // values for funtion symbols values. Function descriptors are kept in the + // .opd section and are dereferenced to find the function address. + ElfReader::SectionInfo opd_info_; + const char *opd_section_; // Must be checked for NULL before use. + int64 base_for_text_; + + // Read PLT-related sections for the current architecture. + bool plts_supported_; + // Code size of each PLT function for the current architecture. + size_t plt_code_size_; + // Size of the special first entry in the .plt section that calls the runtime + // loader resolution routine, and that all other entries jump to when doing + // lazy symbol binding. + size_t plt0_size_; + + // Maps a dynamic symbol index to a PLT offset. + // The vector entry index is the dynamic symbol index. + std::vector symbols_plt_offsets_; + + // Container for PLT function name strings. These strings are passed by + // reference to SymbolSink::AddSymbol() so they need to be stored somewhere. + std::vector plt_function_names_; + + bool visited_relocation_entries_; + + // True if this is a .dwp file. + bool is_dwp_; +}; + +ElfReader::ElfReader(const string &path) + : path_(path), fd_(-1), impl32_(NULL), impl64_(NULL) { + // linux 2.6.XX kernel can show deleted files like this: + // /var/run/nscd/dbYLJYaE (deleted) + // and the kernel-supplied vdso and vsyscall mappings like this: + // [vdso] + // [vsyscall] + if (MyHasSuffixString(path, " (deleted)")) + return; + if (path == "[vdso]") + return; + if (path == "[vsyscall]") + return; + + fd_ = open(path.c_str(), O_RDONLY); +} + +ElfReader::~ElfReader() { + if (fd_ != -1) + close(fd_); + if (impl32_ != NULL) + delete impl32_; + if (impl64_ != NULL) + delete impl64_; +} + + +// The only word-size specific part of this file is IsNativeElfFile(). +#if __WORDSIZE == 32 +#define NATIVE_ELF_ARCH Elf32 +#elif __WORDSIZE == 64 +#define NATIVE_ELF_ARCH Elf64 +#else +#error "Invalid word size" +#endif + +template +static bool IsElfFile(const int fd, const string &path) { + if (fd < 0) + return false; + if (!ElfReaderImpl::IsArchElfFile(fd, NULL)) { + // No error message here. IsElfFile gets called many times. + return false; + } + return true; +} + +bool ElfReader::IsNativeElfFile() const { + return IsElfFile(fd_, path_); +} + +bool ElfReader::IsElf32File() const { + return IsElfFile(fd_, path_); +} + +bool ElfReader::IsElf64File() const { + return IsElfFile(fd_, path_); +} + +/* +void ElfReader::AddSymbols(SymbolMap *symbols, + uint64 mem_offset, uint64 file_offset, + uint64 length) { + if (fd_ < 0) + return; + // TODO(chatham): Actually use the information about file offset and + // the length of the mapped section. On some machines the data + // section gets mapped as executable, and we'll end up reading the + // file twice and getting some of the offsets wrong. + if (IsElf32File()) { + GetImpl32()->GetSymbolPositions(symbols, SHT_SYMTAB, + mem_offset, file_offset); + GetImpl32()->GetSymbolPositions(symbols, SHT_DYNSYM, + mem_offset, file_offset); + } else if (IsElf64File()) { + GetImpl64()->GetSymbolPositions(symbols, SHT_SYMTAB, + mem_offset, file_offset); + GetImpl64()->GetSymbolPositions(symbols, SHT_DYNSYM, + mem_offset, file_offset); + } +} +*/ + +void ElfReader::VisitSymbols(ElfReader::SymbolSink *sink) { + VisitSymbols(sink, -1, -1); +} + +void ElfReader::VisitSymbols(ElfReader::SymbolSink *sink, + int symbol_binding, + int symbol_type) { + VisitSymbols(sink, symbol_binding, symbol_type, false); +} + +void ElfReader::VisitSymbols(ElfReader::SymbolSink *sink, + int symbol_binding, + int symbol_type, + bool get_raw_symbol_values) { + if (IsElf32File()) { + GetImpl32()->VisitRelocationEntries(); + GetImpl32()->VisitSymbols(SHT_SYMTAB, sink, symbol_binding, symbol_type, + get_raw_symbol_values); + GetImpl32()->VisitSymbols(SHT_DYNSYM, sink, symbol_binding, symbol_type, + get_raw_symbol_values); + } else if (IsElf64File()) { + GetImpl64()->VisitRelocationEntries(); + GetImpl64()->VisitSymbols(SHT_SYMTAB, sink, symbol_binding, symbol_type, + get_raw_symbol_values); + GetImpl64()->VisitSymbols(SHT_DYNSYM, sink, symbol_binding, symbol_type, + get_raw_symbol_values); + } +} + +uint64 ElfReader::VaddrOfFirstLoadSegment() { + if (IsElf32File()) { + return GetImpl32()->VaddrOfFirstLoadSegment(); + } else if (IsElf64File()) { + return GetImpl64()->VaddrOfFirstLoadSegment(); + } else { + return 0; + } +} + +const char *ElfReader::GetSectionName(int shndx) { + if (shndx < 0 || static_cast(shndx) >= GetNumSections()) return NULL; + if (IsElf32File()) { + return GetImpl32()->GetSectionNameByIndex(shndx); + } else if (IsElf64File()) { + return GetImpl64()->GetSectionNameByIndex(shndx); + } else { + return NULL; + } +} + +uint64 ElfReader::GetNumSections() { + if (IsElf32File()) { + return GetImpl32()->GetNumSections(); + } else if (IsElf64File()) { + return GetImpl64()->GetNumSections(); + } else { + return 0; + } +} + +const char *ElfReader::GetSectionByIndex(int shndx, size_t *size) { + if (IsElf32File()) { + return GetImpl32()->GetSectionContentsByIndex(shndx, size); + } else if (IsElf64File()) { + return GetImpl64()->GetSectionContentsByIndex(shndx, size); + } else { + return NULL; + } +} + +const char *ElfReader::GetSectionByName(const string §ion_name, + size_t *size) { + if (IsElf32File()) { + return GetImpl32()->GetSectionContentsByName(section_name, size); + } else if (IsElf64File()) { + return GetImpl64()->GetSectionContentsByName(section_name, size); + } else { + return NULL; + } +} + +const char *ElfReader::GetSectionInfoByName(const string §ion_name, + SectionInfo *info) { + if (IsElf32File()) { + return GetImpl32()->GetSectionInfoByName(section_name, info); + } else if (IsElf64File()) { + return GetImpl64()->GetSectionInfoByName(section_name, info); + } else { + return NULL; + } +} + +bool ElfReader::SectionNamesMatch(const string &name, const string &sh_name) { + if ((name.find(".debug_", 0) == 0) && (sh_name.find(".zdebug_", 0) == 0)) { + const string name_suffix(name, strlen(".debug_")); + const string sh_name_suffix(sh_name, strlen(".zdebug_")); + return name_suffix == sh_name_suffix; + } + return name == sh_name; +} + +bool ElfReader::IsDynamicSharedObject() { + if (IsElf32File()) { + return GetImpl32()->IsDynamicSharedObject(); + } else if (IsElf64File()) { + return GetImpl64()->IsDynamicSharedObject(); + } else { + return false; + } +} + +ElfReaderImpl *ElfReader::GetImpl32() { + if (impl32_ == NULL) { + impl32_ = new ElfReaderImpl(path_, fd_); + } + return impl32_; +} + +ElfReaderImpl *ElfReader::GetImpl64() { + if (impl64_ == NULL) { + impl64_ = new ElfReaderImpl(path_, fd_); + } + return impl64_; +} + +// Return true if file is an ELF binary of ElfArch, with unstripped +// debug info (debug_only=true) or symbol table (debug_only=false). +// Otherwise, return false. +template +static bool IsNonStrippedELFBinaryImpl(const string &path, const int fd, + bool debug_only) { + if (!ElfReaderImpl::IsArchElfFile(fd, NULL)) return false; + ElfReaderImpl elf_reader(path, fd); + return debug_only ? + elf_reader.HasDebugSections() + : (elf_reader.GetSectionByType(SHT_SYMTAB) != NULL); +} + +// Helper for the IsNon[Debug]StrippedELFBinary functions. +static bool IsNonStrippedELFBinaryHelper(const string &path, + bool debug_only) { + const int fd = open(path.c_str(), O_RDONLY); + if (fd == -1) { + return false; + } + + if (IsNonStrippedELFBinaryImpl(path, fd, debug_only) || + IsNonStrippedELFBinaryImpl(path, fd, debug_only)) { + close(fd); + return true; + } + close(fd); + return false; +} + +bool ElfReader::IsNonStrippedELFBinary(const string &path) { + return IsNonStrippedELFBinaryHelper(path, false); +} + +bool ElfReader::IsNonDebugStrippedELFBinary(const string &path) { + return IsNonStrippedELFBinaryHelper(path, true); +} +} // namespace dwarf2reader -- cgit v1.2.1