From 4116671cbff9e99fbd834a1b2cdd174226b78c7c Mon Sep 17 00:00:00 2001 From: "ted.mielczarek" Date: Wed, 18 Jul 2012 17:55:08 +0000 Subject: Rework dump_symbols.cc using templates and traits classes to handle cross-word-size symbol dumping R=mark at https://breakpad.appspot.com/393002/ git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@987 4c0a9323-5329-0410-9bdc-e9ce6186880e --- src/common/linux/dump_symbols.cc | 452 +++++++++++++++--------------- src/common/linux/dump_symbols_unittest.cc | 8 +- src/common/linux/elfutils-inl.h | 74 +++++ src/common/linux/elfutils.cc | 59 ++-- src/common/linux/elfutils.h | 31 ++ 5 files changed, 371 insertions(+), 253 deletions(-) create mode 100644 src/common/linux/elfutils-inl.h diff --git a/src/common/linux/dump_symbols.cc b/src/common/linux/dump_symbols.cc index 019a3a6c..8ed78a87 100644 --- a/src/common/linux/dump_symbols.cc +++ b/src/common/linux/dump_symbols.cc @@ -57,6 +57,8 @@ #include "common/dwarf_cfi_to_module.h" #include "common/dwarf_cu_to_module.h" #include "common/dwarf_line_to_module.h" +#include "common/linux/elfutils.h" +#include "common/linux/elfutils-inl.h" #include "common/linux/elf_symbols_to_module.h" #include "common/linux/file_id.h" #include "common/module.h" @@ -70,6 +72,12 @@ namespace { using google_breakpad::DwarfCFIToModule; using google_breakpad::DwarfCUToModule; using google_breakpad::DwarfLineToModule; +using google_breakpad::ElfClass; +using google_breakpad::ElfClass32; +using google_breakpad::ElfClass64; +using google_breakpad::FindElfSectionByName; +using google_breakpad::GetOffset; +using google_breakpad::IsValidElf; using google_breakpad::Module; using google_breakpad::StabsToModule; @@ -130,24 +138,15 @@ class MmapWrapper { size_t size_; }; - -// Fix offset into virtual address by adding the mapped base into offsets. -// Make life easier when want to find something by offset. -static void FixAddress(void *obj_base) { - ElfW(Addr) base = reinterpret_cast(obj_base); - ElfW(Ehdr) *elf_header = static_cast(obj_base); - elf_header->e_phoff += base; - elf_header->e_shoff += base; - ElfW(Shdr) *sections = reinterpret_cast(elf_header->e_shoff); - for (int i = 0; i < elf_header->e_shnum; ++i) - sections[i].sh_offset += base; -} - // Find the preferred loading address of the binary. -static ElfW(Addr) GetLoadingAddress(const ElfW(Phdr) *program_headers, - int nheader) { +template +typename ElfClass::Addr GetLoadingAddress( + const typename ElfClass::Phdr* program_headers, + int nheader) { + typedef typename ElfClass::Phdr Phdr; + for (int i = 0; i < nheader; ++i) { - const ElfW(Phdr) &header = program_headers[i]; + const Phdr& header = program_headers[i]; // For executable, it is the PT_LOAD segment with offset to zero. if (header.p_type == PT_LOAD && header.p_offset == 0) @@ -157,57 +156,22 @@ static ElfW(Addr) GetLoadingAddress(const ElfW(Phdr) *program_headers, return 0; } -static bool IsValidElf(const ElfW(Ehdr) *elf_header) { - return memcmp(elf_header, ELFMAG, SELFMAG) == 0; -} - -static const ElfW(Shdr) *FindSectionByName(const char *name, - const ElfW(Shdr) *sections, - const ElfW(Shdr) *section_names, - int nsection) { - assert(name != NULL); - assert(sections != NULL); - assert(nsection > 0); - - int name_len = strlen(name); - if (name_len == 0) - return NULL; - - // Find the end of the section name section, to make sure that - // comparisons don't run off the end of the section. - const char *names_end = - reinterpret_cast(section_names->sh_offset + section_names->sh_size); - - for (int i = 0; i < nsection; ++i) { - const char *section_name = - reinterpret_cast(section_names->sh_offset + sections[i].sh_name); - if (names_end - section_name >= name_len + 1 && - strcmp(name, section_name) == 0) { - if (sections[i].sh_type == SHT_NOBITS) { - fprintf(stderr, - "Section %s found, but ignored because type=SHT_NOBITS.\n", - name); - return NULL; - } - return sections + i; - } - } - return NULL; -} - -static bool LoadStabs(const ElfW(Ehdr) *elf_header, - const ElfW(Shdr) *stab_section, - const ElfW(Shdr) *stabstr_section, - const bool big_endian, - Module *module) { +template +bool LoadStabs(const typename ElfClass::Ehdr* elf_header, + const typename ElfClass::Shdr* stab_section, + const typename ElfClass::Shdr* stabstr_section, + const bool big_endian, + Module* module) { // A callback object to handle data from the STABS reader. StabsToModule handler(module); // Find the addresses of the STABS data, and create a STABS reader object. // On Linux, STABS entries always have 32-bit values, regardless of the // address size of the architecture whose code they're describing, and // the strings are always "unitized". - uint8_t *stabs = reinterpret_cast(stab_section->sh_offset); - uint8_t *stabstr = reinterpret_cast(stabstr_section->sh_offset); + const uint8_t* stabs = + GetOffset(elf_header, stab_section->sh_offset); + const uint8_t* stabstr = + GetOffset(elf_header, stabstr_section->sh_offset); google_breakpad::StabsReader reader(stabs, stab_section->sh_size, stabstr, stabstr_section->sh_size, big_endian, 4, true, &handler); @@ -236,10 +200,13 @@ class DumperLineToModule: public DwarfCUToModule::LineToModuleFunctor { dwarf2reader::ByteReader *byte_reader_; }; -static bool LoadDwarf(const string &dwarf_filename, - const ElfW(Ehdr) *elf_header, - const bool big_endian, - Module *module) { +template +bool LoadDwarf(const string& dwarf_filename, + const typename ElfClass::Ehdr* elf_header, + const bool big_endian, + Module* module) { + typedef typename ElfClass::Shdr Shdr; + const dwarf2reader::Endianness endianness = big_endian ? dwarf2reader::ENDIANNESS_BIG : dwarf2reader::ENDIANNESS_LITTLE; dwarf2reader::ByteReader byte_reader(endianness); @@ -248,15 +215,17 @@ static bool LoadDwarf(const string &dwarf_filename, DwarfCUToModule::FileContext file_context(dwarf_filename, module); // Build a map of the ELF file's sections. - const ElfW(Shdr) *sections - = reinterpret_cast(elf_header->e_shoff); + const Shdr* sections = + GetOffset(elf_header, elf_header->e_shoff); int num_sections = elf_header->e_shnum; - const ElfW(Shdr) *section_names = sections + elf_header->e_shstrndx; + const Shdr* section_names = sections + elf_header->e_shstrndx; for (int i = 0; i < num_sections; i++) { - const ElfW(Shdr) *section = §ions[i]; - string name = reinterpret_cast(section_names->sh_offset + - section->sh_name); - const char *contents = reinterpret_cast(section->sh_offset); + const Shdr* section = §ions[i]; + string name = GetOffset(elf_header, + section_names->sh_offset) + + section->sh_name; + const char* contents = GetOffset(elf_header, + section->sh_offset); uint64 length = section->sh_size; file_context.section_map[name] = std::make_pair(contents, length); } @@ -265,16 +234,16 @@ static bool LoadDwarf(const string &dwarf_filename, DumperLineToModule line_to_module(&byte_reader); std::pair debug_info_section = file_context.section_map[".debug_info"]; - // We should never have been called if the file doesn't have a + // This should never have been called if the file doesn't have a // .debug_info section. assert(debug_info_section.first); uint64 debug_info_length = debug_info_section.second; for (uint64 offset = 0; offset < debug_info_length;) { // Make a handler for the root DIE that populates MODULE with the - // data we find. + // data that was found. DwarfCUToModule::WarningReporter reporter(dwarf_filename, offset); DwarfCUToModule root_handler(&file_context, &line_to_module, &reporter); - // Make a Dwarf2Handler that drives our DIEHandler. + // Make a Dwarf2Handler that drives the DIEHandler. dwarf2reader::DIEDispatcher die_dispatcher(&root_handler); // Make a DWARF parser for the compilation unit at OFFSET. dwarf2reader::CompilationUnit reader(file_context.section_map, @@ -290,10 +259,11 @@ static bool LoadDwarf(const string &dwarf_filename, // Fill REGISTER_NAMES with the register names appropriate to the // machine architecture given in HEADER, indexed by the register // numbers used in DWARF call frame information. Return true on -// success, or false if we don't recognize HEADER's machine -// architecture. -static bool DwarfCFIRegisterNames(const ElfW(Ehdr) *elf_header, - std::vector *register_names) { +// success, or false if HEADER's machine architecture is not +// supported. +template +bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header, + std::vector* register_names) { switch (elf_header->e_machine) { case EM_386: *register_names = DwarfCFIToModule::RegisterNames::I386(); @@ -309,19 +279,20 @@ static bool DwarfCFIRegisterNames(const ElfW(Ehdr) *elf_header, } } -static bool LoadDwarfCFI(const string &dwarf_filename, - const ElfW(Ehdr) *elf_header, - const char *section_name, - const ElfW(Shdr) *section, - const bool eh_frame, - const ElfW(Shdr) *got_section, - const ElfW(Shdr) *text_section, - const bool big_endian, - Module *module) { +template +bool LoadDwarfCFI(const string& dwarf_filename, + const typename ElfClass::Ehdr* elf_header, + const char* section_name, + const typename ElfClass::Shdr* section, + const bool eh_frame, + const typename ElfClass::Shdr* got_section, + const typename ElfClass::Shdr* text_section, + const bool big_endian, + Module* module) { // Find the appropriate set of register names for this file's // architecture. std::vector register_names; - if (!DwarfCFIRegisterNames(elf_header, ®ister_names)) { + if (!DwarfCFIRegisterNames(elf_header, ®ister_names)) { fprintf(stderr, "%s: unrecognized ELF machine architecture '%d';" " cannot convert DWARF call frame information\n", dwarf_filename.c_str(), elf_header->e_machine); @@ -332,25 +303,17 @@ static bool LoadDwarfCFI(const string &dwarf_filename, dwarf2reader::ENDIANNESS_BIG : dwarf2reader::ENDIANNESS_LITTLE; // Find the call frame information and its size. - const char *cfi = reinterpret_cast(section->sh_offset); + const char* cfi = + GetOffset(elf_header, section->sh_offset); size_t cfi_size = section->sh_size; // Plug together the parser, handler, and their entourages. DwarfCFIToModule::Reporter module_reporter(dwarf_filename, section_name); DwarfCFIToModule handler(module, register_names, &module_reporter); dwarf2reader::ByteReader byte_reader(endianness); - // Since we're using the ElfW macro, we're not actually capable of - // processing both ELF32 and ELF64 files with the same program; that - // would take a bit more work. But this will work out well enough. - if (elf_header->e_ident[EI_CLASS] == ELFCLASS32) - byte_reader.SetAddressSize(4); - else if (elf_header->e_ident[EI_CLASS] == ELFCLASS64) - byte_reader.SetAddressSize(8); - else { - fprintf(stderr, "%s: bad file class in ELF header: %d\n", - dwarf_filename.c_str(), elf_header->e_ident[EI_CLASS]); - return false; - } + + byte_reader.SetAddressSize(ElfClass::kAddrSize); + // Provide the base addresses for .eh_frame encoded pointers, if // possible. byte_reader.SetCFIDataBase(section->sh_addr, cfi); @@ -368,8 +331,8 @@ static bool LoadDwarfCFI(const string &dwarf_filename, return true; } -bool LoadELF(const string &obj_file, MmapWrapper* map_wrapper, - ElfW(Ehdr) **elf_header) { +bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper, + void** elf_header) { int obj_fd = open(obj_file.c_str(), O_RDONLY); if (obj_fd < 0) { fprintf(stderr, "Failed to open ELF file '%s': %s\n", @@ -391,7 +354,7 @@ bool LoadELF(const string &obj_file, MmapWrapper* map_wrapper, return false; } map_wrapper->set(obj_base, st.st_size); - *elf_header = reinterpret_cast(obj_base); + *elf_header = obj_base; if (!IsValidElf(*elf_header)) { fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str()); return false; @@ -400,7 +363,9 @@ bool LoadELF(const string &obj_file, MmapWrapper* map_wrapper, } // Get the endianness of ELF_HEADER. If it's invalid, return false. -bool ElfEndianness(const ElfW(Ehdr) *elf_header, bool *big_endian) { +template +bool ElfEndianness(const typename ElfClass::Ehdr* elf_header, + bool* big_endian) { if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) { *big_endian = false; return true; @@ -417,17 +382,18 @@ bool ElfEndianness(const ElfW(Ehdr) *elf_header, bool *big_endian) { // Read the .gnu_debuglink and get the debug file name. If anything goes // wrong, return an empty string. -static string ReadDebugLink(const ElfW(Shdr) *debuglink_section, - const string &obj_file, - const string &debug_dir) { - char *debuglink = reinterpret_cast(debuglink_section->sh_offset); +template +string ReadDebugLink(const char* debuglink, + size_t debuglink_size, + const string& obj_file, + const string& debug_dir) { size_t debuglink_len = strlen(debuglink) + 5; // '\0' + CRC32. debuglink_len = 4 * ((debuglink_len + 3) / 4); // Round to nearest 4 bytes. // Sanity check. - if (debuglink_len != debuglink_section->sh_size) { + if (debuglink_len != debuglink_size) { fprintf(stderr, "Mismatched .gnu_debuglink string / section size: " - "%zx %zx\n", debuglink_len, debuglink_section->sh_size); + "%zx %zx\n", debuglink_len, debuglink_size); return ""; } @@ -448,18 +414,21 @@ static string ReadDebugLink(const ElfW(Shdr) *debuglink_section, // // LoadSymbolsInfo // -// Holds the state between the two calls to LoadSymbols() in case we have to -// follow the .gnu_debuglink section and load debug information from a +// Holds the state between the two calls to LoadSymbols() in case it's necessary +// to follow the .gnu_debuglink section and load debug information from a // different file. // +template class LoadSymbolsInfo { public: + typedef typename ElfClass::Addr Addr; + explicit LoadSymbolsInfo(const string &dbg_dir) : debug_dir_(dbg_dir), has_loading_addr_(false) {} - // Keeps track of which sections have been loaded so we don't accidentally - // load it twice from two different files. + // Keeps track of which sections have been loaded so sections don't + // accidentally get loaded twice from two different files. void LoadedSection(const string §ion) { if (loaded_sections_.count(section) == 0) { loaded_sections_.insert(section); @@ -469,9 +438,9 @@ class LoadSymbolsInfo { } } - // We expect the ELF file and linked debug file to have the same preferred + // The ELF file and linked debug file are expected to have the same preferred // loading address. - void set_loading_addr(ElfW(Addr) addr, const string &filename) { + void set_loading_addr(Addr addr, const string &filename) { if (!has_loading_addr_) { loading_addr_ = addr; loaded_file_ = filename; @@ -506,48 +475,55 @@ class LoadSymbolsInfo { bool has_loading_addr_; // Indicate if LOADING_ADDR_ is valid. - ElfW(Addr) loading_addr_; // Saves the preferred loading address from the - // first call to LoadSymbols(). + Addr loading_addr_; // Saves the preferred loading address from the + // first call to LoadSymbols(). string loaded_file_; // Name of the file loaded from the first call to - // LoadSymbols(). + // LoadSymbols(). std::set loaded_sections_; // Tracks the Loaded ELF sections - // between calls to LoadSymbols(). + // between calls to LoadSymbols(). }; -static bool LoadSymbols(const string &obj_file, - const bool big_endian, - ElfW(Ehdr) *elf_header, - const bool read_gnu_debug_link, - LoadSymbolsInfo *info, - Module *module) { - // Translate all offsets in section headers into address. - FixAddress(elf_header); - ElfW(Addr) loading_addr = GetLoadingAddress( - reinterpret_cast(elf_header->e_phoff), +template +bool LoadSymbols(const string& obj_file, + const bool big_endian, + const typename ElfClass::Ehdr* elf_header, + const bool read_gnu_debug_link, + LoadSymbolsInfo* info, + Module* module) { + typedef typename ElfClass::Addr Addr; + typedef typename ElfClass::Phdr Phdr; + typedef typename ElfClass::Shdr Shdr; + + Addr loading_addr = GetLoadingAddress( + GetOffset(elf_header, elf_header->e_phoff), elf_header->e_phnum); module->SetLoadAddress(loading_addr); info->set_loading_addr(loading_addr, obj_file); - const ElfW(Shdr) *sections = - reinterpret_cast(elf_header->e_shoff); - const ElfW(Shdr) *section_names = sections + elf_header->e_shstrndx; + const Shdr* sections = + GetOffset(elf_header, elf_header->e_shoff); + const Shdr* section_names = sections + elf_header->e_shstrndx; + const char* names = + GetOffset(elf_header, section_names->sh_offset); + const char *names_end = names + section_names->sh_size; bool found_debug_info_section = false; bool found_usable_info = false; // Look for STABS debugging information, and load it if present. - const ElfW(Shdr) *stab_section - = FindSectionByName(".stab", sections, section_names, - elf_header->e_shnum); + const Shdr* stab_section = + FindElfSectionByName(".stab", SHT_PROGBITS, + sections, names, names_end, + elf_header->e_shnum); if (stab_section) { - const ElfW(Shdr) *stabstr_section = stab_section->sh_link + sections; + const Shdr* stabstr_section = stab_section->sh_link + sections; if (stabstr_section) { found_debug_info_section = true; found_usable_info = true; info->LoadedSection(".stab"); - if (!LoadStabs(elf_header, stab_section, stabstr_section, big_endian, - module)) { + if (!LoadStabs(elf_header, stab_section, stabstr_section, + big_endian, module)) { fprintf(stderr, "%s: \".stab\" section found, but failed to load STABS" " debugging information\n", obj_file.c_str()); } @@ -555,52 +531,60 @@ static bool LoadSymbols(const string &obj_file, } // Look for DWARF debugging information, and load it if present. - const ElfW(Shdr) *dwarf_section - = FindSectionByName(".debug_info", sections, section_names, - elf_header->e_shnum); + const Shdr* dwarf_section = + FindElfSectionByName(".debug_info", SHT_PROGBITS, + sections, names, names_end, + elf_header->e_shnum); if (dwarf_section) { found_debug_info_section = true; found_usable_info = true; info->LoadedSection(".debug_info"); - if (!LoadDwarf(obj_file, elf_header, big_endian, module)) + if (!LoadDwarf(obj_file, elf_header, big_endian, module)) fprintf(stderr, "%s: \".debug_info\" section found, but failed to load " "DWARF debugging information\n", obj_file.c_str()); } // Dwarf Call Frame Information (CFI) is actually independent from // the other DWARF debugging information, and can be used alone. - const ElfW(Shdr) *dwarf_cfi_section = - FindSectionByName(".debug_frame", sections, section_names, - elf_header->e_shnum); + const Shdr* dwarf_cfi_section = + FindElfSectionByName(".debug_frame", SHT_PROGBITS, + sections, names, names_end, + elf_header->e_shnum); if (dwarf_cfi_section) { // Ignore the return value of this function; even without call frame // information, the other debugging information could be perfectly // useful. info->LoadedSection(".debug_frame"); bool result = - LoadDwarfCFI(obj_file, elf_header, ".debug_frame", - dwarf_cfi_section, false, 0, 0, big_endian, module); + LoadDwarfCFI(obj_file, elf_header, ".debug_frame", + dwarf_cfi_section, false, 0, 0, big_endian, + module); found_usable_info = found_usable_info || result; } // Linux C++ exception handling information can also provide // unwinding data. - const ElfW(Shdr) *eh_frame_section = - FindSectionByName(".eh_frame", sections, section_names, - elf_header->e_shnum); + const Shdr* eh_frame_section = + FindElfSectionByName(".eh_frame", SHT_PROGBITS, + sections, names, names_end, + elf_header->e_shnum); if (eh_frame_section) { // Pointers in .eh_frame data may be relative to the base addresses of // certain sections. Provide those sections if present. - const ElfW(Shdr) *got_section = - FindSectionByName(".got", sections, section_names, elf_header->e_shnum); - const ElfW(Shdr) *text_section = - FindSectionByName(".text", sections, section_names, - elf_header->e_shnum); + const Shdr* got_section = + FindElfSectionByName(".got", SHT_PROGBITS, + sections, names, names_end, + elf_header->e_shnum); + const Shdr* text_section = + FindElfSectionByName(".text", SHT_PROGBITS, + sections, names, names_end, + elf_header->e_shnum); info->LoadedSection(".eh_frame"); // As above, ignore the return value of this function. bool result = - LoadDwarfCFI(obj_file, elf_header, ".eh_frame", eh_frame_section, true, - got_section, text_section, big_endian, module); + LoadDwarfCFI(obj_file, elf_header, ".eh_frame", + eh_frame_section, true, + got_section, text_section, big_endian, module); found_usable_info = found_usable_info || result; } @@ -609,15 +593,21 @@ static bool LoadSymbols(const string &obj_file, " (no \".stab\" or \".debug_info\" sections)\n", obj_file.c_str()); - // Failed, but maybe we can find a .gnu_debuglink section? + // Failed, but maybe there's a .gnu_debuglink section? if (read_gnu_debug_link) { - const ElfW(Shdr) *gnu_debuglink_section - = FindSectionByName(".gnu_debuglink", sections, section_names, - elf_header->e_shnum); + const Shdr* gnu_debuglink_section + = FindElfSectionByName(".gnu_debuglink", SHT_PROGBITS, + sections, names, + names_end, elf_header->e_shnum); if (gnu_debuglink_section) { if (!info->debug_dir().empty()) { - string debuglink_file = - ReadDebugLink(gnu_debuglink_section, obj_file, info->debug_dir()); + const char* debuglink_contents = + GetOffset(elf_header, + gnu_debuglink_section->sh_offset); + string debuglink_file + = ReadDebugLink(debuglink_contents, + gnu_debuglink_section->sh_size, + obj_file, info->debug_dir()); info->set_debuglink_file(debuglink_file); } else { fprintf(stderr, ".gnu_debuglink section found in '%s', " @@ -630,31 +620,29 @@ static bool LoadSymbols(const string &obj_file, } else { // The caller doesn't want to consult .gnu_debuglink. // See if there are export symbols available. - const ElfW(Shdr) *dynsym_section = - FindSectionByName(".dynsym", sections, section_names, - elf_header->e_shnum); - const ElfW(Shdr) *dynstr_section = - FindSectionByName(".dynstr", sections, section_names, - elf_header->e_shnum); + const Shdr* dynsym_section = + FindElfSectionByName(".dynsym", SHT_DYNSYM, + sections, names, names_end, + elf_header->e_shnum); + const Shdr* dynstr_section = + FindElfSectionByName(".dynstr", SHT_STRTAB, + sections, names, names_end, + elf_header->e_shnum); if (dynsym_section && dynstr_section) { info->LoadedSection(".dynsym"); - fprintf(stderr, "Have .dynsym + .dynstr\n"); - uint8_t* dynsyms = - reinterpret_cast(dynsym_section->sh_offset); - uint8_t* dynstrs = - reinterpret_cast(dynstr_section->sh_offset); + const uint8_t* dynsyms = + GetOffset(elf_header, dynsym_section->sh_offset); + const uint8_t* dynstrs = + GetOffset(elf_header, dynstr_section->sh_offset); bool result = - ELFSymbolsToModule(dynsyms, - dynsym_section->sh_size, - dynstrs, - dynstr_section->sh_size, - big_endian, - // This could change to something more useful - // when support for dumping cross-architecture - // symbols is finished. - sizeof(ElfW(Addr)), - module); + ELFSymbolsToModule(dynsyms, + dynsym_section->sh_size, + dynstrs, + dynstr_section->sh_size, + big_endian, + ElfClass::kAddrSize, + module); found_usable_info = found_usable_info || result; } @@ -673,8 +661,10 @@ static bool LoadSymbols(const string &obj_file, // Return the breakpad symbol file identifier for the architecture of // ELF_HEADER. -const char *ElfArchitecture(const ElfW(Ehdr) *elf_header) { - ElfW(Half) arch = elf_header->e_machine; +template +const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) { + typedef typename ElfClass::Half Half; + Half arch = elf_header->e_machine; switch (arch) { case EM_386: return "x86"; case EM_ARM: return "arm"; @@ -703,8 +693,7 @@ string FormatIdentifier(unsigned char identifier[16]) { id_no_dash += identifier_str[i]; // Add an extra "0" by the end. PDB files on Windows have an 'age' // number appended to the end of the file identifier; this isn't - // really used or necessary on other platforms, but let's preserve - // the pattern. + // really used or necessary on other platforms, but be consistent. id_no_dash += '0'; return id_no_dash; } @@ -719,24 +708,14 @@ string BaseFileName(const string &filename) { return base; } -} // namespace - -namespace google_breakpad { - -// Not explicitly exported, but not static so it can be used in unit tests. -// Ideally obj_file would be const, but internally this code does write -// to some ELF header fields to make its work simpler. -bool WriteSymbolFileInternal(uint8_t* obj_file, - const string &obj_filename, - const string &debug_dir, +template +bool WriteSymbolFileElfClass(const typename ElfClass::Ehdr* elf_header, + const string& obj_filename, + const string& debug_dir, bool cfi, - std::ostream &sym_stream) { - ElfW(Ehdr) *elf_header = reinterpret_cast(obj_file); - - if (!IsValidElf(elf_header)) { - fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str()); - return false; - } + std::ostream& sym_stream) { + typedef typename ElfClass::Ehdr Ehdr; + typedef typename ElfClass::Shdr Shdr; unsigned char identifier[16]; if (!google_breakpad::FileID::ElfFileIdentifierFromMappedFile(elf_header, @@ -746,7 +725,7 @@ bool WriteSymbolFileInternal(uint8_t* obj_file, return false; } - const char *architecture = ElfArchitecture(elf_header); + const char *architecture = ElfArchitecture(elf_header); if (!architecture) { fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n", obj_filename.c_str(), elf_header->e_machine); @@ -755,17 +734,17 @@ bool WriteSymbolFileInternal(uint8_t* obj_file, // Figure out what endianness this file is. bool big_endian; - if (!ElfEndianness(elf_header, &big_endian)) + if (!ElfEndianness(elf_header, &big_endian)) return false; string name = BaseFileName(obj_filename); string os = "Linux"; string id = FormatIdentifier(identifier); - LoadSymbolsInfo info(debug_dir); + LoadSymbolsInfo info(debug_dir); Module module(name, os, architecture, id); - if (!LoadSymbols(obj_filename, big_endian, elf_header, !debug_dir.empty(), - &info, &module)) { + if (!LoadSymbols(obj_filename, big_endian, elf_header, + !debug_dir.empty(), &info, &module)) { const string debuglink_file = info.debuglink_file(); if (debuglink_file.empty()) return false; @@ -773,11 +752,13 @@ bool WriteSymbolFileInternal(uint8_t* obj_file, // Load debuglink ELF file. fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str()); MmapWrapper debug_map_wrapper; - ElfW(Ehdr) *debug_elf_header = NULL; - if (!LoadELF(debuglink_file, &debug_map_wrapper, &debug_elf_header)) + Ehdr* debug_elf_header = NULL; + if (!LoadELF(debuglink_file, &debug_map_wrapper, + reinterpret_cast(&debug_elf_header))) return false; // Sanity checks to make sure everything matches up. - const char *debug_architecture = ElfArchitecture(debug_elf_header); + const char *debug_architecture = + ElfArchitecture(debug_elf_header); if (!debug_architecture) { fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n", debuglink_file.c_str(), debug_elf_header->e_machine); @@ -792,7 +773,7 @@ bool WriteSymbolFileInternal(uint8_t* obj_file, } bool debug_big_endian; - if (!ElfEndianness(debug_elf_header, &debug_big_endian)) + if (!ElfEndianness(debug_elf_header, &debug_big_endian)) return false; if (debug_big_endian != big_endian) { fprintf(stderr, "%s and %s does not match in endianness\n", @@ -800,8 +781,8 @@ bool WriteSymbolFileInternal(uint8_t* obj_file, return false; } - if (!LoadSymbols(debuglink_file, debug_big_endian, debug_elf_header, - false, &info, &module)) { + if (!LoadSymbols(debuglink_file, debug_big_endian, + debug_elf_header, false, &info, &module)) { return false; } } @@ -811,12 +792,43 @@ bool WriteSymbolFileInternal(uint8_t* obj_file, return true; } +} // namespace + +namespace google_breakpad { + +// Not explicitly exported, but not static so it can be used in unit tests. +bool WriteSymbolFileInternal(const uint8_t* obj_file, + const string& obj_filename, + const string& debug_dir, + bool cfi, + std::ostream& sym_stream) { + + if (!IsValidElf(obj_file)) { + fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str()); + return false; + } + + int elfclass = ElfClass(obj_file); + if (elfclass == ELFCLASS32) { + return WriteSymbolFileElfClass( + reinterpret_cast(obj_file), obj_filename, debug_dir, + cfi, sym_stream); + } + if (elfclass == ELFCLASS64) { + return WriteSymbolFileElfClass( + reinterpret_cast(obj_file), obj_filename, debug_dir, + cfi, sym_stream); + } + + return false; +} + bool WriteSymbolFile(const string &obj_file, const string &debug_dir, bool cfi, std::ostream &sym_stream) { MmapWrapper map_wrapper; - ElfW(Ehdr) *elf_header = NULL; + void* elf_header = NULL; if (!LoadELF(obj_file, &map_wrapper, &elf_header)) return false; diff --git a/src/common/linux/dump_symbols_unittest.cc b/src/common/linux/dump_symbols_unittest.cc index 2c4f0e65..aec6720c 100644 --- a/src/common/linux/dump_symbols_unittest.cc +++ b/src/common/linux/dump_symbols_unittest.cc @@ -45,7 +45,7 @@ #include "common/using_std_string.h" namespace google_breakpad { -bool WriteSymbolFileInternal(uint8_t* obj_file, +bool WriteSymbolFileInternal(const uint8_t* obj_file, const string &obj_filename, const string &debug_dir, bool cfi, @@ -89,9 +89,6 @@ TEST_F(DumpSymbols, Invalid) { s)); } -// TODO(ted): Fix the dump_symbols code to deal with cross-word-size -// ELF files. -#if __ELF_NATIVE_CLASS == 32 TEST_F(DumpSymbols, SimplePublic32) { ELF elf(EM_386, ELFCLASS32, kLittleEndian); // Zero out text section for simplicity. @@ -126,9 +123,7 @@ TEST_F(DumpSymbols, SimplePublic32) { "PUBLIC 1000 0 superfunc\n", s.str()); } -#endif -#if __ELF_NATIVE_CLASS == 64 TEST_F(DumpSymbols, SimplePublic64) { ELF elf(EM_X86_64, ELFCLASS64, kLittleEndian); // Zero out text section for simplicity. @@ -163,4 +158,3 @@ TEST_F(DumpSymbols, SimplePublic64) { "PUBLIC 1000 0 superfunc\n", s.str()); } -#endif diff --git a/src/common/linux/elfutils-inl.h b/src/common/linux/elfutils-inl.h new file mode 100644 index 00000000..e56b37a9 --- /dev/null +++ b/src/common/linux/elfutils-inl.h @@ -0,0 +1,74 @@ +// Copyright (c) 2012, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef COMMON_LINUX_ELFUTILS_INL_H__ +#define COMMON_LINUX_ELFUTILS_INL_H__ + +#include "common/linux/linux_libc_support.h" +#include "elfutils.h" + +namespace google_breakpad { + +template +const T* GetOffset(const typename ElfClass::Ehdr* elf_header, + typename ElfClass::Off offset) { + return reinterpret_cast(reinterpret_cast(elf_header) + + offset); +} + +template +const typename ElfClass::Shdr* FindElfSectionByName( + const char* name, + typename ElfClass::Word section_type, + const typename ElfClass::Shdr* sections, + const char* section_names, + const char* names_end, + int nsection) { + assert(name != NULL); + assert(sections != NULL); + assert(nsection > 0); + + int name_len = my_strlen(name); + if (name_len == 0) + return NULL; + + for (int i = 0; i < nsection; ++i) { + const char* section_name = section_names + sections[i].sh_name; + if (sections[i].sh_type == section_type && + names_end - section_name >= name_len + 1 && + my_strcmp(name, section_name) == 0) { + return sections + i; + } + } + return NULL; +} + +} // namespace google_breakpad + +#endif // COMMON_LINUX_ELFUTILS_INL_H__ diff --git a/src/common/linux/elfutils.cc b/src/common/linux/elfutils.cc index 2fd5c1ff..ee2f4ac0 100644 --- a/src/common/linux/elfutils.cc +++ b/src/common/linux/elfutils.cc @@ -33,6 +33,7 @@ #include #include "common/linux/linux_libc_support.h" +#include "common/linux/elfutils-inl.h" namespace google_breakpad { @@ -41,7 +42,7 @@ namespace { template void FindElfClassSection(const char *elf_base, const char *section_name, - uint32_t section_type, + typename ElfClass::Word section_type, const void **section_start, int *section_size) { typedef typename ElfClass::Ehdr Ehdr; @@ -53,27 +54,21 @@ void FindElfClassSection(const char *elf_base, assert(my_strncmp(elf_base, ELFMAG, SELFMAG) == 0); - int name_len = my_strlen(section_name); - const Ehdr* elf_header = reinterpret_cast(elf_base); assert(elf_header->e_ident[EI_CLASS] == ElfClass::kClass); const Shdr* sections = - reinterpret_cast(elf_base + elf_header->e_shoff); - const Shdr* string_section = sections + elf_header->e_shstrndx; - - const Shdr* section = NULL; - for (int i = 0; i < elf_header->e_shnum; ++i) { - if (sections[i].sh_type == section_type) { - const char* current_section_name = (char*)(elf_base + - string_section->sh_offset + - sections[i].sh_name); - if (!my_strncmp(current_section_name, section_name, name_len)) { - section = §ions[i]; - break; - } - } - } + GetOffset(elf_header, elf_header->e_shoff); + const Shdr* section_names = sections + elf_header->e_shstrndx; + const char* names = + GetOffset(elf_header, section_names->sh_offset); + const char *names_end = names + section_names->sh_size; + + const Shdr* section = + FindElfSectionByName(section_name, section_type, + sections, names, names_end, + elf_header->e_shnum); + if (section != NULL && section->sh_size > 0) { *section_start = elf_base + section->sh_offset; *section_size = section->sh_size; @@ -82,6 +77,18 @@ void FindElfClassSection(const char *elf_base, } // namespace +bool IsValidElf(const void* elf_base) { + return my_strncmp(reinterpret_cast(elf_base), + ELFMAG, SELFMAG) == 0; +} + +int ElfClass(const void* elf_base) { + const ElfW(Ehdr)* elf_header = + reinterpret_cast(elf_base); + + return elf_header->e_ident[EI_CLASS]; +} + bool FindElfSection(const void *elf_mapped_base, const char *section_name, uint32_t section_type, @@ -95,22 +102,22 @@ bool FindElfSection(const void *elf_mapped_base, *section_start = NULL; *section_size = 0; - const char* elf_base = - static_cast(elf_mapped_base); - const ElfW(Ehdr)* elf_header = - reinterpret_cast(elf_base); - if (my_strncmp(elf_base, ELFMAG, SELFMAG) != 0) + if (!IsValidElf(elf_mapped_base)) return false; + int cls = ElfClass(elf_mapped_base); if (elfclass) { - *elfclass = elf_header->e_ident[EI_CLASS]; + *elfclass = cls; } - if (elf_header->e_ident[EI_CLASS] == ELFCLASS32) { + const char* elf_base = + static_cast(elf_mapped_base); + + if (cls == ELFCLASS32) { FindElfClassSection(elf_base, section_name, section_type, section_start, section_size); return *section_start != NULL; - } else if (elf_header->e_ident[EI_CLASS] == ELFCLASS64) { + } else if (cls == ELFCLASS64) { FindElfClassSection(elf_base, section_name, section_type, section_start, section_size); return *section_start != NULL; diff --git a/src/common/linux/elfutils.h b/src/common/linux/elfutils.h index 30f2af3f..95105966 100644 --- a/src/common/linux/elfutils.h +++ b/src/common/linux/elfutils.h @@ -52,7 +52,11 @@ struct ElfClass32 { typedef Elf32_Nhdr Nhdr; typedef Elf32_Phdr Phdr; typedef Elf32_Shdr Shdr; + typedef Elf32_Half Half; + typedef Elf32_Off Off; + typedef Elf32_Word Word; static const int kClass = ELFCLASS32; + static const size_t kAddrSize = sizeof(Elf32_Addr); }; struct ElfClass64 { @@ -61,9 +65,16 @@ struct ElfClass64 { typedef Elf64_Nhdr Nhdr; typedef Elf64_Phdr Phdr; typedef Elf64_Shdr Shdr; + typedef Elf64_Half Half; + typedef Elf64_Off Off; + typedef Elf64_Word Word; static const int kClass = ELFCLASS64; + static const size_t kAddrSize = sizeof(Elf64_Addr); }; +bool IsValidElf(const void* elf_header); +int ElfClass(const void* elf_base); + // Attempt to find a section named |section_name| of type |section_type| // in the ELF binary data at |elf_mapped_base|. On success, returns true // and sets |*section_start| to point to the start of the section data, @@ -76,6 +87,26 @@ bool FindElfSection(const void *elf_mapped_base, int *section_size, int *elfclass); +// Internal helper method, exposed for convenience for callers +// that already have more info. +template +const typename ElfClass::Shdr* +FindElfSectionByName(const char* name, + typename ElfClass::Word section_type, + const typename ElfClass::Shdr* sections, + const char* section_names, + const char* names_end, + int nsection); + +// Convert an offset from an Elf header into a pointer to the mapped +// address in the current process. Takes an extra template parameter +// to specify the return type to avoid having to dynamic_cast the +// result. +template +const T* +GetOffset(const typename ElfClass::Ehdr* elf_header, + typename ElfClass::Off offset); + } // namespace google_breakpad #endif // COMMON_LINUX_ELFUTILS_H__ -- cgit v1.2.1