// Copyright (c) 2006, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "common/linux/dump_symbols.h" #include "common/linux/file_id.h" #include "common/linux/module.h" #include "common/linux/stabs_reader.h" // This namespace contains helper functions. namespace { using google_breakpad::Module; using std::vector; // Stab section name. static const char *kStabName = ".stab"; // Demangle using abi call. // Older GCC may not support it. static std::string Demangle(const std::string &mangled) { int status = 0; char *demangled = abi::__cxa_demangle(mangled.c_str(), NULL, NULL, &status); if (status == 0 && demangled != NULL) { std::string str(demangled); free(demangled); return str; } return std::string(mangled); } // Fix offset into virtual address by adding the mapped base into offsets. // Make life easier when want to find something by offset. static void FixAddress(void *obj_base) { ElfW(Addr) base = reinterpret_cast(obj_base); ElfW(Ehdr) *elf_header = static_cast(obj_base); elf_header->e_phoff += base; elf_header->e_shoff += base; ElfW(Shdr) *sections = reinterpret_cast(elf_header->e_shoff); for (int i = 0; i < elf_header->e_shnum; ++i) sections[i].sh_offset += base; } // Find the prefered loading address of the binary. static ElfW(Addr) GetLoadingAddress(const ElfW(Phdr) *program_headers, int nheader) { for (int i = 0; i < nheader; ++i) { const ElfW(Phdr) &header = program_headers[i]; // For executable, it is the PT_LOAD segment with offset to zero. if (header.p_type == PT_LOAD && header.p_offset == 0) return header.p_vaddr; } // For other types of ELF, return 0. return 0; } static bool IsValidElf(const ElfW(Ehdr) *elf_header) { return memcmp(elf_header, ELFMAG, SELFMAG) == 0; } static const ElfW(Shdr) *FindSectionByName(const char *name, const ElfW(Shdr) *sections, const ElfW(Shdr) *strtab, int nsection) { assert(name != NULL); assert(sections != NULL); assert(nsection > 0); int name_len = strlen(name); if (name_len == 0) return NULL; for (int i = 0; i < nsection; ++i) { const char *section_name = reinterpret_cast(strtab->sh_offset + sections[i].sh_name); if (!strncmp(name, section_name, name_len)) return sections + i; } return NULL; } // Our handler class for STABS data. class DumpStabsHandler: public google_breakpad::StabsHandler { public: DumpStabsHandler(Module *module) : module_(module), comp_unit_base_address_(0), current_function_(NULL), current_source_file_(NULL), current_source_file_name_(NULL) { } bool StartCompilationUnit(const char *name, uint64_t address, const char *build_directory); bool EndCompilationUnit(uint64_t address); bool StartFunction(const std::string &name, uint64_t address); bool EndFunction(uint64_t address); bool Line(uint64_t address, const char *name, int number); // Do any final processing necessary to make module_ contain all the // data provided by the STABS reader. // // Because STABS does not provide reliable size information for // functions and lines, we need to make a pass over the data after // processing all the STABS to compute those sizes. We take care of // that here. void Finalize(); private: // An arbitrary, but very large, size to use for functions whose // size we can't compute properly. static const uint64_t kFallbackSize = 0x10000000; // The module we're contributing debugging info to. Module *module_; // The functions we've generated so far. We don't add these to // module_ as we parse them. Instead, we wait until we've computed // their ending address, and their lines' ending addresses. // // We could just stick them in module_ from the outset, but if // module_ already contains data gathered from other debugging // formats, that would complicate the size computation. vector functions_; // Boundary addresses. STABS doesn't necessarily supply sizes for // functions and lines, so we need to compute them ourselves by // finding the next object. vector boundaries_; // The base address of the current compilation unit. We use this to // recognize functions we should omit from the symbol file. (If you // know the details of why we omit these, please patch this // comment.) Module::Address comp_unit_base_address_; // The function we're currently contributing lines to. Module::Function *current_function_; // The last Module::File we got a line number in. Module::File *current_source_file_; // The pointer in the .stabstr section of the name that // current_source_file_ is built from. This allows us to quickly // recognize when the current line is in the same file as the // previous one (which it usually is). const char *current_source_file_name_; }; bool DumpStabsHandler::StartCompilationUnit(const char *name, uint64_t address, const char *build_directory) { assert(! comp_unit_base_address_); current_source_file_name_ = name; current_source_file_ = module_->FindFile(name); comp_unit_base_address_ = address; boundaries_.push_back(static_cast(address)); return true; } bool DumpStabsHandler::EndCompilationUnit(uint64_t address) { assert(comp_unit_base_address_); comp_unit_base_address_ = 0; current_source_file_ = NULL; current_source_file_name_ = NULL; if (address) boundaries_.push_back(static_cast(address)); return true; } bool DumpStabsHandler::StartFunction(const std::string &name, uint64_t address) { assert(! current_function_); Module::Function *f = new Module::Function; f->name_ = Demangle(name); f->address_ = address; f->size_ = 0; // We compute this in DumpStabsHandler::Finalize(). f->parameter_size_ = 0; // We don't provide this information. current_function_ = f; boundaries_.push_back(static_cast(address)); return true; } bool DumpStabsHandler::EndFunction(uint64_t address) { assert(current_function_); // Functions in this compilation unit should have address bigger // than the compilation unit's starting address. There may be a lot // of duplicated entries for functions in the STABS data; only one // entry can meet this requirement. // // (I don't really understand the above comment; just bringing it // along from the previous code, and leaving the behaivor unchanged. // If you know the whole story, please patch this comment. --jimb) if (current_function_->address_ >= comp_unit_base_address_) functions_.push_back(current_function_); else delete current_function_; current_function_ = NULL; if (address) boundaries_.push_back(static_cast(address)); return true; } bool DumpStabsHandler::Line(uint64_t address, const char *name, int number) { assert(current_function_); assert(current_source_file_); if (name != current_source_file_name_) { current_source_file_ = module_->FindFile(name); current_source_file_name_ = name; } Module::Line line; line.address_ = address; line.size_ = 0; // We compute this in DumpStabsHandler::Finalize(). line.file_ = current_source_file_; line.number_ = number; current_function_->lines_.push_back(line); return true; } void DumpStabsHandler::Finalize() { // Sort our boundary list, so we can search it quickly. sort(boundaries_.begin(), boundaries_.end()); // Sort all functions by address, just for neatness. sort(functions_.begin(), functions_.end(), Module::Function::CompareByAddress); for (vector::iterator func_it = functions_.begin(); func_it != functions_.end(); func_it++) { Module::Function *f = *func_it; // Compute the function f's size. vector::iterator boundary = std::upper_bound(boundaries_.begin(), boundaries_.end(), f->address_); if (boundary != boundaries_.end()) f->size_ = *boundary - f->address_; else // If this is the last function in the module, and the STABS // reader was unable to give us its ending address, then assign // it a bogus, very large value. This will happen at most once // per module: since we've added all functions' addresses to the // boundary table, only one can be the last. f->size_ = kFallbackSize; // Compute sizes for each of the function f's lines --- if it has any. if (! f->lines_.empty()) { stable_sort(f->lines_.begin(), f->lines_.end(), Module::Line::CompareByAddress); vector::iterator last_line = f->lines_.end() - 1; for (vector::iterator line_it = f->lines_.begin(); line_it != last_line; line_it++) line_it[0].size_ = line_it[1].address_ - line_it[0].address_; // Compute the size of the last line from f's end address. last_line->size_ = (f->address_ + f->size_) - last_line->address_; } } // Now that everything has a size, add our functions to the module, and // dispose of our private list. module_->AddFunctions(functions_.begin(), functions_.end()); functions_.clear(); } static bool LoadSymbols(const ElfW(Shdr) *stab_section, const ElfW(Shdr) *stabstr_section, Module *module) { if (stab_section == NULL || stabstr_section == NULL) return false; // A callback object to handle data from the STABS reader. DumpStabsHandler handler(module); // Find the addresses of the STABS data, and create a STABS reader object. uint8_t *stabs = reinterpret_cast(stab_section->sh_offset); uint8_t *stabstr = reinterpret_cast(stabstr_section->sh_offset); google_breakpad::StabsReader reader(stabs, stab_section->sh_size, stabstr, stabstr_section->sh_size, &handler); // Read the STABS data, and do post-processing. if (! reader.Process()) return false; handler.Finalize(); return true; } static bool LoadSymbols(ElfW(Ehdr) *elf_header, Module *module) { // Translate all offsets in section headers into address. FixAddress(elf_header); ElfW(Addr) loading_addr = GetLoadingAddress( reinterpret_cast(elf_header->e_phoff), elf_header->e_phnum); module->SetLoadAddress(loading_addr); const ElfW(Shdr) *sections = reinterpret_cast(elf_header->e_shoff); const ElfW(Shdr) *strtab = sections + elf_header->e_shstrndx; const ElfW(Shdr) *stab_section = FindSectionByName(kStabName, sections, strtab, elf_header->e_shnum); if (stab_section == NULL) { fprintf(stderr, "Stab section not found.\n"); return false; } const ElfW(Shdr) *stabstr_section = stab_section->sh_link + sections; // Load symbols. return LoadSymbols(stab_section, stabstr_section, module); } // // FDWrapper // // Wrapper class to make sure opened file is closed. // class FDWrapper { public: explicit FDWrapper(int fd) : fd_(fd) { } ~FDWrapper() { if (fd_ != -1) close(fd_); } int get() { return fd_; } int release() { int fd = fd_; fd_ = -1; return fd; } private: int fd_; }; // // MmapWrapper // // Wrapper class to make sure mapped regions are unmapped. // class MmapWrapper { public: MmapWrapper(void *mapped_address, size_t mapped_size) : base_(mapped_address), size_(mapped_size) { } ~MmapWrapper() { if (base_ != NULL) { assert(size_ > 0); munmap(base_, size_); } } void release() { base_ = NULL; size_ = 0; } private: void *base_; size_t size_; }; // Return the breakpad symbol file identifier for the architecture of // ELF_HEADER. const char *ElfArchitecture(const ElfW(Ehdr) *elf_header) { ElfW(Half) arch = elf_header->e_machine; if (arch == EM_386) return "x86"; else if (arch == EM_X86_64) return "x86_64"; else return NULL; } // Format the Elf file identifier in IDENTIFIER as a UUID with the // dashes removed. std::string FormatIdentifier(unsigned char identifier[16]) { char identifier_str[40]; google_breakpad::FileID::ConvertIdentifierToString( identifier, identifier_str, sizeof(identifier_str)); std::string id_no_dash; for (int i = 0; identifier_str[i] != '\0'; ++i) if (identifier_str[i] != '-') id_no_dash += identifier_str[i]; // Add an extra "0" by the end. PDB files on Windows have an 'age' // number appended to the end of the file identifier; this isn't // really used or necessary on other platforms, but let's preserve // the pattern. id_no_dash += '0'; return id_no_dash; } // Return the non-directory portion of FILENAME: the portion after the // last slash, or the whole filename if there are no slashes. std::string BaseFileName(const std::string &filename) { // Lots of copies! basename's behavior is less than ideal. char *c_filename = strdup(filename.c_str()); std::string base = basename(c_filename); free(c_filename); return base; } } // namespace namespace google_breakpad { bool DumpSymbols::WriteSymbolFile(const std::string &obj_file, FILE *sym_file) { int obj_fd = open(obj_file.c_str(), O_RDONLY); if (obj_fd < 0) return false; FDWrapper obj_fd_wrapper(obj_fd); struct stat st; if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) return false; void *obj_base = mmap(NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, obj_fd, 0); if (obj_base == MAP_FAILED) return false; MmapWrapper map_wrapper(obj_base, st.st_size); ElfW(Ehdr) *elf_header = reinterpret_cast(obj_base); if (!IsValidElf(elf_header)) return false; unsigned char identifier[16]; google_breakpad::FileID file_id(obj_file.c_str()); if (! file_id.ElfFileIdentifier(identifier)) return false; const char *architecture = ElfArchitecture(elf_header); if (! architecture) return false; std::string name = BaseFileName(obj_file); std::string os = "Linux"; std::string id = FormatIdentifier(identifier); Module module(name, os, architecture, id); if (!LoadSymbols(elf_header, &module)) return false; if (!module.Write(sym_file)) return false; return true; } } // namespace google_breakpad