diff options
author | jimblandy@gmail.com <jimblandy@gmail.com@4c0a9323-5329-0410-9bdc-e9ce6186880e> | 2009-08-07 19:28:45 +0000 |
---|---|---|
committer | jimblandy@gmail.com <jimblandy@gmail.com@4c0a9323-5329-0410-9bdc-e9ce6186880e> | 2009-08-07 19:28:45 +0000 |
commit | eab03fdb72a77dfd71db028e6e5676e734bdc443 (patch) | |
tree | 4e3c53bf0edcf6024695f6f876d54deb51c4b23e /src/common | |
parent | Add files left behind by previous commit. (diff) | |
download | breakpad-eab03fdb72a77dfd71db028e6e5676e734bdc443.tar.xz |
Linux dumper: Move the data structures representing the breakpad data into their own class.
src/linux/common/module.h defines a new class, google_breakpad::Module,
that can represent the contents of a breakpad symbol file. Module::Write
writes a well-formed symbol file to the given stream.
src/linux/common/dump_symbols.cc can now lose its symbol-file-writing
code, and change DumpStabsHandler to populate a Module object, rather
than the old SymbolInfo/SourceFileInfo/... collection of types.
The code to compute function and line sizes, even in the absence of
reliable size data in STABS, is moved into a new Finalize method of
DumpStabsHandler, which is responsible for completing the Module's
contents.
a=jimblandy
r=nealsid
git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@380 4c0a9323-5329-0410-9bdc-e9ce6186880e
Diffstat (limited to 'src/common')
-rw-r--r-- | src/common/linux/dump_symbols.cc | 662 | ||||
-rw-r--r-- | src/common/linux/module.cc | 167 | ||||
-rw-r--r-- | src/common/linux/module.h | 191 |
3 files changed, 574 insertions, 446 deletions
diff --git a/src/common/linux/dump_symbols.cc b/src/common/linux/dump_symbols.cc index a87ab97e..3f6fbb08 100644 --- a/src/common/linux/dump_symbols.cc +++ b/src/common/linux/dump_symbols.cc @@ -41,128 +41,34 @@ #include <unistd.h> #include <algorithm> -#include <string> +#include <cstring> #include <functional> #include <list> -#include <vector> #include <map> -#include <string.h> +#include <string> +#include <vector> -#include "common/linux/stabs_reader.h" #include "common/linux/dump_symbols.h" #include "common/linux/file_id.h" #include "common/linux/guid_creator.h" +#include "common/linux/module.h" +#include "common/linux/stabs_reader.h" #include "processor/scoped_ptr.h" // This namespace contains helper functions. namespace { -struct SourceFileInfo; - -// Infomation of a line. -struct LineInfo { - // Offset from start of the function. - // Load from stab symbol. - ElfW(Off) rva_to_func; - // Offset from base of the loading binary. - ElfW(Off) rva_to_base; - // Size of the line. - // It is the difference of the starting address of the line and starting - // address of the next N_SLINE, N_FUN or N_SO. - uint32_t size; - // Line number. - uint32_t line_num; - // The source file this line belongs to. - SourceFileInfo *file; -}; - -typedef std::list<struct LineInfo> LineInfoList; - -// Information of a function. -struct FuncInfo { - // Name of the function. - std::string name; - // Offset from the base of the loading address. - ElfW(Off) rva_to_base; - // Virtual address of the function. - // Load from stab symbol. - ElfW(Addr) addr; - // Size of the function. - // It is the difference of the starting address of the function and starting - // address of the next N_FUN or N_SO. - uint32_t size; - // Total size of stack parameters. - uint32_t stack_param_size; - // Line information array. - LineInfoList line_info; -}; - -typedef std::list<struct FuncInfo> FuncInfoList; - -// Information of a source file. -struct SourceFileInfo { - // Name of the source file. - const char *name; - // Starting address of the source file. - ElfW(Addr) addr; - // Id of the source file. - int source_id; - // Functions information. - FuncInfoList func_info; -}; - -// A simple std::list of pointers to SourceFileInfo structures, that -// owns the structures pointed to: destroying the list destroys them, -// as well. -class SourceFileInfoList : public std::list<SourceFileInfo *> { - public: - ~SourceFileInfoList() { - for (iterator it = this->begin(); it != this->end(); it++) - delete *it; - } -}; - -typedef std::map<const char *, SourceFileInfo *> NameToFileMap; - -// Information of a symbol table. -// This is the root of all types of symbol. -struct SymbolInfo { - // The main files used in this module. This does not include header - // files; it includes only files that were provided as the primary - // source file for the compilation unit. In STABS, these are files - // named in 'N_SO' entries. - SourceFileInfoList main_files; - - // Map from file names to source file structures. Note that this - // map's keys are compared as pointers, not strings, so if the same - // name appears at two different addresses in stabstr, the map will - // treat that as two different names. If the linker didn't unify - // names in .stabstr (which it does), this would result in duplicate - // FILE lines, which is benign. - NameToFileMap name_to_file; - - // An array of some addresses at which a file boundary occurs. - // - // The STABS information describing a compilation unit gives the - // unit's start address, but not its ending address or size. Those - // must be inferred by finding the start address of the next file. - // For the last compilation unit, or when one compilation unit ends - // before the next one starts, STABS includes an N_SO entry whose - // filename is the empty string; such an entry's address serves - // simply to mark the end of the preceding compilation unit. Rather - // than create FuncInfoList for such entries, we record their - // addresses here. These are not necessarily sorted. - std::vector<ElfW(Addr)> file_boundaries; -}; +using google_breakpad::Module; +using std::vector; // Stab section name. static const char *kStabName = ".stab"; // Demangle using abi call. // Older GCC may not support it. -static std::string Demangle(const char *mangled) { +static std::string Demangle(const std::string &mangled) { int status = 0; - char *demangled = abi::__cxa_demangle(mangled, NULL, NULL, &status); + char *demangled = abi::__cxa_demangle(mangled.c_str(), NULL, NULL, &status); if (status == 0 && demangled != NULL) { std::string str(demangled); free(demangled); @@ -222,141 +128,15 @@ static const ElfW(Shdr) *FindSectionByName(const char *name, return NULL; } -// Return the SourceFileInfo for the file named NAME in SYMBOLS, as -// recorden in the name_to_file map. If none exists, create a new -// one. -// -// If the file is a main file, it is the caller's responsibility to -// set its address and add it to the list of main files. -// -// When creating a new file, this function does not make a copy of -// NAME; NAME must stay alive for as long as the symbol table does. -static SourceFileInfo *FindSourceFileInfo(SymbolInfo *symbols, - const char *name) { - SourceFileInfo **map_entry = &symbols->name_to_file[name]; - SourceFileInfo *file; - if (*map_entry) - file = *map_entry; - else { - file = new SourceFileInfo; - file->name = name; - file->source_id = -1; - file->addr = 0; - *map_entry = file; - } - return file; -} - -// Compute size and rva information based on symbols loaded from stab section. -static bool ComputeSizeAndRVA(ElfW(Addr) loading_addr, - struct SymbolInfo *symbols) { - SourceFileInfoList::iterator file_it; - FuncInfoList::iterator func_it; - LineInfoList::iterator line_it; - - // A table of all the addresses at which files and functions start - // or end. We build this from the file boundary list and our lists - // of files and functions, sort it, and then use it to find the ends - // of functions and source lines for which we have no size - // information. - std::vector<ElfW(Addr)> boundaries = symbols->file_boundaries; - for (file_it = symbols->main_files.begin(); - file_it != symbols->main_files.end(); file_it++) { - boundaries.push_back((*file_it)->addr); - for (func_it = (*file_it)->func_info.begin(); - func_it != (*file_it)->func_info.end(); func_it++) - boundaries.push_back(func_it->addr); - } - std::sort(boundaries.begin(), boundaries.end()); - - int no_next_addr_count = 0; - for (file_it = symbols->main_files.begin(); - file_it != symbols->main_files.end(); file_it++) { - for (func_it = (*file_it)->func_info.begin(); - func_it != (*file_it)->func_info.end(); func_it++) { - struct FuncInfo &func_info = *func_it; - assert(func_info.addr >= loading_addr); - func_info.rva_to_base = func_info.addr - loading_addr; - func_info.size = 0; - std::vector<ElfW(Addr)>::iterator boundary - = std::upper_bound(boundaries.begin(), boundaries.end(), - func_info.addr); - ElfW(Addr) next_addr = (boundary == boundaries.end()) ? 0 : *boundary; - // I've noticed functions with an address bigger than any other functions - // and source files modules, this is probably the last function in the - // module, due to limitions of Linux stab symbol, it is impossible to get - // the exact size of this kind of function, thus we give it a default - // very big value. This should be safe since this is the last function. - // But it is a ugly hack..... - // The following code can reproduce the case: - // template<class T> - // void Foo(T value) { - // } - // - // int main(void) { - // Foo(10); - // Foo(std::string("hello")); - // return 0; - // } - // TODO(liuli): Find a better solution. - static const int kDefaultSize = 0x10000000; - if (next_addr != 0) { - func_info.size = next_addr - func_info.addr; - } else { - if (no_next_addr_count > 1) { - fprintf(stderr, "Got more than one funtion without the \ - following symbol. Igore this function.\n"); - fprintf(stderr, "The dumped symbol may not correct.\n"); - assert(!"This should not happen!\n"); - func_info.size = 0; - continue; - } - - no_next_addr_count++; - func_info.size = kDefaultSize; - } - // Compute line size. - for (line_it = func_info.line_info.begin(); - line_it != func_info.line_info.end(); line_it++) { - struct LineInfo &line_info = *line_it; - LineInfoList::iterator next_line_it = line_it; - next_line_it++; - line_info.size = 0; - if (next_line_it != func_info.line_info.end()) { - line_info.size = - next_line_it->rva_to_func - line_info.rva_to_func; - } else { - // The last line in the function. - // If we can find a function or source file symbol immediately - // following the line, we can get the size of the line by computing - // the difference of the next address to the starting address of this - // line. - // Otherwise, we need to set a default big enough value. This occurs - // mostly because the this function is the last one in the module. - if (next_addr != 0) { - ElfW(Off) next_addr_offset = next_addr - func_info.addr; - line_info.size = next_addr_offset - line_info.rva_to_func; - } else { - line_info.size = kDefaultSize; - } - } - line_info.rva_to_base = line_info.rva_to_func + func_info.rva_to_base; - } // for each line. - } // for each function. - } // for each source file. - return true; -} - // Our handler class for STABS data. class DumpStabsHandler: public google_breakpad::StabsHandler { public: - DumpStabsHandler(struct SymbolInfo *symbols, ElfW(Addr) loading_addr): - symbols_(symbols), - loading_addr_(loading_addr), - current_comp_unit_(NULL), - current_source_file_(NULL) { - current_function_.addr = 0; - } + DumpStabsHandler(Module *module) : + module_(module), + comp_unit_base_address_(0), + current_function_(NULL), + current_source_file_(NULL), + current_source_file_name_(NULL) { } bool StartCompilationUnit(const char *name, uint64_t address, const char *build_directory); @@ -365,112 +145,195 @@ class DumpStabsHandler: public google_breakpad::StabsHandler { bool EndFunction(uint64_t address); bool Line(uint64_t address, const char *name, int number); + // Do any final processing necessary to make module_ contain all the + // data provided by the STABS reader. + // + // Because STABS does not provide reliable size information for + // functions and lines, we need to make a pass over the data after + // processing all the STABS to compute those sizes. We take care of + // that here. + void Finalize(); + private: - // The symbol info we're contributing to. - struct SymbolInfo *symbols_; - // The address at which this module gets loaded. - ElfW(Addr) loading_addr_; + // An arbitrary, but very large, size to use for functions whose + // size we can't compute properly. + static const uint64_t kFallbackSize = 0x10000000; - // The main file we're currently contributing functions/lines to. - struct SourceFileInfo *current_comp_unit_; + // The module we're contributing debugging info to. + Module *module_; + + // The functions we've generated so far. We don't add these to + // module_ as we parse them. Instead, we wait until we've computed + // their ending address, and their lines' ending addresses. + // + // We could just stick them in module_ from the outset, but if + // module_ already contains data gathered from other debugging + // formats, that would complicate the size computation. + vector<Module::Function *> functions_; + + // Boundary addresses. STABS doesn't necessarily supply sizes for + // functions and lines, so we need to compute them ourselves by + // finding the next object. + vector<Module::Address> boundaries_; + + // The base address of the current compilation unit. We use this to + // recognize functions we should omit from the symbol file. (If you + // know the details of why we omit these, please patch this + // comment.) + Module::Address comp_unit_base_address_; // The function we're currently contributing lines to. - // FIXME: This gets copied, along with all its lines. Should be a pointer. - struct FuncInfo current_function_; - - // The SourceFileInfo structure for the last file we got a line - // number in. Instead of hashing on the name ('s address) on every - // line, we just check whether the name is the same as this file's - // (which it usually is). - SourceFileInfo *current_source_file_; + Module::Function *current_function_; + + // The last Module::File we got a line number in. + Module::File *current_source_file_; + + // The pointer in the .stabstr section of the name that + // current_source_file_ is built from. This allows us to quickly + // recognize when the current line is in the same file as the + // previous one (which it usually is). + const char *current_source_file_name_; }; bool DumpStabsHandler::StartCompilationUnit(const char *name, uint64_t address, const char *build_directory) { - assert(! current_comp_unit_); - current_comp_unit_ = FindSourceFileInfo(symbols_, name); - current_source_file_ = current_comp_unit_; - // Add it to the list; use ADDR to tell whether we've already done so. - if (! current_comp_unit_->addr) - symbols_->main_files.push_back(current_comp_unit_); - current_comp_unit_->addr = address; + assert(! comp_unit_base_address_); + current_source_file_name_ = name; + current_source_file_ = module_->FindFile(name); + comp_unit_base_address_ = address; + boundaries_.push_back(static_cast<Module::Address>(address)); return true; } bool DumpStabsHandler::EndCompilationUnit(uint64_t address) { - assert(current_comp_unit_); - // We compute everything's size later. - symbols_->file_boundaries.push_back(address); - current_comp_unit_ = NULL; + assert(comp_unit_base_address_); + comp_unit_base_address_ = 0; current_source_file_ = NULL; + current_source_file_name_ = NULL; + if (address) + boundaries_.push_back(static_cast<Module::Address>(address)); return true; } bool DumpStabsHandler::StartFunction(const std::string &name, uint64_t address) { - assert(! current_function_.addr); - current_function_.name = name; - current_function_.rva_to_base = 0; - current_function_.addr = address; - current_function_.size = 0; - current_function_.stack_param_size = 0; - current_function_.line_info.clear(); + assert(! current_function_); + Module::Function *f = new Module::Function; + f->name_ = Demangle(name); + f->address_ = address; + f->size_ = 0; // We compute this in DumpStabsHandler::Finalize(). + f->parameter_size_ = 0; // We don't provide this information. + current_function_ = f; + boundaries_.push_back(static_cast<Module::Address>(address)); return true; } bool DumpStabsHandler::EndFunction(uint64_t address) { - assert(current_function_.addr); - if (current_function_.addr >= current_comp_unit_->addr) - // This is a big copy, then free. Should use a pointer. - current_comp_unit_->func_info.push_back(current_function_); - current_function_.addr = 0; - current_function_.line_info.clear(); + assert(current_function_); + // Functions in this compilation unit should have address bigger + // than the compilation unit's starting address. There may be a lot + // of duplicated entries for functions in the STABS data; only one + // entry can meet this requirement. + // + // (I don't really understand the above comment; just bringing it + // along from the previous code, and leaving the behaivor unchanged. + // If you know the whole story, please patch this comment. --jimb) + if (current_function_->address_ >= comp_unit_base_address_) + functions_.push_back(current_function_); + else + delete current_function_; + current_function_ = NULL; + if (address) + boundaries_.push_back(static_cast<Module::Address>(address)); return true; } bool DumpStabsHandler::Line(uint64_t address, const char *name, int number) { - assert(current_function_.addr); + assert(current_function_); assert(current_source_file_); - if (name != current_source_file_->name) - current_source_file_ = FindSourceFileInfo(symbols_, name); - struct LineInfo line; - // FIXME: might as well set rva_to_base directly. - line.rva_to_func = address - current_function_.addr; - line.file = current_source_file_; - line.line_num = number; - line.size = 0; - line.rva_to_base = 0; - current_function_.line_info.push_back(line); + if (name != current_source_file_name_) { + current_source_file_ = module_->FindFile(name); + current_source_file_name_ = name; + } + Module::Line line; + line.address_ = address; + line.size_ = 0; // We compute this in DumpStabsHandler::Finalize(). + line.file_ = current_source_file_; + line.number_ = number; + current_function_->lines_.push_back(line); return true; } +void DumpStabsHandler::Finalize() { + // Sort our boundary list, so we can search it quickly. + sort(boundaries_.begin(), boundaries_.end()); + // Sort all functions by address, just for neatness. + sort(functions_.begin(), functions_.end(), + Module::Function::CompareByAddress); + for (vector<Module::Function *>::iterator func_it = functions_.begin(); + func_it != functions_.end(); + func_it++) { + Module::Function *f = *func_it; + // Compute the function f's size. + vector<Module::Address>::iterator boundary + = std::upper_bound(boundaries_.begin(), boundaries_.end(), f->address_); + if (boundary != boundaries_.end()) + f->size_ = *boundary - f->address_; + else + // If this is the last function in the module, and the STABS + // reader was unable to give us its ending address, then assign + // it a bogus, very large value. This will happen at most once + // per module: since we've added all functions' addresses to the + // boundary table, only one can be the last. + f->size_ = kFallbackSize; + + // Compute sizes for each of the function f's lines --- if it has any. + if (! f->lines_.empty()) { + stable_sort(f->lines_.begin(), f->lines_.end(), + Module::Line::CompareByAddress); + vector<Module::Line>::iterator last_line = f->lines_.end() - 1; + for (vector<Module::Line>::iterator line_it = f->lines_.begin(); + line_it != last_line; line_it++) + line_it[0].size_ = line_it[1].address_ - line_it[0].address_; + // Compute the size of the last line from f's end address. + last_line->size_ = (f->address_ + f->size_) - last_line->address_; + } + } + // Now that everything has a size, add our functions to the module, and + // dispose of our private list. + module_->AddFunctions(functions_.begin(), functions_.end()); + functions_.clear(); +} + static bool LoadSymbols(const ElfW(Shdr) *stab_section, const ElfW(Shdr) *stabstr_section, - ElfW(Addr) loading_addr, - struct SymbolInfo *symbols) { + Module *module) { if (stab_section == NULL || stabstr_section == NULL) return false; + // A callback object to handle data from the STABS reader. + DumpStabsHandler handler(module); + // Find the addresses of the STABS data, and create a STABS reader object. uint8_t *stabs = reinterpret_cast<uint8_t *>(stab_section->sh_offset); uint8_t *stabstr = reinterpret_cast<uint8_t *>(stabstr_section->sh_offset); - DumpStabsHandler handler(symbols, loading_addr); google_breakpad::StabsReader reader(stabs, stab_section->sh_size, stabstr, stabstr_section->sh_size, &handler); + // Read the STABS data, and do post-processing. if (! reader.Process()) return false; - - // Second pass, compute the size of functions and lines. - return ComputeSizeAndRVA(loading_addr, symbols); + handler.Finalize(); + return true; } -static bool LoadSymbols(ElfW(Ehdr) *elf_header, struct SymbolInfo *symbols) { +static bool LoadSymbols(ElfW(Ehdr) *elf_header, Module *module) { // Translate all offsets in section headers into address. FixAddress(elf_header); ElfW(Addr) loading_addr = GetLoadingAddress( reinterpret_cast<ElfW(Phdr) *>(elf_header->e_phoff), elf_header->e_phnum); + module->SetLoadAddress(loading_addr); const ElfW(Shdr) *sections = reinterpret_cast<ElfW(Shdr) *>(elf_header->e_shoff); @@ -484,153 +347,7 @@ static bool LoadSymbols(ElfW(Ehdr) *elf_header, struct SymbolInfo *symbols) { const ElfW(Shdr) *stabstr_section = stab_section->sh_link + sections; // Load symbols. - return LoadSymbols(stab_section, stabstr_section, loading_addr, symbols); -} - -static bool WriteModuleInfo(FILE *file, - ElfW(Half) arch, - const std::string &obj_file) { - const char *arch_name = NULL; - if (arch == EM_386) - arch_name = "x86"; - else if (arch == EM_X86_64) - arch_name = "x86_64"; - else - return false; - - unsigned char identifier[16]; - google_breakpad::FileID file_id(obj_file.c_str()); - if (file_id.ElfFileIdentifier(identifier)) { - char identifier_str[40]; - file_id.ConvertIdentifierToString(identifier, - identifier_str, sizeof(identifier_str)); - char id_no_dash[40]; - int id_no_dash_len = 0; - memset(id_no_dash, 0, sizeof(id_no_dash)); - for (int i = 0; identifier_str[i] != '\0'; ++i) - if (identifier_str[i] != '-') - id_no_dash[id_no_dash_len++] = identifier_str[i]; - // Add an extra "0" by the end. - id_no_dash[id_no_dash_len++] = '0'; - std::string filename = obj_file; - size_t slash_pos = obj_file.find_last_of("/"); - if (slash_pos != std::string::npos) - filename = obj_file.substr(slash_pos + 1); - return 0 <= fprintf(file, "MODULE Linux %s %s %s\n", arch_name, - id_no_dash, filename.c_str()); - } - return false; -} - -// Set *INCLUDED_FILES to the list of included files in SYMBOLS, -// ordered appropriately for output. Included files should appear in -// the order in which they are first referenced by source line info. -// Assign these files source id numbers starting with NEXT_SOURCE_ID. -// -// Note that the name_to_file map may contain #included files that are -// unreferenced; these are the result of LoadFuncSymbols omitting -// functions from the list whose addresses fall outside the address -// range of the file that contains them. -static void CollectIncludedFiles(const struct SymbolInfo &symbols, - std::vector<SourceFileInfo *> *included_files, - int next_source_id) { - for (SourceFileInfoList::const_iterator file_it = symbols.main_files.begin(); - file_it != symbols.main_files.end(); file_it++) { - for (FuncInfoList::const_iterator func_it = (*file_it)->func_info.begin(); - func_it != (*file_it)->func_info.end(); func_it++) { - for (LineInfoList::const_iterator line_it = func_it->line_info.begin(); - line_it != func_it->line_info.end(); line_it++) { - SourceFileInfo *file = line_it->file; - if (file->source_id == -1) { - file->source_id = next_source_id++; - // Here we use the source id as a mark, ensuring that each - // file appears in the list only once. - included_files->push_back(file); - } - } - } - } -} - -// Write 'FILE' lines for all source files in SYMBOLS to FILE. We -// assign source id numbers to files here. -static bool WriteSourceFileInfo(FILE *file, struct SymbolInfo &symbols) { - int next_source_id = 0; - // Assign source id numbers to main files, and write them out to the file. - for (SourceFileInfoList::iterator file_it = symbols.main_files.begin(); - file_it != symbols.main_files.end(); file_it++) { - SourceFileInfo *file_info = *file_it; - assert(file_info->addr); - // We only output 'FILE' lines for main files if their names - // contain '.'. The extensionless C++ header files are #included, - // not main files, so it wouldn't affect them. If you know the - // story, please patch this comment. - if (strchr(file_info->name, '.')) { - file_info->source_id = next_source_id++; - if (0 > fprintf(file, "FILE %d %s\n", - file_info->source_id, file_info->name)) - return false; - } - } - // Compute the list of included files, and write them out. - // Can't use SourceFileInfoList here, because that owns the files it - // points to. - std::vector<SourceFileInfo *> included_files; - std::vector<SourceFileInfo *>::const_iterator file_it; - CollectIncludedFiles(symbols, &included_files, next_source_id); - for (file_it = included_files.begin(); file_it != included_files.end(); - file_it++) { - if (0 > fprintf(file, "FILE %d %s\n", - (*file_it)->source_id, (*file_it)->name)) - return false; - } - return true; -} - -static bool WriteOneFunction(FILE *file, - const struct FuncInfo &func_info){ - std::string func_name = Demangle(func_info.name.c_str()); - - if (func_info.size <= 0) - return true; - - if (0 <= fprintf(file, "FUNC %lx %lx %d %s\n", - (unsigned long) func_info.rva_to_base, - (unsigned long) func_info.size, - func_info.stack_param_size, - func_name.c_str())) { - for (LineInfoList::const_iterator it = func_info.line_info.begin(); - it != func_info.line_info.end(); it++) { - const struct LineInfo &line_info = *it; - if (0 > fprintf(file, "%lx %lx %d %d\n", - (unsigned long) line_info.rva_to_base, - (unsigned long) line_info.size, - line_info.line_num, - line_info.file->source_id)) - return false; - } - return true; - } - return false; -} - -static bool WriteFunctionInfo(FILE *file, const struct SymbolInfo &symbols) { - for (SourceFileInfoList::const_iterator it = symbols.main_files.begin(); - it != symbols.main_files.end(); it++) { - const struct SourceFileInfo &file_info = **it; - for (FuncInfoList::const_iterator fiIt = file_info.func_info.begin(); - fiIt != file_info.func_info.end(); fiIt++) { - const struct FuncInfo &func_info = *fiIt; - if (!WriteOneFunction(file, func_info)) - return false; - } - } - return true; -} - -static bool DumpStabSymbols(FILE *file, struct SymbolInfo &symbols) { - return WriteSourceFileInfo(file, symbols) && - WriteFunctionInfo(file, symbols); + return LoadSymbols(stab_section, stabstr_section, module); } // @@ -685,6 +402,48 @@ class MmapWrapper { size_t size_; }; +// Return the breakpad symbol file identifier for the architecture of +// ELF_HEADER. +const char *ElfArchitecture(const ElfW(Ehdr) *elf_header) { + ElfW(Half) arch = elf_header->e_machine; + if (arch == EM_386) + return "x86"; + else if (arch == EM_X86_64) + return "x86_64"; + else + return NULL; +} + +// Format the Elf file identifier in IDENTIFIER as a UUID with the +// dashes removed. +std::string FormatIdentifier(unsigned char identifier[16]) { + char identifier_str[40]; + google_breakpad::FileID::ConvertIdentifierToString( + identifier, + identifier_str, + sizeof(identifier_str)); + std::string id_no_dash; + for (int i = 0; identifier_str[i] != '\0'; ++i) + if (identifier_str[i] != '-') + id_no_dash += identifier_str[i]; + // Add an extra "0" by the end. PDB files on Windows have an 'age' + // number appended to the end of the file identifier; this isn't + // really used or necessary on other platforms, but let's preserve + // the pattern. + id_no_dash += '0'; + return id_no_dash; +} + +// Return the non-directory portion of FILENAME: the portion after the +// last slash, or the whole filename if there are no slashes. +std::string BaseFileName(const std::string &filename) { + // Lots of copies! basename's behavior is less than ideal. + char *c_filename = strdup(filename.c_str()); + std::string base = basename(c_filename); + free(c_filename); + return base; +} + } // namespace namespace google_breakpad { @@ -706,16 +465,27 @@ bool DumpSymbols::WriteSymbolFile(const std::string &obj_file, ElfW(Ehdr) *elf_header = reinterpret_cast<ElfW(Ehdr) *>(obj_base); if (!IsValidElf(elf_header)) return false; - struct SymbolInfo symbols; - if (!LoadSymbols(elf_header, &symbols)) - return false; - // Write to symbol file. - if (WriteModuleInfo(sym_file, elf_header->e_machine, obj_file) && - DumpStabSymbols(sym_file, symbols)) - return true; + unsigned char identifier[16]; + google_breakpad::FileID file_id(obj_file.c_str()); + if (! file_id.ElfFileIdentifier(identifier)) + return false; + + const char *architecture = ElfArchitecture(elf_header); + if (! architecture) + return false; - return false; + std::string name = BaseFileName(obj_file); + std::string os = "Linux"; + std::string id = FormatIdentifier(identifier); + + Module module(name, os, architecture, id); + if (!LoadSymbols(elf_header, &module)) + return false; + if (!module.Write(sym_file)) + return false; + + return true; } } // namespace google_breakpad diff --git a/src/common/linux/module.cc b/src/common/linux/module.cc new file mode 100644 index 00000000..69bec9cd --- /dev/null +++ b/src/common/linux/module.cc @@ -0,0 +1,167 @@ +// Copyright (c) 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <cerrno> +#include <cstring> +#include "common/linux/module.h" + +namespace google_breakpad { + +Module::Module(const string &name, const string &os, + const string &architecture, const string &id) : + name_(name), + os_(os), + architecture_(architecture), + id_(id), + load_address_(0) { } + +Module::~Module() { + for (FileByNameMap::iterator it = files_.begin(); it != files_.end(); it++) + delete it->second; + for (vector<Function *>::iterator it = functions_.begin(); + it != functions_.end(); it++) + delete *it; +} + +void Module::SetLoadAddress(Address address) { + load_address_ = address; +} + +void Module::AddFunction(Function *function) { + functions_.push_back(function); +} + +void Module::AddFunctions(vector<Function *>::iterator begin, + vector<Function *>::iterator end) { + functions_.insert(functions_.end(), begin, end); +} + +Module::File *Module::FindFile(const string &name) { + // A tricky bit here. The key of each map entry needs to be a + // pointer to the entry's File's name string. This means that we + // can't do the initial lookup with any operation that would create + // an empty entry for us if the name isn't found (like, say, + // operator[] or insert do), because such a created entry's key will + // be a pointer the string passed as our argument. Since the key of + // a map's value type is const, we can't fix it up once we've + // created our file. lower_bound does the lookup without doing an + // insertion, and returns a good hint iterator to pass to insert. + // Our "destiny" is where we belong, whether we're there or not now. + FileByNameMap::iterator destiny = files_.lower_bound(&name); + if (destiny == files_.end() + || *destiny->first != name) { // Repeated string comparison, boo hoo. + File *file = new File; + file->name_ = name; + file->source_id_ = -1; + destiny = files_.insert(destiny, + FileByNameMap::value_type(&file->name_, file)); + } + return destiny->second; +} + +Module::File *Module::FindFile(const char *name) { + string name_string = name; + return FindFile(name_string); +} + +void Module::AssignSourceIds() { + // First, give every source file an id of -1. + for (FileByNameMap::iterator file_it = files_.begin(); + file_it != files_.end(); file_it++) + file_it->second->source_id_ = -1; + + // Next, mark all files actually cited by our functions' line number + // info, by setting each one's source id to zero. + for (vector<Function *>::const_iterator func_it = functions_.begin(); + func_it != functions_.end(); func_it++) { + Function *func = *func_it; + for (vector<Line>::iterator line_it = func->lines_.begin(); + line_it != func->lines_.end(); line_it++) + line_it->file_->source_id_ = 0; + } + + // Finally, assign source ids to those files that have been marked. + // We could have just assigned source id numbers while traversing + // the line numbers, but doing it this way numbers the files in + // lexicographical order by name, which is neat. + int next_source_id = 0; + for (FileByNameMap::iterator file_it = files_.begin(); + file_it != files_.end(); file_it++) + if (! file_it->second->source_id_) + file_it->second->source_id_ = next_source_id++; +} + +bool Module::ReportError() { + fprintf(stderr, "error writing symbol file: %s\n", + strerror (errno)); + return false; +} + +bool Module::Write(FILE *stream) { + if (0 > fprintf(stream, "MODULE %s %s %s %s\n", + os_.c_str(), architecture_.c_str(), id_.c_str(), + name_.c_str())) + return ReportError(); + + // Write out files. + AssignSourceIds(); + for (FileByNameMap::iterator file_it = files_.begin(); + file_it != files_.end(); file_it++) { + File *file = file_it->second; + if (file->source_id_ >= 0) { + if (0 > fprintf(stream, "FILE %d %s\n", + file->source_id_, file->name_.c_str())) + return ReportError(); + } + } + + // Write out functions and their lines. + for (vector<Function *>::const_iterator func_it = functions_.begin(); + func_it != functions_.end(); func_it++) { + Function *func = *func_it; + if (0 > fprintf(stream, "FUNC %lx %lx %lu %s\n", + (unsigned long) (func->address_ - load_address_), + (unsigned long) func->size_, + (unsigned long) func->parameter_size_, + func->name_.c_str())) + return ReportError(); + for (vector<Line>::iterator line_it = func->lines_.begin(); + line_it != func->lines_.end(); line_it++) + if (0 > fprintf(stream, "%lx %lx %d %d\n", + (unsigned long) (line_it->address_ - load_address_), + (unsigned long) line_it->size_, + line_it->number_, + line_it->file_->source_id_)) + return ReportError(); + } + + return true; +} + +} // namespace google_breakpad diff --git a/src/common/linux/module.h b/src/common/linux/module.h new file mode 100644 index 00000000..f3a6c199 --- /dev/null +++ b/src/common/linux/module.h @@ -0,0 +1,191 @@ +// Copyright (c) 2009, Google Inc. -*- mode: c++ -*- +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// module.h: defines google_breakpad::Module, for writing breakpad symbol files + +#ifndef COMMON_LINUX_MODULE_H__ +#define COMMON_LINUX_MODULE_H__ + +#include <map> +#include <string> +#include <vector> +#include <cstdio> + +#include "google_breakpad/common/breakpad_types.h" + +namespace google_breakpad { + +using std::string; +using std::vector; +using std::map; + +// A Module represents the contents of a module, and supports methods +// for adding information produced by parsing STABS or DWARF data +// --- possibly both from the same file --- and then writing out the +// unified contents as a Breakpad-format symbol file. +class Module { + public: + // The type of addresses and sizes in a symbol table. + typedef u_int64_t Address; + struct File; + struct Function; + struct Line; + + // Addresses appearing in File, Function, and Line structures are + // absolute, not relative to the the module's load address. That + // is, if the module were loaded at its nominal load address, the + // addresses would be correct. + + // A source file. + struct File { + // The name of the source file. + string name_; + + // The file's source id. The Write member function clears this + // field and assigns source ids a fresh, so any value placed here + // before calling Write will be lost. + int source_id_; + }; + + // A function. + struct Function { + // For sorting by address. (Not style-guide compliant, but it's + // stupid not to put this in the struct.) + static bool CompareByAddress(const Function *x, const Function *y) { + return x->address_ < y->address_; + } + + // The function's name. + string name_; + + // The start address and length of the function's code. + Address address_, size_; + + // The function's parameter size. + Address parameter_size_; + + // Source lines belonging to this function, sorted by increasing + // address. + vector<Line> lines_; + }; + + // A source line. + struct Line { + // For sorting by address. (Not style-guide compliant, but it's + // stupid not to put this in the struct.) + static bool CompareByAddress(const Module::Line &x, const Module::Line &y) { + return x.address_ < y.address_; + } + + Address address_, size_; // The address and size of the line's code. + File *file_; // The source file. + int number_; // The source line number. + }; + + // Create a new module with the given name, operating system, + // architecture, and ID string. + Module(const string &name, const string &os, const string &architecture, + const string &id); + ~Module(); + + // Set the module's load address to LOAD_ADDRESS; addresses given + // for functions and lines will be written to the Breakpad symbol + // file as offsets from this address. Construction initializes this + // module's load address to zero: addresses written to the symbol + // file will be the same as they appear in the File and Line + // structures. + void SetLoadAddress(Address load_address); + + // Add FUNCTION to the module. + // Destroying this module frees all Function objects that have been + // added with this function. + void AddFunction(Function *function); + + // Add all the functions in [BEGIN,END) to the module. + // Destroying this module frees all Function objects that have been + // added with this function. + void AddFunctions(vector<Function *>::iterator begin, + vector<Function *>::iterator end); + + // If this module has a file named NAME, return a pointer to a + // pointer to it. If it has none, then create one and return a + // pointer to the new file. + // Destroying this module frees all File objects that have been created + // using this function, or with Insert. + File *FindFile(const string &name); + File *FindFile(const char *name); + + // Write this module to STREAM in the breakpad symbol format. + // Return true if all goes well, or false if an error occurs. This + // method writes out a header based on the values given to the + // constructor, writes the source files added via Insert and + // FindFile, and then the functions added via Insert, along with + // their lines. + bool Write(FILE *stream); + +private: + + // Assign source id numbers to this modules' files that functions' + // line number data actually refers to. Set the source id numbers + // for all other files to -1. We do this before writing out the + // symbol file, omitting any unused files. + void AssignSourceIds(); + + // Report an error that has occurred writing the symbol file, using + // errno to find the appropriate cause. Return false. + static bool ReportError(); + + // Module header entries. + string name_, os_, architecture_, id_; + + // The module's nominal load address. Addresses for functions and + // lines are absolute, assuming the module is loaded at this + // address. + Address load_address_; + + // Relation for maps whose keys are strings shared with some other + // structure. + struct CompareStringPtrs { + bool operator()(const string *x, const string *y) { return *x < *y; }; + }; + + // A map from filenames to File structures. The map's keys are + // pointers to the Files' names. + typedef map<const string *, File *, CompareStringPtrs> FileByNameMap; + + // The module owns all the files and functions that have been added + // to it; destroying the module frees the Files and Functions these + // point to. + FileByNameMap files_; // This module's source files. + vector<Function *> functions_; // This module's functions. +}; + +} // namespace google_breakpad + +#endif // COMMON_LINUX_MODULE_H__ |