aboutsummaryrefslogtreecommitdiff
path: root/src/common
diff options
context:
space:
mode:
authorjimblandy@gmail.com <jimblandy@gmail.com@4c0a9323-5329-0410-9bdc-e9ce6186880e>2009-08-07 19:28:45 +0000
committerjimblandy@gmail.com <jimblandy@gmail.com@4c0a9323-5329-0410-9bdc-e9ce6186880e>2009-08-07 19:28:45 +0000
commiteab03fdb72a77dfd71db028e6e5676e734bdc443 (patch)
tree4e3c53bf0edcf6024695f6f876d54deb51c4b23e /src/common
parentAdd files left behind by previous commit. (diff)
downloadbreakpad-eab03fdb72a77dfd71db028e6e5676e734bdc443.tar.xz
Linux dumper: Move the data structures representing the breakpad data into their own class.
src/linux/common/module.h defines a new class, google_breakpad::Module, that can represent the contents of a breakpad symbol file. Module::Write writes a well-formed symbol file to the given stream. src/linux/common/dump_symbols.cc can now lose its symbol-file-writing code, and change DumpStabsHandler to populate a Module object, rather than the old SymbolInfo/SourceFileInfo/... collection of types. The code to compute function and line sizes, even in the absence of reliable size data in STABS, is moved into a new Finalize method of DumpStabsHandler, which is responsible for completing the Module's contents. a=jimblandy r=nealsid git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@380 4c0a9323-5329-0410-9bdc-e9ce6186880e
Diffstat (limited to 'src/common')
-rw-r--r--src/common/linux/dump_symbols.cc662
-rw-r--r--src/common/linux/module.cc167
-rw-r--r--src/common/linux/module.h191
3 files changed, 574 insertions, 446 deletions
diff --git a/src/common/linux/dump_symbols.cc b/src/common/linux/dump_symbols.cc
index a87ab97e..3f6fbb08 100644
--- a/src/common/linux/dump_symbols.cc
+++ b/src/common/linux/dump_symbols.cc
@@ -41,128 +41,34 @@
#include <unistd.h>
#include <algorithm>
-#include <string>
+#include <cstring>
#include <functional>
#include <list>
-#include <vector>
#include <map>
-#include <string.h>
+#include <string>
+#include <vector>
-#include "common/linux/stabs_reader.h"
#include "common/linux/dump_symbols.h"
#include "common/linux/file_id.h"
#include "common/linux/guid_creator.h"
+#include "common/linux/module.h"
+#include "common/linux/stabs_reader.h"
#include "processor/scoped_ptr.h"
// This namespace contains helper functions.
namespace {
-struct SourceFileInfo;
-
-// Infomation of a line.
-struct LineInfo {
- // Offset from start of the function.
- // Load from stab symbol.
- ElfW(Off) rva_to_func;
- // Offset from base of the loading binary.
- ElfW(Off) rva_to_base;
- // Size of the line.
- // It is the difference of the starting address of the line and starting
- // address of the next N_SLINE, N_FUN or N_SO.
- uint32_t size;
- // Line number.
- uint32_t line_num;
- // The source file this line belongs to.
- SourceFileInfo *file;
-};
-
-typedef std::list<struct LineInfo> LineInfoList;
-
-// Information of a function.
-struct FuncInfo {
- // Name of the function.
- std::string name;
- // Offset from the base of the loading address.
- ElfW(Off) rva_to_base;
- // Virtual address of the function.
- // Load from stab symbol.
- ElfW(Addr) addr;
- // Size of the function.
- // It is the difference of the starting address of the function and starting
- // address of the next N_FUN or N_SO.
- uint32_t size;
- // Total size of stack parameters.
- uint32_t stack_param_size;
- // Line information array.
- LineInfoList line_info;
-};
-
-typedef std::list<struct FuncInfo> FuncInfoList;
-
-// Information of a source file.
-struct SourceFileInfo {
- // Name of the source file.
- const char *name;
- // Starting address of the source file.
- ElfW(Addr) addr;
- // Id of the source file.
- int source_id;
- // Functions information.
- FuncInfoList func_info;
-};
-
-// A simple std::list of pointers to SourceFileInfo structures, that
-// owns the structures pointed to: destroying the list destroys them,
-// as well.
-class SourceFileInfoList : public std::list<SourceFileInfo *> {
- public:
- ~SourceFileInfoList() {
- for (iterator it = this->begin(); it != this->end(); it++)
- delete *it;
- }
-};
-
-typedef std::map<const char *, SourceFileInfo *> NameToFileMap;
-
-// Information of a symbol table.
-// This is the root of all types of symbol.
-struct SymbolInfo {
- // The main files used in this module. This does not include header
- // files; it includes only files that were provided as the primary
- // source file for the compilation unit. In STABS, these are files
- // named in 'N_SO' entries.
- SourceFileInfoList main_files;
-
- // Map from file names to source file structures. Note that this
- // map's keys are compared as pointers, not strings, so if the same
- // name appears at two different addresses in stabstr, the map will
- // treat that as two different names. If the linker didn't unify
- // names in .stabstr (which it does), this would result in duplicate
- // FILE lines, which is benign.
- NameToFileMap name_to_file;
-
- // An array of some addresses at which a file boundary occurs.
- //
- // The STABS information describing a compilation unit gives the
- // unit's start address, but not its ending address or size. Those
- // must be inferred by finding the start address of the next file.
- // For the last compilation unit, or when one compilation unit ends
- // before the next one starts, STABS includes an N_SO entry whose
- // filename is the empty string; such an entry's address serves
- // simply to mark the end of the preceding compilation unit. Rather
- // than create FuncInfoList for such entries, we record their
- // addresses here. These are not necessarily sorted.
- std::vector<ElfW(Addr)> file_boundaries;
-};
+using google_breakpad::Module;
+using std::vector;
// Stab section name.
static const char *kStabName = ".stab";
// Demangle using abi call.
// Older GCC may not support it.
-static std::string Demangle(const char *mangled) {
+static std::string Demangle(const std::string &mangled) {
int status = 0;
- char *demangled = abi::__cxa_demangle(mangled, NULL, NULL, &status);
+ char *demangled = abi::__cxa_demangle(mangled.c_str(), NULL, NULL, &status);
if (status == 0 && demangled != NULL) {
std::string str(demangled);
free(demangled);
@@ -222,141 +128,15 @@ static const ElfW(Shdr) *FindSectionByName(const char *name,
return NULL;
}
-// Return the SourceFileInfo for the file named NAME in SYMBOLS, as
-// recorden in the name_to_file map. If none exists, create a new
-// one.
-//
-// If the file is a main file, it is the caller's responsibility to
-// set its address and add it to the list of main files.
-//
-// When creating a new file, this function does not make a copy of
-// NAME; NAME must stay alive for as long as the symbol table does.
-static SourceFileInfo *FindSourceFileInfo(SymbolInfo *symbols,
- const char *name) {
- SourceFileInfo **map_entry = &symbols->name_to_file[name];
- SourceFileInfo *file;
- if (*map_entry)
- file = *map_entry;
- else {
- file = new SourceFileInfo;
- file->name = name;
- file->source_id = -1;
- file->addr = 0;
- *map_entry = file;
- }
- return file;
-}
-
-// Compute size and rva information based on symbols loaded from stab section.
-static bool ComputeSizeAndRVA(ElfW(Addr) loading_addr,
- struct SymbolInfo *symbols) {
- SourceFileInfoList::iterator file_it;
- FuncInfoList::iterator func_it;
- LineInfoList::iterator line_it;
-
- // A table of all the addresses at which files and functions start
- // or end. We build this from the file boundary list and our lists
- // of files and functions, sort it, and then use it to find the ends
- // of functions and source lines for which we have no size
- // information.
- std::vector<ElfW(Addr)> boundaries = symbols->file_boundaries;
- for (file_it = symbols->main_files.begin();
- file_it != symbols->main_files.end(); file_it++) {
- boundaries.push_back((*file_it)->addr);
- for (func_it = (*file_it)->func_info.begin();
- func_it != (*file_it)->func_info.end(); func_it++)
- boundaries.push_back(func_it->addr);
- }
- std::sort(boundaries.begin(), boundaries.end());
-
- int no_next_addr_count = 0;
- for (file_it = symbols->main_files.begin();
- file_it != symbols->main_files.end(); file_it++) {
- for (func_it = (*file_it)->func_info.begin();
- func_it != (*file_it)->func_info.end(); func_it++) {
- struct FuncInfo &func_info = *func_it;
- assert(func_info.addr >= loading_addr);
- func_info.rva_to_base = func_info.addr - loading_addr;
- func_info.size = 0;
- std::vector<ElfW(Addr)>::iterator boundary
- = std::upper_bound(boundaries.begin(), boundaries.end(),
- func_info.addr);
- ElfW(Addr) next_addr = (boundary == boundaries.end()) ? 0 : *boundary;
- // I've noticed functions with an address bigger than any other functions
- // and source files modules, this is probably the last function in the
- // module, due to limitions of Linux stab symbol, it is impossible to get
- // the exact size of this kind of function, thus we give it a default
- // very big value. This should be safe since this is the last function.
- // But it is a ugly hack.....
- // The following code can reproduce the case:
- // template<class T>
- // void Foo(T value) {
- // }
- //
- // int main(void) {
- // Foo(10);
- // Foo(std::string("hello"));
- // return 0;
- // }
- // TODO(liuli): Find a better solution.
- static const int kDefaultSize = 0x10000000;
- if (next_addr != 0) {
- func_info.size = next_addr - func_info.addr;
- } else {
- if (no_next_addr_count > 1) {
- fprintf(stderr, "Got more than one funtion without the \
- following symbol. Igore this function.\n");
- fprintf(stderr, "The dumped symbol may not correct.\n");
- assert(!"This should not happen!\n");
- func_info.size = 0;
- continue;
- }
-
- no_next_addr_count++;
- func_info.size = kDefaultSize;
- }
- // Compute line size.
- for (line_it = func_info.line_info.begin();
- line_it != func_info.line_info.end(); line_it++) {
- struct LineInfo &line_info = *line_it;
- LineInfoList::iterator next_line_it = line_it;
- next_line_it++;
- line_info.size = 0;
- if (next_line_it != func_info.line_info.end()) {
- line_info.size =
- next_line_it->rva_to_func - line_info.rva_to_func;
- } else {
- // The last line in the function.
- // If we can find a function or source file symbol immediately
- // following the line, we can get the size of the line by computing
- // the difference of the next address to the starting address of this
- // line.
- // Otherwise, we need to set a default big enough value. This occurs
- // mostly because the this function is the last one in the module.
- if (next_addr != 0) {
- ElfW(Off) next_addr_offset = next_addr - func_info.addr;
- line_info.size = next_addr_offset - line_info.rva_to_func;
- } else {
- line_info.size = kDefaultSize;
- }
- }
- line_info.rva_to_base = line_info.rva_to_func + func_info.rva_to_base;
- } // for each line.
- } // for each function.
- } // for each source file.
- return true;
-}
-
// Our handler class for STABS data.
class DumpStabsHandler: public google_breakpad::StabsHandler {
public:
- DumpStabsHandler(struct SymbolInfo *symbols, ElfW(Addr) loading_addr):
- symbols_(symbols),
- loading_addr_(loading_addr),
- current_comp_unit_(NULL),
- current_source_file_(NULL) {
- current_function_.addr = 0;
- }
+ DumpStabsHandler(Module *module) :
+ module_(module),
+ comp_unit_base_address_(0),
+ current_function_(NULL),
+ current_source_file_(NULL),
+ current_source_file_name_(NULL) { }
bool StartCompilationUnit(const char *name, uint64_t address,
const char *build_directory);
@@ -365,112 +145,195 @@ class DumpStabsHandler: public google_breakpad::StabsHandler {
bool EndFunction(uint64_t address);
bool Line(uint64_t address, const char *name, int number);
+ // Do any final processing necessary to make module_ contain all the
+ // data provided by the STABS reader.
+ //
+ // Because STABS does not provide reliable size information for
+ // functions and lines, we need to make a pass over the data after
+ // processing all the STABS to compute those sizes. We take care of
+ // that here.
+ void Finalize();
+
private:
- // The symbol info we're contributing to.
- struct SymbolInfo *symbols_;
- // The address at which this module gets loaded.
- ElfW(Addr) loading_addr_;
+ // An arbitrary, but very large, size to use for functions whose
+ // size we can't compute properly.
+ static const uint64_t kFallbackSize = 0x10000000;
- // The main file we're currently contributing functions/lines to.
- struct SourceFileInfo *current_comp_unit_;
+ // The module we're contributing debugging info to.
+ Module *module_;
+
+ // The functions we've generated so far. We don't add these to
+ // module_ as we parse them. Instead, we wait until we've computed
+ // their ending address, and their lines' ending addresses.
+ //
+ // We could just stick them in module_ from the outset, but if
+ // module_ already contains data gathered from other debugging
+ // formats, that would complicate the size computation.
+ vector<Module::Function *> functions_;
+
+ // Boundary addresses. STABS doesn't necessarily supply sizes for
+ // functions and lines, so we need to compute them ourselves by
+ // finding the next object.
+ vector<Module::Address> boundaries_;
+
+ // The base address of the current compilation unit. We use this to
+ // recognize functions we should omit from the symbol file. (If you
+ // know the details of why we omit these, please patch this
+ // comment.)
+ Module::Address comp_unit_base_address_;
// The function we're currently contributing lines to.
- // FIXME: This gets copied, along with all its lines. Should be a pointer.
- struct FuncInfo current_function_;
-
- // The SourceFileInfo structure for the last file we got a line
- // number in. Instead of hashing on the name ('s address) on every
- // line, we just check whether the name is the same as this file's
- // (which it usually is).
- SourceFileInfo *current_source_file_;
+ Module::Function *current_function_;
+
+ // The last Module::File we got a line number in.
+ Module::File *current_source_file_;
+
+ // The pointer in the .stabstr section of the name that
+ // current_source_file_ is built from. This allows us to quickly
+ // recognize when the current line is in the same file as the
+ // previous one (which it usually is).
+ const char *current_source_file_name_;
};
bool DumpStabsHandler::StartCompilationUnit(const char *name, uint64_t address,
const char *build_directory) {
- assert(! current_comp_unit_);
- current_comp_unit_ = FindSourceFileInfo(symbols_, name);
- current_source_file_ = current_comp_unit_;
- // Add it to the list; use ADDR to tell whether we've already done so.
- if (! current_comp_unit_->addr)
- symbols_->main_files.push_back(current_comp_unit_);
- current_comp_unit_->addr = address;
+ assert(! comp_unit_base_address_);
+ current_source_file_name_ = name;
+ current_source_file_ = module_->FindFile(name);
+ comp_unit_base_address_ = address;
+ boundaries_.push_back(static_cast<Module::Address>(address));
return true;
}
bool DumpStabsHandler::EndCompilationUnit(uint64_t address) {
- assert(current_comp_unit_);
- // We compute everything's size later.
- symbols_->file_boundaries.push_back(address);
- current_comp_unit_ = NULL;
+ assert(comp_unit_base_address_);
+ comp_unit_base_address_ = 0;
current_source_file_ = NULL;
+ current_source_file_name_ = NULL;
+ if (address)
+ boundaries_.push_back(static_cast<Module::Address>(address));
return true;
}
bool DumpStabsHandler::StartFunction(const std::string &name,
uint64_t address) {
- assert(! current_function_.addr);
- current_function_.name = name;
- current_function_.rva_to_base = 0;
- current_function_.addr = address;
- current_function_.size = 0;
- current_function_.stack_param_size = 0;
- current_function_.line_info.clear();
+ assert(! current_function_);
+ Module::Function *f = new Module::Function;
+ f->name_ = Demangle(name);
+ f->address_ = address;
+ f->size_ = 0; // We compute this in DumpStabsHandler::Finalize().
+ f->parameter_size_ = 0; // We don't provide this information.
+ current_function_ = f;
+ boundaries_.push_back(static_cast<Module::Address>(address));
return true;
}
bool DumpStabsHandler::EndFunction(uint64_t address) {
- assert(current_function_.addr);
- if (current_function_.addr >= current_comp_unit_->addr)
- // This is a big copy, then free. Should use a pointer.
- current_comp_unit_->func_info.push_back(current_function_);
- current_function_.addr = 0;
- current_function_.line_info.clear();
+ assert(current_function_);
+ // Functions in this compilation unit should have address bigger
+ // than the compilation unit's starting address. There may be a lot
+ // of duplicated entries for functions in the STABS data; only one
+ // entry can meet this requirement.
+ //
+ // (I don't really understand the above comment; just bringing it
+ // along from the previous code, and leaving the behaivor unchanged.
+ // If you know the whole story, please patch this comment. --jimb)
+ if (current_function_->address_ >= comp_unit_base_address_)
+ functions_.push_back(current_function_);
+ else
+ delete current_function_;
+ current_function_ = NULL;
+ if (address)
+ boundaries_.push_back(static_cast<Module::Address>(address));
return true;
}
bool DumpStabsHandler::Line(uint64_t address, const char *name, int number) {
- assert(current_function_.addr);
+ assert(current_function_);
assert(current_source_file_);
- if (name != current_source_file_->name)
- current_source_file_ = FindSourceFileInfo(symbols_, name);
- struct LineInfo line;
- // FIXME: might as well set rva_to_base directly.
- line.rva_to_func = address - current_function_.addr;
- line.file = current_source_file_;
- line.line_num = number;
- line.size = 0;
- line.rva_to_base = 0;
- current_function_.line_info.push_back(line);
+ if (name != current_source_file_name_) {
+ current_source_file_ = module_->FindFile(name);
+ current_source_file_name_ = name;
+ }
+ Module::Line line;
+ line.address_ = address;
+ line.size_ = 0; // We compute this in DumpStabsHandler::Finalize().
+ line.file_ = current_source_file_;
+ line.number_ = number;
+ current_function_->lines_.push_back(line);
return true;
}
+void DumpStabsHandler::Finalize() {
+ // Sort our boundary list, so we can search it quickly.
+ sort(boundaries_.begin(), boundaries_.end());
+ // Sort all functions by address, just for neatness.
+ sort(functions_.begin(), functions_.end(),
+ Module::Function::CompareByAddress);
+ for (vector<Module::Function *>::iterator func_it = functions_.begin();
+ func_it != functions_.end();
+ func_it++) {
+ Module::Function *f = *func_it;
+ // Compute the function f's size.
+ vector<Module::Address>::iterator boundary
+ = std::upper_bound(boundaries_.begin(), boundaries_.end(), f->address_);
+ if (boundary != boundaries_.end())
+ f->size_ = *boundary - f->address_;
+ else
+ // If this is the last function in the module, and the STABS
+ // reader was unable to give us its ending address, then assign
+ // it a bogus, very large value. This will happen at most once
+ // per module: since we've added all functions' addresses to the
+ // boundary table, only one can be the last.
+ f->size_ = kFallbackSize;
+
+ // Compute sizes for each of the function f's lines --- if it has any.
+ if (! f->lines_.empty()) {
+ stable_sort(f->lines_.begin(), f->lines_.end(),
+ Module::Line::CompareByAddress);
+ vector<Module::Line>::iterator last_line = f->lines_.end() - 1;
+ for (vector<Module::Line>::iterator line_it = f->lines_.begin();
+ line_it != last_line; line_it++)
+ line_it[0].size_ = line_it[1].address_ - line_it[0].address_;
+ // Compute the size of the last line from f's end address.
+ last_line->size_ = (f->address_ + f->size_) - last_line->address_;
+ }
+ }
+ // Now that everything has a size, add our functions to the module, and
+ // dispose of our private list.
+ module_->AddFunctions(functions_.begin(), functions_.end());
+ functions_.clear();
+}
+
static bool LoadSymbols(const ElfW(Shdr) *stab_section,
const ElfW(Shdr) *stabstr_section,
- ElfW(Addr) loading_addr,
- struct SymbolInfo *symbols) {
+ Module *module) {
if (stab_section == NULL || stabstr_section == NULL)
return false;
+ // A callback object to handle data from the STABS reader.
+ DumpStabsHandler handler(module);
+ // Find the addresses of the STABS data, and create a STABS reader object.
uint8_t *stabs = reinterpret_cast<uint8_t *>(stab_section->sh_offset);
uint8_t *stabstr = reinterpret_cast<uint8_t *>(stabstr_section->sh_offset);
- DumpStabsHandler handler(symbols, loading_addr);
google_breakpad::StabsReader reader(stabs, stab_section->sh_size,
stabstr, stabstr_section->sh_size,
&handler);
+ // Read the STABS data, and do post-processing.
if (! reader.Process())
return false;
-
- // Second pass, compute the size of functions and lines.
- return ComputeSizeAndRVA(loading_addr, symbols);
+ handler.Finalize();
+ return true;
}
-static bool LoadSymbols(ElfW(Ehdr) *elf_header, struct SymbolInfo *symbols) {
+static bool LoadSymbols(ElfW(Ehdr) *elf_header, Module *module) {
// Translate all offsets in section headers into address.
FixAddress(elf_header);
ElfW(Addr) loading_addr = GetLoadingAddress(
reinterpret_cast<ElfW(Phdr) *>(elf_header->e_phoff),
elf_header->e_phnum);
+ module->SetLoadAddress(loading_addr);
const ElfW(Shdr) *sections =
reinterpret_cast<ElfW(Shdr) *>(elf_header->e_shoff);
@@ -484,153 +347,7 @@ static bool LoadSymbols(ElfW(Ehdr) *elf_header, struct SymbolInfo *symbols) {
const ElfW(Shdr) *stabstr_section = stab_section->sh_link + sections;
// Load symbols.
- return LoadSymbols(stab_section, stabstr_section, loading_addr, symbols);
-}
-
-static bool WriteModuleInfo(FILE *file,
- ElfW(Half) arch,
- const std::string &obj_file) {
- const char *arch_name = NULL;
- if (arch == EM_386)
- arch_name = "x86";
- else if (arch == EM_X86_64)
- arch_name = "x86_64";
- else
- return false;
-
- unsigned char identifier[16];
- google_breakpad::FileID file_id(obj_file.c_str());
- if (file_id.ElfFileIdentifier(identifier)) {
- char identifier_str[40];
- file_id.ConvertIdentifierToString(identifier,
- identifier_str, sizeof(identifier_str));
- char id_no_dash[40];
- int id_no_dash_len = 0;
- memset(id_no_dash, 0, sizeof(id_no_dash));
- for (int i = 0; identifier_str[i] != '\0'; ++i)
- if (identifier_str[i] != '-')
- id_no_dash[id_no_dash_len++] = identifier_str[i];
- // Add an extra "0" by the end.
- id_no_dash[id_no_dash_len++] = '0';
- std::string filename = obj_file;
- size_t slash_pos = obj_file.find_last_of("/");
- if (slash_pos != std::string::npos)
- filename = obj_file.substr(slash_pos + 1);
- return 0 <= fprintf(file, "MODULE Linux %s %s %s\n", arch_name,
- id_no_dash, filename.c_str());
- }
- return false;
-}
-
-// Set *INCLUDED_FILES to the list of included files in SYMBOLS,
-// ordered appropriately for output. Included files should appear in
-// the order in which they are first referenced by source line info.
-// Assign these files source id numbers starting with NEXT_SOURCE_ID.
-//
-// Note that the name_to_file map may contain #included files that are
-// unreferenced; these are the result of LoadFuncSymbols omitting
-// functions from the list whose addresses fall outside the address
-// range of the file that contains them.
-static void CollectIncludedFiles(const struct SymbolInfo &symbols,
- std::vector<SourceFileInfo *> *included_files,
- int next_source_id) {
- for (SourceFileInfoList::const_iterator file_it = symbols.main_files.begin();
- file_it != symbols.main_files.end(); file_it++) {
- for (FuncInfoList::const_iterator func_it = (*file_it)->func_info.begin();
- func_it != (*file_it)->func_info.end(); func_it++) {
- for (LineInfoList::const_iterator line_it = func_it->line_info.begin();
- line_it != func_it->line_info.end(); line_it++) {
- SourceFileInfo *file = line_it->file;
- if (file->source_id == -1) {
- file->source_id = next_source_id++;
- // Here we use the source id as a mark, ensuring that each
- // file appears in the list only once.
- included_files->push_back(file);
- }
- }
- }
- }
-}
-
-// Write 'FILE' lines for all source files in SYMBOLS to FILE. We
-// assign source id numbers to files here.
-static bool WriteSourceFileInfo(FILE *file, struct SymbolInfo &symbols) {
- int next_source_id = 0;
- // Assign source id numbers to main files, and write them out to the file.
- for (SourceFileInfoList::iterator file_it = symbols.main_files.begin();
- file_it != symbols.main_files.end(); file_it++) {
- SourceFileInfo *file_info = *file_it;
- assert(file_info->addr);
- // We only output 'FILE' lines for main files if their names
- // contain '.'. The extensionless C++ header files are #included,
- // not main files, so it wouldn't affect them. If you know the
- // story, please patch this comment.
- if (strchr(file_info->name, '.')) {
- file_info->source_id = next_source_id++;
- if (0 > fprintf(file, "FILE %d %s\n",
- file_info->source_id, file_info->name))
- return false;
- }
- }
- // Compute the list of included files, and write them out.
- // Can't use SourceFileInfoList here, because that owns the files it
- // points to.
- std::vector<SourceFileInfo *> included_files;
- std::vector<SourceFileInfo *>::const_iterator file_it;
- CollectIncludedFiles(symbols, &included_files, next_source_id);
- for (file_it = included_files.begin(); file_it != included_files.end();
- file_it++) {
- if (0 > fprintf(file, "FILE %d %s\n",
- (*file_it)->source_id, (*file_it)->name))
- return false;
- }
- return true;
-}
-
-static bool WriteOneFunction(FILE *file,
- const struct FuncInfo &func_info){
- std::string func_name = Demangle(func_info.name.c_str());
-
- if (func_info.size <= 0)
- return true;
-
- if (0 <= fprintf(file, "FUNC %lx %lx %d %s\n",
- (unsigned long) func_info.rva_to_base,
- (unsigned long) func_info.size,
- func_info.stack_param_size,
- func_name.c_str())) {
- for (LineInfoList::const_iterator it = func_info.line_info.begin();
- it != func_info.line_info.end(); it++) {
- const struct LineInfo &line_info = *it;
- if (0 > fprintf(file, "%lx %lx %d %d\n",
- (unsigned long) line_info.rva_to_base,
- (unsigned long) line_info.size,
- line_info.line_num,
- line_info.file->source_id))
- return false;
- }
- return true;
- }
- return false;
-}
-
-static bool WriteFunctionInfo(FILE *file, const struct SymbolInfo &symbols) {
- for (SourceFileInfoList::const_iterator it = symbols.main_files.begin();
- it != symbols.main_files.end(); it++) {
- const struct SourceFileInfo &file_info = **it;
- for (FuncInfoList::const_iterator fiIt = file_info.func_info.begin();
- fiIt != file_info.func_info.end(); fiIt++) {
- const struct FuncInfo &func_info = *fiIt;
- if (!WriteOneFunction(file, func_info))
- return false;
- }
- }
- return true;
-}
-
-static bool DumpStabSymbols(FILE *file, struct SymbolInfo &symbols) {
- return WriteSourceFileInfo(file, symbols) &&
- WriteFunctionInfo(file, symbols);
+ return LoadSymbols(stab_section, stabstr_section, module);
}
//
@@ -685,6 +402,48 @@ class MmapWrapper {
size_t size_;
};
+// Return the breakpad symbol file identifier for the architecture of
+// ELF_HEADER.
+const char *ElfArchitecture(const ElfW(Ehdr) *elf_header) {
+ ElfW(Half) arch = elf_header->e_machine;
+ if (arch == EM_386)
+ return "x86";
+ else if (arch == EM_X86_64)
+ return "x86_64";
+ else
+ return NULL;
+}
+
+// Format the Elf file identifier in IDENTIFIER as a UUID with the
+// dashes removed.
+std::string FormatIdentifier(unsigned char identifier[16]) {
+ char identifier_str[40];
+ google_breakpad::FileID::ConvertIdentifierToString(
+ identifier,
+ identifier_str,
+ sizeof(identifier_str));
+ std::string id_no_dash;
+ for (int i = 0; identifier_str[i] != '\0'; ++i)
+ if (identifier_str[i] != '-')
+ id_no_dash += identifier_str[i];
+ // Add an extra "0" by the end. PDB files on Windows have an 'age'
+ // number appended to the end of the file identifier; this isn't
+ // really used or necessary on other platforms, but let's preserve
+ // the pattern.
+ id_no_dash += '0';
+ return id_no_dash;
+}
+
+// Return the non-directory portion of FILENAME: the portion after the
+// last slash, or the whole filename if there are no slashes.
+std::string BaseFileName(const std::string &filename) {
+ // Lots of copies! basename's behavior is less than ideal.
+ char *c_filename = strdup(filename.c_str());
+ std::string base = basename(c_filename);
+ free(c_filename);
+ return base;
+}
+
} // namespace
namespace google_breakpad {
@@ -706,16 +465,27 @@ bool DumpSymbols::WriteSymbolFile(const std::string &obj_file,
ElfW(Ehdr) *elf_header = reinterpret_cast<ElfW(Ehdr) *>(obj_base);
if (!IsValidElf(elf_header))
return false;
- struct SymbolInfo symbols;
- if (!LoadSymbols(elf_header, &symbols))
- return false;
- // Write to symbol file.
- if (WriteModuleInfo(sym_file, elf_header->e_machine, obj_file) &&
- DumpStabSymbols(sym_file, symbols))
- return true;
+ unsigned char identifier[16];
+ google_breakpad::FileID file_id(obj_file.c_str());
+ if (! file_id.ElfFileIdentifier(identifier))
+ return false;
+
+ const char *architecture = ElfArchitecture(elf_header);
+ if (! architecture)
+ return false;
- return false;
+ std::string name = BaseFileName(obj_file);
+ std::string os = "Linux";
+ std::string id = FormatIdentifier(identifier);
+
+ Module module(name, os, architecture, id);
+ if (!LoadSymbols(elf_header, &module))
+ return false;
+ if (!module.Write(sym_file))
+ return false;
+
+ return true;
}
} // namespace google_breakpad
diff --git a/src/common/linux/module.cc b/src/common/linux/module.cc
new file mode 100644
index 00000000..69bec9cd
--- /dev/null
+++ b/src/common/linux/module.cc
@@ -0,0 +1,167 @@
+// Copyright (c) 2009, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <cerrno>
+#include <cstring>
+#include "common/linux/module.h"
+
+namespace google_breakpad {
+
+Module::Module(const string &name, const string &os,
+ const string &architecture, const string &id) :
+ name_(name),
+ os_(os),
+ architecture_(architecture),
+ id_(id),
+ load_address_(0) { }
+
+Module::~Module() {
+ for (FileByNameMap::iterator it = files_.begin(); it != files_.end(); it++)
+ delete it->second;
+ for (vector<Function *>::iterator it = functions_.begin();
+ it != functions_.end(); it++)
+ delete *it;
+}
+
+void Module::SetLoadAddress(Address address) {
+ load_address_ = address;
+}
+
+void Module::AddFunction(Function *function) {
+ functions_.push_back(function);
+}
+
+void Module::AddFunctions(vector<Function *>::iterator begin,
+ vector<Function *>::iterator end) {
+ functions_.insert(functions_.end(), begin, end);
+}
+
+Module::File *Module::FindFile(const string &name) {
+ // A tricky bit here. The key of each map entry needs to be a
+ // pointer to the entry's File's name string. This means that we
+ // can't do the initial lookup with any operation that would create
+ // an empty entry for us if the name isn't found (like, say,
+ // operator[] or insert do), because such a created entry's key will
+ // be a pointer the string passed as our argument. Since the key of
+ // a map's value type is const, we can't fix it up once we've
+ // created our file. lower_bound does the lookup without doing an
+ // insertion, and returns a good hint iterator to pass to insert.
+ // Our "destiny" is where we belong, whether we're there or not now.
+ FileByNameMap::iterator destiny = files_.lower_bound(&name);
+ if (destiny == files_.end()
+ || *destiny->first != name) { // Repeated string comparison, boo hoo.
+ File *file = new File;
+ file->name_ = name;
+ file->source_id_ = -1;
+ destiny = files_.insert(destiny,
+ FileByNameMap::value_type(&file->name_, file));
+ }
+ return destiny->second;
+}
+
+Module::File *Module::FindFile(const char *name) {
+ string name_string = name;
+ return FindFile(name_string);
+}
+
+void Module::AssignSourceIds() {
+ // First, give every source file an id of -1.
+ for (FileByNameMap::iterator file_it = files_.begin();
+ file_it != files_.end(); file_it++)
+ file_it->second->source_id_ = -1;
+
+ // Next, mark all files actually cited by our functions' line number
+ // info, by setting each one's source id to zero.
+ for (vector<Function *>::const_iterator func_it = functions_.begin();
+ func_it != functions_.end(); func_it++) {
+ Function *func = *func_it;
+ for (vector<Line>::iterator line_it = func->lines_.begin();
+ line_it != func->lines_.end(); line_it++)
+ line_it->file_->source_id_ = 0;
+ }
+
+ // Finally, assign source ids to those files that have been marked.
+ // We could have just assigned source id numbers while traversing
+ // the line numbers, but doing it this way numbers the files in
+ // lexicographical order by name, which is neat.
+ int next_source_id = 0;
+ for (FileByNameMap::iterator file_it = files_.begin();
+ file_it != files_.end(); file_it++)
+ if (! file_it->second->source_id_)
+ file_it->second->source_id_ = next_source_id++;
+}
+
+bool Module::ReportError() {
+ fprintf(stderr, "error writing symbol file: %s\n",
+ strerror (errno));
+ return false;
+}
+
+bool Module::Write(FILE *stream) {
+ if (0 > fprintf(stream, "MODULE %s %s %s %s\n",
+ os_.c_str(), architecture_.c_str(), id_.c_str(),
+ name_.c_str()))
+ return ReportError();
+
+ // Write out files.
+ AssignSourceIds();
+ for (FileByNameMap::iterator file_it = files_.begin();
+ file_it != files_.end(); file_it++) {
+ File *file = file_it->second;
+ if (file->source_id_ >= 0) {
+ if (0 > fprintf(stream, "FILE %d %s\n",
+ file->source_id_, file->name_.c_str()))
+ return ReportError();
+ }
+ }
+
+ // Write out functions and their lines.
+ for (vector<Function *>::const_iterator func_it = functions_.begin();
+ func_it != functions_.end(); func_it++) {
+ Function *func = *func_it;
+ if (0 > fprintf(stream, "FUNC %lx %lx %lu %s\n",
+ (unsigned long) (func->address_ - load_address_),
+ (unsigned long) func->size_,
+ (unsigned long) func->parameter_size_,
+ func->name_.c_str()))
+ return ReportError();
+ for (vector<Line>::iterator line_it = func->lines_.begin();
+ line_it != func->lines_.end(); line_it++)
+ if (0 > fprintf(stream, "%lx %lx %d %d\n",
+ (unsigned long) (line_it->address_ - load_address_),
+ (unsigned long) line_it->size_,
+ line_it->number_,
+ line_it->file_->source_id_))
+ return ReportError();
+ }
+
+ return true;
+}
+
+} // namespace google_breakpad
diff --git a/src/common/linux/module.h b/src/common/linux/module.h
new file mode 100644
index 00000000..f3a6c199
--- /dev/null
+++ b/src/common/linux/module.h
@@ -0,0 +1,191 @@
+// Copyright (c) 2009, Google Inc. -*- mode: c++ -*-
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// module.h: defines google_breakpad::Module, for writing breakpad symbol files
+
+#ifndef COMMON_LINUX_MODULE_H__
+#define COMMON_LINUX_MODULE_H__
+
+#include <map>
+#include <string>
+#include <vector>
+#include <cstdio>
+
+#include "google_breakpad/common/breakpad_types.h"
+
+namespace google_breakpad {
+
+using std::string;
+using std::vector;
+using std::map;
+
+// A Module represents the contents of a module, and supports methods
+// for adding information produced by parsing STABS or DWARF data
+// --- possibly both from the same file --- and then writing out the
+// unified contents as a Breakpad-format symbol file.
+class Module {
+ public:
+ // The type of addresses and sizes in a symbol table.
+ typedef u_int64_t Address;
+ struct File;
+ struct Function;
+ struct Line;
+
+ // Addresses appearing in File, Function, and Line structures are
+ // absolute, not relative to the the module's load address. That
+ // is, if the module were loaded at its nominal load address, the
+ // addresses would be correct.
+
+ // A source file.
+ struct File {
+ // The name of the source file.
+ string name_;
+
+ // The file's source id. The Write member function clears this
+ // field and assigns source ids a fresh, so any value placed here
+ // before calling Write will be lost.
+ int source_id_;
+ };
+
+ // A function.
+ struct Function {
+ // For sorting by address. (Not style-guide compliant, but it's
+ // stupid not to put this in the struct.)
+ static bool CompareByAddress(const Function *x, const Function *y) {
+ return x->address_ < y->address_;
+ }
+
+ // The function's name.
+ string name_;
+
+ // The start address and length of the function's code.
+ Address address_, size_;
+
+ // The function's parameter size.
+ Address parameter_size_;
+
+ // Source lines belonging to this function, sorted by increasing
+ // address.
+ vector<Line> lines_;
+ };
+
+ // A source line.
+ struct Line {
+ // For sorting by address. (Not style-guide compliant, but it's
+ // stupid not to put this in the struct.)
+ static bool CompareByAddress(const Module::Line &x, const Module::Line &y) {
+ return x.address_ < y.address_;
+ }
+
+ Address address_, size_; // The address and size of the line's code.
+ File *file_; // The source file.
+ int number_; // The source line number.
+ };
+
+ // Create a new module with the given name, operating system,
+ // architecture, and ID string.
+ Module(const string &name, const string &os, const string &architecture,
+ const string &id);
+ ~Module();
+
+ // Set the module's load address to LOAD_ADDRESS; addresses given
+ // for functions and lines will be written to the Breakpad symbol
+ // file as offsets from this address. Construction initializes this
+ // module's load address to zero: addresses written to the symbol
+ // file will be the same as they appear in the File and Line
+ // structures.
+ void SetLoadAddress(Address load_address);
+
+ // Add FUNCTION to the module.
+ // Destroying this module frees all Function objects that have been
+ // added with this function.
+ void AddFunction(Function *function);
+
+ // Add all the functions in [BEGIN,END) to the module.
+ // Destroying this module frees all Function objects that have been
+ // added with this function.
+ void AddFunctions(vector<Function *>::iterator begin,
+ vector<Function *>::iterator end);
+
+ // If this module has a file named NAME, return a pointer to a
+ // pointer to it. If it has none, then create one and return a
+ // pointer to the new file.
+ // Destroying this module frees all File objects that have been created
+ // using this function, or with Insert.
+ File *FindFile(const string &name);
+ File *FindFile(const char *name);
+
+ // Write this module to STREAM in the breakpad symbol format.
+ // Return true if all goes well, or false if an error occurs. This
+ // method writes out a header based on the values given to the
+ // constructor, writes the source files added via Insert and
+ // FindFile, and then the functions added via Insert, along with
+ // their lines.
+ bool Write(FILE *stream);
+
+private:
+
+ // Assign source id numbers to this modules' files that functions'
+ // line number data actually refers to. Set the source id numbers
+ // for all other files to -1. We do this before writing out the
+ // symbol file, omitting any unused files.
+ void AssignSourceIds();
+
+ // Report an error that has occurred writing the symbol file, using
+ // errno to find the appropriate cause. Return false.
+ static bool ReportError();
+
+ // Module header entries.
+ string name_, os_, architecture_, id_;
+
+ // The module's nominal load address. Addresses for functions and
+ // lines are absolute, assuming the module is loaded at this
+ // address.
+ Address load_address_;
+
+ // Relation for maps whose keys are strings shared with some other
+ // structure.
+ struct CompareStringPtrs {
+ bool operator()(const string *x, const string *y) { return *x < *y; };
+ };
+
+ // A map from filenames to File structures. The map's keys are
+ // pointers to the Files' names.
+ typedef map<const string *, File *, CompareStringPtrs> FileByNameMap;
+
+ // The module owns all the files and functions that have been added
+ // to it; destroying the module frees the Files and Functions these
+ // point to.
+ FileByNameMap files_; // This module's source files.
+ vector<Function *> functions_; // This module's functions.
+};
+
+} // namespace google_breakpad
+
+#endif // COMMON_LINUX_MODULE_H__