Linux dumper: Move the data structures representing the breakpad data into their own class.

src/linux/common/module.h defines a new class, google_breakpad::Module, that can represent the contents of a breakpad symbol file. Module::Write writes a well-formed symbol file to the given stream. src/linux/common/dump_symbols.cc can now lose its symbol-file-writing code, and change DumpStabsHandler to populate a Module object, rather than the old SymbolInfo/SourceFileInfo/... collection of types. The code to compute function and line sizes, even in the absence of reliable size data in STABS, is moved into a new Finalize method of DumpStabsHandler, which is responsible for completing the Module's contents. a=jimblandy r=nealsid git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@380 4c0a9323-5329-0410-9bdc-e9ce6186880e
author: jimblandy@gmail.com <jimblandy@gmail.com@4c0a9323-5329-0410-9bdc-e9ce6186880e> 2009-08-07 19:28:45 +0000
committer: jimblandy@gmail.com <jimblandy@gmail.com@4c0a9323-5329-0410-9bdc-e9ce6186880e> 2009-08-07 19:28:45 +0000
commit: eab03fdb72a77dfd71db028e6e5676e734bdc443 (patch)
tree: 4e3c53bf0edcf6024695f6f876d54deb51c4b23e /src/common
parent: Add files left behind by previous commit. (diff)
download: breakpad-eab03fdb72a77dfd71db028e6e5676e734bdc443.tar.xz
3 files changed, 574 insertions, 446 deletions
diff --git a/src/common/linux/dump_symbols.cc b/src/common/linux/dump_symbols.cc
index a87ab97e..3f6fbb08 100644
--- a/src/common/linux/dump_symbols.cc
+++ b/src/common/linux/dump_symbols.cc
@@ -41,128 +41,34 @@
 #include <unistd.h>
 #include <algorithm>
 
-#include <string>
+#include <cstring>
 #include <functional>
 #include <list>
-#include <vector>
 #include <map>
-#include <string.h>
+#include <string>
+#include <vector>
 
-#include "common/linux/stabs_reader.h"
 #include "common/linux/dump_symbols.h"
 #include "common/linux/file_id.h"
 #include "common/linux/guid_creator.h"
+#include "common/linux/module.h"
+#include "common/linux/stabs_reader.h"
 #include "processor/scoped_ptr.h"
 
 // This namespace contains helper functions.
 namespace {
 
-struct SourceFileInfo;
-
-// Infomation of a line.
-struct LineInfo {
-  // Offset from start of the function.
-  // Load from stab symbol.
-  ElfW(Off) rva_to_func;
-  // Offset from base of the loading binary.
-  ElfW(Off) rva_to_base;
-  // Size of the line.
-  // It is the difference of the starting address of the line and starting
-  // address of the next N_SLINE, N_FUN or N_SO.
-  uint32_t size;
-  // Line number.
-  uint32_t line_num;
-  // The source file this line belongs to.
-  SourceFileInfo *file;
-};
-
-typedef std::list<struct LineInfo> LineInfoList;
-
-// Information of a function.
-struct FuncInfo {
-  // Name of the function.
-  std::string name;
-  // Offset from the base of the loading address.
-  ElfW(Off) rva_to_base;
-  // Virtual address of the function.
-  // Load from stab symbol.
-  ElfW(Addr) addr;
-  // Size of the function.
-  // It is the difference of the starting address of the function and starting
-  // address of the next N_FUN or N_SO.
-  uint32_t size;
-  // Total size of stack parameters.
-  uint32_t stack_param_size;
-  // Line information array.
-  LineInfoList line_info;
-};
-
-typedef std::list<struct FuncInfo> FuncInfoList;
-
-// Information of a source file.
-struct SourceFileInfo {
-  // Name of the source file.
-  const char *name;
-  // Starting address of the source file.
-  ElfW(Addr) addr;
-  // Id of the source file.
-  int source_id;
-  // Functions information.
-  FuncInfoList func_info;
-};
-
-// A simple std::list of pointers to SourceFileInfo structures, that
-// owns the structures pointed to: destroying the list destroys them,
-// as well.
-class SourceFileInfoList : public std::list<SourceFileInfo *> {
- public:
-  ~SourceFileInfoList() {
-    for (iterator it = this->begin(); it != this->end(); it++)
-      delete *it;
-  }
-};
-
-typedef std::map<const char *, SourceFileInfo *> NameToFileMap;
-
-// Information of a symbol table.
-// This is the root of all types of symbol.
-struct SymbolInfo {
-  // The main files used in this module.  This does not include header
-  // files; it includes only files that were provided as the primary
-  // source file for the compilation unit.  In STABS, these are files
-  // named in 'N_SO' entries.
-  SourceFileInfoList main_files;
-
-  // Map from file names to source file structures.  Note that this
-  // map's keys are compared as pointers, not strings, so if the same
-  // name appears at two different addresses in stabstr, the map will
-  // treat that as two different names.  If the linker didn't unify
-  // names in .stabstr (which it does), this would result in duplicate
-  // FILE lines, which is benign.
-  NameToFileMap name_to_file;
-
-  // An array of some addresses at which a file boundary occurs.
-  //
-  // The STABS information describing a compilation unit gives the
-  // unit's start address, but not its ending address or size.  Those
-  // must be inferred by finding the start address of the next file.
-  // For the last compilation unit, or when one compilation unit ends
-  // before the next one starts, STABS includes an N_SO entry whose
-  // filename is the empty string; such an entry's address serves
-  // simply to mark the end of the preceding compilation unit.  Rather
-  // than create FuncInfoList for such entries, we record their
-  // addresses here.  These are not necessarily sorted.
-  std::vector<ElfW(Addr)> file_boundaries;
-};
+using google_breakpad::Module;
+using std::vector;
 
 // Stab section name.
 static const char *kStabName = ".stab";
 
 // Demangle using abi call.
 // Older GCC may not support it.
-static std::string Demangle(const char *mangled) {
+static std::string Demangle(const std::string &mangled) {
   int status = 0;
-  char *demangled = abi::__cxa_demangle(mangled, NULL, NULL, &status);
+  char *demangled = abi::__cxa_demangle(mangled.c_str(), NULL, NULL, &status);
   if (status == 0 && demangled != NULL) {
     std::string str(demangled);
     free(demangled);
@@ -222,141 +128,15 @@ static const ElfW(Shdr) *FindSectionByName(const char *name,
   return NULL;
 }
 
-// Return the SourceFileInfo for the file named NAME in SYMBOLS, as
-// recorden in the name_to_file map.  If none exists, create a new
-// one.
-//
-// If the file is a main file, it is the caller's responsibility to
-// set its address and add it to the list of main files.
-//
-// When creating a new file, this function does not make a copy of
-// NAME; NAME must stay alive for as long as the symbol table does.
-static SourceFileInfo *FindSourceFileInfo(SymbolInfo *symbols,
-                                          const char *name) {
-  SourceFileInfo **map_entry = &symbols->name_to_file[name];
-  SourceFileInfo *file;
-  if (*map_entry)
-    file = *map_entry;
-  else {
-    file = new SourceFileInfo;
-    file->name = name;
-    file->source_id = -1;
-    file->addr = 0;
-    *map_entry = file;
-  }
-  return file;
-}
-
-// Compute size and rva information based on symbols loaded from stab section.
-static bool ComputeSizeAndRVA(ElfW(Addr) loading_addr,
-                              struct SymbolInfo *symbols) {
-  SourceFileInfoList::iterator file_it;
-  FuncInfoList::iterator func_it;
-  LineInfoList::iterator line_it;
-
-  // A table of all the addresses at which files and functions start
-  // or end.  We build this from the file boundary list and our lists
-  // of files and functions, sort it, and then use it to find the ends
-  // of functions and source lines for which we have no size
-  // information.
-  std::vector<ElfW(Addr)> boundaries = symbols->file_boundaries;
-  for (file_it = symbols->main_files.begin();
-       file_it != symbols->main_files.end(); file_it++) {
-    boundaries.push_back((*file_it)->addr);
-    for (func_it = (*file_it)->func_info.begin();
-         func_it != (*file_it)->func_info.end(); func_it++)
-      boundaries.push_back(func_it->addr);
-  }
-  std::sort(boundaries.begin(), boundaries.end());
-
-  int no_next_addr_count = 0;
-  for (file_it = symbols->main_files.begin();
-       file_it != symbols->main_files.end(); file_it++) {
-    for (func_it = (*file_it)->func_info.begin();
-         func_it != (*file_it)->func_info.end(); func_it++) {
-      struct FuncInfo &func_info = *func_it;
-      assert(func_info.addr >= loading_addr);
-      func_info.rva_to_base = func_info.addr - loading_addr;
-      func_info.size = 0;
-      std::vector<ElfW(Addr)>::iterator boundary
-        = std::upper_bound(boundaries.begin(), boundaries.end(),
-                           func_info.addr);
-      ElfW(Addr) next_addr = (boundary == boundaries.end()) ? 0 : *boundary;
-      // I've noticed functions with an address bigger than any other functions
-      // and source files modules, this is probably the last function in the
-      // module, due to limitions of Linux stab symbol, it is impossible to get
-      // the exact size of this kind of function, thus we give it a default
-      // very big value. This should be safe since this is the last function.
-      // But it is a ugly hack.....
-      // The following code can reproduce the case:
-      // template<class T>
-      // void Foo(T value) {
-      // }
-      //
-      // int main(void) {
-      //   Foo(10);
-      //   Foo(std::string("hello"));
-      //   return 0;
-      // }
-      // TODO(liuli): Find a better solution.
-      static const int kDefaultSize = 0x10000000;
-      if (next_addr != 0) {
-        func_info.size = next_addr - func_info.addr;
-      } else {
-        if (no_next_addr_count > 1) {
-          fprintf(stderr, "Got more than one funtion without the \
-                  following symbol. Igore this function.\n");
-          fprintf(stderr, "The dumped symbol may not correct.\n");
-          assert(!"This should not happen!\n");
-          func_info.size = 0;
-          continue;
-        }
-
-        no_next_addr_count++;
-        func_info.size = kDefaultSize;
-      }
-      // Compute line size.
-      for (line_it = func_info.line_info.begin(); 
-	   line_it != func_info.line_info.end(); line_it++) {
-        struct LineInfo &line_info = *line_it;
-	LineInfoList::iterator next_line_it = line_it;
-	next_line_it++;
-        line_info.size = 0;
-        if (next_line_it != func_info.line_info.end()) {
-          line_info.size =
-            next_line_it->rva_to_func - line_info.rva_to_func;
-        } else {
-          // The last line in the function.
-          // If we can find a function or source file symbol immediately
-          // following the line, we can get the size of the line by computing
-          // the difference of the next address to the starting address of this
-          // line.
-          // Otherwise, we need to set a default big enough value. This occurs
-          // mostly because the this function is the last one in the module.
-          if (next_addr != 0) {
-            ElfW(Off) next_addr_offset = next_addr - func_info.addr;
-            line_info.size = next_addr_offset - line_info.rva_to_func;
-          } else {
-            line_info.size = kDefaultSize;
-          }
-        }
-        line_info.rva_to_base = line_info.rva_to_func + func_info.rva_to_base;
-      }  // for each line.
-    }  // for each function.
-  } // for each source file.
-  return true;
-}
-
 // Our handler class for STABS data.
 class DumpStabsHandler: public google_breakpad::StabsHandler {
  public:
-  DumpStabsHandler(struct SymbolInfo *symbols, ElfW(Addr) loading_addr):
-      symbols_(symbols),
-      loading_addr_(loading_addr),
-      current_comp_unit_(NULL),
-      current_source_file_(NULL) {
-    current_function_.addr = 0;
-  }
+  DumpStabsHandler(Module *module) :
+      module_(module),
+      comp_unit_base_address_(0),
+      current_function_(NULL),
+      current_source_file_(NULL),
+      current_source_file_name_(NULL) { }
 
   bool StartCompilationUnit(const char *name, uint64_t address,
                             const char *build_directory);
@@ -365,112 +145,195 @@ class DumpStabsHandler: public google_breakpad::StabsHandler {
   bool EndFunction(uint64_t address);
   bool Line(uint64_t address, const char *name, int number);
 
+  // Do any final processing necessary to make module_ contain all the
+  // data provided by the STABS reader.
+  //
+  // Because STABS does not provide reliable size information for
+  // functions and lines, we need to make a pass over the data after
+  // processing all the STABS to compute those sizes.  We take care of
+  // that here.
+  void Finalize();
+
  private:
-  // The symbol info we're contributing to.
-  struct SymbolInfo *symbols_;
 
-  // The address at which this module gets loaded.
-  ElfW(Addr) loading_addr_;
+  // An arbitrary, but very large, size to use for functions whose
+  // size we can't compute properly.
+  static const uint64_t kFallbackSize = 0x10000000;
 
-  // The main file we're currently contributing functions/lines to.
-  struct SourceFileInfo *current_comp_unit_;
+  // The module we're contributing debugging info to.
+  Module *module_;
+
+  // The functions we've generated so far.  We don't add these to
+  // module_ as we parse them.  Instead, we wait until we've computed
+  // their ending address, and their lines' ending addresses.
+  //
+  // We could just stick them in module_ from the outset, but if
+  // module_ already contains data gathered from other debugging
+  // formats, that would complicate the size computation.
+  vector<Module::Function *> functions_;
+
+  // Boundary addresses.  STABS doesn't necessarily supply sizes for
+  // functions and lines, so we need to compute them ourselves by
+  // finding the next object.
+  vector<Module::Address> boundaries_;
+
+  // The base address of the current compilation unit.  We use this to
+  // recognize functions we should omit from the symbol file.  (If you
+  // know the details of why we omit these, please patch this
+  // comment.)
+  Module::Address comp_unit_base_address_;
 
   // The function we're currently contributing lines to.
-  // FIXME: This gets copied, along with all its lines.  Should be a pointer.
-  struct FuncInfo current_function_;
-
-  // The SourceFileInfo structure for the last file we got a line
-  // number in.  Instead of hashing on the name ('s address) on every
-  // line, we just check whether the name is the same as this file's
-  // (which it usually is).
-  SourceFileInfo *current_source_file_;
+  Module::Function *current_function_;
+
+  // The last Module::File we got a line number in.
+  Module::File *current_source_file_;
+
+  // The pointer in the .stabstr section of the name that
+  // current_source_file_ is built from.  This allows us to quickly
+  // recognize when the current line is in the same file as the
+  // previous one (which it usually is).
+  const char *current_source_file_name_;
 };
     
 bool DumpStabsHandler::StartCompilationUnit(const char *name, uint64_t address,
                                             const char *build_directory) {
-  assert(! current_comp_unit_);
-  current_comp_unit_ = FindSourceFileInfo(symbols_, name);
-  current_source_file_ = current_comp_unit_;
-  // Add it to the list; use ADDR to tell whether we've already done so.
-  if (! current_comp_unit_->addr)
-    symbols_->main_files.push_back(current_comp_unit_);
-  current_comp_unit_->addr = address;
+  assert(! comp_unit_base_address_);
+  current_source_file_name_ = name;
+  current_source_file_ = module_->FindFile(name);
+  comp_unit_base_address_ = address;
+  boundaries_.push_back(static_cast<Module::Address>(address));
   return true;
 }
 
 bool DumpStabsHandler::EndCompilationUnit(uint64_t address) {
-  assert(current_comp_unit_);
-  // We compute everything's size later.
-  symbols_->file_boundaries.push_back(address);
-  current_comp_unit_ = NULL;
+  assert(comp_unit_base_address_);
+  comp_unit_base_address_ = 0;
   current_source_file_ = NULL;
+  current_source_file_name_ = NULL;
+  if (address)
+    boundaries_.push_back(static_cast<Module::Address>(address));
   return true;
 }
 
 bool DumpStabsHandler::StartFunction(const std::string &name,
                                      uint64_t address) {
-  assert(! current_function_.addr);
-  current_function_.name = name;
-  current_function_.rva_to_base = 0;
-  current_function_.addr = address;
-  current_function_.size = 0;
-  current_function_.stack_param_size = 0;
-  current_function_.line_info.clear();
+  assert(! current_function_);
+  Module::Function *f = new Module::Function;
+  f->name_ = Demangle(name);
+  f->address_ = address;
+  f->size_ = 0;           // We compute this in DumpStabsHandler::Finalize().
+  f->parameter_size_ = 0; // We don't provide this information.
+  current_function_ = f;
+  boundaries_.push_back(static_cast<Module::Address>(address));
   return true;
 }
 
 bool DumpStabsHandler::EndFunction(uint64_t address) {
-  assert(current_function_.addr);
-  if (current_function_.addr >= current_comp_unit_->addr)
-    // This is a big copy, then free.  Should use a pointer.
-    current_comp_unit_->func_info.push_back(current_function_);
-  current_function_.addr = 0;
-  current_function_.line_info.clear();
+  assert(current_function_);
+  // Functions in this compilation unit should have address bigger
+  // than the compilation unit's starting address.  There may be a lot
+  // of duplicated entries for functions in the STABS data; only one
+  // entry can meet this requirement.
+  //
+  // (I don't really understand the above comment; just bringing it
+  // along from the previous code, and leaving the behaivor unchanged.
+  // If you know the whole story, please patch this comment.  --jimb)
+  if (current_function_->address_ >= comp_unit_base_address_)
+    functions_.push_back(current_function_);
+  else
+    delete current_function_;
+  current_function_ = NULL;
+  if (address)
+    boundaries_.push_back(static_cast<Module::Address>(address));
   return true;
 }
 
 bool DumpStabsHandler::Line(uint64_t address, const char *name, int number) {
-  assert(current_function_.addr);
+  assert(current_function_);
   assert(current_source_file_);
-  if (name != current_source_file_->name)
-    current_source_file_ = FindSourceFileInfo(symbols_, name);
-  struct LineInfo line;
-  // FIXME: might as well set rva_to_base directly.
-  line.rva_to_func = address - current_function_.addr;
-  line.file = current_source_file_;
-  line.line_num = number;
-  line.size = 0;
-  line.rva_to_base = 0;
-  current_function_.line_info.push_back(line);
+  if (name != current_source_file_name_) {
+    current_source_file_ = module_->FindFile(name);
+    current_source_file_name_ = name;
+  }
+  Module::Line line;
+  line.address_ = address;
+  line.size_ = 0;  // We compute this in DumpStabsHandler::Finalize().
+  line.file_ = current_source_file_;
+  line.number_ = number;
+  current_function_->lines_.push_back(line);
   return true;
 }
 
+void DumpStabsHandler::Finalize() {
+  // Sort our boundary list, so we can search it quickly.
+  sort(boundaries_.begin(), boundaries_.end());
+  // Sort all functions by address, just for neatness.
+  sort(functions_.begin(), functions_.end(),
+       Module::Function::CompareByAddress);
+  for (vector<Module::Function *>::iterator func_it = functions_.begin();
+       func_it != functions_.end();
+       func_it++) {
+    Module::Function *f = *func_it;
+    // Compute the function f's size.
+    vector<Module::Address>::iterator boundary
+        = std::upper_bound(boundaries_.begin(), boundaries_.end(), f->address_);
+    if (boundary != boundaries_.end())
+      f->size_ = *boundary - f->address_;
+    else
+      // If this is the last function in the module, and the STABS
+      // reader was unable to give us its ending address, then assign
+      // it a bogus, very large value.  This will happen at most once
+      // per module: since we've added all functions' addresses to the
+      // boundary table, only one can be the last.
+      f->size_ = kFallbackSize;
+
+    // Compute sizes for each of the function f's lines --- if it has any.
+    if (! f->lines_.empty()) {
+      stable_sort(f->lines_.begin(), f->lines_.end(),
+                  Module::Line::CompareByAddress);
+      vector<Module::Line>::iterator last_line = f->lines_.end() - 1;
+      for (vector<Module::Line>::iterator line_it = f->lines_.begin();
+           line_it != last_line; line_it++)
+        line_it[0].size_ = line_it[1].address_ - line_it[0].address_;
+      // Compute the size of the last line from f's end address.
+      last_line->size_ = (f->address_ + f->size_) - last_line->address_;
+    }
+  }
+  // Now that everything has a size, add our functions to the module, and
+  // dispose of our private list.
+  module_->AddFunctions(functions_.begin(), functions_.end());
+  functions_.clear();
+}
+
 static bool LoadSymbols(const ElfW(Shdr) *stab_section,
                         const ElfW(Shdr) *stabstr_section,
-                        ElfW(Addr) loading_addr,
-                        struct SymbolInfo *symbols) {
+                        Module *module) {
   if (stab_section == NULL || stabstr_section == NULL)
     return false;
 
+  // A callback object to handle data from the STABS reader.
+  DumpStabsHandler handler(module);
+  // Find the addresses of the STABS data, and create a STABS reader object.
   uint8_t *stabs = reinterpret_cast<uint8_t *>(stab_section->sh_offset);
   uint8_t *stabstr = reinterpret_cast<uint8_t *>(stabstr_section->sh_offset);
-  DumpStabsHandler handler(symbols, loading_addr);
   google_breakpad::StabsReader reader(stabs, stab_section->sh_size,
                                       stabstr, stabstr_section->sh_size,
                                       &handler);
+  // Read the STABS data, and do post-processing.
   if (! reader.Process())
     return false;
-
-  // Second pass, compute the size of functions and lines.
-  return ComputeSizeAndRVA(loading_addr, symbols);
+  handler.Finalize();
+  return true;
 }
 
-static bool LoadSymbols(ElfW(Ehdr) *elf_header, struct SymbolInfo *symbols) {
+static bool LoadSymbols(ElfW(Ehdr) *elf_header, Module *module) {
   // Translate all offsets in section headers into address.
   FixAddress(elf_header);
   ElfW(Addr) loading_addr = GetLoadingAddress(
       reinterpret_cast<ElfW(Phdr) *>(elf_header->e_phoff),
       elf_header->e_phnum);
+  module->SetLoadAddress(loading_addr);
 
   const ElfW(Shdr) *sections =
     reinterpret_cast<ElfW(Shdr) *>(elf_header->e_shoff);
@@ -484,153 +347,7 @@ static bool LoadSymbols(ElfW(Ehdr) *elf_header, struct SymbolInfo *symbols) {
   const ElfW(Shdr) *stabstr_section = stab_section->sh_link + sections;
 
   // Load symbols.
-  return LoadSymbols(stab_section, stabstr_section, loading_addr, symbols);
-}
-
-static bool WriteModuleInfo(FILE *file,
-                            ElfW(Half) arch,
-                            const std::string &obj_file) {
-  const char *arch_name = NULL;
-  if (arch == EM_386)
-    arch_name = "x86";
-  else if (arch == EM_X86_64)
-    arch_name = "x86_64";
-  else
-    return false;
-
-  unsigned char identifier[16];
-  google_breakpad::FileID file_id(obj_file.c_str());
-  if (file_id.ElfFileIdentifier(identifier)) {
-    char identifier_str[40];
-    file_id.ConvertIdentifierToString(identifier,
-                                      identifier_str, sizeof(identifier_str));
-    char id_no_dash[40];
-    int id_no_dash_len = 0;
-    memset(id_no_dash, 0, sizeof(id_no_dash));
-    for (int i = 0; identifier_str[i] != '\0'; ++i)
-      if (identifier_str[i] != '-')
-        id_no_dash[id_no_dash_len++] = identifier_str[i];
-    // Add an extra "0" by the end.
-    id_no_dash[id_no_dash_len++] = '0';
-    std::string filename = obj_file;
-    size_t slash_pos = obj_file.find_last_of("/");
-    if (slash_pos != std::string::npos)
-      filename = obj_file.substr(slash_pos + 1);
-    return 0 <= fprintf(file, "MODULE Linux %s %s %s\n", arch_name,
-                        id_no_dash, filename.c_str());
-  }
-  return false;
-}
-
-// Set *INCLUDED_FILES to the list of included files in SYMBOLS,
-// ordered appropriately for output.  Included files should appear in
-// the order in which they are first referenced by source line info.
-// Assign these files source id numbers starting with NEXT_SOURCE_ID.
-//
-// Note that the name_to_file map may contain #included files that are
-// unreferenced; these are the result of LoadFuncSymbols omitting
-// functions from the list whose addresses fall outside the address
-// range of the file that contains them.
-static void CollectIncludedFiles(const struct SymbolInfo &symbols,
-                                 std::vector<SourceFileInfo *> *included_files,
-                                 int next_source_id) {
-  for (SourceFileInfoList::const_iterator file_it = symbols.main_files.begin();
-       file_it != symbols.main_files.end(); file_it++) {
-    for (FuncInfoList::const_iterator func_it = (*file_it)->func_info.begin();
-         func_it != (*file_it)->func_info.end(); func_it++) {
-      for (LineInfoList::const_iterator line_it = func_it->line_info.begin();
-           line_it != func_it->line_info.end(); line_it++) {
-        SourceFileInfo *file = line_it->file;
-        if (file->source_id == -1) {
-          file->source_id = next_source_id++;
-          // Here we use the source id as a mark, ensuring that each
-          // file appears in the list only once.
-          included_files->push_back(file);
-        }
-      }
-    }
-  }
-}
-
-// Write 'FILE' lines for all source files in SYMBOLS to FILE.  We
-// assign source id numbers to files here.
-static bool WriteSourceFileInfo(FILE *file, struct SymbolInfo &symbols) {
-  int next_source_id = 0;
-  // Assign source id numbers to main files, and write them out to the file.
-  for (SourceFileInfoList::iterator file_it = symbols.main_files.begin();
-       file_it != symbols.main_files.end(); file_it++) {
-    SourceFileInfo *file_info = *file_it;
-    assert(file_info->addr);
-    // We only output 'FILE' lines for main files if their names
-    // contain '.'.  The extensionless C++ header files are #included,
-    // not main files, so it wouldn't affect them.  If you know the
-    // story, please patch this comment.
-    if (strchr(file_info->name, '.')) {
-      file_info->source_id = next_source_id++;
-      if (0 > fprintf(file, "FILE %d %s\n",
-                      file_info->source_id, file_info->name))
-        return false;
-    }
-  }
-  // Compute the list of included files, and write them out.
-  // Can't use SourceFileInfoList here, because that owns the files it
-  // points to.
-  std::vector<SourceFileInfo *> included_files;
-  std::vector<SourceFileInfo *>::const_iterator file_it;
-  CollectIncludedFiles(symbols, &included_files, next_source_id);
-  for (file_it = included_files.begin(); file_it != included_files.end();
-       file_it++) {
-    if (0 > fprintf(file, "FILE %d %s\n",
-                    (*file_it)->source_id, (*file_it)->name))
-      return false;
-  }
-  return true;
-}
-
-static bool WriteOneFunction(FILE *file,
-                             const struct FuncInfo &func_info){
-  std::string func_name = Demangle(func_info.name.c_str());
-
-  if (func_info.size <= 0)
-    return true;
-
-  if (0 <= fprintf(file, "FUNC %lx %lx %d %s\n",
-                   (unsigned long) func_info.rva_to_base,
-                   (unsigned long) func_info.size,
-                   func_info.stack_param_size,
-                   func_name.c_str())) {
-    for (LineInfoList::const_iterator it = func_info.line_info.begin();
-	 it != func_info.line_info.end(); it++) {
-      const struct LineInfo &line_info = *it;
-      if (0 > fprintf(file, "%lx %lx %d %d\n",
-                      (unsigned long) line_info.rva_to_base,
-                      (unsigned long) line_info.size,
-                      line_info.line_num,
-                      line_info.file->source_id))
-        return false;
-    }
-    return true;
-  }
-  return false;
-}
-
-static bool WriteFunctionInfo(FILE *file, const struct SymbolInfo &symbols) {
-  for (SourceFileInfoList::const_iterator it = symbols.main_files.begin();
-       it != symbols.main_files.end(); it++) {
-    const struct SourceFileInfo &file_info = **it;
-    for (FuncInfoList::const_iterator fiIt = file_info.func_info.begin(); 
-	 fiIt != file_info.func_info.end(); fiIt++) {
-      const struct FuncInfo &func_info = *fiIt;
-      if (!WriteOneFunction(file, func_info))
-        return false;
-    }
-  }
-  return true;
-}
-
-static bool DumpStabSymbols(FILE *file, struct SymbolInfo &symbols) {
-  return WriteSourceFileInfo(file, symbols) &&
-    WriteFunctionInfo(file, symbols);
+  return LoadSymbols(stab_section, stabstr_section, module);
 }
 
 //
@@ -685,6 +402,48 @@ class MmapWrapper {
    size_t size_;
 };
 
+// Return the breakpad symbol file identifier for the architecture of
+// ELF_HEADER.
+const char *ElfArchitecture(const ElfW(Ehdr) *elf_header) {
+  ElfW(Half) arch = elf_header->e_machine;
+  if (arch == EM_386)
+    return "x86";
+  else if (arch == EM_X86_64)
+    return "x86_64";
+  else
+    return NULL;
+}
+
+// Format the Elf file identifier in IDENTIFIER as a UUID with the
+// dashes removed.
+std::string FormatIdentifier(unsigned char identifier[16]) {
+  char identifier_str[40];
+  google_breakpad::FileID::ConvertIdentifierToString(
+      identifier,
+      identifier_str,
+      sizeof(identifier_str));
+  std::string id_no_dash;
+  for (int i = 0; identifier_str[i] != '\0'; ++i)
+    if (identifier_str[i] != '-')
+      id_no_dash += identifier_str[i];
+  // Add an extra "0" by the end.  PDB files on Windows have an 'age'
+  // number appended to the end of the file identifier; this isn't
+  // really used or necessary on other platforms, but let's preserve
+  // the pattern.
+  id_no_dash += '0';
+  return id_no_dash;
+}
+
+// Return the non-directory portion of FILENAME: the portion after the
+// last slash, or the whole filename if there are no slashes.
+std::string BaseFileName(const std::string &filename) {
+  // Lots of copies!  basename's behavior is less than ideal.
+  char *c_filename = strdup(filename.c_str());
+  std::string base = basename(c_filename);
+  free(c_filename);
+  return base;
+}
+
 }  // namespace
 
 namespace google_breakpad {
@@ -706,16 +465,27 @@ bool DumpSymbols::WriteSymbolFile(const std::string &obj_file,
   ElfW(Ehdr) *elf_header = reinterpret_cast<ElfW(Ehdr) *>(obj_base);
   if (!IsValidElf(elf_header))
     return false;
-  struct SymbolInfo symbols;
 
-  if (!LoadSymbols(elf_header, &symbols))
-     return false;
-  // Write to symbol file.
-  if (WriteModuleInfo(sym_file, elf_header->e_machine, obj_file) &&
-      DumpStabSymbols(sym_file, symbols))
-    return true;
+  unsigned char identifier[16];
+  google_breakpad::FileID file_id(obj_file.c_str());
+  if (! file_id.ElfFileIdentifier(identifier))
+    return false;
+
+  const char *architecture = ElfArchitecture(elf_header);
+  if (! architecture)
+    return false;
 
-  return false;
+  std::string name = BaseFileName(obj_file);
+  std::string os = "Linux";
+  std::string id = FormatIdentifier(identifier);
+
+  Module module(name, os, architecture, id);
+  if (!LoadSymbols(elf_header, &module))
+    return false;
+  if (!module.Write(sym_file))
+    return false;
+
+  return true;
 }
 
 }  // namespace google_breakpad
diff --git a/src/common/linux/module.cc b/src/common/linux/module.cc
new file mode 100644
index 00000000..69bec9cd
--- /dev/null
+++ b/src/common/linux/module.cc
@@ -0,0 +1,167 @@
+// Copyright (c) 2009, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <cerrno>
+#include <cstring>
+#include "common/linux/module.h"
+
+namespace google_breakpad {
+
+Module::Module(const string &name, const string &os,
+               const string &architecture, const string &id) :
+    name_(name),
+    os_(os),
+    architecture_(architecture),
+    id_(id),
+    load_address_(0) { }
+
+Module::~Module() {
+  for (FileByNameMap::iterator it = files_.begin(); it != files_.end(); it++)
+    delete it->second;
+  for (vector<Function *>::iterator it = functions_.begin();
+       it != functions_.end(); it++)
+    delete *it;
+}
+
+void Module::SetLoadAddress(Address address) {
+  load_address_ = address;
+}
+
+void Module::AddFunction(Function *function) {
+  functions_.push_back(function);
+}
+
+void Module::AddFunctions(vector<Function *>::iterator begin,
+                          vector<Function *>::iterator end) {
+  functions_.insert(functions_.end(), begin, end);
+}
+
+Module::File *Module::FindFile(const string &name) {
+  // A tricky bit here.  The key of each map entry needs to be a
+  // pointer to the entry's File's name string.  This means that we
+  // can't do the initial lookup with any operation that would create
+  // an empty entry for us if the name isn't found (like, say,
+  // operator[] or insert do), because such a created entry's key will
+  // be a pointer the string passed as our argument.  Since the key of
+  // a map's value type is const, we can't fix it up once we've
+  // created our file.  lower_bound does the lookup without doing an
+  // insertion, and returns a good hint iterator to pass to insert.
+  // Our "destiny" is where we belong, whether we're there or not now.
+  FileByNameMap::iterator destiny = files_.lower_bound(&name);
+  if (destiny == files_.end()
+      || *destiny->first != name) {  // Repeated string comparison, boo hoo.
+    File *file = new File;
+    file->name_ = name;
+    file->source_id_ = -1;
+    destiny = files_.insert(destiny,
+                            FileByNameMap::value_type(&file->name_, file));
+  }
+  return destiny->second;
+}
+
+Module::File *Module::FindFile(const char *name) {
+  string name_string = name;
+  return FindFile(name_string);
+}
+
+void Module::AssignSourceIds() {
+  // First, give every source file an id of -1.
+  for (FileByNameMap::iterator file_it = files_.begin();
+       file_it != files_.end(); file_it++)
+    file_it->second->source_id_ = -1;
+
+  // Next, mark all files actually cited by our functions' line number
+  // info, by setting each one's source id to zero.
+  for (vector<Function *>::const_iterator func_it = functions_.begin();
+       func_it != functions_.end(); func_it++) {
+    Function *func = *func_it;
+    for (vector<Line>::iterator line_it = func->lines_.begin();
+         line_it != func->lines_.end(); line_it++)
+      line_it->file_->source_id_ = 0;
+  }
+
+  // Finally, assign source ids to those files that have been marked.
+  // We could have just assigned source id numbers while traversing
+  // the line numbers, but doing it this way numbers the files in
+  // lexicographical order by name, which is neat.
+  int next_source_id = 0;
+  for (FileByNameMap::iterator file_it = files_.begin();
+       file_it != files_.end(); file_it++)
+    if (! file_it->second->source_id_)
+      file_it->second->source_id_ = next_source_id++;
+}
+
+bool Module::ReportError() {
+  fprintf(stderr, "error writing symbol file: %s\n",
+          strerror (errno));
+  return false;
+}
+
+bool Module::Write(FILE *stream) {
+  if (0 > fprintf(stream, "MODULE %s %s %s %s\n",
+                  os_.c_str(), architecture_.c_str(), id_.c_str(),
+                  name_.c_str()))
+    return ReportError();
+
+  // Write out files.
+  AssignSourceIds();
+  for (FileByNameMap::iterator file_it = files_.begin();
+       file_it != files_.end(); file_it++) {
+    File *file = file_it->second;
+    if (file->source_id_ >= 0) {
+      if (0 > fprintf(stream, "FILE %d %s\n",
+                      file->source_id_, file->name_.c_str()))
+        return ReportError();
+    }
+  }
+
+  // Write out functions and their lines.
+  for (vector<Function *>::const_iterator func_it = functions_.begin();
+       func_it != functions_.end(); func_it++) {
+    Function *func = *func_it;
+    if (0 > fprintf(stream, "FUNC %lx %lx %lu %s\n",
+                    (unsigned long) (func->address_ - load_address_),
+                    (unsigned long) func->size_,
+                    (unsigned long) func->parameter_size_,
+                    func->name_.c_str()))
+      return ReportError();
+    for (vector<Line>::iterator line_it = func->lines_.begin();
+         line_it != func->lines_.end(); line_it++)
+      if (0 > fprintf(stream, "%lx %lx %d %d\n",
+                      (unsigned long) (line_it->address_ - load_address_),
+                      (unsigned long) line_it->size_,
+                      line_it->number_,
+                      line_it->file_->source_id_))
+        return ReportError();
+  }
+
+  return true;
+}
+
+} // namespace google_breakpad
diff --git a/src/common/linux/module.h b/src/common/linux/module.h
new file mode 100644
index 00000000..f3a6c199
--- /dev/null
+++ b/src/common/linux/module.h
@@ -0,0 +1,191 @@
+// Copyright (c) 2009, Google Inc.             -*- mode: c++ -*-
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// module.h: defines google_breakpad::Module, for writing breakpad symbol files
+
+#ifndef COMMON_LINUX_MODULE_H__
+#define COMMON_LINUX_MODULE_H__
+
+#include <map>
+#include <string>
+#include <vector>
+#include <cstdio>
+
+#include "google_breakpad/common/breakpad_types.h"
+
+namespace google_breakpad {
+
+using std::string;
+using std::vector;
+using std::map;
+
+// A Module represents the contents of a module, and supports methods
+// for adding information produced by parsing STABS or DWARF data
+// --- possibly both from the same file --- and then writing out the
+// unified contents as a Breakpad-format symbol file.
+class Module {
+ public:
+  // The type of addresses and sizes in a symbol table.
+  typedef u_int64_t Address;
+  struct File;
+  struct Function;
+  struct Line;
+
+  // Addresses appearing in File, Function, and Line structures are
+  // absolute, not relative to the the module's load address.  That
+  // is, if the module were loaded at its nominal load address, the
+  // addresses would be correct.
+
+  // A source file.
+  struct File {
+    // The name of the source file.
+    string name_;
+
+    // The file's source id.  The Write member function clears this
+    // field and assigns source ids a fresh, so any value placed here
+    // before calling Write will be lost.
+    int source_id_;
+  };
+
+  // A function.
+  struct Function {
+    // For sorting by address.  (Not style-guide compliant, but it's
+    // stupid not to put this in the struct.)
+    static bool CompareByAddress(const Function *x, const Function *y) {
+      return x->address_ < y->address_;
+    }
+
+    // The function's name.
+    string name_;
+    
+    // The start address and length of the function's code.
+    Address address_, size_;
+
+    // The function's parameter size.
+    Address parameter_size_;
+
+    // Source lines belonging to this function, sorted by increasing
+    // address.
+    vector<Line> lines_;
+  };
+
+  // A source line.
+  struct Line {
+    // For sorting by address.  (Not style-guide compliant, but it's
+    // stupid not to put this in the struct.)
+    static bool CompareByAddress(const Module::Line &x, const Module::Line &y) {
+      return x.address_ < y.address_;
+    }
+
+    Address address_, size_;    // The address and size of the line's code.
+    File *file_;                // The source file.
+    int number_;                // The source line number.
+  };
+    
+  // Create a new module with the given name, operating system,
+  // architecture, and ID string.
+  Module(const string &name, const string &os, const string &architecture, 
+         const string &id);
+  ~Module();
+
+  // Set the module's load address to LOAD_ADDRESS; addresses given
+  // for functions and lines will be written to the Breakpad symbol
+  // file as offsets from this address.  Construction initializes this
+  // module's load address to zero: addresses written to the symbol
+  // file will be the same as they appear in the File and Line
+  // structures.
+  void SetLoadAddress(Address load_address);
+
+  // Add FUNCTION to the module.
+  // Destroying this module frees all Function objects that have been
+  // added with this function.
+  void AddFunction(Function *function);
+
+  // Add all the functions in [BEGIN,END) to the module.
+  // Destroying this module frees all Function objects that have been
+  // added with this function.
+  void AddFunctions(vector<Function *>::iterator begin,
+                    vector<Function *>::iterator end);
+
+  // If this module has a file named NAME, return a pointer to a
+  // pointer to it.  If it has none, then create one and return a
+  // pointer to the new file.
+  // Destroying this module frees all File objects that have been created
+  // using this function, or with Insert.
+  File *FindFile(const string &name);
+  File *FindFile(const char *name);
+
+  // Write this module to STREAM in the breakpad symbol format.
+  // Return true if all goes well, or false if an error occurs.  This
+  // method writes out a header based on the values given to the
+  // constructor, writes the source files added via Insert and
+  // FindFile, and then the functions added via Insert, along with
+  // their lines.
+  bool Write(FILE *stream);
+
+private:
+
+  // Assign source id numbers to this modules' files that functions'
+  // line number data actually refers to.  Set the source id numbers
+  // for all other files to -1.  We do this before writing out the
+  // symbol file, omitting any unused files.
+  void AssignSourceIds();
+
+  // Report an error that has occurred writing the symbol file, using
+  // errno to find the appropriate cause.  Return false.
+  static bool ReportError();
+
+  // Module header entries.
+  string name_, os_, architecture_, id_;
+
+  // The module's nominal load address.  Addresses for functions and
+  // lines are absolute, assuming the module is loaded at this
+  // address.
+  Address load_address_;
+
+  // Relation for maps whose keys are strings shared with some other
+  // structure.
+  struct CompareStringPtrs {
+    bool operator()(const string *x, const string *y) { return *x < *y; };
+  };
+
+  // A map from filenames to File structures.  The map's keys are
+  // pointers to the Files' names.
+  typedef map<const string *, File *, CompareStringPtrs> FileByNameMap;
+
+  // The module owns all the files and functions that have been added
+  // to it; destroying the module frees the Files and Functions these
+  // point to.
+  FileByNameMap files_;                 // This module's source files.  
+  vector<Function *> functions_;        // This module's functions.
+};
+
+} // namespace google_breakpad
+
+#endif  // COMMON_LINUX_MODULE_H__
author	jimblandy@gmail.com <jimblandy@gmail.com@4c0a9323-5329-0410-9bdc-e9ce6186880e>	2009-08-07 19:28:45 +0000
committer	jimblandy@gmail.com <jimblandy@gmail.com@4c0a9323-5329-0410-9bdc-e9ce6186880e>	2009-08-07 19:28:45 +0000
commit	eab03fdb72a77dfd71db028e6e5676e734bdc443 (patch)
tree	4e3c53bf0edcf6024695f6f876d54deb51c4b23e /src/common
parent	Add files left behind by previous commit. (diff)
download	breakpad-eab03fdb72a77dfd71db028e6e5676e734bdc443.tar.xz