From 786275e7195761228374d7c0f5ff02403c7e1ef8 Mon Sep 17 00:00:00 2001 From: jimblandy Date: Sat, 17 Jul 2010 15:14:30 +0000 Subject: Breakpad Linux/Mac symbol dumper: Share duplicate strings that arise in DWARF data. This patch avoids allocating many copies of identical strings appearing in debugging information. Without this patch, running dump_syms on Mozilla's libxul.so (with 173MiB of debugging information) has a peak resident set of around 450MiB. With this patch, the peak is around 365MiB. a=jimblandy, r=mark git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@626 4c0a9323-5329-0410-9bdc-e9ce6186880e --- src/common/dwarf_cfi_to_module.cc | 19 ++++++++++++++----- src/common/dwarf_cfi_to_module.h | 23 ++++++++++++++++++++++- src/common/dwarf_cu_to_module.cc | 27 ++++++++++++++++++++++++++- 3 files changed, 62 insertions(+), 7 deletions(-) diff --git a/src/common/dwarf_cfi_to_module.cc b/src/common/dwarf_cfi_to_module.cc index 611cecd5..ed0b406d 100644 --- a/src/common/dwarf_cfi_to_module.cc +++ b/src/common/dwarf_cfi_to_module.cc @@ -117,7 +117,7 @@ bool DwarfCFIToModule::Entry(size_t offset, uint64 address, uint64 length, // address on entry to the function. So establish an initial .ra // rule citing the return address register. if (return_address_ < register_names_.size()) - entry_->initial_rules[".ra"] = register_names_[return_address_]; + entry_->initial_rules[ra_name_] = register_names_[return_address_]; return true; } @@ -126,11 +126,11 @@ string DwarfCFIToModule::RegisterName(int i) { assert(entry_); if (i < 0) { assert(i == kCFARegister); - return ".cfa"; + return cfa_name_; } unsigned reg = i; if (reg == return_address_) - return ".ra"; + return ra_name_; if (0 <= reg && reg < register_names_.size()) return register_names_[reg]; @@ -144,12 +144,21 @@ string DwarfCFIToModule::RegisterName(int i) { void DwarfCFIToModule::Record(Module::Address address, int reg, const string &rule) { assert(entry_); + + // Place the name in our global set of strings, and then use the string + // from the set. Even though the assignment looks like a copy, all the + // major std::string implementations use reference counting internally, + // so the effect is to have all our data structures share copies of rules + // whenever possible. Since register names are drawn from a + // vector, register names are already shared. + string shared_rule = *common_strings_.insert(rule).first; + // Is this one of this entry's initial rules? if (address == entry_->address) - entry_->initial_rules[RegisterName(reg)] = rule; + entry_->initial_rules[RegisterName(reg)] = shared_rule; // File it under the appropriate address. else - entry_->rule_changes[address][RegisterName(reg)] = rule; + entry_->rule_changes[address][RegisterName(reg)] = shared_rule; } bool DwarfCFIToModule::UndefinedRule(uint64 address, int reg) { diff --git a/src/common/dwarf_cfi_to_module.h b/src/common/dwarf_cfi_to_module.h index dc11ec53..d29a796c 100644 --- a/src/common/dwarf_cfi_to_module.h +++ b/src/common/dwarf_cfi_to_module.h @@ -40,7 +40,9 @@ #define COMMON_LINUX_DWARF_CFI_TO_MODULE_H #include +#include +#include #include #include @@ -51,6 +53,7 @@ namespace google_breakpad { using dwarf2reader::CallFrameInfo; using google_breakpad::Module; +using std::set; using std::string; using std::vector; @@ -124,7 +127,8 @@ class DwarfCFIToModule: public CallFrameInfo::Handler { DwarfCFIToModule(Module *module, const vector ®ister_names, Reporter *reporter) : module_(module), register_names_(register_names), reporter_(reporter), - entry_(NULL), return_address_(-1) { } + entry_(NULL), return_address_(-1), cfa_name_(".cfa"), ra_name_(".ra") { + } virtual ~DwarfCFIToModule() { delete entry_; } virtual bool Entry(size_t offset, uint64 address, uint64 length, @@ -168,6 +172,23 @@ class DwarfCFIToModule: public CallFrameInfo::Handler { // The return address column for that entry. unsigned return_address_; + + // The names of the return address and canonical frame address. Putting + // these here instead of using string literals allows us to share their + // texts in reference-counted std::string implementations (all the + // popular ones). Many, many rules cite these strings. + string cfa_name_, ra_name_; + + // A set of strings used by this CFI. Before storing a string in one of + // our data structures, insert it into this set, and then use the string + // from the set. + // + // Because std::string uses reference counting internally, simply using + // strings from this set, even if passed by value, assigned, or held + // directly in structures and containers (map, for example), + // causes those strings to share a single instance of each distinct piece + // of text. + set common_strings_; }; } // namespace google_breakpad diff --git a/src/common/dwarf_cu_to_module.cc b/src/common/dwarf_cu_to_module.cc index 85d1d8c4..9b538283 100644 --- a/src/common/dwarf_cu_to_module.cc +++ b/src/common/dwarf_cu_to_module.cc @@ -36,12 +36,16 @@ #include #include +#include +#include #include "common/dwarf_line_to_module.h" namespace google_breakpad { using std::map; +using std::pair; +using std::set; using std::vector; // Data provided by a DWARF specification DIE. @@ -83,6 +87,17 @@ typedef map AbstractOriginByOffset; // Data global to the DWARF-bearing file that is private to the // DWARF-to-Module process. struct DwarfCUToModule::FilePrivate { + // A set of strings used in this CU. Before storing a string in one of + // our data structures, insert it into this set, and then use the string + // from the set. + // + // Because std::string uses reference counting internally, simply using + // strings from this set, even if passed by value, assigned, or held + // directly in structures and containers (map, for example), + // causes those strings to share a single instance of each distinct piece + // of text. + set common_strings; + // A map from offsets of DIEs within the .debug_info section to // Specifications describing those DIEs. Specification references can // cross compilation unit boundaries. @@ -256,7 +271,17 @@ void DwarfCUToModule::GenericDIEHandler::ProcessAttributeString( enum DwarfForm form, const string &data) { switch (attr) { - case dwarf2reader::DW_AT_name: name_attribute_ = data; break; + case dwarf2reader::DW_AT_name: { + // Place the name in our global set of strings, and then use the + // string from the set. Even though the assignment looks like a copy, + // all the major std::string implementations use reference counting + // internally, so the effect is to have all our data structures share + // copies of strings whenever possible. + pair::iterator, bool> result = + cu_context_->file_context->file_private->common_strings.insert(data); + name_attribute_ = *result.first; + break; + } default: break; } } -- cgit v1.2.1