aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorjimblandy <jimblandy@4c0a9323-5329-0410-9bdc-e9ce6186880e>2010-07-17 15:14:30 +0000
committerjimblandy <jimblandy@4c0a9323-5329-0410-9bdc-e9ce6186880e>2010-07-17 15:14:30 +0000
commit786275e7195761228374d7c0f5ff02403c7e1ef8 (patch)
tree2d38496971ca3c7eacbbfbb3e240f7b64b4b5a5f /src
parentBreakpad Linux dumper: Don't map file into memory a second time just to compu... (diff)
downloadbreakpad-786275e7195761228374d7c0f5ff02403c7e1ef8.tar.xz
Breakpad Linux/Mac symbol dumper: Share duplicate strings that arise in DWARF data.
This patch avoids allocating many copies of identical strings appearing in debugging information. Without this patch, running dump_syms on Mozilla's libxul.so (with 173MiB of debugging information) has a peak resident set of around 450MiB. With this patch, the peak is around 365MiB. a=jimblandy, r=mark git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@626 4c0a9323-5329-0410-9bdc-e9ce6186880e
Diffstat (limited to 'src')
-rw-r--r--src/common/dwarf_cfi_to_module.cc19
-rw-r--r--src/common/dwarf_cfi_to_module.h23
-rw-r--r--src/common/dwarf_cu_to_module.cc27
3 files changed, 62 insertions, 7 deletions
diff --git a/src/common/dwarf_cfi_to_module.cc b/src/common/dwarf_cfi_to_module.cc
index 611cecd5..ed0b406d 100644
--- a/src/common/dwarf_cfi_to_module.cc
+++ b/src/common/dwarf_cfi_to_module.cc
@@ -117,7 +117,7 @@ bool DwarfCFIToModule::Entry(size_t offset, uint64 address, uint64 length,
// address on entry to the function. So establish an initial .ra
// rule citing the return address register.
if (return_address_ < register_names_.size())
- entry_->initial_rules[".ra"] = register_names_[return_address_];
+ entry_->initial_rules[ra_name_] = register_names_[return_address_];
return true;
}
@@ -126,11 +126,11 @@ string DwarfCFIToModule::RegisterName(int i) {
assert(entry_);
if (i < 0) {
assert(i == kCFARegister);
- return ".cfa";
+ return cfa_name_;
}
unsigned reg = i;
if (reg == return_address_)
- return ".ra";
+ return ra_name_;
if (0 <= reg && reg < register_names_.size())
return register_names_[reg];
@@ -144,12 +144,21 @@ string DwarfCFIToModule::RegisterName(int i) {
void DwarfCFIToModule::Record(Module::Address address, int reg,
const string &rule) {
assert(entry_);
+
+ // Place the name in our global set of strings, and then use the string
+ // from the set. Even though the assignment looks like a copy, all the
+ // major std::string implementations use reference counting internally,
+ // so the effect is to have all our data structures share copies of rules
+ // whenever possible. Since register names are drawn from a
+ // vector<string>, register names are already shared.
+ string shared_rule = *common_strings_.insert(rule).first;
+
// Is this one of this entry's initial rules?
if (address == entry_->address)
- entry_->initial_rules[RegisterName(reg)] = rule;
+ entry_->initial_rules[RegisterName(reg)] = shared_rule;
// File it under the appropriate address.
else
- entry_->rule_changes[address][RegisterName(reg)] = rule;
+ entry_->rule_changes[address][RegisterName(reg)] = shared_rule;
}
bool DwarfCFIToModule::UndefinedRule(uint64 address, int reg) {
diff --git a/src/common/dwarf_cfi_to_module.h b/src/common/dwarf_cfi_to_module.h
index dc11ec53..d29a796c 100644
--- a/src/common/dwarf_cfi_to_module.h
+++ b/src/common/dwarf_cfi_to_module.h
@@ -40,7 +40,9 @@
#define COMMON_LINUX_DWARF_CFI_TO_MODULE_H
#include <assert.h>
+#include <stdio.h>
+#include <set>
#include <string>
#include <vector>
@@ -51,6 +53,7 @@ namespace google_breakpad {
using dwarf2reader::CallFrameInfo;
using google_breakpad::Module;
+using std::set;
using std::string;
using std::vector;
@@ -124,7 +127,8 @@ class DwarfCFIToModule: public CallFrameInfo::Handler {
DwarfCFIToModule(Module *module, const vector<string> &register_names,
Reporter *reporter)
: module_(module), register_names_(register_names), reporter_(reporter),
- entry_(NULL), return_address_(-1) { }
+ entry_(NULL), return_address_(-1), cfa_name_(".cfa"), ra_name_(".ra") {
+ }
virtual ~DwarfCFIToModule() { delete entry_; }
virtual bool Entry(size_t offset, uint64 address, uint64 length,
@@ -168,6 +172,23 @@ class DwarfCFIToModule: public CallFrameInfo::Handler {
// The return address column for that entry.
unsigned return_address_;
+
+ // The names of the return address and canonical frame address. Putting
+ // these here instead of using string literals allows us to share their
+ // texts in reference-counted std::string implementations (all the
+ // popular ones). Many, many rules cite these strings.
+ string cfa_name_, ra_name_;
+
+ // A set of strings used by this CFI. Before storing a string in one of
+ // our data structures, insert it into this set, and then use the string
+ // from the set.
+ //
+ // Because std::string uses reference counting internally, simply using
+ // strings from this set, even if passed by value, assigned, or held
+ // directly in structures and containers (map<string, ...>, for example),
+ // causes those strings to share a single instance of each distinct piece
+ // of text.
+ set<string> common_strings_;
};
} // namespace google_breakpad
diff --git a/src/common/dwarf_cu_to_module.cc b/src/common/dwarf_cu_to_module.cc
index 85d1d8c4..9b538283 100644
--- a/src/common/dwarf_cu_to_module.cc
+++ b/src/common/dwarf_cu_to_module.cc
@@ -36,12 +36,16 @@
#include <assert.h>
#include <algorithm>
+#include <set>
+#include <utility>
#include "common/dwarf_line_to_module.h"
namespace google_breakpad {
using std::map;
+using std::pair;
+using std::set;
using std::vector;
// Data provided by a DWARF specification DIE.
@@ -83,6 +87,17 @@ typedef map<uint64, AbstractOrigin> AbstractOriginByOffset;
// Data global to the DWARF-bearing file that is private to the
// DWARF-to-Module process.
struct DwarfCUToModule::FilePrivate {
+ // A set of strings used in this CU. Before storing a string in one of
+ // our data structures, insert it into this set, and then use the string
+ // from the set.
+ //
+ // Because std::string uses reference counting internally, simply using
+ // strings from this set, even if passed by value, assigned, or held
+ // directly in structures and containers (map<string, ...>, for example),
+ // causes those strings to share a single instance of each distinct piece
+ // of text.
+ set<string> common_strings;
+
// A map from offsets of DIEs within the .debug_info section to
// Specifications describing those DIEs. Specification references can
// cross compilation unit boundaries.
@@ -256,7 +271,17 @@ void DwarfCUToModule::GenericDIEHandler::ProcessAttributeString(
enum DwarfForm form,
const string &data) {
switch (attr) {
- case dwarf2reader::DW_AT_name: name_attribute_ = data; break;
+ case dwarf2reader::DW_AT_name: {
+ // Place the name in our global set of strings, and then use the
+ // string from the set. Even though the assignment looks like a copy,
+ // all the major std::string implementations use reference counting
+ // internally, so the effect is to have all our data structures share
+ // copies of strings whenever possible.
+ pair<set<string>::iterator, bool> result =
+ cu_context_->file_context->file_private->common_strings.insert(data);
+ name_attribute_ = *result.first;
+ break;
+ }
default: break;
}
}