From 862c9f47efe9ea2ff80c4acc94f5595b487878b8 Mon Sep 17 00:00:00 2001 From: Christopher Grant Date: Tue, 29 Oct 2019 14:56:38 -0400 Subject: linux, dump_syms: Filter module entries outside specified ranges Partitioned libraries generated with lld and llvm-objcopy currently contain a superset of debug information, beyond what applies to the library itself. This is because objcopy cannot split up debug information by partition - instead, it places a copy of all debug information into each partition. In lieu of potential future support for lld or objcopy becoming able to split up debug information, let dump_syms do the next best thing: - Find the address ranges of all PT_LOAD segments in the lib. - Supply these to the Module being generated. - Filter additions to the Module based on these ranges. Bug: 990190 Change-Id: Ib5f279f42e3f6ea79eed9665efbcc23c3c5d25dc Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/1884699 Reviewed-by: Joshua Peraza --- src/common/linux/dump_symbols.cc | 25 ++++++++++++++++++++ src/common/module.cc | 29 +++++++++++++++++++++++ src/common/module.h | 16 +++++++++++++ src/common/module_unittest.cc | 50 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 120 insertions(+) (limited to 'src/common') diff --git a/src/common/linux/dump_symbols.cc b/src/common/linux/dump_symbols.cc index 660f133e..e561ad94 100644 --- a/src/common/linux/dump_symbols.cc +++ b/src/common/linux/dump_symbols.cc @@ -182,6 +182,23 @@ typename ElfClass::Addr GetLoadingAddress( return 0; } +// Find the set of address ranges for all PT_LOAD segments. +template +vector GetPtLoadSegmentRanges( + const typename ElfClass::Phdr* program_headers, + int nheader) { + typedef typename ElfClass::Phdr Phdr; + vector ranges; + + for (int i = 0; i < nheader; ++i) { + const Phdr& header = program_headers[i]; + if (header.p_type == PT_LOAD) { + ranges.push_back(Module::Range(header.p_vaddr, header.p_memsz)); + } + } + return ranges; +} + #ifndef NO_STABS_SUPPORT template bool LoadStabs(const typename ElfClass::Ehdr* elf_header, @@ -649,6 +666,14 @@ bool LoadSymbols(const string& obj_file, module->SetLoadAddress(loading_addr); info->set_loading_addr(loading_addr, obj_file); + // Allow filtering of extraneous debug information in partitioned libraries. + // Such libraries contain debug information for all libraries extracted from + // the same combined library, implying extensive duplication. + vector address_ranges = GetPtLoadSegmentRanges( + GetOffset(elf_header, elf_header->e_phoff), + elf_header->e_phnum); + module->SetAddressRanges(address_ranges); + const Shdr* sections = GetOffset(elf_header, elf_header->e_shoff); const Shdr* section_names = sections + elf_header->e_shstrndx; diff --git a/src/common/module.cc b/src/common/module.cc index dc4f957e..aff22127 100644 --- a/src/common/module.cc +++ b/src/common/module.cc @@ -76,11 +76,19 @@ void Module::SetLoadAddress(Address address) { load_address_ = address; } +void Module::SetAddressRanges(const vector& ranges) { + address_ranges_ = ranges; +} + void Module::AddFunction(Function *function) { // FUNC lines must not hold an empty name, so catch the problem early if // callers try to add one. assert(!function->name.empty()); + if (!AddressIsInModule(function->address)) { + return; + } + // FUNCs are better than PUBLICs as they come with sizes, so remove an extern // with the same address if present. Extern ext(function->address); @@ -123,10 +131,18 @@ void Module::AddFunctions(vector::iterator begin, } void Module::AddStackFrameEntry(StackFrameEntry *stack_frame_entry) { + if (!AddressIsInModule(stack_frame_entry->address)) { + return; + } + stack_frame_entries_.push_back(stack_frame_entry); } void Module::AddExtern(Extern *ext) { + if (!AddressIsInModule(ext->address)) { + return; + } + std::pair ret = externs_.insert(ext); if (!ret.second) { // Free the duplicate that was not inserted because this Module @@ -232,6 +248,19 @@ bool Module::WriteRuleMap(const RuleMap &rule_map, std::ostream &stream) { return stream.good(); } +bool Module::AddressIsInModule(Address address) const { + if (address_ranges_.empty()) { + return true; + } + for (const auto& segment : address_ranges_) { + if (address >= segment.address && + address < segment.address + segment.size) { + return true; + } + } + return false; +} + bool Module::Write(std::ostream &stream, SymbolData symbol_data) { stream << "MODULE " << os_ << " " << architecture_ << " " << id_ << " " << name_ << "\n"; diff --git a/src/common/module.h b/src/common/module.h index 7b1a0db0..db8dabd5 100644 --- a/src/common/module.h +++ b/src/common/module.h @@ -205,6 +205,14 @@ class Module { // Write is used. void SetLoadAddress(Address load_address); + // Sets address filtering on elements added to the module. This allows + // libraries with extraneous debug symbols to generate symbol files containing + // only relevant symbols. For example, an LLD-generated partition library may + // contain debug information pertaining to all partitions derived from a + // single "combined" library. Filtering applies only to elements added after + // this method is called. + void SetAddressRanges(const vector& ranges); + // Add FUNCTION to the module. FUNCTION's name must not be empty. // This module owns all Function objects added with this function: // destroying the module destroys them as well. @@ -302,6 +310,10 @@ class Module { // if an error occurs, return false, and leave errno set. static bool WriteRuleMap(const RuleMap &rule_map, std::ostream &stream); + // Returns true of the specified address resides with an specified address + // range, or if no ranges have been specified. + bool AddressIsInModule(Address address) const; + // Module header entries. string name_, os_, architecture_, id_, code_id_; @@ -310,6 +322,10 @@ class Module { // address. Address load_address_; + // The set of valid address ranges of the module. If specified, attempts to + // add elements residing outside these ranges will be silently filtered. + vector address_ranges_; + // Relation for maps whose keys are strings shared with some other // structure. struct CompareStringPtrs { diff --git a/src/common/module_unittest.cc b/src/common/module_unittest.cc index 819fa035..b6770c5e 100644 --- a/src/common/module_unittest.cc +++ b/src/common/module_unittest.cc @@ -564,3 +564,53 @@ TEST(Construct, FunctionsAndThumbExternsWithSameAddress) { "PUBLIC cc00 0 arm_func\n", contents.c_str()); } + +TEST(Write, OutOfRangeAddresses) { + stringstream s; + Module m(MODULE_NAME, MODULE_OS, MODULE_ARCH, MODULE_ID); + + // Specify an allowed address range, representing a PT_LOAD segment in a + // module. + vector address_ranges = { + Module::Range(0x2000ULL, 0x1000ULL), + }; + m.SetAddressRanges(address_ranges); + + // Add three stack frames (one lower, one in, and one higher than the allowed + // address range). Only the middle frame should be captured. + Module::StackFrameEntry* entry1 = new Module::StackFrameEntry(); + entry1->address = 0x1000ULL; + entry1->size = 0x100ULL; + m.AddStackFrameEntry(entry1); + Module::StackFrameEntry* entry2 = new Module::StackFrameEntry(); + entry2->address = 0x2000ULL; + entry2->size = 0x100ULL; + m.AddStackFrameEntry(entry2); + Module::StackFrameEntry* entry3 = new Module::StackFrameEntry(); + entry3->address = 0x3000ULL; + entry3->size = 0x100ULL; + m.AddStackFrameEntry(entry3); + + // Add a function outside the allowed range. + Module::File* file = m.FindFile("file_name.cc"); + Module::Function* function = new Module::Function( + "function_name", 0x4000ULL); + Module::Range range(0x4000ULL, 0x1000ULL); + function->ranges.push_back(range); + function->parameter_size = 0x100ULL; + Module::Line line = { 0x4000ULL, 0x100ULL, file, 67519080 }; + function->lines.push_back(line); + m.AddFunction(function); + + // Add an extern outside the allowed range. + Module::Extern* extern1 = new Module::Extern(0x5000ULL); + extern1->name = "_xyz"; + m.AddExtern(extern1); + + m.Write(s, ALL_SYMBOL_DATA); + + EXPECT_STREQ("MODULE os-name architecture id-string name with spaces\n" + "STACK CFI INIT 2000 100 \n", + s.str().c_str()); + +} -- cgit v1.2.1