diff options
author | Yunlian Jiang <yunlian@google.com> | 2016-05-04 11:09:44 -0700 |
---|---|---|
committer | Yunlian Jiang <yunlian@google.com> | 2016-05-04 11:09:44 -0700 |
commit | 764c21f7529df70a19f1e1cb33bb9ece28e0bf8f (patch) | |
tree | d71aaba9a7d47419c05ab7af184e117b5a115592 /src/common | |
parent | macho: fix printf type mismatches (diff) | |
download | breakpad-764c21f7529df70a19f1e1cb33bb9ece28e0bf8f.tar.xz |
Add debug fission support.
This added debug fission support.
It tries to find the dwp file from the debug dir /usr/lib/debug/*/debug
and read symbols from them.
Most of this patch comes from
https://critique.corp.google.com/#review/52048295
and some fixes after that.
The elf_reader.cc comes from TOT google code. I just
removed some google dependency.
Current problems from this patch
1: Some type mismatch: from uint8_t * to char *.
2: Some hack to find the .dwp file. (replace .debug with .dwp)
BUG=chromium:604440
R=dehao@google.com, ivanpe@chromium.org
Review URL: https://codereview.chromium.org/1884283002 .
Diffstat (limited to 'src/common')
-rw-r--r-- | src/common/common.gyp | 2 | ||||
-rw-r--r-- | src/common/dwarf/bytereader.cc | 4 | ||||
-rw-r--r-- | src/common/dwarf/bytereader.h | 1 | ||||
-rw-r--r-- | src/common/dwarf/dwarf2enums.h | 29 | ||||
-rw-r--r-- | src/common/dwarf/dwarf2reader.cc | 436 | ||||
-rw-r--r-- | src/common/dwarf/dwarf2reader.h | 386 | ||||
-rw-r--r-- | src/common/dwarf/elf_reader.cc | 1258 | ||||
-rw-r--r-- | src/common/dwarf/elf_reader.h | 166 | ||||
-rw-r--r-- | src/common/linux/dump_symbols.cc | 3 | ||||
-rw-r--r-- | src/common/mac/dump_syms.cc | 3 |
10 files changed, 2183 insertions, 105 deletions
diff --git a/src/common/common.gyp b/src/common/common.gyp index 177c0938..08772bf7 100644 --- a/src/common/common.gyp +++ b/src/common/common.gyp @@ -75,6 +75,8 @@ 'dwarf/dwarf2reader.cc', 'dwarf/dwarf2reader.h', 'dwarf/dwarf2reader_test_common.h', + 'dwarf/elf_reader.cc', + 'dwarf/elf_reader.h', 'dwarf/functioninfo.cc', 'dwarf/functioninfo.h', 'dwarf/line_state_machine.h', diff --git a/src/common/dwarf/bytereader.cc b/src/common/dwarf/bytereader.cc index 3ccbcad6..14b43adb 100644 --- a/src/common/dwarf/bytereader.cc +++ b/src/common/dwarf/bytereader.cc @@ -243,4 +243,8 @@ uint64 ByteReader::ReadEncodedPointer(const uint8_t *buffer, return pointer; } +Endianness ByteReader::GetEndianness() const { + return endian_; +} + } // namespace dwarf2reader diff --git a/src/common/dwarf/bytereader.h b/src/common/dwarf/bytereader.h index cf583094..59d43034 100644 --- a/src/common/dwarf/bytereader.h +++ b/src/common/dwarf/bytereader.h @@ -280,6 +280,7 @@ class ByteReader { DwarfPointerEncoding encoding, size_t *len) const; + Endianness GetEndianness() const; private: // Function pointer type for our address and offset readers. diff --git a/src/common/dwarf/dwarf2enums.h b/src/common/dwarf/dwarf2enums.h index 5565d66e..6b8a7245 100644 --- a/src/common/dwarf/dwarf2enums.h +++ b/src/common/dwarf/dwarf2enums.h @@ -149,7 +149,10 @@ enum DwarfForm { DW_FORM_sec_offset = 0x17, DW_FORM_exprloc = 0x18, DW_FORM_flag_present = 0x19, - DW_FORM_ref_sig8 = 0x20 + DW_FORM_ref_sig8 = 0x20, + // Extensions for Fission. See http://gcc.gnu.org/wiki/DebugFission. + DW_FORM_GNU_addr_index = 0x1f01, + DW_FORM_GNU_str_index = 0x1f02 }; // Attribute names and codes @@ -264,6 +267,13 @@ enum DwarfAttribute { DW_AT_body_begin = 0x2105, DW_AT_body_end = 0x2106, DW_AT_GNU_vector = 0x2107, + // Extensions for Fission. See http://gcc.gnu.org/wiki/DebugFission. + DW_AT_GNU_dwo_name = 0x2130, + DW_AT_GNU_dwo_id = 0x2131, + DW_AT_GNU_ranges_base = 0x2132, + DW_AT_GNU_addr_base = 0x2133, + DW_AT_GNU_pubnames = 0x2134, + DW_AT_GNU_pubtypes = 0x2135, // VMS extensions. DW_AT_VMS_rtnbeg_pd_address = 0x2201, // UPC extension. @@ -491,7 +501,22 @@ enum DwarfOpcode { DW_OP_lo_user =0xe0, DW_OP_hi_user =0xff, // GNU extensions - DW_OP_GNU_push_tls_address =0xe0 + DW_OP_GNU_push_tls_address =0xe0, + // Extensions for Fission. See http://gcc.gnu.org/wiki/DebugFission. + DW_OP_GNU_addr_index =0xfb, + DW_OP_GNU_const_index =0xfc +}; + +// Section identifiers for DWP files +enum DwarfSectionId { + DW_SECT_INFO = 1, + DW_SECT_TYPES = 2, + DW_SECT_ABBREV = 3, + DW_SECT_LINE = 4, + DW_SECT_LOC = 5, + DW_SECT_STR_OFFSETS = 6, + DW_SECT_MACINFO = 7, + DW_SECT_MACRO = 8 }; // Source languages. These are values for DW_AT_language. diff --git a/src/common/dwarf/dwarf2reader.cc b/src/common/dwarf/dwarf2reader.cc index fcd29b8c..003ed7b1 100644 --- a/src/common/dwarf/dwarf2reader.cc +++ b/src/common/dwarf/dwarf2reader.cc @@ -44,6 +44,8 @@ #include <string> #include <utility> +#include <sys/stat.h> + #include "common/dwarf/bytereader-inl.h" #include "common/dwarf/bytereader.h" #include "common/dwarf/line_state_machine.h" @@ -51,11 +53,38 @@ namespace dwarf2reader { -CompilationUnit::CompilationUnit(const SectionMap& sections, uint64 offset, +CompilationUnit::CompilationUnit(const string& path, + const SectionMap& sections, uint64 offset, ByteReader* reader, Dwarf2Handler* handler) - : offset_from_section_start_(offset), reader_(reader), - sections_(sections), handler_(handler), abbrevs_(NULL), - string_buffer_(NULL), string_buffer_length_(0) {} + : path_(path), offset_from_section_start_(offset), reader_(reader), + sections_(sections), handler_(handler), abbrevs_(), + string_buffer_(NULL), string_buffer_length_(0), + str_offsets_buffer_(NULL), str_offsets_buffer_length_(0), + addr_buffer_(NULL), addr_buffer_length_(0), + is_split_dwarf_(false), dwo_id_(0), dwo_name_(), + skeleton_dwo_id_(0), ranges_base_(0), addr_base_(0), + have_checked_for_dwp_(false), dwp_path_(), + dwp_byte_reader_(), dwp_reader_() {} + +// Initialize a compilation unit from a .dwo or .dwp file. +// In this case, we need the .debug_addr section from the +// executable file that contains the corresponding skeleton +// compilation unit. We also inherit the Dwarf2Handler from +// the executable file, and call it as if we were still +// processing the original compilation unit. + +void CompilationUnit::SetSplitDwarf(const uint8_t* addr_buffer, + uint64 addr_buffer_length, + uint64 addr_base, + uint64 ranges_base, + uint64 dwo_id) { + is_split_dwarf_ = true; + addr_buffer_ = addr_buffer; + addr_buffer_length_ = addr_buffer_length; + addr_base_ = addr_base; + ranges_base_ = ranges_base; + skeleton_dwo_id_ = dwo_id; +} // Read a DWARF2/3 abbreviation section. // Each abbrev consists of a abbreviation number, a tag, a byte @@ -174,6 +203,8 @@ const uint8_t *CompilationUnit::SkipAttribute(const uint8_t *start, return start + strlen(reinterpret_cast<const char *>(start)) + 1; case DW_FORM_udata: case DW_FORM_ref_udata: + case DW_FORM_GNU_str_index: + case DW_FORM_GNU_addr_index: reader_->ReadUnsignedLEB128(start, &len); return start + len; @@ -296,9 +327,31 @@ uint64 CompilationUnit::Start() { string_buffer_length_ = iter->second.second; } + // Set the string offsets section if we have one. + iter = sections_.find(".debug_str_offsets"); + if (iter != sections_.end()) { + str_offsets_buffer_ = iter->second.first; + str_offsets_buffer_length_ = iter->second.second; + } + + // Set the address section if we have one. + iter = sections_.find(".debug_addr"); + if (iter != sections_.end()) { + addr_buffer_ = iter->second.first; + addr_buffer_length_ = iter->second.second; + } + // Now that we have our abbreviations, start processing DIE's. ProcessDIEs(); + // If this is a skeleton compilation unit generated with split DWARF, + // and the client needs the full debug info, we need to find the full + // compilation unit in a .dwo or .dwp file. + if (!is_split_dwarf_ + && dwo_name_ != NULL + && handler_->NeedSplitDebugInfo()) + ProcessSplitDwarf(); + return ourlength; } @@ -320,48 +373,46 @@ const uint8_t *CompilationUnit::ProcessAttribute( return ProcessAttribute(dieoffset, start, attr, form); case DW_FORM_flag_present: - handler_->ProcessAttributeUnsigned(dieoffset, attr, form, 1); + ProcessAttributeUnsigned(dieoffset, attr, form, 1); return start; case DW_FORM_data1: case DW_FORM_flag: - handler_->ProcessAttributeUnsigned(dieoffset, attr, form, - reader_->ReadOneByte(start)); + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadOneByte(start)); return start + 1; case DW_FORM_data2: - handler_->ProcessAttributeUnsigned(dieoffset, attr, form, - reader_->ReadTwoBytes(start)); + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadTwoBytes(start)); return start + 2; case DW_FORM_data4: - handler_->ProcessAttributeUnsigned(dieoffset, attr, form, - reader_->ReadFourBytes(start)); + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadFourBytes(start)); return start + 4; case DW_FORM_data8: - handler_->ProcessAttributeUnsigned(dieoffset, attr, form, - reader_->ReadEightBytes(start)); + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadEightBytes(start)); return start + 8; case DW_FORM_string: { const char *str = reinterpret_cast<const char *>(start); - handler_->ProcessAttributeString(dieoffset, attr, form, - str); + ProcessAttributeString(dieoffset, attr, form, str); return start + strlen(str) + 1; } case DW_FORM_udata: - handler_->ProcessAttributeUnsigned(dieoffset, attr, form, - reader_->ReadUnsignedLEB128(start, - &len)); + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadUnsignedLEB128(start, &len)); return start + len; case DW_FORM_sdata: - handler_->ProcessAttributeSigned(dieoffset, attr, form, - reader_->ReadSignedLEB128(start, &len)); + ProcessAttributeSigned(dieoffset, attr, form, + reader_->ReadSignedLEB128(start, &len)); return start + len; case DW_FORM_addr: - handler_->ProcessAttributeUnsigned(dieoffset, attr, form, - reader_->ReadAddress(start)); + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadAddress(start)); return start + reader_->AddressSize(); case DW_FORM_sec_offset: - handler_->ProcessAttributeUnsigned(dieoffset, attr, form, - reader_->ReadOffset(start)); + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadOffset(start)); return start + reader_->OffsetSize(); case DW_FORM_ref1: @@ -441,10 +492,32 @@ const uint8_t *CompilationUnit::ProcessAttribute( assert(string_buffer_ + offset < string_buffer_ + string_buffer_length_); const char *str = reinterpret_cast<const char *>(string_buffer_ + offset); - handler_->ProcessAttributeString(dieoffset, attr, form, - str); + ProcessAttributeString(dieoffset, attr, form, str); return start + reader_->OffsetSize(); } + + case DW_FORM_GNU_str_index: { + uint64 str_index = reader_->ReadUnsignedLEB128(start, &len); + const uint8_t* offset_ptr = + str_offsets_buffer_ + str_index * reader_->OffsetSize(); + const uint64 offset = reader_->ReadOffset(offset_ptr); + if (offset >= string_buffer_length_) { + return NULL; + } + + const char* str = reinterpret_cast<const char *>(string_buffer_) + offset; + ProcessAttributeString(dieoffset, attr, form, str); + return start + len; + break; + } + case DW_FORM_GNU_addr_index: { + uint64 addr_index = reader_->ReadUnsignedLEB128(start, &len); + const uint8_t* addr_ptr = + addr_buffer_ + addr_base_ + addr_index * reader_->AddressSize(); + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadAddress(addr_ptr)); + return start + len; + } } fprintf(stderr, "Unhandled form type\n"); return NULL; @@ -458,6 +531,16 @@ const uint8_t *CompilationUnit::ProcessDIE(uint64 dieoffset, i++) { start = ProcessAttribute(dieoffset, start, i->first, i->second); } + + // If this is a compilation unit in a split DWARF object, verify that + // the dwo_id matches. If it does not match, we will ignore this + // compilation unit. + if (abbrev.tag == DW_TAG_compile_unit + && is_split_dwarf_ + && dwo_id_ != skeleton_dwo_id_) { + return NULL; + } + return start; } @@ -515,6 +598,307 @@ void CompilationUnit::ProcessDIEs() { } } +// Check for a valid ELF file and return the Address size. +// Returns 0 if not a valid ELF file. +inline int GetElfWidth(const ElfReader& elf) { + if (elf.IsElf32File()) + return 4; + if (elf.IsElf64File()) + return 8; + return 0; +} + +void CompilationUnit::ProcessSplitDwarf() { + struct stat statbuf; + if (!have_checked_for_dwp_) { + // Look for a .dwp file in the same directory as the executable. + have_checked_for_dwp_ = true; + string dwp_suffix(".dwp"); + dwp_path_ = path_ + dwp_suffix; + if (stat(dwp_path_.c_str(), &statbuf) != 0) { + // Fall back to a split .debug file in the same directory. + string debug_suffix(".debug"); + dwp_path_ = path_; + size_t found = path_.rfind(debug_suffix); + if (found + debug_suffix.length() == path_.length()) + dwp_path_ = dwp_path_.replace(found, debug_suffix.length(), dwp_suffix); + } + if (stat(dwp_path_.c_str(), &statbuf) == 0) { + ElfReader* elf = new ElfReader(dwp_path_); + int width = GetElfWidth(*elf); + if (width != 0) { + dwp_byte_reader_.reset(new ByteReader(reader_->GetEndianness())); + dwp_byte_reader_->SetAddressSize(width); + dwp_reader_.reset(new DwpReader(*dwp_byte_reader_, elf)); + dwp_reader_->Initialize(); + } else { + delete elf; + } + } + } + bool found_in_dwp = false; + if (dwp_reader_ != NULL) { + // If we have a .dwp file, read the debug sections for the requested CU. + SectionMap sections; + dwp_reader_->ReadDebugSectionsForCU(dwo_id_, §ions); + if (!sections.empty()) { + found_in_dwp = true; + CompilationUnit dwp_comp_unit(dwp_path_, sections, 0, + dwp_byte_reader_.get(), handler_); + dwp_comp_unit.SetSplitDwarf(addr_buffer_, addr_buffer_length_, addr_base_, + ranges_base_, dwo_id_); + dwp_comp_unit.Start(); + } + } + if (!found_in_dwp) { + // If no .dwp file, try to open the .dwo file. + if (stat(dwo_name_, &statbuf) == 0) { + ElfReader elf(dwo_name_); + int width = GetElfWidth(elf); + if (width != 0) { + ByteReader reader(ENDIANNESS_LITTLE); + reader.SetAddressSize(width); + SectionMap sections; + ReadDebugSectionsFromDwo(&elf, §ions); + CompilationUnit dwo_comp_unit(dwo_name_, sections, 0, &reader, + handler_); + dwo_comp_unit.SetSplitDwarf(addr_buffer_, addr_buffer_length_, + addr_base_, ranges_base_, dwo_id_); + dwo_comp_unit.Start(); + } + } + } +} + +void CompilationUnit::ReadDebugSectionsFromDwo(ElfReader* elf_reader, + SectionMap* sections) { + static const char* const section_names[] = { + ".debug_abbrev", + ".debug_info", + ".debug_str_offsets", + ".debug_str" + }; + for (unsigned int i = 0u; + i < sizeof(section_names)/sizeof(*(section_names)); ++i) { + string base_name = section_names[i]; + string dwo_name = base_name + ".dwo"; + size_t section_size; + const char* section_data = elf_reader->GetSectionByName(dwo_name, + §ion_size); + if (section_data != NULL) + sections->insert(std::make_pair( + base_name, std::make_pair( + reinterpret_cast<const uint8_t *>(section_data), + section_size))); + } +} + +DwpReader::DwpReader(const ByteReader& byte_reader, ElfReader* elf_reader) + : elf_reader_(elf_reader), byte_reader_(byte_reader), + cu_index_(NULL), cu_index_size_(0), string_buffer_(NULL), + string_buffer_size_(0), version_(0), ncolumns_(0), nunits_(0), + nslots_(0), phash_(NULL), pindex_(NULL), shndx_pool_(NULL), + offset_table_(NULL), size_table_(NULL), abbrev_data_(NULL), + abbrev_size_(0), info_data_(NULL), info_size_(0), + str_offsets_data_(NULL), str_offsets_size_(0) {} + +DwpReader::~DwpReader() { + if (elf_reader_) delete elf_reader_; +} + +void DwpReader::Initialize() { + cu_index_ = elf_reader_->GetSectionByName(".debug_cu_index", + &cu_index_size_); + if (cu_index_ == NULL) { + return; + } + // The .debug_str.dwo section is shared by all CUs in the file. + string_buffer_ = elf_reader_->GetSectionByName(".debug_str.dwo", + &string_buffer_size_); + + version_ = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t *>(cu_index_)); + + if (version_ == 1) { + nslots_ = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t *>(cu_index_) + + 3 * sizeof(uint32)); + phash_ = cu_index_ + 4 * sizeof(uint32); + pindex_ = phash_ + nslots_ * sizeof(uint64); + shndx_pool_ = pindex_ + nslots_ * sizeof(uint32); + if (shndx_pool_ >= cu_index_ + cu_index_size_) { + version_ = 0; + } + } else if (version_ == 2) { + ncolumns_ = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t *>(cu_index_) + sizeof(uint32)); + nunits_ = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t *>(cu_index_) + 2 * sizeof(uint32)); + nslots_ = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t *>(cu_index_) + 3 * sizeof(uint32)); + phash_ = cu_index_ + 4 * sizeof(uint32); + pindex_ = phash_ + nslots_ * sizeof(uint64); + offset_table_ = pindex_ + nslots_ * sizeof(uint32); + size_table_ = offset_table_ + ncolumns_ * (nunits_ + 1) * sizeof(uint32); + abbrev_data_ = elf_reader_->GetSectionByName(".debug_abbrev.dwo", + &abbrev_size_); + info_data_ = elf_reader_->GetSectionByName(".debug_info.dwo", &info_size_); + str_offsets_data_ = elf_reader_->GetSectionByName(".debug_str_offsets.dwo", + &str_offsets_size_); + if (size_table_ >= cu_index_ + cu_index_size_) { + version_ = 0; + } + } +} + +void DwpReader::ReadDebugSectionsForCU(uint64 dwo_id, + SectionMap* sections) { + if (version_ == 1) { + int slot = LookupCU(dwo_id); + if (slot == -1) { + return; + } + + // The index table points to the section index pool, where we + // can read a list of section indexes for the debug sections + // for the CU whose dwo_id we are looking for. + int index = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t *>(pindex_) + + slot * sizeof(uint32)); + const char* shndx_list = shndx_pool_ + index * sizeof(uint32); + for (;;) { + if (shndx_list >= cu_index_ + cu_index_size_) { + version_ = 0; + return; + } + unsigned int shndx = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t *>(shndx_list)); + shndx_list += sizeof(uint32); + if (shndx == 0) + break; + const char* section_name = elf_reader_->GetSectionName(shndx); + size_t section_size; + const char* section_data; + // We're only interested in these four debug sections. + // The section names in the .dwo file end with ".dwo", but we + // add them to the sections table with their normal names. + if (!strncmp(section_name, ".debug_abbrev", strlen(".debug_abbrev"))) { + section_data = elf_reader_->GetSectionByIndex(shndx, §ion_size); + sections->insert(std::make_pair( + ".debug_abbrev", + std::make_pair(reinterpret_cast<const uint8_t *> (section_data), + section_size))); + } else if (!strncmp(section_name, ".debug_info", strlen(".debug_info"))) { + section_data = elf_reader_->GetSectionByIndex(shndx, §ion_size); + sections->insert(std::make_pair( + ".debug_info", + std::make_pair(reinterpret_cast<const uint8_t *> (section_data), + section_size))); + } else if (!strncmp(section_name, ".debug_str_offsets", + strlen(".debug_str_offsets"))) { + section_data = elf_reader_->GetSectionByIndex(shndx, §ion_size); + sections->insert(std::make_pair( + ".debug_str_offsets", + std::make_pair(reinterpret_cast<const uint8_t *> (section_data), + section_size))); + } + } + sections->insert(std::make_pair( + ".debug_str", + std::make_pair(reinterpret_cast<const uint8_t *> (string_buffer_), + string_buffer_size_))); + } else if (version_ == 2) { + uint32 index = LookupCUv2(dwo_id); + if (index == 0) { + return; + } + + // The index points to a row in each of the section offsets table + // and the section size table, where we can read the offsets and sizes + // of the contributions to each debug section from the CU whose dwo_id + // we are looking for. Row 0 of the section offsets table has the + // section ids for each column of the table. The size table begins + // with row 1. + const char* id_row = offset_table_; + const char* offset_row = offset_table_ + + index * ncolumns_ * sizeof(uint32); + const char* size_row = + size_table_ + (index - 1) * ncolumns_ * sizeof(uint32); + if (size_row + ncolumns_ * sizeof(uint32) > cu_index_ + cu_index_size_) { + version_ = 0; + return; + } + for (unsigned int col = 0u; col < ncolumns_; ++col) { + uint32 section_id = + byte_reader_.ReadFourBytes(reinterpret_cast<const uint8_t *>(id_row) + + col * sizeof(uint32)); + uint32 offset = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t *>(offset_row) + + col * sizeof(uint32)); + uint32 size = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t *>(size_row) + col * sizeof(uint32)); + if (section_id == DW_SECT_ABBREV) { + sections->insert(std::make_pair( + ".debug_abbrev", + std::make_pair(reinterpret_cast<const uint8_t *> (abbrev_data_) + + offset, size))); + } else if (section_id == DW_SECT_INFO) { + sections->insert(std::make_pair( + ".debug_info", + std::make_pair(reinterpret_cast<const uint8_t *> (info_data_) + + offset, size))); + } else if (section_id == DW_SECT_STR_OFFSETS) { + sections->insert(std::make_pair( + ".debug_str_offsets", + std::make_pair(reinterpret_cast<const uint8_t *> (str_offsets_data_) + + offset, size))); + } + } + sections->insert(std::make_pair( + ".debug_str", + std::make_pair(reinterpret_cast<const uint8_t *> (string_buffer_), + string_buffer_size_))); + } +} + +int DwpReader::LookupCU(uint64 dwo_id) { + uint32 slot = static_cast<uint32>(dwo_id) & (nslots_ - 1); + uint64 probe = byte_reader_.ReadEightBytes( + reinterpret_cast<const uint8_t *>(phash_) + slot * sizeof(uint64)); + if (probe != 0 && probe != dwo_id) { + uint32 secondary_hash = + (static_cast<uint32>(dwo_id >> 32) & (nslots_ - 1)) | 1; + do { + slot = (slot + secondary_hash) & (nslots_ - 1); + probe = byte_reader_.ReadEightBytes( + reinterpret_cast<const uint8_t *>(phash_) + slot * sizeof(uint64)); + } while (probe != 0 && probe != dwo_id); + } + if (probe == 0) + return -1; + return slot; +} + +uint32 DwpReader::LookupCUv2(uint64 dwo_id) { + uint32 slot = static_cast<uint32>(dwo_id) & (nslots_ - 1); + uint64 probe = byte_reader_.ReadEightBytes( + reinterpret_cast<const uint8_t *>(phash_) + slot * sizeof(uint64)); + uint32 index = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t *>(pindex_) + slot * sizeof(uint32)); + if (index != 0 && probe != dwo_id) { + uint32 secondary_hash = + (static_cast<uint32>(dwo_id >> 32) & (nslots_ - 1)) | 1; + do { + slot = (slot + secondary_hash) & (nslots_ - 1); + probe = byte_reader_.ReadEightBytes( + reinterpret_cast<const uint8_t *>(phash_) + slot * sizeof(uint64)); + index = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t *>(pindex_) + slot * sizeof(uint32)); + } while (index != 0 && probe != dwo_id); + } + return index; +} + LineInfo::LineInfo(const uint8_t *buffer, uint64 buffer_length, ByteReader* reader, LineInfoHandler* handler): handler_(handler), reader_(reader), buffer_(buffer) { diff --git a/src/common/dwarf/dwarf2reader.h b/src/common/dwarf/dwarf2reader.h index 1f5b37a0..064c42bc 100644 --- a/src/common/dwarf/dwarf2reader.h +++ b/src/common/dwarf/dwarf2reader.h @@ -47,16 +47,19 @@ #include <string> #include <utility> #include <vector> +#include <memory> #include "common/dwarf/bytereader.h" #include "common/dwarf/dwarf2enums.h" #include "common/dwarf/types.h" #include "common/using_std_string.h" +#include "common/dwarf/elf_reader.h" namespace dwarf2reader { struct LineStateMachine; class Dwarf2Handler; class LineInfoHandler; +class DwpReader; // This maps from a string naming a section to a pair containing a // the data for the section, and the size of the section. @@ -184,6 +187,106 @@ class LineInfoHandler { uint32 file_num, uint32 line_num, uint32 column_num) { } }; +// This class is the main interface between the reader and the +// client. The virtual functions inside this get called for +// interesting events that happen during DWARF2 reading. +// The default implementation skips everything. +class Dwarf2Handler { + public: + Dwarf2Handler() { } + + virtual ~Dwarf2Handler() { } + + // Start to process a compilation unit at OFFSET from the beginning of the + // .debug_info section. Return false if you would like to skip this + // compilation unit. + virtual bool StartCompilationUnit(uint64 offset, uint8 address_size, + uint8 offset_size, uint64 cu_length, + uint8 dwarf_version) { return false; } + + // When processing a skeleton compilation unit, resulting from a split + // DWARF compilation, once the skeleton debug info has been read, + // the reader will call this function to ask the client if it needs + // the full debug info from the .dwo or .dwp file. Return true if + // you need it, or false to skip processing the split debug info. + virtual bool NeedSplitDebugInfo() { return true; } + + // Start to process a split compilation unit at OFFSET from the beginning of + // the debug_info section in the .dwp/.dwo file. Return false if you would + // like to skip this compilation unit. + virtual bool StartSplitCompilationUnit(uint64 offset, + uint64 cu_length) { return false; } + + // Start to process a DIE at OFFSET from the beginning of the .debug_info + // section. Return false if you would like to skip this DIE. + virtual bool StartDIE(uint64 offset, enum DwarfTag tag) { return false; } + + // Called when we have an attribute with unsigned data to give to our + // handler. The attribute is for the DIE at OFFSET from the beginning of the + // .debug_info section. Its name is ATTR, its form is FORM, and its value is + // DATA. + virtual void ProcessAttributeUnsigned(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data) { } + + // Called when we have an attribute with signed data to give to our handler. + // The attribute is for the DIE at OFFSET from the beginning of the + // .debug_info section. Its name is ATTR, its form is FORM, and its value is + // DATA. + virtual void ProcessAttributeSigned(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + int64 data) { } + + // Called when we have an attribute whose value is a reference to + // another DIE. The attribute belongs to the DIE at OFFSET from the + // beginning of the .debug_info section. Its name is ATTR, its form + // is FORM, and the offset of the DIE being referred to from the + // beginning of the .debug_info section is DATA. + virtual void ProcessAttributeReference(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data) { } + + // Called when we have an attribute with a buffer of data to give to our + // handler. The attribute is for the DIE at OFFSET from the beginning of the + // .debug_info section. Its name is ATTR, its form is FORM, DATA points to + // the buffer's contents, and its length in bytes is LENGTH. The buffer is + // owned by the caller, not the callee, and may not persist for very long. + // If you want the data to be available later, it needs to be copied. + virtual void ProcessAttributeBuffer(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const uint8_t *data, + uint64 len) { } + + // Called when we have an attribute with string data to give to our handler. + // The attribute is for the DIE at OFFSET from the beginning of the + // .debug_info section. Its name is ATTR, its form is FORM, and its value is + // DATA. + virtual void ProcessAttributeString(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const string& data) { } + + // Called when we have an attribute whose value is the 64-bit signature + // of a type unit in the .debug_types section. OFFSET is the offset of + // the DIE whose attribute we're reporting. ATTR and FORM are the + // attribute's name and form. SIGNATURE is the type unit's signature. + virtual void ProcessAttributeSignature(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64 signature) { } + + // Called when finished processing the DIE at OFFSET. + // Because DWARF2/3 specifies a tree of DIEs, you may get starts + // before ends of the previous DIE, as we process children before + // ending the parent. + virtual void EndDIE(uint64 offset) { } + +}; + // The base of DWARF2/3 debug info is a DIE (Debugging Information // Entry. // DWARF groups DIE's into a tree and calls the root of this tree a @@ -225,12 +328,21 @@ class CompilationUnit { // Initialize a compilation unit. This requires a map of sections, // the offset of this compilation unit in the .debug_info section, a // ByteReader, and a Dwarf2Handler class to call callbacks in. - CompilationUnit(const SectionMap& sections, uint64 offset, + CompilationUnit(const string& path, const SectionMap& sections, uint64 offset, ByteReader* reader, Dwarf2Handler* handler); virtual ~CompilationUnit() { if (abbrevs_) delete abbrevs_; } + // Initialize a compilation unit from a .dwo or .dwp file. + // In this case, we need the .debug_addr section from the + // executable file that contains the corresponding skeleton + // compilation unit. We also inherit the Dwarf2Handler from + // the executable file, and call it as if we were still + // processing the original compilation unit. + void SetSplitDwarf(const uint8_t* addr_buffer, uint64 addr_buffer_length, + uint64 addr_base, uint64 ranges_base, uint64 dwo_id); + // Begin reading a Dwarf2 compilation unit, and calling the // callbacks in the Dwarf2Handler @@ -281,6 +393,73 @@ class CompilationUnit { enum DwarfAttribute attr, enum DwarfForm form); + // Called when we have an attribute with unsigned data to give to + // our handler. The attribute is for the DIE at OFFSET from the + // beginning of compilation unit, has a name of ATTR, a form of + // FORM, and the actual data of the attribute is in DATA. + // If we see a DW_AT_GNU_dwo_id attribute, save the value so that + // we can find the debug info in a .dwo or .dwp file. + void ProcessAttributeUnsigned(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data) { + if (attr == DW_AT_GNU_dwo_id) { + dwo_id_ = data; + } + else if (attr == DW_AT_GNU_addr_base) { + addr_base_ = data; + } + else if (attr == DW_AT_GNU_ranges_base) { + ranges_base_ = data; + } + // TODO(yunlian): When we add DW_AT_ranges_base from DWARF-5, + // that base will apply to DW_AT_ranges attributes in the + // skeleton CU as well as in the .dwo/.dwp files. + else if (attr == DW_AT_ranges && is_split_dwarf_) { + data += ranges_base_; + } + handler_->ProcessAttributeUnsigned(offset, attr, form, data); + } + + // Called when we have an attribute with signed data to give to + // our handler. The attribute is for the DIE at OFFSET from the + // beginning of compilation unit, has a name of ATTR, a form of + // FORM, and the actual data of the attribute is in DATA. + void ProcessAttributeSigned(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + int64 data) { + handler_->ProcessAttributeSigned(offset, attr, form, data); + } + + // Called when we have an attribute with a buffer of data to give to + // our handler. The attribute is for the DIE at OFFSET from the + // beginning of compilation unit, has a name of ATTR, a form of + // FORM, and the actual data of the attribute is in DATA, and the + // length of the buffer is LENGTH. + void ProcessAttributeBuffer(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const uint8_t* data, + uint64 len) { + handler_->ProcessAttributeBuffer(offset, attr, form, data, len); + } + + // Called when we have an attribute with string data to give to + // our handler. The attribute is for the DIE at OFFSET from the + // beginning of compilation unit, has a name of ATTR, a form of + // FORM, and the actual data of the attribute is in DATA. + // If we see a DW_AT_GNU_dwo_name attribute, save the value so + // that we can find the debug info in a .dwo or .dwp file. + void ProcessAttributeString(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const char* data) { + if (attr == DW_AT_GNU_dwo_name) + dwo_name_ = data; + handler_->ProcessAttributeString(offset, attr, form, data); + } + // Processes all DIEs for this compilation unit void ProcessDIEs(); @@ -292,6 +471,16 @@ class CompilationUnit { // new place to position the stream to. const uint8_t *SkipAttribute(const uint8_t *start, enum DwarfForm form); + // Process the actual debug information in a split DWARF file. + void ProcessSplitDwarf(); + + // Read the debug sections from a .dwo file. + void ReadDebugSectionsFromDwo(ElfReader* elf_reader, + SectionMap* sections); + + // Path of the file containing the debug information. + const string path_; + // Offset from section start is the offset of this compilation unit // from the beginning of the .debug_info section. uint64 offset_from_section_start_; @@ -322,94 +511,141 @@ class CompilationUnit { // ProcessAttribute, which is in the hot path for DWARF2 reading. const uint8_t *string_buffer_; uint64 string_buffer_length_; -}; -// This class is the main interface between the reader and the -// client. The virtual functions inside this get called for -// interesting events that happen during DWARF2 reading. -// The default implementation skips everything. + // String offsets section buffer and length, if we have a string offsets + // section (.debug_str_offsets or .debug_str_offsets.dwo). + const uint8_t* str_offsets_buffer_; + uint64 str_offsets_buffer_length_; -class Dwarf2Handler { + // Address section buffer and length, if we have an address section + // (.debug_addr). + const uint8_t* addr_buffer_; + uint64 addr_buffer_length_; + + // Flag indicating whether this compilation unit is part of a .dwo + // or .dwp file. If true, we are reading this unit because a + // skeleton compilation unit in an executable file had a + // DW_AT_GNU_dwo_name or DW_AT_GNU_dwo_id attribute. + // In a .dwo file, we expect the string offsets section to + // have a ".dwo" suffix, and we will use the ".debug_addr" section + // associated with the skeleton compilation unit. + bool is_split_dwarf_; + + // The value of the DW_AT_GNU_dwo_id attribute, if any. + uint64 dwo_id_; + + // The value of the DW_AT_GNU_dwo_name attribute, if any. + const char* dwo_name_; + + // If this is a split DWARF CU, the value of the DW_AT_GNU_dwo_id attribute + // from the skeleton CU. + uint64 skeleton_dwo_id_; + + // The value of the DW_AT_GNU_ranges_base attribute, if any. + uint64 ranges_base_; + + // The value of the DW_AT_GNU_addr_base attribute, if any. + uint64 addr_base_; + + // True if we have already looked for a .dwp file. + bool have_checked_for_dwp_; + + // Path to the .dwp file. + string dwp_path_; + + // ByteReader for the DWP file. + std::unique_ptr<ByteReader> dwp_byte_reader_; + + // DWP reader. + std::unique_ptr<DwpReader> dwp_reader_; +}; + +// A Reader for a .dwp file. Supports the fetching of DWARF debug +// info for a given dwo_id. +// +// There are two versions of .dwp files. In both versions, the +// .dwp file is an ELF file containing only debug sections. +// In Version 1, the file contains many copies of each debug +// section, one for each .dwo file that is packaged in the .dwp +// file, and the .debug_cu_index section maps from the dwo_id +// to a set of section indexes. In Version 2, the file contains +// one of each debug section, and the .debug_cu_index section +// maps from the dwo_id to a set of offsets and lengths that +// identify each .dwo file's contribution to the larger sections. + +class DwpReader { public: - Dwarf2Handler() { } + DwpReader(const ByteReader& byte_reader, ElfReader* elf_reader); - virtual ~Dwarf2Handler() { } + ~DwpReader(); - // Start to process a compilation unit at OFFSET from the beginning of the - // .debug_info section. Return false if you would like to skip this - // compilation unit. - virtual bool StartCompilationUnit(uint64 offset, uint8 address_size, - uint8 offset_size, uint64 cu_length, - uint8 dwarf_version) { return false; } + // Read the CU index and initialize data members. + void Initialize(); - // Start to process a DIE at OFFSET from the beginning of the .debug_info - // section. Return false if you would like to skip this DIE. - virtual bool StartDIE(uint64 offset, enum DwarfTag tag) { return false; } + // Read the debug sections for the given dwo_id. + void ReadDebugSectionsForCU(uint64 dwo_id, SectionMap* sections); - // Called when we have an attribute with unsigned data to give to our - // handler. The attribute is for the DIE at OFFSET from the beginning of the - // .debug_info section. Its name is ATTR, its form is FORM, and its value is - // DATA. - virtual void ProcessAttributeUnsigned(uint64 offset, - enum DwarfAttribute attr, - enum DwarfForm form, - uint64 data) { } + private: + // Search a v1 hash table for "dwo_id". Returns the slot index + // where the dwo_id was found, or -1 if it was not found. + int LookupCU(uint64 dwo_id); - // Called when we have an attribute with signed data to give to our handler. - // The attribute is for the DIE at OFFSET from the beginning of the - // .debug_info section. Its name is ATTR, its form is FORM, and its value is - // DATA. - virtual void ProcessAttributeSigned(uint64 offset, - enum DwarfAttribute attr, - enum DwarfForm form, - int64 data) { } + // Search a v2 hash table for "dwo_id". Returns the row index + // in the offsets and sizes tables, or 0 if it was not found. + uint32 LookupCUv2(uint64 dwo_id); - // Called when we have an attribute whose value is a reference to - // another DIE. The attribute belongs to the DIE at OFFSET from the - // beginning of the .debug_info section. Its name is ATTR, its form - // is FORM, and the offset of the DIE being referred to from the - // beginning of the .debug_info section is DATA. - virtual void ProcessAttributeReference(uint64 offset, - enum DwarfAttribute attr, - enum DwarfForm form, - uint64 data) { } + // The ELF reader for the .dwp file. + ElfReader* elf_reader_; - // Called when we have an attribute with a buffer of data to give to our - // handler. The attribute is for the DIE at OFFSET from the beginning of the - // .debug_info section. Its name is ATTR, its form is FORM, DATA points to - // the buffer's contents, and its length in bytes is LENGTH. The buffer is - // owned by the caller, not the callee, and may not persist for very long. - // If you want the data to be available later, it needs to be copied. - virtual void ProcessAttributeBuffer(uint64 offset, - enum DwarfAttribute attr, - enum DwarfForm form, - const uint8_t *data, - uint64 len) { } + // The ByteReader for the .dwp file. + const ByteReader& byte_reader_; - // Called when we have an attribute with string data to give to our handler. - // The attribute is for the DIE at OFFSET from the beginning of the - // .debug_info section. Its name is ATTR, its form is FORM, and its value is - // DATA. - virtual void ProcessAttributeString(uint64 offset, - enum DwarfAttribute attr, - enum DwarfForm form, - const string& data) { } + // Pointer to the .debug_cu_index section. + const char* cu_index_; - // Called when we have an attribute whose value is the 64-bit signature - // of a type unit in the .debug_types section. OFFSET is the offset of - // the DIE whose attribute we're reporting. ATTR and FORM are the - // attribute's name and form. SIGNATURE is the type unit's signature. - virtual void ProcessAttributeSignature(uint64 offset, - enum DwarfAttribute attr, - enum DwarfForm form, - uint64 signature) { } + // Size of the .debug_cu_index section. + size_t cu_index_size_; - // Called when finished processing the DIE at OFFSET. - // Because DWARF2/3 specifies a tree of DIEs, you may get starts - // before ends of the previous DIE, as we process children before - // ending the parent. - virtual void EndDIE(uint64 offset) { } + // Pointer to the .debug_str.dwo section. + const char* string_buffer_; + + // Size of the .debug_str.dwo section. + size_t string_buffer_size_; + + // Version of the .dwp file. We support versions 1 and 2 currently. + int version_; + + // Number of columns in the section tables (version 2). + unsigned int ncolumns_; + + // Number of units in the section tables (version 2). + unsigned int nunits_; + + // Number of slots in the hash table. + unsigned int nslots_; + + // Pointer to the beginning of the hash table. + const char* phash_; + + // Pointer to the beginning of the index table. + const char* pindex_; + + // Pointer to the beginning of the section index pool (version 1). + const char* shndx_pool_; + + // Pointer to the beginning of the section offset table (version 2). + const char* offset_table_; + + // Pointer to the beginning of the section size table (version 2). + const char* size_table_; + // Contents of the sections of interest (version 2). + const char* abbrev_data_; + size_t abbrev_size_; + const char* info_data_; + size_t info_size_; + const char* str_offsets_data_; + size_t str_offsets_size_; }; // This class is a reader for DWARF's Call Frame Information. CFI diff --git a/src/common/dwarf/elf_reader.cc b/src/common/dwarf/elf_reader.cc new file mode 100644 index 00000000..81683141 --- /dev/null +++ b/src/common/dwarf/elf_reader.cc @@ -0,0 +1,1258 @@ +// Copyright 2005 Google Inc. All Rights Reserved. +// Author: chatham@google.com (Andrew Chatham) +// Author: satorux@google.com (Satoru Takabayashi) +// +// Code for reading in ELF files. +// +// For information on the ELF format, see +// http://www.x86.org/ftp/manuals/tools/elf.pdf +// +// I also liked: +// http://www.caldera.com/developers/gabi/1998-04-29/contents.html +// +// A note about types: When dealing with the file format, we use types +// like Elf32_Word, but in the public interfaces we treat all +// addresses as uint64. As a result, we should be able to symbolize +// 64-bit binaries from a 32-bit process (which we don't do, +// anyway). size_t should therefore be avoided, except where required +// by things like mmap(). +// +// Although most of this code can deal with arbitrary ELF files of +// either word size, the public ElfReader interface only examines +// files loaded into the current address space, which must all match +// __WORDSIZE. This code cannot handle ELF files with a non-native +// byte ordering. +// +// TODO(chatham): It would be nice if we could accomplish this task +// without using malloc(), so we could use it as the process is dying. + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE // needed for pread() +#endif + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <unistd.h> +#include <fcntl.h> +#include <elf.h> +#include <string.h> + +#include <algorithm> +#include <map> +#include <string> +#include <vector> +#include "zlib.h" + +#include "elf_reader.h" +//#include "using_std_string.h" +// EM_AARCH64 is not defined by elf.h of GRTE v3 on x86. +// TODO(dougkwan): Remove this when v17 is retired. +#if !defined(EM_AARCH64) +#define EM_AARCH64 183 /* ARM AARCH64 */ +#endif + +// TODO(dthomson): Can be removed once all Java code is using the Google3 +// launcher. We need to avoid processing PLT functions as it causes memory +// fragmentation in malloc, which is fixed in tcmalloc - and if the Google3 +// launcher is used the JVM will then use tcmalloc. b/13735638 +//DEFINE_bool(elfreader_process_dynsyms, true, +// "Activate PLT function processing"); + +using std::string; +using std::vector; + +namespace { + +// The lowest bit of an ARM symbol value is used to indicate a Thumb address. +const int kARMThumbBitOffset = 0; + +// Converts an ARM Thumb symbol value to a true aligned address value. +template <typename T> +T AdjustARMThumbSymbolValue(const T& symbol_table_value) { + return symbol_table_value & ~(1 << kARMThumbBitOffset); +} + +// Names of PLT-related sections. +const char kElfPLTRelSectionName[] = ".rel.plt"; // Use Rel struct. +const char kElfPLTRelaSectionName[] = ".rela.plt"; // Use Rela struct. +const char kElfPLTSectionName[] = ".plt"; +const char kElfDynSymSectionName[] = ".dynsym"; + +const int kX86PLTCodeSize = 0x10; // Size of one x86 PLT function in bytes. +const int kARMPLTCodeSize = 0xc; +const int kAARCH64PLTCodeSize = 0x10; + +const int kX86PLT0Size = 0x10; // Size of the special PLT0 entry. +const int kARMPLT0Size = 0x14; +const int kAARCH64PLT0Size = 0x20; + +// Suffix for PLT functions when it needs to be explicitly identified as such. +const char kPLTFunctionSuffix[] = "@plt"; + +} // namespace + +namespace dwarf2reader { + +template <class ElfArch> class ElfReaderImpl; + +// 32-bit and 64-bit ELF files are processed exactly the same, except +// for various field sizes. Elf32 and Elf64 encompass all of the +// differences between the two formats, and all format-specific code +// in this file is templated on one of them. +class Elf32 { + public: + typedef Elf32_Ehdr Ehdr; + typedef Elf32_Shdr Shdr; + typedef Elf32_Phdr Phdr; + typedef Elf32_Word Word; + typedef Elf32_Sym Sym; + typedef Elf32_Rel Rel; + typedef Elf32_Rela Rela; + + // What should be in the EI_CLASS header. + static const int kElfClass = ELFCLASS32; + + // Given a symbol pointer, return the binding type (eg STB_WEAK). + static char Bind(const Elf32_Sym *sym) { + return ELF32_ST_BIND(sym->st_info); + } + // Given a symbol pointer, return the symbol type (eg STT_FUNC). + static char Type(const Elf32_Sym *sym) { + return ELF32_ST_TYPE(sym->st_info); + } + + // Extract the symbol index from the r_info field of a relocation. + static int r_sym(const Elf32_Word r_info) { + return ELF32_R_SYM(r_info); + } +}; + + +class Elf64 { + public: + typedef Elf64_Ehdr Ehdr; + typedef Elf64_Shdr Shdr; + typedef Elf64_Phdr Phdr; + typedef Elf64_Word Word; + typedef Elf64_Sym Sym; + typedef Elf64_Rel Rel; + typedef Elf64_Rela Rela; + + // What should be in the EI_CLASS header. + static const int kElfClass = ELFCLASS64; + + static char Bind(const Elf64_Sym *sym) { + return ELF64_ST_BIND(sym->st_info); + } + static char Type(const Elf64_Sym *sym) { + return ELF64_ST_TYPE(sym->st_info); + } + static int r_sym(const Elf64_Xword r_info) { + return ELF64_R_SYM(r_info); + } +}; + + +// ElfSectionReader mmaps a section of an ELF file ("section" is ELF +// terminology). The ElfReaderImpl object providing the section header +// must exist for the lifetime of this object. +// +// The motivation for mmaping individual sections of the file is that +// many Google executables are large enough when unstripped that we +// have to worry about running out of virtual address space. +// +// For compressed sections we have no choice but to allocate memory. +template<class ElfArch> +class ElfSectionReader { + public: + ElfSectionReader(const char *name, const string &path, int fd, + const typename ElfArch::Shdr §ion_header) + : contents_aligned_(NULL), + contents_(NULL), + header_(section_header) { + // Back up to the beginning of the page we're interested in. + const size_t additional = header_.sh_offset % getpagesize(); + const size_t offset_aligned = header_.sh_offset - additional; + section_size_ = header_.sh_size; + size_aligned_ = section_size_ + additional; + // If the section has been stripped or is empty, do not attempt + // to process its contents. + if (header_.sh_type == SHT_NOBITS || header_.sh_size == 0) + return; + contents_aligned_ = mmap(NULL, size_aligned_, PROT_READ, MAP_SHARED, + fd, offset_aligned); + // Set where the offset really should begin. + contents_ = reinterpret_cast<char *>(contents_aligned_) + + (header_.sh_offset - offset_aligned); + + // Check for and handle any compressed contents. + //if (strncmp(name, ".zdebug_", strlen(".zdebug_")) == 0) + // DecompressZlibContents(); + // TODO(saugustine): Add support for proposed elf-section flag + // "SHF_COMPRESS". + } + + ~ElfSectionReader() { + if (contents_aligned_ != NULL) + munmap(contents_aligned_, size_aligned_); + else + delete[] contents_; + } + + // Return the section header for this section. + typename ElfArch::Shdr const &header() const { return header_; } + + // Return memory at the given offset within this section. + const char *GetOffset(typename ElfArch::Word bytes) const { + return contents_ + bytes; + } + + const char *contents() const { return contents_; } + size_t section_size() const { return section_size_; } + + private: + // page-aligned file contents + void *contents_aligned_; + // contents as usable by the client. For non-compressed sections, + // pointer within contents_aligned_ to where the section data + // begins; for compressed sections, pointer to the decompressed + // data. + char *contents_; + // size of contents_aligned_ + size_t size_aligned_; + // size of contents. + size_t section_size_; + const typename ElfArch::Shdr header_; +}; + +// An iterator over symbols in a given section. It handles walking +// through the entries in the specified section and mapping symbol +// entries to their names in the appropriate string table (in +// another section). +template<class ElfArch> +class SymbolIterator { + public: + SymbolIterator(ElfReaderImpl<ElfArch> *reader, + typename ElfArch::Word section_type) + : symbol_section_(reader->GetSectionByType(section_type)), + string_section_(NULL), + num_symbols_in_section_(0), + symbol_within_section_(0) { + + // If this section type doesn't exist, leave + // num_symbols_in_section_ as zero, so this iterator is already + // done(). + if (symbol_section_ != NULL) { + num_symbols_in_section_ = symbol_section_->header().sh_size / + symbol_section_->header().sh_entsize; + + // Symbol sections have sh_link set to the section number of + // the string section containing the symbol names. + string_section_ = reader->GetSection(symbol_section_->header().sh_link); + } + } + + // Return true iff we have passed all symbols in this section. + bool done() const { + return symbol_within_section_ >= num_symbols_in_section_; + } + + // Advance to the next symbol in this section. + // REQUIRES: !done() + void Next() { ++symbol_within_section_; } + + // Return a pointer to the current symbol. + // REQUIRES: !done() + const typename ElfArch::Sym *GetSymbol() const { + return reinterpret_cast<const typename ElfArch::Sym*>( + symbol_section_->GetOffset(symbol_within_section_ * + symbol_section_->header().sh_entsize)); + } + + // Return the name of the current symbol, NULL if it has none. + // REQUIRES: !done() + const char *GetSymbolName() const { + int name_offset = GetSymbol()->st_name; + if (name_offset == 0) + return NULL; + return string_section_->GetOffset(name_offset); + } + + int GetCurrentSymbolIndex() const { + return symbol_within_section_; + } + + private: + const ElfSectionReader<ElfArch> *const symbol_section_; + const ElfSectionReader<ElfArch> *string_section_; + int num_symbols_in_section_; + int symbol_within_section_; +}; + + +// Copied from strings/strutil.h. Per chatham, +// this library should not depend on strings. + +static inline bool MyHasSuffixString(const string& str, const string& suffix) { + int len = str.length(); + int suflen = suffix.length(); + return (suflen <= len) && (str.compare(len-suflen, suflen, suffix) == 0); +} + + +// ElfReader loads an ELF binary and can provide information about its +// contents. It is most useful for matching addresses to function +// names. It does not understand debugging formats (eg dwarf2), so it +// can't print line numbers. It takes a path to an elf file and a +// readable file descriptor for that file, which it does not assume +// ownership of. +template<class ElfArch> +class ElfReaderImpl { + public: + explicit ElfReaderImpl(const string &path, int fd) + : path_(path), + fd_(fd), + section_headers_(NULL), + program_headers_(NULL), + opd_section_(NULL), + base_for_text_(0), + plts_supported_(false), + plt_code_size_(0), + plt0_size_(0), + visited_relocation_entries_(false) { + string error; + is_dwp_ = MyHasSuffixString(path, ".dwp"); + ParseHeaders(fd, path); + // Currently we need some extra information for PowerPC64 binaries + // including a way to read the .opd section for function descriptors and a + // way to find the linked base for function symbols. + if (header_.e_machine == EM_PPC64) { + // "opd_section_" must always be checked for NULL before use. + opd_section_ = GetSectionInfoByName(".opd", &opd_info_); + for (unsigned int k = 0u; k < GetNumSections(); ++k) { + const char *name = GetSectionName(section_headers_[k].sh_name); + if (strncmp(name, ".text", strlen(".text")) == 0) { + base_for_text_ = + section_headers_[k].sh_addr - section_headers_[k].sh_offset; + break; + } + } + } + // Turn on PLTs. + if (header_.e_machine == EM_386 || header_.e_machine == EM_X86_64) { + plt_code_size_ = kX86PLTCodeSize; + plt0_size_ = kX86PLT0Size; + plts_supported_ = true; + } else if (header_.e_machine == EM_ARM) { + plt_code_size_ = kARMPLTCodeSize; + plt0_size_ = kARMPLT0Size; + plts_supported_ = true; + } else if (header_.e_machine == EM_AARCH64) { + plt_code_size_ = kAARCH64PLTCodeSize; + plt0_size_ = kAARCH64PLT0Size; + plts_supported_ = true; + } + } + + ~ElfReaderImpl() { + for (unsigned int i = 0u; i < sections_.size(); ++i) + delete sections_[i]; + delete [] section_headers_; + delete [] program_headers_; + } + + // Examine the headers of the file and return whether the file looks + // like an ELF file for this architecture. Takes an already-open + // file descriptor for the candidate file, reading in the prologue + // to see if the ELF file appears to match the current + // architecture. If error is non-NULL, it will be set with a reason + // in case of failure. + static bool IsArchElfFile(int fd, string *error) { + unsigned char header[EI_NIDENT]; + if (pread(fd, header, sizeof(header), 0) != sizeof(header)) { + if (error != NULL) *error = "Could not read header"; + return false; + } + + if (memcmp(header, ELFMAG, SELFMAG) != 0) { + if (error != NULL) *error = "Missing ELF magic"; + return false; + } + + if (header[EI_CLASS] != ElfArch::kElfClass) { + if (error != NULL) *error = "Different word size"; + return false; + } + + int endian = 0; + if (header[EI_DATA] == ELFDATA2LSB) + endian = __LITTLE_ENDIAN; + else if (header[EI_DATA] == ELFDATA2MSB) + endian = __BIG_ENDIAN; + if (endian != __BYTE_ORDER) { + if (error != NULL) *error = "Different byte order"; + return false; + } + + return true; + } + + // Return true if we can use this symbol in Address-to-Symbol map. + bool CanUseSymbol(const char *name, const typename ElfArch::Sym *sym) { + // For now we only save FUNC and NOTYPE symbols. For now we just + // care about functions, but some functions written in assembler + // don't have a proper ELF type attached to them, so we store + // NOTYPE symbols as well. The remaining significant type is + // OBJECT (eg global variables), which represent about 25% of + // the symbols in a typical google3 binary. + if (ElfArch::Type(sym) != STT_FUNC && + ElfArch::Type(sym) != STT_NOTYPE) { + return false; + } + + // Target specific filtering. + switch (header_.e_machine) { + case EM_AARCH64: + case EM_ARM: + // Filter out '$x' special local symbols used by tools + return name[0] != '$' || ElfArch::Bind(sym) != STB_LOCAL; + case EM_X86_64: + // Filter out read-only constants like .LC123. + return name[0] != '.' || ElfArch::Bind(sym) != STB_LOCAL; + default: + return true; + } + } + + // Iterate over the symbols in a section, either SHT_DYNSYM or + // SHT_SYMTAB. Add all symbols to the given SymbolMap. + /* + void GetSymbolPositions(SymbolMap *symbols, + typename ElfArch::Word section_type, + uint64 mem_offset, + uint64 file_offset) { + // This map is used to filter out "nested" functions. + // See comment below. + AddrToSymMap addr_to_sym_map; + for (SymbolIterator<ElfArch> it(this, section_type); + !it.done(); it.Next()) { + const char *name = it.GetSymbolName(); + if (name == NULL) + continue; + const typename ElfArch::Sym *sym = it.GetSymbol(); + if (CanUseSymbol(name, sym)) { + const int sec = sym->st_shndx; + + // We don't support special section indices. The most common + // is SHN_ABS, for absolute symbols used deep in the bowels of + // glibc. Also ignore any undefined symbols. + if (sec == SHN_UNDEF || + (sec >= SHN_LORESERVE && sec <= SHN_HIRESERVE)) { + continue; + } + + const typename ElfArch::Shdr& hdr = section_headers_[sec]; + + // Adjust for difference between where we expected to mmap + // this section, and where it was actually mmapped. + const int64 expected_base = hdr.sh_addr - hdr.sh_offset; + const int64 real_base = mem_offset - file_offset; + const int64 adjust = real_base - expected_base; + + uint64 start = sym->st_value + adjust; + + // Adjust function symbols for PowerPC64 by dereferencing and adjusting + // the function descriptor to get the function address. + if (header_.e_machine == EM_PPC64 && ElfArch::Type(sym) == STT_FUNC) { + const uint64 opd_addr = + AdjustPPC64FunctionDescriptorSymbolValue(sym->st_value); + // Only adjust the returned value if the function address was found. + if (opd_addr != sym->st_value) { + const int64 adjust_function_symbols = + real_base - base_for_text_; + start = opd_addr + adjust_function_symbols; + } + } + + addr_to_sym_map.push_back(std::make_pair(start, sym)); + } + } + std::sort(addr_to_sym_map.begin(), addr_to_sym_map.end(), &AddrToSymSorter); + addr_to_sym_map.erase(std::unique(addr_to_sym_map.begin(), + addr_to_sym_map.end(), &AddrToSymEquals), + addr_to_sym_map.end()); + + // Squeeze out any "nested functions". + // Nested functions are not allowed in C, but libc plays tricks. + // + // For example, here is disassembly of /lib64/tls/libc-2.3.5.so: + // 0x00000000000aa380 <read+0>: cmpl $0x0,0x2781b9(%rip) + // 0x00000000000aa387 <read+7>: jne 0xaa39b <read+27> + // 0x00000000000aa389 <__read_nocancel+0>: mov $0x0,%rax + // 0x00000000000aa390 <__read_nocancel+7>: syscall + // 0x00000000000aa392 <__read_nocancel+9>: cmp $0xfffffffffffff001,%rax + // 0x00000000000aa398 <__read_nocancel+15>: jae 0xaa3ef <read+111> + // 0x00000000000aa39a <__read_nocancel+17>: retq + // 0x00000000000aa39b <read+27>: sub $0x28,%rsp + // 0x00000000000aa39f <read+31>: mov %rdi,0x8(%rsp) + // ... + // Without removing __read_nocancel, symbolizer will return NULL + // given e.g. 0xaa39f (because the lower bound is __read_nocancel, + // but 0xaa39f is beyond its end. + if (addr_to_sym_map.empty()) { + return; + } + const ElfSectionReader<ElfArch> *const symbol_section = + this->GetSectionByType(section_type); + const ElfSectionReader<ElfArch> *const string_section = + this->GetSection(symbol_section->header().sh_link); + + typename AddrToSymMap::iterator curr = addr_to_sym_map.begin(); + // Always insert the first symbol. + symbols->AddSymbol(string_section->GetOffset(curr->second->st_name), + curr->first, curr->second->st_size); + typename AddrToSymMap::iterator prev = curr++; + for (; curr != addr_to_sym_map.end(); ++curr) { + const uint64 prev_addr = prev->first; + const uint64 curr_addr = curr->first; + const typename ElfArch::Sym *const prev_sym = prev->second; + const typename ElfArch::Sym *const curr_sym = curr->second; + if (prev_addr + prev_sym->st_size <= curr_addr || + // The next condition is true if two symbols overlap like this: + // + // Previous symbol |----------------------------| + // Current symbol |-------------------------------| + // + // These symbols are not found in google3 codebase, but in + // jdk1.6.0_01_gg1/jre/lib/i386/server/libjvm.so. + // + // 0619e040 00000046 t CardTableModRefBS::write_region_work() + // 0619e070 00000046 t CardTableModRefBS::write_ref_array_work() + // + // We allow overlapped symbols rather than ignore these. + // Due to the way SymbolMap::GetSymbolAtPosition() works, + // lookup for any address in [curr_addr, curr_addr + its size) + // (e.g. 0619e071) will produce the current symbol, + // which is the desired outcome. + prev_addr + prev_sym->st_size < curr_addr + curr_sym->st_size) { + const char *name = string_section->GetOffset(curr_sym->st_name); + symbols->AddSymbol(name, curr_addr, curr_sym->st_size); + prev = curr; + } else { + // Current symbol is "nested" inside previous one like this: + // + // Previous symbol |----------------------------| + // Current symbol |---------------------| + // + // This happens within glibc, e.g. __read_nocancel is nested + // "inside" __read. Ignore "inner" symbol. + //DCHECK_LE(curr_addr + curr_sym->st_size, + // prev_addr + prev_sym->st_size); + ; + } + } + } +*/ + + void VisitSymbols(typename ElfArch::Word section_type, + ElfReader::SymbolSink *sink) { + VisitSymbols(section_type, sink, -1, -1, false); + } + + void VisitSymbols(typename ElfArch::Word section_type, + ElfReader::SymbolSink *sink, + int symbol_binding, + int symbol_type, + bool get_raw_symbol_values) { + for (SymbolIterator<ElfArch> it(this, section_type); + !it.done(); it.Next()) { + const char *name = it.GetSymbolName(); + if (!name) continue; + const typename ElfArch::Sym *sym = it.GetSymbol(); + if ((symbol_binding < 0 || ElfArch::Bind(sym) == symbol_binding) && + (symbol_type < 0 || ElfArch::Type(sym) == symbol_type)) { + typename ElfArch::Sym symbol = *sym; + // Add a PLT symbol in addition to the main undefined symbol. + // Only do this for SHT_DYNSYM, because PLT symbols are dynamic. + int symbol_index = it.GetCurrentSymbolIndex(); + // TODO(dthomson): Can be removed once all Java code is using the + // Google3 launcher. + if (section_type == SHT_DYNSYM && + static_cast<unsigned int>(symbol_index) < symbols_plt_offsets_.size() && + symbols_plt_offsets_[symbol_index] != 0) { + string plt_name = string(name) + kPLTFunctionSuffix; + if (plt_function_names_[symbol_index].empty()) { + plt_function_names_[symbol_index] = plt_name; + } else if (plt_function_names_[symbol_index] != plt_name) { + ; + } + sink->AddSymbol(plt_function_names_[symbol_index].c_str(), + symbols_plt_offsets_[it.GetCurrentSymbolIndex()], + plt_code_size_); + } + if (!get_raw_symbol_values) + AdjustSymbolValue(&symbol); + sink->AddSymbol(name, symbol.st_value, symbol.st_size); + } + } + } + + void VisitRelocationEntries() { + if (visited_relocation_entries_) { + return; + } + visited_relocation_entries_ = true; + + if (!plts_supported_) { + return; + } + // First determine if PLTs exist. If not, then there is nothing to do. + ElfReader::SectionInfo plt_section_info; + const char* plt_section = + GetSectionInfoByName(kElfPLTSectionName, &plt_section_info); + if (!plt_section) { + return; + } + if (plt_section_info.size == 0) { + return; + } + + // The PLTs could be referenced by either a Rel or Rela (Rel with Addend) + // section. + ElfReader::SectionInfo rel_section_info; + ElfReader::SectionInfo rela_section_info; + const char* rel_section = + GetSectionInfoByName(kElfPLTRelSectionName, &rel_section_info); + const char* rela_section = + GetSectionInfoByName(kElfPLTRelaSectionName, &rela_section_info); + + const typename ElfArch::Rel* rel = + reinterpret_cast<const typename ElfArch::Rel*>(rel_section); + const typename ElfArch::Rela* rela = + reinterpret_cast<const typename ElfArch::Rela*>(rela_section); + + if (!rel_section && !rela_section) { + return; + } + + // Use either Rel or Rela section, depending on which one exists. + size_t section_size = rel_section ? rel_section_info.size + : rela_section_info.size; + size_t entry_size = rel_section ? sizeof(typename ElfArch::Rel) + : sizeof(typename ElfArch::Rela); + + // Determine the number of entries in the dynamic symbol table. + ElfReader::SectionInfo dynsym_section_info; + const char* dynsym_section = + GetSectionInfoByName(kElfDynSymSectionName, &dynsym_section_info); + // The dynsym section might not exist, or it might be empty. In either case + // there is nothing to be done so return. + if (!dynsym_section || dynsym_section_info.size == 0) { + return; + } + size_t num_dynamic_symbols = + dynsym_section_info.size / dynsym_section_info.entsize; + symbols_plt_offsets_.resize(num_dynamic_symbols, 0); + + // TODO(dthomson): Can be removed once all Java code is using the + // Google3 launcher. + // Make storage room for PLT function name strings. + plt_function_names_.resize(num_dynamic_symbols); + + for (size_t i = 0; i < section_size / entry_size; ++i) { + // Determine symbol index from the |r_info| field. + int sym_index = ElfArch::r_sym(rel_section ? rel[i].r_info + : rela[i].r_info); + if (static_cast<unsigned int>(sym_index) >= symbols_plt_offsets_.size()) { + continue; + } + symbols_plt_offsets_[sym_index] = + plt_section_info.addr + plt0_size_ + i * plt_code_size_; + } + } + + // Return an ElfSectionReader for the first section of the given + // type by iterating through all section headers. Returns NULL if + // the section type is not found. + const ElfSectionReader<ElfArch> *GetSectionByType( + typename ElfArch::Word section_type) { + for (unsigned int k = 0u; k < GetNumSections(); ++k) { + if (section_headers_[k].sh_type == section_type) { + return GetSection(k); + } + } + return NULL; + } + + // Return the name of section "shndx". Returns NULL if the section + // is not found. + const char *GetSectionNameByIndex(int shndx) { + return GetSectionName(section_headers_[shndx].sh_name); + } + + // Return a pointer to section "shndx", and store the size in + // "size". Returns NULL if the section is not found. + const char *GetSectionContentsByIndex(int shndx, size_t *size) { + const ElfSectionReader<ElfArch> *section = GetSection(shndx); + if (section != NULL) { + *size = section->section_size(); + return section->contents(); + } + return NULL; + } + + // Return a pointer to the first section of the given name by + // iterating through all section headers, and store the size in + // "size". Returns NULL if the section name is not found. + const char *GetSectionContentsByName(const string §ion_name, + size_t *size) { + for (unsigned int k = 0u; k < GetNumSections(); ++k) { + // When searching for sections in a .dwp file, the sections + // we're looking for will always be at the end of the section + // table, so reverse the direction of iteration. + int shndx = is_dwp_ ? GetNumSections() - k - 1 : k; + const char *name = GetSectionName(section_headers_[shndx].sh_name); + if (name != NULL && ElfReader::SectionNamesMatch(section_name, name)) { + const ElfSectionReader<ElfArch> *section = GetSection(shndx); + if (section == NULL) { + return NULL; + } else { + *size = section->section_size(); + return section->contents(); + } + } + } + return NULL; + } + + // This is like GetSectionContentsByName() but it returns a lot of extra + // information about the section. + const char *GetSectionInfoByName(const string §ion_name, + ElfReader::SectionInfo *info) { + for (unsigned int k = 0u; k < GetNumSections(); ++k) { + // When searching for sections in a .dwp file, the sections + // we're looking for will always be at the end of the section + // table, so reverse the direction of iteration. + int shndx = is_dwp_ ? GetNumSections() - k - 1 : k; + const char *name = GetSectionName(section_headers_[shndx].sh_name); + if (name != NULL && ElfReader::SectionNamesMatch(section_name, name)) { + const ElfSectionReader<ElfArch> *section = GetSection(shndx); + if (section == NULL) { + return NULL; + } else { + info->type = section->header().sh_type; + info->flags = section->header().sh_flags; + info->addr = section->header().sh_addr; + info->offset = section->header().sh_offset; + info->size = section->header().sh_size; + info->link = section->header().sh_link; + info->info = section->header().sh_info; + info->addralign = section->header().sh_addralign; + info->entsize = section->header().sh_entsize; + return section->contents(); + } + } + } + return NULL; + } + + // p_vaddr of the first PT_LOAD segment (if any), or 0 if no PT_LOAD + // segments are present. This is the address an ELF image was linked + // (by static linker) to be loaded at. Usually (but not always) 0 for + // shared libraries and position-independent executables. + uint64 VaddrOfFirstLoadSegment() const { + // Relocatable objects (of type ET_REL) do not have LOAD segments. + if (header_.e_type == ET_REL) { + return 0; + } + for (int i = 0; i < GetNumProgramHeaders(); ++i) { + if (program_headers_[i].p_type == PT_LOAD) { + return program_headers_[i].p_vaddr; + } + } + return 0; + } + + // According to the LSB ("ELF special sections"), sections with debug + // info are prefixed by ".debug". The names are not specified, but they + // look like ".debug_line", ".debug_info", etc. + bool HasDebugSections() { + // Debug sections are likely to be near the end, so reverse the + // direction of iteration. + for (int k = GetNumSections() - 1; k >= 0; --k) { + const char *name = GetSectionName(section_headers_[k].sh_name); + if (strncmp(name, ".debug", strlen(".debug")) == 0) return true; + if (strncmp(name, ".zdebug", strlen(".zdebug")) == 0) return true; + } + return false; + } + + bool IsDynamicSharedObject() const { + return header_.e_type == ET_DYN; + } + + // Return the number of sections. + uint64_t GetNumSections() const { + if (HasManySections()) + return first_section_header_.sh_size; + return header_.e_shnum; + } + + private: + typedef vector<pair<uint64, const typename ElfArch::Sym *> > AddrToSymMap; + + static bool AddrToSymSorter(const typename AddrToSymMap::value_type& lhs, + const typename AddrToSymMap::value_type& rhs) { + return lhs.first < rhs.first; + } + + static bool AddrToSymEquals(const typename AddrToSymMap::value_type& lhs, + const typename AddrToSymMap::value_type& rhs) { + return lhs.first == rhs.first; + } + + // Does this ELF file have too many sections to fit in the program header? + bool HasManySections() const { + return header_.e_shnum == SHN_UNDEF; + } + + // Return the number of program headers. + int GetNumProgramHeaders() const { + if (HasManySections() && header_.e_phnum == 0xffff && + first_section_header_.sh_info != 0) + return first_section_header_.sh_info; + return header_.e_phnum; + } + + // Return the index of the string table. + int GetStringTableIndex() const { + if (HasManySections()) { + if (header_.e_shstrndx == 0xffff) + return first_section_header_.sh_link; + else if (header_.e_shstrndx >= GetNumSections()) + return 0; + } + return header_.e_shstrndx; + } + + // Given an offset into the section header string table, return the + // section name. + const char *GetSectionName(typename ElfArch::Word sh_name) { + const ElfSectionReader<ElfArch> *shstrtab = + GetSection(GetStringTableIndex()); + if (shstrtab != NULL) { + return shstrtab->GetOffset(sh_name); + } + return NULL; + } + + // Return an ElfSectionReader for the given section. The reader will + // be freed when this object is destroyed. + const ElfSectionReader<ElfArch> *GetSection(int num) { + const char *name; + // Hard-coding the name for the section-name string table prevents + // infinite recursion. + if (num == GetStringTableIndex()) + name = ".shstrtab"; + else + name = GetSectionNameByIndex(num); + ElfSectionReader<ElfArch> *& reader = sections_[num]; + if (reader == NULL) + reader = new ElfSectionReader<ElfArch>(name, path_, fd_, + section_headers_[num]); + return reader; + } + + // Parse out the overall header information from the file and assert + // that it looks sane. This contains information like the magic + // number and target architecture. + bool ParseHeaders(int fd, const string &path) { + // Read in the global ELF header. + if (pread(fd, &header_, sizeof(header_), 0) != sizeof(header_)) { + return false; + } + + // Must be an executable, dynamic shared object or relocatable object + if (header_.e_type != ET_EXEC && + header_.e_type != ET_DYN && + header_.e_type != ET_REL) { + return false; + } + // Need a section header. + if (header_.e_shoff == 0) { + return false; + } + + if (header_.e_shnum == SHN_UNDEF) { + // The number of sections in the program header is only a 16-bit value. In + // the event of overflow (greater than SHN_LORESERVE sections), e_shnum + // will read SHN_UNDEF and the true number of section header table entries + // is found in the sh_size field of the first section header. + // See: http://www.sco.com/developers/gabi/2003-12-17/ch4.sheader.html + if (pread(fd, &first_section_header_, sizeof(first_section_header_), + header_.e_shoff) != sizeof(first_section_header_)) { + return false; + } + } + + // Dynamically allocate enough space to store the section headers + // and read them out of the file. + const int section_headers_size = + GetNumSections() * sizeof(*section_headers_); + section_headers_ = new typename ElfArch::Shdr[section_headers_size]; + if (pread(fd, section_headers_, section_headers_size, header_.e_shoff) != + section_headers_size) { + return false; + } + + // Dynamically allocate enough space to store the program headers + // and read them out of the file. + //const int program_headers_size = + // GetNumProgramHeaders() * sizeof(*program_headers_); + program_headers_ = new typename ElfArch::Phdr[GetNumProgramHeaders()]; + + // Presize the sections array for efficiency. + sections_.resize(GetNumSections(), NULL); + return true; + } + + // Given the "value" of a function descriptor return the address of the + // function (i.e. the dereferenced value). Otherwise return "value". + uint64 AdjustPPC64FunctionDescriptorSymbolValue(uint64 value) { + if (opd_section_ != NULL && + opd_info_.addr <= value && + value < opd_info_.addr + opd_info_.size) { + uint64 offset = value - opd_info_.addr; + return (*reinterpret_cast<const uint64*>(opd_section_ + offset)); + } + return value; + } + + void AdjustSymbolValue(typename ElfArch::Sym* sym) { + switch (header_.e_machine) { + case EM_ARM: + // For ARM architecture, if the LSB of the function symbol offset is set, + // it indicates a Thumb function. This bit should not be taken literally. + // Clear it. + if (ElfArch::Type(sym) == STT_FUNC) + sym->st_value = AdjustARMThumbSymbolValue(sym->st_value); + break; + case EM_386: + // No adjustment needed for Intel x86 architecture. However, explicitly + // define this case as we use it quite often. + break; + case EM_PPC64: + // PowerPC64 currently has function descriptors as part of the ABI. + // Function symbols need to be adjusted accordingly. + if (ElfArch::Type(sym) == STT_FUNC) + sym->st_value = AdjustPPC64FunctionDescriptorSymbolValue(sym->st_value); + break; + default: + break; + } + } + + friend class SymbolIterator<ElfArch>; + + // The file we're reading. + const string path_; + // Open file descriptor for path_. Not owned by this object. + const int fd_; + + // The global header of the ELF file. + typename ElfArch::Ehdr header_; + + // The header of the first section. This may be used to supplement the ELF + // file header. + typename ElfArch::Shdr first_section_header_; + + // Array of GetNumSections() section headers, allocated when we read + // in the global header. + typename ElfArch::Shdr *section_headers_; + + // Array of GetNumProgramHeaders() program headers, allocated when we read + // in the global header. + typename ElfArch::Phdr *program_headers_; + + // An array of pointers to ElfSectionReaders. Sections are + // mmaped as they're needed and not released until this object is + // destroyed. + vector<ElfSectionReader<ElfArch>*> sections_; + + // For PowerPC64 we need to keep track of function descriptors when looking up + // values for funtion symbols values. Function descriptors are kept in the + // .opd section and are dereferenced to find the function address. + ElfReader::SectionInfo opd_info_; + const char *opd_section_; // Must be checked for NULL before use. + int64 base_for_text_; + + // Read PLT-related sections for the current architecture. + bool plts_supported_; + // Code size of each PLT function for the current architecture. + size_t plt_code_size_; + // Size of the special first entry in the .plt section that calls the runtime + // loader resolution routine, and that all other entries jump to when doing + // lazy symbol binding. + size_t plt0_size_; + + // Maps a dynamic symbol index to a PLT offset. + // The vector entry index is the dynamic symbol index. + std::vector<uint64> symbols_plt_offsets_; + + // Container for PLT function name strings. These strings are passed by + // reference to SymbolSink::AddSymbol() so they need to be stored somewhere. + std::vector<string> plt_function_names_; + + bool visited_relocation_entries_; + + // True if this is a .dwp file. + bool is_dwp_; +}; + +ElfReader::ElfReader(const string &path) + : path_(path), fd_(-1), impl32_(NULL), impl64_(NULL) { + // linux 2.6.XX kernel can show deleted files like this: + // /var/run/nscd/dbYLJYaE (deleted) + // and the kernel-supplied vdso and vsyscall mappings like this: + // [vdso] + // [vsyscall] + if (MyHasSuffixString(path, " (deleted)")) + return; + if (path == "[vdso]") + return; + if (path == "[vsyscall]") + return; + + fd_ = open(path.c_str(), O_RDONLY); +} + +ElfReader::~ElfReader() { + if (fd_ != -1) + close(fd_); + if (impl32_ != NULL) + delete impl32_; + if (impl64_ != NULL) + delete impl64_; +} + + +// The only word-size specific part of this file is IsNativeElfFile(). +#if __WORDSIZE == 32 +#define NATIVE_ELF_ARCH Elf32 +#elif __WORDSIZE == 64 +#define NATIVE_ELF_ARCH Elf64 +#else +#error "Invalid word size" +#endif + +template <typename ElfArch> +static bool IsElfFile(const int fd, const string &path) { + if (fd < 0) + return false; + if (!ElfReaderImpl<ElfArch>::IsArchElfFile(fd, NULL)) { + // No error message here. IsElfFile gets called many times. + return false; + } + return true; +} + +bool ElfReader::IsNativeElfFile() const { + return IsElfFile<NATIVE_ELF_ARCH>(fd_, path_); +} + +bool ElfReader::IsElf32File() const { + return IsElfFile<Elf32>(fd_, path_); +} + +bool ElfReader::IsElf64File() const { + return IsElfFile<Elf64>(fd_, path_); +} + +/* +void ElfReader::AddSymbols(SymbolMap *symbols, + uint64 mem_offset, uint64 file_offset, + uint64 length) { + if (fd_ < 0) + return; + // TODO(chatham): Actually use the information about file offset and + // the length of the mapped section. On some machines the data + // section gets mapped as executable, and we'll end up reading the + // file twice and getting some of the offsets wrong. + if (IsElf32File()) { + GetImpl32()->GetSymbolPositions(symbols, SHT_SYMTAB, + mem_offset, file_offset); + GetImpl32()->GetSymbolPositions(symbols, SHT_DYNSYM, + mem_offset, file_offset); + } else if (IsElf64File()) { + GetImpl64()->GetSymbolPositions(symbols, SHT_SYMTAB, + mem_offset, file_offset); + GetImpl64()->GetSymbolPositions(symbols, SHT_DYNSYM, + mem_offset, file_offset); + } +} +*/ + +void ElfReader::VisitSymbols(ElfReader::SymbolSink *sink) { + VisitSymbols(sink, -1, -1); +} + +void ElfReader::VisitSymbols(ElfReader::SymbolSink *sink, + int symbol_binding, + int symbol_type) { + VisitSymbols(sink, symbol_binding, symbol_type, false); +} + +void ElfReader::VisitSymbols(ElfReader::SymbolSink *sink, + int symbol_binding, + int symbol_type, + bool get_raw_symbol_values) { + if (IsElf32File()) { + GetImpl32()->VisitRelocationEntries(); + GetImpl32()->VisitSymbols(SHT_SYMTAB, sink, symbol_binding, symbol_type, + get_raw_symbol_values); + GetImpl32()->VisitSymbols(SHT_DYNSYM, sink, symbol_binding, symbol_type, + get_raw_symbol_values); + } else if (IsElf64File()) { + GetImpl64()->VisitRelocationEntries(); + GetImpl64()->VisitSymbols(SHT_SYMTAB, sink, symbol_binding, symbol_type, + get_raw_symbol_values); + GetImpl64()->VisitSymbols(SHT_DYNSYM, sink, symbol_binding, symbol_type, + get_raw_symbol_values); + } +} + +uint64 ElfReader::VaddrOfFirstLoadSegment() { + if (IsElf32File()) { + return GetImpl32()->VaddrOfFirstLoadSegment(); + } else if (IsElf64File()) { + return GetImpl64()->VaddrOfFirstLoadSegment(); + } else { + return 0; + } +} + +const char *ElfReader::GetSectionName(int shndx) { + if (shndx < 0 || static_cast<unsigned int>(shndx) >= GetNumSections()) return NULL; + if (IsElf32File()) { + return GetImpl32()->GetSectionNameByIndex(shndx); + } else if (IsElf64File()) { + return GetImpl64()->GetSectionNameByIndex(shndx); + } else { + return NULL; + } +} + +uint64 ElfReader::GetNumSections() { + if (IsElf32File()) { + return GetImpl32()->GetNumSections(); + } else if (IsElf64File()) { + return GetImpl64()->GetNumSections(); + } else { + return 0; + } +} + +const char *ElfReader::GetSectionByIndex(int shndx, size_t *size) { + if (IsElf32File()) { + return GetImpl32()->GetSectionContentsByIndex(shndx, size); + } else if (IsElf64File()) { + return GetImpl64()->GetSectionContentsByIndex(shndx, size); + } else { + return NULL; + } +} + +const char *ElfReader::GetSectionByName(const string §ion_name, + size_t *size) { + if (IsElf32File()) { + return GetImpl32()->GetSectionContentsByName(section_name, size); + } else if (IsElf64File()) { + return GetImpl64()->GetSectionContentsByName(section_name, size); + } else { + return NULL; + } +} + +const char *ElfReader::GetSectionInfoByName(const string §ion_name, + SectionInfo *info) { + if (IsElf32File()) { + return GetImpl32()->GetSectionInfoByName(section_name, info); + } else if (IsElf64File()) { + return GetImpl64()->GetSectionInfoByName(section_name, info); + } else { + return NULL; + } +} + +bool ElfReader::SectionNamesMatch(const string &name, const string &sh_name) { + if ((name.find(".debug_", 0) == 0) && (sh_name.find(".zdebug_", 0) == 0)) { + const string name_suffix(name, strlen(".debug_")); + const string sh_name_suffix(sh_name, strlen(".zdebug_")); + return name_suffix == sh_name_suffix; + } + return name == sh_name; +} + +bool ElfReader::IsDynamicSharedObject() { + if (IsElf32File()) { + return GetImpl32()->IsDynamicSharedObject(); + } else if (IsElf64File()) { + return GetImpl64()->IsDynamicSharedObject(); + } else { + return false; + } +} + +ElfReaderImpl<Elf32> *ElfReader::GetImpl32() { + if (impl32_ == NULL) { + impl32_ = new ElfReaderImpl<Elf32>(path_, fd_); + } + return impl32_; +} + +ElfReaderImpl<Elf64> *ElfReader::GetImpl64() { + if (impl64_ == NULL) { + impl64_ = new ElfReaderImpl<Elf64>(path_, fd_); + } + return impl64_; +} + +// Return true if file is an ELF binary of ElfArch, with unstripped +// debug info (debug_only=true) or symbol table (debug_only=false). +// Otherwise, return false. +template <typename ElfArch> +static bool IsNonStrippedELFBinaryImpl(const string &path, const int fd, + bool debug_only) { + if (!ElfReaderImpl<ElfArch>::IsArchElfFile(fd, NULL)) return false; + ElfReaderImpl<ElfArch> elf_reader(path, fd); + return debug_only ? + elf_reader.HasDebugSections() + : (elf_reader.GetSectionByType(SHT_SYMTAB) != NULL); +} + +// Helper for the IsNon[Debug]StrippedELFBinary functions. +static bool IsNonStrippedELFBinaryHelper(const string &path, + bool debug_only) { + const int fd = open(path.c_str(), O_RDONLY); + if (fd == -1) { + return false; + } + + if (IsNonStrippedELFBinaryImpl<Elf32>(path, fd, debug_only) || + IsNonStrippedELFBinaryImpl<Elf64>(path, fd, debug_only)) { + close(fd); + return true; + } + close(fd); + return false; +} + +bool ElfReader::IsNonStrippedELFBinary(const string &path) { + return IsNonStrippedELFBinaryHelper(path, false); +} + +bool ElfReader::IsNonDebugStrippedELFBinary(const string &path) { + return IsNonStrippedELFBinaryHelper(path, true); +} +} // namespace dwarf2reader diff --git a/src/common/dwarf/elf_reader.h b/src/common/dwarf/elf_reader.h new file mode 100644 index 00000000..07477341 --- /dev/null +++ b/src/common/dwarf/elf_reader.h @@ -0,0 +1,166 @@ +// Copyright 2005 Google Inc. All Rights Reserved. +// Author: chatham@google.com (Andrew Chatham) +// Author: satorux@google.com (Satoru Takabayashi) +// +// ElfReader handles reading in ELF. It can extract symbols from the +// current process, which may be used to symbolize stack traces +// without having to make a potentially dangerous call to fork(). +// +// ElfReader dynamically allocates memory, so it is not appropriate to +// use once the address space might be corrupted, such as during +// process death. +// +// ElfReader supports both 32-bit and 64-bit ELF binaries. + +#ifndef COMMON_DWARF_ELF_READER_H__ +#define COMMON_DWARF_ELF_READER_H__ + +#include <string> +#include <vector> + +#include "common/dwarf/types.h" + +using std::string; +using std::vector; +using std::pair; + +namespace dwarf2reader { + +class SymbolMap; +class Elf32; +class Elf64; +template<typename ElfArch> +class ElfReaderImpl; + +class ElfReader { + public: + explicit ElfReader(const string &path); + ~ElfReader(); + + // Parse the ELF prologue of this file and return whether it was + // successfully parsed and matches the word size and byte order of + // the current process. + bool IsNativeElfFile() const; + + // Similar to IsNativeElfFile but checks if it's a 32-bit ELF file. + bool IsElf32File() const; + + // Similar to IsNativeElfFile but checks if it's a 64-bit ELF file. + bool IsElf64File() const; + + // Checks if it's an ELF file of type ET_DYN (shared object file). + bool IsDynamicSharedObject(); + + // Add symbols in the given ELF file into the provided SymbolMap, + // assuming that the file has been loaded into the specified + // offset. + // + // The remaining arguments are typically taken from a + // ProcMapsIterator (base/sysinfo.h) and describe which portions of + // the ELF file are mapped into which parts of memory: + // + // mem_offset - position at which the segment is mapped into memory + // file_offset - offset in the file where the mapping begins + // length - length of the mapped segment + void AddSymbols(SymbolMap *symbols, + uint64 mem_offset, uint64 file_offset, + uint64 length); + + class SymbolSink { + public: + virtual ~SymbolSink() {} + virtual void AddSymbol(const char *name, uint64 address, uint64 size) = 0; + }; + + // Like AddSymbols above, but with no address correction. + // Processes any SHT_SYMTAB section, followed by any SHT_DYNSYM section. + void VisitSymbols(SymbolSink *sink); + + // Like VisitSymbols above, but for a specific symbol binding/type. + // A negative value for the binding and type parameters means any + // binding or type. + void VisitSymbols(SymbolSink *sink, int symbol_binding, int symbol_type); + + // Like VisitSymbols above but can optionally export raw symbol values instead + // of adjusted ones. + void VisitSymbols(SymbolSink *sink, int symbol_binding, int symbol_type, + bool get_raw_symbol_values); + + // p_vaddr of the first PT_LOAD segment (if any), or 0 if no PT_LOAD + // segments are present. This is the address an ELF image was linked + // (by static linker) to be loaded at. Usually (but not always) 0 for + // shared libraries and position-independent executables. + uint64 VaddrOfFirstLoadSegment(); + + // Return the name of section "shndx". Returns NULL if the section + // is not found. + const char *GetSectionName(int shndx); + + // Return the number of sections in the given ELF file. + uint64 GetNumSections(); + + // Get section "shndx" from the given ELF file. On success, return + // the pointer to the section and store the size in "size". + // On error, return NULL. The returned section data is only valid + // until the ElfReader gets destroyed. + const char *GetSectionByIndex(int shndx, size_t *size); + + // Get section with "section_name" (ex. ".text", ".symtab") in the + // given ELF file. On success, return the pointer to the section + // and store the size in "size". On error, return NULL. The + // returned section data is only valid until the ElfReader gets + // destroyed. + const char *GetSectionByName(const string §ion_name, size_t *size); + + // This is like GetSectionByName() but it returns a lot of extra information + // about the section. The SectionInfo structure is almost identical to + // the typedef struct Elf64_Shdr defined in <elf.h>, but is redefined + // here so that the many short macro names in <elf.h> don't have to be + // added to our already cluttered namespace. + struct SectionInfo { + uint32 type; // Section type (SHT_xxx constant from elf.h). + uint64 flags; // Section flags (SHF_xxx constants from elf.h). + uint64 addr; // Section virtual address at execution. + uint64 offset; // Section file offset. + uint64 size; // Section size in bytes. + uint32 link; // Link to another section. + uint32 info; // Additional section information. + uint64 addralign; // Section alignment. + uint64 entsize; // Entry size if section holds a table. + }; + const char *GetSectionInfoByName(const string §ion_name, + SectionInfo *info); + + // Check if "path" is an ELF binary that has not been stripped of symbol + // tables. This function supports both 32-bit and 64-bit ELF binaries. + static bool IsNonStrippedELFBinary(const string &path); + + // Check if "path" is an ELF binary that has not been stripped of debug + // info. Unlike IsNonStrippedELFBinary, this function will return + // false for binaries passed through "strip -S". + static bool IsNonDebugStrippedELFBinary(const string &path); + + // Match a requested section name with the section name as it + // appears in the elf-file, adjusting for compressed debug section + // names. For example, returns true if name == ".debug_abbrev" and + // sh_name == ".zdebug_abbrev" + static bool SectionNamesMatch(const string &name, const string &sh_name); + + private: + // Lazily initialize impl32_ and return it. + ElfReaderImpl<Elf32> *GetImpl32(); + // Ditto for impl64_. + ElfReaderImpl<Elf64> *GetImpl64(); + + // Path of the file we're reading. + const string path_; + // Read-only file descriptor for the file. May be -1 if there was an + // error during open. + int fd_; + ElfReaderImpl<Elf32> *impl32_; + ElfReaderImpl<Elf64> *impl64_; +}; + +} // namespace dwarf2reader + +#endif // COMMON_DWARF_ELF_READER_H__ diff --git a/src/common/linux/dump_symbols.cc b/src/common/linux/dump_symbols.cc index 0000add6..6b27120a 100644 --- a/src/common/linux/dump_symbols.cc +++ b/src/common/linux/dump_symbols.cc @@ -288,7 +288,8 @@ bool LoadDwarf(const string& dwarf_filename, // Make a Dwarf2Handler that drives the DIEHandler. dwarf2reader::DIEDispatcher die_dispatcher(&root_handler); // Make a DWARF parser for the compilation unit at OFFSET. - dwarf2reader::CompilationUnit reader(file_context.section_map(), + dwarf2reader::CompilationUnit reader(dwarf_filename, + file_context.section_map(), offset, &byte_reader, &die_dispatcher); diff --git a/src/common/mac/dump_syms.cc b/src/common/mac/dump_syms.cc index 97c3e06e..b20a0558 100644 --- a/src/common/mac/dump_syms.cc +++ b/src/common/mac/dump_syms.cc @@ -437,7 +437,8 @@ bool DumpSymbols::ReadDwarf(google_breakpad::Module *module, // Make a Dwarf2Handler that drives our DIEHandler. dwarf2reader::DIEDispatcher die_dispatcher(&root_handler); // Make a DWARF parser for the compilation unit at OFFSET. - dwarf2reader::CompilationUnit dwarf_reader(file_context.section_map(), + dwarf2reader::CompilationUnit dwarf_reader(selected_object_name_, + file_context.section_map(), offset, &byte_reader, &die_dispatcher); |