aboutsummaryrefslogtreecommitdiff
path: root/src/common/dwarf
diff options
context:
space:
mode:
authorYunlian Jiang <yunlian@google.com>2016-05-04 11:09:44 -0700
committerYunlian Jiang <yunlian@google.com>2016-05-04 11:09:44 -0700
commit764c21f7529df70a19f1e1cb33bb9ece28e0bf8f (patch)
treed71aaba9a7d47419c05ab7af184e117b5a115592 /src/common/dwarf
parentmacho: fix printf type mismatches (diff)
downloadbreakpad-764c21f7529df70a19f1e1cb33bb9ece28e0bf8f.tar.xz
Add debug fission support.
This added debug fission support. It tries to find the dwp file from the debug dir /usr/lib/debug/*/debug and read symbols from them. Most of this patch comes from https://critique.corp.google.com/#review/52048295 and some fixes after that. The elf_reader.cc comes from TOT google code. I just removed some google dependency. Current problems from this patch 1: Some type mismatch: from uint8_t * to char *. 2: Some hack to find the .dwp file. (replace .debug with .dwp) BUG=chromium:604440 R=dehao@google.com, ivanpe@chromium.org Review URL: https://codereview.chromium.org/1884283002 .
Diffstat (limited to 'src/common/dwarf')
-rw-r--r--src/common/dwarf/bytereader.cc4
-rw-r--r--src/common/dwarf/bytereader.h1
-rw-r--r--src/common/dwarf/dwarf2enums.h29
-rw-r--r--src/common/dwarf/dwarf2reader.cc436
-rw-r--r--src/common/dwarf/dwarf2reader.h386
-rw-r--r--src/common/dwarf/elf_reader.cc1258
-rw-r--r--src/common/dwarf/elf_reader.h166
7 files changed, 2177 insertions, 103 deletions
diff --git a/src/common/dwarf/bytereader.cc b/src/common/dwarf/bytereader.cc
index 3ccbcad6..14b43adb 100644
--- a/src/common/dwarf/bytereader.cc
+++ b/src/common/dwarf/bytereader.cc
@@ -243,4 +243,8 @@ uint64 ByteReader::ReadEncodedPointer(const uint8_t *buffer,
return pointer;
}
+Endianness ByteReader::GetEndianness() const {
+ return endian_;
+}
+
} // namespace dwarf2reader
diff --git a/src/common/dwarf/bytereader.h b/src/common/dwarf/bytereader.h
index cf583094..59d43034 100644
--- a/src/common/dwarf/bytereader.h
+++ b/src/common/dwarf/bytereader.h
@@ -280,6 +280,7 @@ class ByteReader {
DwarfPointerEncoding encoding,
size_t *len) const;
+ Endianness GetEndianness() const;
private:
// Function pointer type for our address and offset readers.
diff --git a/src/common/dwarf/dwarf2enums.h b/src/common/dwarf/dwarf2enums.h
index 5565d66e..6b8a7245 100644
--- a/src/common/dwarf/dwarf2enums.h
+++ b/src/common/dwarf/dwarf2enums.h
@@ -149,7 +149,10 @@ enum DwarfForm {
DW_FORM_sec_offset = 0x17,
DW_FORM_exprloc = 0x18,
DW_FORM_flag_present = 0x19,
- DW_FORM_ref_sig8 = 0x20
+ DW_FORM_ref_sig8 = 0x20,
+ // Extensions for Fission. See http://gcc.gnu.org/wiki/DebugFission.
+ DW_FORM_GNU_addr_index = 0x1f01,
+ DW_FORM_GNU_str_index = 0x1f02
};
// Attribute names and codes
@@ -264,6 +267,13 @@ enum DwarfAttribute {
DW_AT_body_begin = 0x2105,
DW_AT_body_end = 0x2106,
DW_AT_GNU_vector = 0x2107,
+ // Extensions for Fission. See http://gcc.gnu.org/wiki/DebugFission.
+ DW_AT_GNU_dwo_name = 0x2130,
+ DW_AT_GNU_dwo_id = 0x2131,
+ DW_AT_GNU_ranges_base = 0x2132,
+ DW_AT_GNU_addr_base = 0x2133,
+ DW_AT_GNU_pubnames = 0x2134,
+ DW_AT_GNU_pubtypes = 0x2135,
// VMS extensions.
DW_AT_VMS_rtnbeg_pd_address = 0x2201,
// UPC extension.
@@ -491,7 +501,22 @@ enum DwarfOpcode {
DW_OP_lo_user =0xe0,
DW_OP_hi_user =0xff,
// GNU extensions
- DW_OP_GNU_push_tls_address =0xe0
+ DW_OP_GNU_push_tls_address =0xe0,
+ // Extensions for Fission. See http://gcc.gnu.org/wiki/DebugFission.
+ DW_OP_GNU_addr_index =0xfb,
+ DW_OP_GNU_const_index =0xfc
+};
+
+// Section identifiers for DWP files
+enum DwarfSectionId {
+ DW_SECT_INFO = 1,
+ DW_SECT_TYPES = 2,
+ DW_SECT_ABBREV = 3,
+ DW_SECT_LINE = 4,
+ DW_SECT_LOC = 5,
+ DW_SECT_STR_OFFSETS = 6,
+ DW_SECT_MACINFO = 7,
+ DW_SECT_MACRO = 8
};
// Source languages. These are values for DW_AT_language.
diff --git a/src/common/dwarf/dwarf2reader.cc b/src/common/dwarf/dwarf2reader.cc
index fcd29b8c..003ed7b1 100644
--- a/src/common/dwarf/dwarf2reader.cc
+++ b/src/common/dwarf/dwarf2reader.cc
@@ -44,6 +44,8 @@
#include <string>
#include <utility>
+#include <sys/stat.h>
+
#include "common/dwarf/bytereader-inl.h"
#include "common/dwarf/bytereader.h"
#include "common/dwarf/line_state_machine.h"
@@ -51,11 +53,38 @@
namespace dwarf2reader {
-CompilationUnit::CompilationUnit(const SectionMap& sections, uint64 offset,
+CompilationUnit::CompilationUnit(const string& path,
+ const SectionMap& sections, uint64 offset,
ByteReader* reader, Dwarf2Handler* handler)
- : offset_from_section_start_(offset), reader_(reader),
- sections_(sections), handler_(handler), abbrevs_(NULL),
- string_buffer_(NULL), string_buffer_length_(0) {}
+ : path_(path), offset_from_section_start_(offset), reader_(reader),
+ sections_(sections), handler_(handler), abbrevs_(),
+ string_buffer_(NULL), string_buffer_length_(0),
+ str_offsets_buffer_(NULL), str_offsets_buffer_length_(0),
+ addr_buffer_(NULL), addr_buffer_length_(0),
+ is_split_dwarf_(false), dwo_id_(0), dwo_name_(),
+ skeleton_dwo_id_(0), ranges_base_(0), addr_base_(0),
+ have_checked_for_dwp_(false), dwp_path_(),
+ dwp_byte_reader_(), dwp_reader_() {}
+
+// Initialize a compilation unit from a .dwo or .dwp file.
+// In this case, we need the .debug_addr section from the
+// executable file that contains the corresponding skeleton
+// compilation unit. We also inherit the Dwarf2Handler from
+// the executable file, and call it as if we were still
+// processing the original compilation unit.
+
+void CompilationUnit::SetSplitDwarf(const uint8_t* addr_buffer,
+ uint64 addr_buffer_length,
+ uint64 addr_base,
+ uint64 ranges_base,
+ uint64 dwo_id) {
+ is_split_dwarf_ = true;
+ addr_buffer_ = addr_buffer;
+ addr_buffer_length_ = addr_buffer_length;
+ addr_base_ = addr_base;
+ ranges_base_ = ranges_base;
+ skeleton_dwo_id_ = dwo_id;
+}
// Read a DWARF2/3 abbreviation section.
// Each abbrev consists of a abbreviation number, a tag, a byte
@@ -174,6 +203,8 @@ const uint8_t *CompilationUnit::SkipAttribute(const uint8_t *start,
return start + strlen(reinterpret_cast<const char *>(start)) + 1;
case DW_FORM_udata:
case DW_FORM_ref_udata:
+ case DW_FORM_GNU_str_index:
+ case DW_FORM_GNU_addr_index:
reader_->ReadUnsignedLEB128(start, &len);
return start + len;
@@ -296,9 +327,31 @@ uint64 CompilationUnit::Start() {
string_buffer_length_ = iter->second.second;
}
+ // Set the string offsets section if we have one.
+ iter = sections_.find(".debug_str_offsets");
+ if (iter != sections_.end()) {
+ str_offsets_buffer_ = iter->second.first;
+ str_offsets_buffer_length_ = iter->second.second;
+ }
+
+ // Set the address section if we have one.
+ iter = sections_.find(".debug_addr");
+ if (iter != sections_.end()) {
+ addr_buffer_ = iter->second.first;
+ addr_buffer_length_ = iter->second.second;
+ }
+
// Now that we have our abbreviations, start processing DIE's.
ProcessDIEs();
+ // If this is a skeleton compilation unit generated with split DWARF,
+ // and the client needs the full debug info, we need to find the full
+ // compilation unit in a .dwo or .dwp file.
+ if (!is_split_dwarf_
+ && dwo_name_ != NULL
+ && handler_->NeedSplitDebugInfo())
+ ProcessSplitDwarf();
+
return ourlength;
}
@@ -320,48 +373,46 @@ const uint8_t *CompilationUnit::ProcessAttribute(
return ProcessAttribute(dieoffset, start, attr, form);
case DW_FORM_flag_present:
- handler_->ProcessAttributeUnsigned(dieoffset, attr, form, 1);
+ ProcessAttributeUnsigned(dieoffset, attr, form, 1);
return start;
case DW_FORM_data1:
case DW_FORM_flag:
- handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
- reader_->ReadOneByte(start));
+ ProcessAttributeUnsigned(dieoffset, attr, form,
+ reader_->ReadOneByte(start));
return start + 1;
case DW_FORM_data2:
- handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
- reader_->ReadTwoBytes(start));
+ ProcessAttributeUnsigned(dieoffset, attr, form,
+ reader_->ReadTwoBytes(start));
return start + 2;
case DW_FORM_data4:
- handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
- reader_->ReadFourBytes(start));
+ ProcessAttributeUnsigned(dieoffset, attr, form,
+ reader_->ReadFourBytes(start));
return start + 4;
case DW_FORM_data8:
- handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
- reader_->ReadEightBytes(start));
+ ProcessAttributeUnsigned(dieoffset, attr, form,
+ reader_->ReadEightBytes(start));
return start + 8;
case DW_FORM_string: {
const char *str = reinterpret_cast<const char *>(start);
- handler_->ProcessAttributeString(dieoffset, attr, form,
- str);
+ ProcessAttributeString(dieoffset, attr, form, str);
return start + strlen(str) + 1;
}
case DW_FORM_udata:
- handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
- reader_->ReadUnsignedLEB128(start,
- &len));
+ ProcessAttributeUnsigned(dieoffset, attr, form,
+ reader_->ReadUnsignedLEB128(start, &len));
return start + len;
case DW_FORM_sdata:
- handler_->ProcessAttributeSigned(dieoffset, attr, form,
- reader_->ReadSignedLEB128(start, &len));
+ ProcessAttributeSigned(dieoffset, attr, form,
+ reader_->ReadSignedLEB128(start, &len));
return start + len;
case DW_FORM_addr:
- handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
- reader_->ReadAddress(start));
+ ProcessAttributeUnsigned(dieoffset, attr, form,
+ reader_->ReadAddress(start));
return start + reader_->AddressSize();
case DW_FORM_sec_offset:
- handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
- reader_->ReadOffset(start));
+ ProcessAttributeUnsigned(dieoffset, attr, form,
+ reader_->ReadOffset(start));
return start + reader_->OffsetSize();
case DW_FORM_ref1:
@@ -441,10 +492,32 @@ const uint8_t *CompilationUnit::ProcessAttribute(
assert(string_buffer_ + offset < string_buffer_ + string_buffer_length_);
const char *str = reinterpret_cast<const char *>(string_buffer_ + offset);
- handler_->ProcessAttributeString(dieoffset, attr, form,
- str);
+ ProcessAttributeString(dieoffset, attr, form, str);
return start + reader_->OffsetSize();
}
+
+ case DW_FORM_GNU_str_index: {
+ uint64 str_index = reader_->ReadUnsignedLEB128(start, &len);
+ const uint8_t* offset_ptr =
+ str_offsets_buffer_ + str_index * reader_->OffsetSize();
+ const uint64 offset = reader_->ReadOffset(offset_ptr);
+ if (offset >= string_buffer_length_) {
+ return NULL;
+ }
+
+ const char* str = reinterpret_cast<const char *>(string_buffer_) + offset;
+ ProcessAttributeString(dieoffset, attr, form, str);
+ return start + len;
+ break;
+ }
+ case DW_FORM_GNU_addr_index: {
+ uint64 addr_index = reader_->ReadUnsignedLEB128(start, &len);
+ const uint8_t* addr_ptr =
+ addr_buffer_ + addr_base_ + addr_index * reader_->AddressSize();
+ ProcessAttributeUnsigned(dieoffset, attr, form,
+ reader_->ReadAddress(addr_ptr));
+ return start + len;
+ }
}
fprintf(stderr, "Unhandled form type\n");
return NULL;
@@ -458,6 +531,16 @@ const uint8_t *CompilationUnit::ProcessDIE(uint64 dieoffset,
i++) {
start = ProcessAttribute(dieoffset, start, i->first, i->second);
}
+
+ // If this is a compilation unit in a split DWARF object, verify that
+ // the dwo_id matches. If it does not match, we will ignore this
+ // compilation unit.
+ if (abbrev.tag == DW_TAG_compile_unit
+ && is_split_dwarf_
+ && dwo_id_ != skeleton_dwo_id_) {
+ return NULL;
+ }
+
return start;
}
@@ -515,6 +598,307 @@ void CompilationUnit::ProcessDIEs() {
}
}
+// Check for a valid ELF file and return the Address size.
+// Returns 0 if not a valid ELF file.
+inline int GetElfWidth(const ElfReader& elf) {
+ if (elf.IsElf32File())
+ return 4;
+ if (elf.IsElf64File())
+ return 8;
+ return 0;
+}
+
+void CompilationUnit::ProcessSplitDwarf() {
+ struct stat statbuf;
+ if (!have_checked_for_dwp_) {
+ // Look for a .dwp file in the same directory as the executable.
+ have_checked_for_dwp_ = true;
+ string dwp_suffix(".dwp");
+ dwp_path_ = path_ + dwp_suffix;
+ if (stat(dwp_path_.c_str(), &statbuf) != 0) {
+ // Fall back to a split .debug file in the same directory.
+ string debug_suffix(".debug");
+ dwp_path_ = path_;
+ size_t found = path_.rfind(debug_suffix);
+ if (found + debug_suffix.length() == path_.length())
+ dwp_path_ = dwp_path_.replace(found, debug_suffix.length(), dwp_suffix);
+ }
+ if (stat(dwp_path_.c_str(), &statbuf) == 0) {
+ ElfReader* elf = new ElfReader(dwp_path_);
+ int width = GetElfWidth(*elf);
+ if (width != 0) {
+ dwp_byte_reader_.reset(new ByteReader(reader_->GetEndianness()));
+ dwp_byte_reader_->SetAddressSize(width);
+ dwp_reader_.reset(new DwpReader(*dwp_byte_reader_, elf));
+ dwp_reader_->Initialize();
+ } else {
+ delete elf;
+ }
+ }
+ }
+ bool found_in_dwp = false;
+ if (dwp_reader_ != NULL) {
+ // If we have a .dwp file, read the debug sections for the requested CU.
+ SectionMap sections;
+ dwp_reader_->ReadDebugSectionsForCU(dwo_id_, &sections);
+ if (!sections.empty()) {
+ found_in_dwp = true;
+ CompilationUnit dwp_comp_unit(dwp_path_, sections, 0,
+ dwp_byte_reader_.get(), handler_);
+ dwp_comp_unit.SetSplitDwarf(addr_buffer_, addr_buffer_length_, addr_base_,
+ ranges_base_, dwo_id_);
+ dwp_comp_unit.Start();
+ }
+ }
+ if (!found_in_dwp) {
+ // If no .dwp file, try to open the .dwo file.
+ if (stat(dwo_name_, &statbuf) == 0) {
+ ElfReader elf(dwo_name_);
+ int width = GetElfWidth(elf);
+ if (width != 0) {
+ ByteReader reader(ENDIANNESS_LITTLE);
+ reader.SetAddressSize(width);
+ SectionMap sections;
+ ReadDebugSectionsFromDwo(&elf, &sections);
+ CompilationUnit dwo_comp_unit(dwo_name_, sections, 0, &reader,
+ handler_);
+ dwo_comp_unit.SetSplitDwarf(addr_buffer_, addr_buffer_length_,
+ addr_base_, ranges_base_, dwo_id_);
+ dwo_comp_unit.Start();
+ }
+ }
+ }
+}
+
+void CompilationUnit::ReadDebugSectionsFromDwo(ElfReader* elf_reader,
+ SectionMap* sections) {
+ static const char* const section_names[] = {
+ ".debug_abbrev",
+ ".debug_info",
+ ".debug_str_offsets",
+ ".debug_str"
+ };
+ for (unsigned int i = 0u;
+ i < sizeof(section_names)/sizeof(*(section_names)); ++i) {
+ string base_name = section_names[i];
+ string dwo_name = base_name + ".dwo";
+ size_t section_size;
+ const char* section_data = elf_reader->GetSectionByName(dwo_name,
+ &section_size);
+ if (section_data != NULL)
+ sections->insert(std::make_pair(
+ base_name, std::make_pair(
+ reinterpret_cast<const uint8_t *>(section_data),
+ section_size)));
+ }
+}
+
+DwpReader::DwpReader(const ByteReader& byte_reader, ElfReader* elf_reader)
+ : elf_reader_(elf_reader), byte_reader_(byte_reader),
+ cu_index_(NULL), cu_index_size_(0), string_buffer_(NULL),
+ string_buffer_size_(0), version_(0), ncolumns_(0), nunits_(0),
+ nslots_(0), phash_(NULL), pindex_(NULL), shndx_pool_(NULL),
+ offset_table_(NULL), size_table_(NULL), abbrev_data_(NULL),
+ abbrev_size_(0), info_data_(NULL), info_size_(0),
+ str_offsets_data_(NULL), str_offsets_size_(0) {}
+
+DwpReader::~DwpReader() {
+ if (elf_reader_) delete elf_reader_;
+}
+
+void DwpReader::Initialize() {
+ cu_index_ = elf_reader_->GetSectionByName(".debug_cu_index",
+ &cu_index_size_);
+ if (cu_index_ == NULL) {
+ return;
+ }
+ // The .debug_str.dwo section is shared by all CUs in the file.
+ string_buffer_ = elf_reader_->GetSectionByName(".debug_str.dwo",
+ &string_buffer_size_);
+
+ version_ = byte_reader_.ReadFourBytes(
+ reinterpret_cast<const uint8_t *>(cu_index_));
+
+ if (version_ == 1) {
+ nslots_ = byte_reader_.ReadFourBytes(
+ reinterpret_cast<const uint8_t *>(cu_index_)
+ + 3 * sizeof(uint32));
+ phash_ = cu_index_ + 4 * sizeof(uint32);
+ pindex_ = phash_ + nslots_ * sizeof(uint64);
+ shndx_pool_ = pindex_ + nslots_ * sizeof(uint32);
+ if (shndx_pool_ >= cu_index_ + cu_index_size_) {
+ version_ = 0;
+ }
+ } else if (version_ == 2) {
+ ncolumns_ = byte_reader_.ReadFourBytes(
+ reinterpret_cast<const uint8_t *>(cu_index_) + sizeof(uint32));
+ nunits_ = byte_reader_.ReadFourBytes(
+ reinterpret_cast<const uint8_t *>(cu_index_) + 2 * sizeof(uint32));
+ nslots_ = byte_reader_.ReadFourBytes(
+ reinterpret_cast<const uint8_t *>(cu_index_) + 3 * sizeof(uint32));
+ phash_ = cu_index_ + 4 * sizeof(uint32);
+ pindex_ = phash_ + nslots_ * sizeof(uint64);
+ offset_table_ = pindex_ + nslots_ * sizeof(uint32);
+ size_table_ = offset_table_ + ncolumns_ * (nunits_ + 1) * sizeof(uint32);
+ abbrev_data_ = elf_reader_->GetSectionByName(".debug_abbrev.dwo",
+ &abbrev_size_);
+ info_data_ = elf_reader_->GetSectionByName(".debug_info.dwo", &info_size_);
+ str_offsets_data_ = elf_reader_->GetSectionByName(".debug_str_offsets.dwo",
+ &str_offsets_size_);
+ if (size_table_ >= cu_index_ + cu_index_size_) {
+ version_ = 0;
+ }
+ }
+}
+
+void DwpReader::ReadDebugSectionsForCU(uint64 dwo_id,
+ SectionMap* sections) {
+ if (version_ == 1) {
+ int slot = LookupCU(dwo_id);
+ if (slot == -1) {
+ return;
+ }
+
+ // The index table points to the section index pool, where we
+ // can read a list of section indexes for the debug sections
+ // for the CU whose dwo_id we are looking for.
+ int index = byte_reader_.ReadFourBytes(
+ reinterpret_cast<const uint8_t *>(pindex_)
+ + slot * sizeof(uint32));
+ const char* shndx_list = shndx_pool_ + index * sizeof(uint32);
+ for (;;) {
+ if (shndx_list >= cu_index_ + cu_index_size_) {
+ version_ = 0;
+ return;
+ }
+ unsigned int shndx = byte_reader_.ReadFourBytes(
+ reinterpret_cast<const uint8_t *>(shndx_list));
+ shndx_list += sizeof(uint32);
+ if (shndx == 0)
+ break;
+ const char* section_name = elf_reader_->GetSectionName(shndx);
+ size_t section_size;
+ const char* section_data;
+ // We're only interested in these four debug sections.
+ // The section names in the .dwo file end with ".dwo", but we
+ // add them to the sections table with their normal names.
+ if (!strncmp(section_name, ".debug_abbrev", strlen(".debug_abbrev"))) {
+ section_data = elf_reader_->GetSectionByIndex(shndx, &section_size);
+ sections->insert(std::make_pair(
+ ".debug_abbrev",
+ std::make_pair(reinterpret_cast<const uint8_t *> (section_data),
+ section_size)));
+ } else if (!strncmp(section_name, ".debug_info", strlen(".debug_info"))) {
+ section_data = elf_reader_->GetSectionByIndex(shndx, &section_size);
+ sections->insert(std::make_pair(
+ ".debug_info",
+ std::make_pair(reinterpret_cast<const uint8_t *> (section_data),
+ section_size)));
+ } else if (!strncmp(section_name, ".debug_str_offsets",
+ strlen(".debug_str_offsets"))) {
+ section_data = elf_reader_->GetSectionByIndex(shndx, &section_size);
+ sections->insert(std::make_pair(
+ ".debug_str_offsets",
+ std::make_pair(reinterpret_cast<const uint8_t *> (section_data),
+ section_size)));
+ }
+ }
+ sections->insert(std::make_pair(
+ ".debug_str",
+ std::make_pair(reinterpret_cast<const uint8_t *> (string_buffer_),
+ string_buffer_size_)));
+ } else if (version_ == 2) {
+ uint32 index = LookupCUv2(dwo_id);
+ if (index == 0) {
+ return;
+ }
+
+ // The index points to a row in each of the section offsets table
+ // and the section size table, where we can read the offsets and sizes
+ // of the contributions to each debug section from the CU whose dwo_id
+ // we are looking for. Row 0 of the section offsets table has the
+ // section ids for each column of the table. The size table begins
+ // with row 1.
+ const char* id_row = offset_table_;
+ const char* offset_row = offset_table_
+ + index * ncolumns_ * sizeof(uint32);
+ const char* size_row =
+ size_table_ + (index - 1) * ncolumns_ * sizeof(uint32);
+ if (size_row + ncolumns_ * sizeof(uint32) > cu_index_ + cu_index_size_) {
+ version_ = 0;
+ return;
+ }
+ for (unsigned int col = 0u; col < ncolumns_; ++col) {
+ uint32 section_id =
+ byte_reader_.ReadFourBytes(reinterpret_cast<const uint8_t *>(id_row)
+ + col * sizeof(uint32));
+ uint32 offset = byte_reader_.ReadFourBytes(
+ reinterpret_cast<const uint8_t *>(offset_row)
+ + col * sizeof(uint32));
+ uint32 size = byte_reader_.ReadFourBytes(
+ reinterpret_cast<const uint8_t *>(size_row) + col * sizeof(uint32));
+ if (section_id == DW_SECT_ABBREV) {
+ sections->insert(std::make_pair(
+ ".debug_abbrev",
+ std::make_pair(reinterpret_cast<const uint8_t *> (abbrev_data_)
+ + offset, size)));
+ } else if (section_id == DW_SECT_INFO) {
+ sections->insert(std::make_pair(
+ ".debug_info",
+ std::make_pair(reinterpret_cast<const uint8_t *> (info_data_)
+ + offset, size)));
+ } else if (section_id == DW_SECT_STR_OFFSETS) {
+ sections->insert(std::make_pair(
+ ".debug_str_offsets",
+ std::make_pair(reinterpret_cast<const uint8_t *> (str_offsets_data_)
+ + offset, size)));
+ }
+ }
+ sections->insert(std::make_pair(
+ ".debug_str",
+ std::make_pair(reinterpret_cast<const uint8_t *> (string_buffer_),
+ string_buffer_size_)));
+ }
+}
+
+int DwpReader::LookupCU(uint64 dwo_id) {
+ uint32 slot = static_cast<uint32>(dwo_id) & (nslots_ - 1);
+ uint64 probe = byte_reader_.ReadEightBytes(
+ reinterpret_cast<const uint8_t *>(phash_) + slot * sizeof(uint64));
+ if (probe != 0 && probe != dwo_id) {
+ uint32 secondary_hash =
+ (static_cast<uint32>(dwo_id >> 32) & (nslots_ - 1)) | 1;
+ do {
+ slot = (slot + secondary_hash) & (nslots_ - 1);
+ probe = byte_reader_.ReadEightBytes(
+ reinterpret_cast<const uint8_t *>(phash_) + slot * sizeof(uint64));
+ } while (probe != 0 && probe != dwo_id);
+ }
+ if (probe == 0)
+ return -1;
+ return slot;
+}
+
+uint32 DwpReader::LookupCUv2(uint64 dwo_id) {
+ uint32 slot = static_cast<uint32>(dwo_id) & (nslots_ - 1);
+ uint64 probe = byte_reader_.ReadEightBytes(
+ reinterpret_cast<const uint8_t *>(phash_) + slot * sizeof(uint64));
+ uint32 index = byte_reader_.ReadFourBytes(
+ reinterpret_cast<const uint8_t *>(pindex_) + slot * sizeof(uint32));
+ if (index != 0 && probe != dwo_id) {
+ uint32 secondary_hash =
+ (static_cast<uint32>(dwo_id >> 32) & (nslots_ - 1)) | 1;
+ do {
+ slot = (slot + secondary_hash) & (nslots_ - 1);
+ probe = byte_reader_.ReadEightBytes(
+ reinterpret_cast<const uint8_t *>(phash_) + slot * sizeof(uint64));
+ index = byte_reader_.ReadFourBytes(
+ reinterpret_cast<const uint8_t *>(pindex_) + slot * sizeof(uint32));
+ } while (index != 0 && probe != dwo_id);
+ }
+ return index;
+}
+
LineInfo::LineInfo(const uint8_t *buffer, uint64 buffer_length,
ByteReader* reader, LineInfoHandler* handler):
handler_(handler), reader_(reader), buffer_(buffer) {
diff --git a/src/common/dwarf/dwarf2reader.h b/src/common/dwarf/dwarf2reader.h
index 1f5b37a0..064c42bc 100644
--- a/src/common/dwarf/dwarf2reader.h
+++ b/src/common/dwarf/dwarf2reader.h
@@ -47,16 +47,19 @@
#include <string>
#include <utility>
#include <vector>
+#include <memory>
#include "common/dwarf/bytereader.h"
#include "common/dwarf/dwarf2enums.h"
#include "common/dwarf/types.h"
#include "common/using_std_string.h"
+#include "common/dwarf/elf_reader.h"
namespace dwarf2reader {
struct LineStateMachine;
class Dwarf2Handler;
class LineInfoHandler;
+class DwpReader;
// This maps from a string naming a section to a pair containing a
// the data for the section, and the size of the section.
@@ -184,6 +187,106 @@ class LineInfoHandler {
uint32 file_num, uint32 line_num, uint32 column_num) { }
};
+// This class is the main interface between the reader and the
+// client. The virtual functions inside this get called for
+// interesting events that happen during DWARF2 reading.
+// The default implementation skips everything.
+class Dwarf2Handler {
+ public:
+ Dwarf2Handler() { }
+
+ virtual ~Dwarf2Handler() { }
+
+ // Start to process a compilation unit at OFFSET from the beginning of the
+ // .debug_info section. Return false if you would like to skip this
+ // compilation unit.
+ virtual bool StartCompilationUnit(uint64 offset, uint8 address_size,
+ uint8 offset_size, uint64 cu_length,
+ uint8 dwarf_version) { return false; }
+
+ // When processing a skeleton compilation unit, resulting from a split
+ // DWARF compilation, once the skeleton debug info has been read,
+ // the reader will call this function to ask the client if it needs
+ // the full debug info from the .dwo or .dwp file. Return true if
+ // you need it, or false to skip processing the split debug info.
+ virtual bool NeedSplitDebugInfo() { return true; }
+
+ // Start to process a split compilation unit at OFFSET from the beginning of
+ // the debug_info section in the .dwp/.dwo file. Return false if you would
+ // like to skip this compilation unit.
+ virtual bool StartSplitCompilationUnit(uint64 offset,
+ uint64 cu_length) { return false; }
+
+ // Start to process a DIE at OFFSET from the beginning of the .debug_info
+ // section. Return false if you would like to skip this DIE.
+ virtual bool StartDIE(uint64 offset, enum DwarfTag tag) { return false; }
+
+ // Called when we have an attribute with unsigned data to give to our
+ // handler. The attribute is for the DIE at OFFSET from the beginning of the
+ // .debug_info section. Its name is ATTR, its form is FORM, and its value is
+ // DATA.
+ virtual void ProcessAttributeUnsigned(uint64 offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ uint64 data) { }
+
+ // Called when we have an attribute with signed data to give to our handler.
+ // The attribute is for the DIE at OFFSET from the beginning of the
+ // .debug_info section. Its name is ATTR, its form is FORM, and its value is
+ // DATA.
+ virtual void ProcessAttributeSigned(uint64 offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ int64 data) { }
+
+ // Called when we have an attribute whose value is a reference to
+ // another DIE. The attribute belongs to the DIE at OFFSET from the
+ // beginning of the .debug_info section. Its name is ATTR, its form
+ // is FORM, and the offset of the DIE being referred to from the
+ // beginning of the .debug_info section is DATA.
+ virtual void ProcessAttributeReference(uint64 offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ uint64 data) { }
+
+ // Called when we have an attribute with a buffer of data to give to our
+ // handler. The attribute is for the DIE at OFFSET from the beginning of the
+ // .debug_info section. Its name is ATTR, its form is FORM, DATA points to
+ // the buffer's contents, and its length in bytes is LENGTH. The buffer is
+ // owned by the caller, not the callee, and may not persist for very long.
+ // If you want the data to be available later, it needs to be copied.
+ virtual void ProcessAttributeBuffer(uint64 offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ const uint8_t *data,
+ uint64 len) { }
+
+ // Called when we have an attribute with string data to give to our handler.
+ // The attribute is for the DIE at OFFSET from the beginning of the
+ // .debug_info section. Its name is ATTR, its form is FORM, and its value is
+ // DATA.
+ virtual void ProcessAttributeString(uint64 offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ const string& data) { }
+
+ // Called when we have an attribute whose value is the 64-bit signature
+ // of a type unit in the .debug_types section. OFFSET is the offset of
+ // the DIE whose attribute we're reporting. ATTR and FORM are the
+ // attribute's name and form. SIGNATURE is the type unit's signature.
+ virtual void ProcessAttributeSignature(uint64 offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ uint64 signature) { }
+
+ // Called when finished processing the DIE at OFFSET.
+ // Because DWARF2/3 specifies a tree of DIEs, you may get starts
+ // before ends of the previous DIE, as we process children before
+ // ending the parent.
+ virtual void EndDIE(uint64 offset) { }
+
+};
+
// The base of DWARF2/3 debug info is a DIE (Debugging Information
// Entry.
// DWARF groups DIE's into a tree and calls the root of this tree a
@@ -225,12 +328,21 @@ class CompilationUnit {
// Initialize a compilation unit. This requires a map of sections,
// the offset of this compilation unit in the .debug_info section, a
// ByteReader, and a Dwarf2Handler class to call callbacks in.
- CompilationUnit(const SectionMap& sections, uint64 offset,
+ CompilationUnit(const string& path, const SectionMap& sections, uint64 offset,
ByteReader* reader, Dwarf2Handler* handler);
virtual ~CompilationUnit() {
if (abbrevs_) delete abbrevs_;
}
+ // Initialize a compilation unit from a .dwo or .dwp file.
+ // In this case, we need the .debug_addr section from the
+ // executable file that contains the corresponding skeleton
+ // compilation unit. We also inherit the Dwarf2Handler from
+ // the executable file, and call it as if we were still
+ // processing the original compilation unit.
+ void SetSplitDwarf(const uint8_t* addr_buffer, uint64 addr_buffer_length,
+ uint64 addr_base, uint64 ranges_base, uint64 dwo_id);
+
// Begin reading a Dwarf2 compilation unit, and calling the
// callbacks in the Dwarf2Handler
@@ -281,6 +393,73 @@ class CompilationUnit {
enum DwarfAttribute attr,
enum DwarfForm form);
+ // Called when we have an attribute with unsigned data to give to
+ // our handler. The attribute is for the DIE at OFFSET from the
+ // beginning of compilation unit, has a name of ATTR, a form of
+ // FORM, and the actual data of the attribute is in DATA.
+ // If we see a DW_AT_GNU_dwo_id attribute, save the value so that
+ // we can find the debug info in a .dwo or .dwp file.
+ void ProcessAttributeUnsigned(uint64 offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ uint64 data) {
+ if (attr == DW_AT_GNU_dwo_id) {
+ dwo_id_ = data;
+ }
+ else if (attr == DW_AT_GNU_addr_base) {
+ addr_base_ = data;
+ }
+ else if (attr == DW_AT_GNU_ranges_base) {
+ ranges_base_ = data;
+ }
+ // TODO(yunlian): When we add DW_AT_ranges_base from DWARF-5,
+ // that base will apply to DW_AT_ranges attributes in the
+ // skeleton CU as well as in the .dwo/.dwp files.
+ else if (attr == DW_AT_ranges && is_split_dwarf_) {
+ data += ranges_base_;
+ }
+ handler_->ProcessAttributeUnsigned(offset, attr, form, data);
+ }
+
+ // Called when we have an attribute with signed data to give to
+ // our handler. The attribute is for the DIE at OFFSET from the
+ // beginning of compilation unit, has a name of ATTR, a form of
+ // FORM, and the actual data of the attribute is in DATA.
+ void ProcessAttributeSigned(uint64 offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ int64 data) {
+ handler_->ProcessAttributeSigned(offset, attr, form, data);
+ }
+
+ // Called when we have an attribute with a buffer of data to give to
+ // our handler. The attribute is for the DIE at OFFSET from the
+ // beginning of compilation unit, has a name of ATTR, a form of
+ // FORM, and the actual data of the attribute is in DATA, and the
+ // length of the buffer is LENGTH.
+ void ProcessAttributeBuffer(uint64 offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ const uint8_t* data,
+ uint64 len) {
+ handler_->ProcessAttributeBuffer(offset, attr, form, data, len);
+ }
+
+ // Called when we have an attribute with string data to give to
+ // our handler. The attribute is for the DIE at OFFSET from the
+ // beginning of compilation unit, has a name of ATTR, a form of
+ // FORM, and the actual data of the attribute is in DATA.
+ // If we see a DW_AT_GNU_dwo_name attribute, save the value so
+ // that we can find the debug info in a .dwo or .dwp file.
+ void ProcessAttributeString(uint64 offset,
+ enum DwarfAttribute attr,
+ enum DwarfForm form,
+ const char* data) {
+ if (attr == DW_AT_GNU_dwo_name)
+ dwo_name_ = data;
+ handler_->ProcessAttributeString(offset, attr, form, data);
+ }
+
// Processes all DIEs for this compilation unit
void ProcessDIEs();
@@ -292,6 +471,16 @@ class CompilationUnit {
// new place to position the stream to.
const uint8_t *SkipAttribute(const uint8_t *start, enum DwarfForm form);
+ // Process the actual debug information in a split DWARF file.
+ void ProcessSplitDwarf();
+
+ // Read the debug sections from a .dwo file.
+ void ReadDebugSectionsFromDwo(ElfReader* elf_reader,
+ SectionMap* sections);
+
+ // Path of the file containing the debug information.
+ const string path_;
+
// Offset from section start is the offset of this compilation unit
// from the beginning of the .debug_info section.
uint64 offset_from_section_start_;
@@ -322,94 +511,141 @@ class CompilationUnit {
// ProcessAttribute, which is in the hot path for DWARF2 reading.
const uint8_t *string_buffer_;
uint64 string_buffer_length_;
-};
-// This class is the main interface between the reader and the
-// client. The virtual functions inside this get called for
-// interesting events that happen during DWARF2 reading.
-// The default implementation skips everything.
+ // String offsets section buffer and length, if we have a string offsets
+ // section (.debug_str_offsets or .debug_str_offsets.dwo).
+ const uint8_t* str_offsets_buffer_;
+ uint64 str_offsets_buffer_length_;
-class Dwarf2Handler {
+ // Address section buffer and length, if we have an address section
+ // (.debug_addr).
+ const uint8_t* addr_buffer_;
+ uint64 addr_buffer_length_;
+
+ // Flag indicating whether this compilation unit is part of a .dwo
+ // or .dwp file. If true, we are reading this unit because a
+ // skeleton compilation unit in an executable file had a
+ // DW_AT_GNU_dwo_name or DW_AT_GNU_dwo_id attribute.
+ // In a .dwo file, we expect the string offsets section to
+ // have a ".dwo" suffix, and we will use the ".debug_addr" section
+ // associated with the skeleton compilation unit.
+ bool is_split_dwarf_;
+
+ // The value of the DW_AT_GNU_dwo_id attribute, if any.
+ uint64 dwo_id_;
+
+ // The value of the DW_AT_GNU_dwo_name attribute, if any.
+ const char* dwo_name_;
+
+ // If this is a split DWARF CU, the value of the DW_AT_GNU_dwo_id attribute
+ // from the skeleton CU.
+ uint64 skeleton_dwo_id_;
+
+ // The value of the DW_AT_GNU_ranges_base attribute, if any.
+ uint64 ranges_base_;
+
+ // The value of the DW_AT_GNU_addr_base attribute, if any.
+ uint64 addr_base_;
+
+ // True if we have already looked for a .dwp file.
+ bool have_checked_for_dwp_;
+
+ // Path to the .dwp file.
+ string dwp_path_;
+
+ // ByteReader for the DWP file.
+ std::unique_ptr<ByteReader> dwp_byte_reader_;
+
+ // DWP reader.
+ std::unique_ptr<DwpReader> dwp_reader_;
+};
+
+// A Reader for a .dwp file. Supports the fetching of DWARF debug
+// info for a given dwo_id.
+//
+// There are two versions of .dwp files. In both versions, the
+// .dwp file is an ELF file containing only debug sections.
+// In Version 1, the file contains many copies of each debug
+// section, one for each .dwo file that is packaged in the .dwp
+// file, and the .debug_cu_index section maps from the dwo_id
+// to a set of section indexes. In Version 2, the file contains
+// one of each debug section, and the .debug_cu_index section
+// maps from the dwo_id to a set of offsets and lengths that
+// identify each .dwo file's contribution to the larger sections.
+
+class DwpReader {
public:
- Dwarf2Handler() { }
+ DwpReader(const ByteReader& byte_reader, ElfReader* elf_reader);
- virtual ~Dwarf2Handler() { }
+ ~DwpReader();
- // Start to process a compilation unit at OFFSET from the beginning of the
- // .debug_info section. Return false if you would like to skip this
- // compilation unit.
- virtual bool StartCompilationUnit(uint64 offset, uint8 address_size,
- uint8 offset_size, uint64 cu_length,
- uint8 dwarf_version) { return false; }
+ // Read the CU index and initialize data members.
+ void Initialize();
- // Start to process a DIE at OFFSET from the beginning of the .debug_info
- // section. Return false if you would like to skip this DIE.
- virtual bool StartDIE(uint64 offset, enum DwarfTag tag) { return false; }
+ // Read the debug sections for the given dwo_id.
+ void ReadDebugSectionsForCU(uint64 dwo_id, SectionMap* sections);
- // Called when we have an attribute with unsigned data to give to our
- // handler. The attribute is for the DIE at OFFSET from the beginning of the
- // .debug_info section. Its name is ATTR, its form is FORM, and its value is
- // DATA.
- virtual void ProcessAttributeUnsigned(uint64 offset,
- enum DwarfAttribute attr,
- enum DwarfForm form,
- uint64 data) { }
+ private:
+ // Search a v1 hash table for "dwo_id". Returns the slot index
+ // where the dwo_id was found, or -1 if it was not found.
+ int LookupCU(uint64 dwo_id);
- // Called when we have an attribute with signed data to give to our handler.
- // The attribute is for the DIE at OFFSET from the beginning of the
- // .debug_info section. Its name is ATTR, its form is FORM, and its value is
- // DATA.
- virtual void ProcessAttributeSigned(uint64 offset,
- enum DwarfAttribute attr,
- enum DwarfForm form,
- int64 data) { }
+ // Search a v2 hash table for "dwo_id". Returns the row index
+ // in the offsets and sizes tables, or 0 if it was not found.
+ uint32 LookupCUv2(uint64 dwo_id);
- // Called when we have an attribute whose value is a reference to
- // another DIE. The attribute belongs to the DIE at OFFSET from the
- // beginning of the .debug_info section. Its name is ATTR, its form
- // is FORM, and the offset of the DIE being referred to from the
- // beginning of the .debug_info section is DATA.
- virtual void ProcessAttributeReference(uint64 offset,
- enum DwarfAttribute attr,
- enum DwarfForm form,
- uint64 data) { }
+ // The ELF reader for the .dwp file.
+ ElfReader* elf_reader_;
- // Called when we have an attribute with a buffer of data to give to our
- // handler. The attribute is for the DIE at OFFSET from the beginning of the
- // .debug_info section. Its name is ATTR, its form is FORM, DATA points to
- // the buffer's contents, and its length in bytes is LENGTH. The buffer is
- // owned by the caller, not the callee, and may not persist for very long.
- // If you want the data to be available later, it needs to be copied.
- virtual void ProcessAttributeBuffer(uint64 offset,
- enum DwarfAttribute attr,
- enum DwarfForm form,
- const uint8_t *data,
- uint64 len) { }
+ // The ByteReader for the .dwp file.
+ const ByteReader& byte_reader_;
- // Called when we have an attribute with string data to give to our handler.
- // The attribute is for the DIE at OFFSET from the beginning of the
- // .debug_info section. Its name is ATTR, its form is FORM, and its value is
- // DATA.
- virtual void ProcessAttributeString(uint64 offset,
- enum DwarfAttribute attr,
- enum DwarfForm form,
- const string& data) { }
+ // Pointer to the .debug_cu_index section.
+ const char* cu_index_;
- // Called when we have an attribute whose value is the 64-bit signature
- // of a type unit in the .debug_types section. OFFSET is the offset of
- // the DIE whose attribute we're reporting. ATTR and FORM are the
- // attribute's name and form. SIGNATURE is the type unit's signature.
- virtual void ProcessAttributeSignature(uint64 offset,
- enum DwarfAttribute attr,
- enum DwarfForm form,
- uint64 signature) { }
+ // Size of the .debug_cu_index section.
+ size_t cu_index_size_;
- // Called when finished processing the DIE at OFFSET.
- // Because DWARF2/3 specifies a tree of DIEs, you may get starts
- // before ends of the previous DIE, as we process children before
- // ending the parent.
- virtual void EndDIE(uint64 offset) { }
+ // Pointer to the .debug_str.dwo section.
+ const char* string_buffer_;
+
+ // Size of the .debug_str.dwo section.
+ size_t string_buffer_size_;
+
+ // Version of the .dwp file. We support versions 1 and 2 currently.
+ int version_;
+
+ // Number of columns in the section tables (version 2).
+ unsigned int ncolumns_;
+
+ // Number of units in the section tables (version 2).
+ unsigned int nunits_;
+
+ // Number of slots in the hash table.
+ unsigned int nslots_;
+
+ // Pointer to the beginning of the hash table.
+ const char* phash_;
+
+ // Pointer to the beginning of the index table.
+ const char* pindex_;
+
+ // Pointer to the beginning of the section index pool (version 1).
+ const char* shndx_pool_;
+
+ // Pointer to the beginning of the section offset table (version 2).
+ const char* offset_table_;
+
+ // Pointer to the beginning of the section size table (version 2).
+ const char* size_table_;
+ // Contents of the sections of interest (version 2).
+ const char* abbrev_data_;
+ size_t abbrev_size_;
+ const char* info_data_;
+ size_t info_size_;
+ const char* str_offsets_data_;
+ size_t str_offsets_size_;
};
// This class is a reader for DWARF's Call Frame Information. CFI
diff --git a/src/common/dwarf/elf_reader.cc b/src/common/dwarf/elf_reader.cc
new file mode 100644
index 00000000..81683141
--- /dev/null
+++ b/src/common/dwarf/elf_reader.cc
@@ -0,0 +1,1258 @@
+// Copyright 2005 Google Inc. All Rights Reserved.
+// Author: chatham@google.com (Andrew Chatham)
+// Author: satorux@google.com (Satoru Takabayashi)
+//
+// Code for reading in ELF files.
+//
+// For information on the ELF format, see
+// http://www.x86.org/ftp/manuals/tools/elf.pdf
+//
+// I also liked:
+// http://www.caldera.com/developers/gabi/1998-04-29/contents.html
+//
+// A note about types: When dealing with the file format, we use types
+// like Elf32_Word, but in the public interfaces we treat all
+// addresses as uint64. As a result, we should be able to symbolize
+// 64-bit binaries from a 32-bit process (which we don't do,
+// anyway). size_t should therefore be avoided, except where required
+// by things like mmap().
+//
+// Although most of this code can deal with arbitrary ELF files of
+// either word size, the public ElfReader interface only examines
+// files loaded into the current address space, which must all match
+// __WORDSIZE. This code cannot handle ELF files with a non-native
+// byte ordering.
+//
+// TODO(chatham): It would be nice if we could accomplish this task
+// without using malloc(), so we could use it as the process is dying.
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE // needed for pread()
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <elf.h>
+#include <string.h>
+
+#include <algorithm>
+#include <map>
+#include <string>
+#include <vector>
+#include "zlib.h"
+
+#include "elf_reader.h"
+//#include "using_std_string.h"
+// EM_AARCH64 is not defined by elf.h of GRTE v3 on x86.
+// TODO(dougkwan): Remove this when v17 is retired.
+#if !defined(EM_AARCH64)
+#define EM_AARCH64 183 /* ARM AARCH64 */
+#endif
+
+// TODO(dthomson): Can be removed once all Java code is using the Google3
+// launcher. We need to avoid processing PLT functions as it causes memory
+// fragmentation in malloc, which is fixed in tcmalloc - and if the Google3
+// launcher is used the JVM will then use tcmalloc. b/13735638
+//DEFINE_bool(elfreader_process_dynsyms, true,
+// "Activate PLT function processing");
+
+using std::string;
+using std::vector;
+
+namespace {
+
+// The lowest bit of an ARM symbol value is used to indicate a Thumb address.
+const int kARMThumbBitOffset = 0;
+
+// Converts an ARM Thumb symbol value to a true aligned address value.
+template <typename T>
+T AdjustARMThumbSymbolValue(const T& symbol_table_value) {
+ return symbol_table_value & ~(1 << kARMThumbBitOffset);
+}
+
+// Names of PLT-related sections.
+const char kElfPLTRelSectionName[] = ".rel.plt"; // Use Rel struct.
+const char kElfPLTRelaSectionName[] = ".rela.plt"; // Use Rela struct.
+const char kElfPLTSectionName[] = ".plt";
+const char kElfDynSymSectionName[] = ".dynsym";
+
+const int kX86PLTCodeSize = 0x10; // Size of one x86 PLT function in bytes.
+const int kARMPLTCodeSize = 0xc;
+const int kAARCH64PLTCodeSize = 0x10;
+
+const int kX86PLT0Size = 0x10; // Size of the special PLT0 entry.
+const int kARMPLT0Size = 0x14;
+const int kAARCH64PLT0Size = 0x20;
+
+// Suffix for PLT functions when it needs to be explicitly identified as such.
+const char kPLTFunctionSuffix[] = "@plt";
+
+} // namespace
+
+namespace dwarf2reader {
+
+template <class ElfArch> class ElfReaderImpl;
+
+// 32-bit and 64-bit ELF files are processed exactly the same, except
+// for various field sizes. Elf32 and Elf64 encompass all of the
+// differences between the two formats, and all format-specific code
+// in this file is templated on one of them.
+class Elf32 {
+ public:
+ typedef Elf32_Ehdr Ehdr;
+ typedef Elf32_Shdr Shdr;
+ typedef Elf32_Phdr Phdr;
+ typedef Elf32_Word Word;
+ typedef Elf32_Sym Sym;
+ typedef Elf32_Rel Rel;
+ typedef Elf32_Rela Rela;
+
+ // What should be in the EI_CLASS header.
+ static const int kElfClass = ELFCLASS32;
+
+ // Given a symbol pointer, return the binding type (eg STB_WEAK).
+ static char Bind(const Elf32_Sym *sym) {
+ return ELF32_ST_BIND(sym->st_info);
+ }
+ // Given a symbol pointer, return the symbol type (eg STT_FUNC).
+ static char Type(const Elf32_Sym *sym) {
+ return ELF32_ST_TYPE(sym->st_info);
+ }
+
+ // Extract the symbol index from the r_info field of a relocation.
+ static int r_sym(const Elf32_Word r_info) {
+ return ELF32_R_SYM(r_info);
+ }
+};
+
+
+class Elf64 {
+ public:
+ typedef Elf64_Ehdr Ehdr;
+ typedef Elf64_Shdr Shdr;
+ typedef Elf64_Phdr Phdr;
+ typedef Elf64_Word Word;
+ typedef Elf64_Sym Sym;
+ typedef Elf64_Rel Rel;
+ typedef Elf64_Rela Rela;
+
+ // What should be in the EI_CLASS header.
+ static const int kElfClass = ELFCLASS64;
+
+ static char Bind(const Elf64_Sym *sym) {
+ return ELF64_ST_BIND(sym->st_info);
+ }
+ static char Type(const Elf64_Sym *sym) {
+ return ELF64_ST_TYPE(sym->st_info);
+ }
+ static int r_sym(const Elf64_Xword r_info) {
+ return ELF64_R_SYM(r_info);
+ }
+};
+
+
+// ElfSectionReader mmaps a section of an ELF file ("section" is ELF
+// terminology). The ElfReaderImpl object providing the section header
+// must exist for the lifetime of this object.
+//
+// The motivation for mmaping individual sections of the file is that
+// many Google executables are large enough when unstripped that we
+// have to worry about running out of virtual address space.
+//
+// For compressed sections we have no choice but to allocate memory.
+template<class ElfArch>
+class ElfSectionReader {
+ public:
+ ElfSectionReader(const char *name, const string &path, int fd,
+ const typename ElfArch::Shdr &section_header)
+ : contents_aligned_(NULL),
+ contents_(NULL),
+ header_(section_header) {
+ // Back up to the beginning of the page we're interested in.
+ const size_t additional = header_.sh_offset % getpagesize();
+ const size_t offset_aligned = header_.sh_offset - additional;
+ section_size_ = header_.sh_size;
+ size_aligned_ = section_size_ + additional;
+ // If the section has been stripped or is empty, do not attempt
+ // to process its contents.
+ if (header_.sh_type == SHT_NOBITS || header_.sh_size == 0)
+ return;
+ contents_aligned_ = mmap(NULL, size_aligned_, PROT_READ, MAP_SHARED,
+ fd, offset_aligned);
+ // Set where the offset really should begin.
+ contents_ = reinterpret_cast<char *>(contents_aligned_) +
+ (header_.sh_offset - offset_aligned);
+
+ // Check for and handle any compressed contents.
+ //if (strncmp(name, ".zdebug_", strlen(".zdebug_")) == 0)
+ // DecompressZlibContents();
+ // TODO(saugustine): Add support for proposed elf-section flag
+ // "SHF_COMPRESS".
+ }
+
+ ~ElfSectionReader() {
+ if (contents_aligned_ != NULL)
+ munmap(contents_aligned_, size_aligned_);
+ else
+ delete[] contents_;
+ }
+
+ // Return the section header for this section.
+ typename ElfArch::Shdr const &header() const { return header_; }
+
+ // Return memory at the given offset within this section.
+ const char *GetOffset(typename ElfArch::Word bytes) const {
+ return contents_ + bytes;
+ }
+
+ const char *contents() const { return contents_; }
+ size_t section_size() const { return section_size_; }
+
+ private:
+ // page-aligned file contents
+ void *contents_aligned_;
+ // contents as usable by the client. For non-compressed sections,
+ // pointer within contents_aligned_ to where the section data
+ // begins; for compressed sections, pointer to the decompressed
+ // data.
+ char *contents_;
+ // size of contents_aligned_
+ size_t size_aligned_;
+ // size of contents.
+ size_t section_size_;
+ const typename ElfArch::Shdr header_;
+};
+
+// An iterator over symbols in a given section. It handles walking
+// through the entries in the specified section and mapping symbol
+// entries to their names in the appropriate string table (in
+// another section).
+template<class ElfArch>
+class SymbolIterator {
+ public:
+ SymbolIterator(ElfReaderImpl<ElfArch> *reader,
+ typename ElfArch::Word section_type)
+ : symbol_section_(reader->GetSectionByType(section_type)),
+ string_section_(NULL),
+ num_symbols_in_section_(0),
+ symbol_within_section_(0) {
+
+ // If this section type doesn't exist, leave
+ // num_symbols_in_section_ as zero, so this iterator is already
+ // done().
+ if (symbol_section_ != NULL) {
+ num_symbols_in_section_ = symbol_section_->header().sh_size /
+ symbol_section_->header().sh_entsize;
+
+ // Symbol sections have sh_link set to the section number of
+ // the string section containing the symbol names.
+ string_section_ = reader->GetSection(symbol_section_->header().sh_link);
+ }
+ }
+
+ // Return true iff we have passed all symbols in this section.
+ bool done() const {
+ return symbol_within_section_ >= num_symbols_in_section_;
+ }
+
+ // Advance to the next symbol in this section.
+ // REQUIRES: !done()
+ void Next() { ++symbol_within_section_; }
+
+ // Return a pointer to the current symbol.
+ // REQUIRES: !done()
+ const typename ElfArch::Sym *GetSymbol() const {
+ return reinterpret_cast<const typename ElfArch::Sym*>(
+ symbol_section_->GetOffset(symbol_within_section_ *
+ symbol_section_->header().sh_entsize));
+ }
+
+ // Return the name of the current symbol, NULL if it has none.
+ // REQUIRES: !done()
+ const char *GetSymbolName() const {
+ int name_offset = GetSymbol()->st_name;
+ if (name_offset == 0)
+ return NULL;
+ return string_section_->GetOffset(name_offset);
+ }
+
+ int GetCurrentSymbolIndex() const {
+ return symbol_within_section_;
+ }
+
+ private:
+ const ElfSectionReader<ElfArch> *const symbol_section_;
+ const ElfSectionReader<ElfArch> *string_section_;
+ int num_symbols_in_section_;
+ int symbol_within_section_;
+};
+
+
+// Copied from strings/strutil.h. Per chatham,
+// this library should not depend on strings.
+
+static inline bool MyHasSuffixString(const string& str, const string& suffix) {
+ int len = str.length();
+ int suflen = suffix.length();
+ return (suflen <= len) && (str.compare(len-suflen, suflen, suffix) == 0);
+}
+
+
+// ElfReader loads an ELF binary and can provide information about its
+// contents. It is most useful for matching addresses to function
+// names. It does not understand debugging formats (eg dwarf2), so it
+// can't print line numbers. It takes a path to an elf file and a
+// readable file descriptor for that file, which it does not assume
+// ownership of.
+template<class ElfArch>
+class ElfReaderImpl {
+ public:
+ explicit ElfReaderImpl(const string &path, int fd)
+ : path_(path),
+ fd_(fd),
+ section_headers_(NULL),
+ program_headers_(NULL),
+ opd_section_(NULL),
+ base_for_text_(0),
+ plts_supported_(false),
+ plt_code_size_(0),
+ plt0_size_(0),
+ visited_relocation_entries_(false) {
+ string error;
+ is_dwp_ = MyHasSuffixString(path, ".dwp");
+ ParseHeaders(fd, path);
+ // Currently we need some extra information for PowerPC64 binaries
+ // including a way to read the .opd section for function descriptors and a
+ // way to find the linked base for function symbols.
+ if (header_.e_machine == EM_PPC64) {
+ // "opd_section_" must always be checked for NULL before use.
+ opd_section_ = GetSectionInfoByName(".opd", &opd_info_);
+ for (unsigned int k = 0u; k < GetNumSections(); ++k) {
+ const char *name = GetSectionName(section_headers_[k].sh_name);
+ if (strncmp(name, ".text", strlen(".text")) == 0) {
+ base_for_text_ =
+ section_headers_[k].sh_addr - section_headers_[k].sh_offset;
+ break;
+ }
+ }
+ }
+ // Turn on PLTs.
+ if (header_.e_machine == EM_386 || header_.e_machine == EM_X86_64) {
+ plt_code_size_ = kX86PLTCodeSize;
+ plt0_size_ = kX86PLT0Size;
+ plts_supported_ = true;
+ } else if (header_.e_machine == EM_ARM) {
+ plt_code_size_ = kARMPLTCodeSize;
+ plt0_size_ = kARMPLT0Size;
+ plts_supported_ = true;
+ } else if (header_.e_machine == EM_AARCH64) {
+ plt_code_size_ = kAARCH64PLTCodeSize;
+ plt0_size_ = kAARCH64PLT0Size;
+ plts_supported_ = true;
+ }
+ }
+
+ ~ElfReaderImpl() {
+ for (unsigned int i = 0u; i < sections_.size(); ++i)
+ delete sections_[i];
+ delete [] section_headers_;
+ delete [] program_headers_;
+ }
+
+ // Examine the headers of the file and return whether the file looks
+ // like an ELF file for this architecture. Takes an already-open
+ // file descriptor for the candidate file, reading in the prologue
+ // to see if the ELF file appears to match the current
+ // architecture. If error is non-NULL, it will be set with a reason
+ // in case of failure.
+ static bool IsArchElfFile(int fd, string *error) {
+ unsigned char header[EI_NIDENT];
+ if (pread(fd, header, sizeof(header), 0) != sizeof(header)) {
+ if (error != NULL) *error = "Could not read header";
+ return false;
+ }
+
+ if (memcmp(header, ELFMAG, SELFMAG) != 0) {
+ if (error != NULL) *error = "Missing ELF magic";
+ return false;
+ }
+
+ if (header[EI_CLASS] != ElfArch::kElfClass) {
+ if (error != NULL) *error = "Different word size";
+ return false;
+ }
+
+ int endian = 0;
+ if (header[EI_DATA] == ELFDATA2LSB)
+ endian = __LITTLE_ENDIAN;
+ else if (header[EI_DATA] == ELFDATA2MSB)
+ endian = __BIG_ENDIAN;
+ if (endian != __BYTE_ORDER) {
+ if (error != NULL) *error = "Different byte order";
+ return false;
+ }
+
+ return true;
+ }
+
+ // Return true if we can use this symbol in Address-to-Symbol map.
+ bool CanUseSymbol(const char *name, const typename ElfArch::Sym *sym) {
+ // For now we only save FUNC and NOTYPE symbols. For now we just
+ // care about functions, but some functions written in assembler
+ // don't have a proper ELF type attached to them, so we store
+ // NOTYPE symbols as well. The remaining significant type is
+ // OBJECT (eg global variables), which represent about 25% of
+ // the symbols in a typical google3 binary.
+ if (ElfArch::Type(sym) != STT_FUNC &&
+ ElfArch::Type(sym) != STT_NOTYPE) {
+ return false;
+ }
+
+ // Target specific filtering.
+ switch (header_.e_machine) {
+ case EM_AARCH64:
+ case EM_ARM:
+ // Filter out '$x' special local symbols used by tools
+ return name[0] != '$' || ElfArch::Bind(sym) != STB_LOCAL;
+ case EM_X86_64:
+ // Filter out read-only constants like .LC123.
+ return name[0] != '.' || ElfArch::Bind(sym) != STB_LOCAL;
+ default:
+ return true;
+ }
+ }
+
+ // Iterate over the symbols in a section, either SHT_DYNSYM or
+ // SHT_SYMTAB. Add all symbols to the given SymbolMap.
+ /*
+ void GetSymbolPositions(SymbolMap *symbols,
+ typename ElfArch::Word section_type,
+ uint64 mem_offset,
+ uint64 file_offset) {
+ // This map is used to filter out "nested" functions.
+ // See comment below.
+ AddrToSymMap addr_to_sym_map;
+ for (SymbolIterator<ElfArch> it(this, section_type);
+ !it.done(); it.Next()) {
+ const char *name = it.GetSymbolName();
+ if (name == NULL)
+ continue;
+ const typename ElfArch::Sym *sym = it.GetSymbol();
+ if (CanUseSymbol(name, sym)) {
+ const int sec = sym->st_shndx;
+
+ // We don't support special section indices. The most common
+ // is SHN_ABS, for absolute symbols used deep in the bowels of
+ // glibc. Also ignore any undefined symbols.
+ if (sec == SHN_UNDEF ||
+ (sec >= SHN_LORESERVE && sec <= SHN_HIRESERVE)) {
+ continue;
+ }
+
+ const typename ElfArch::Shdr& hdr = section_headers_[sec];
+
+ // Adjust for difference between where we expected to mmap
+ // this section, and where it was actually mmapped.
+ const int64 expected_base = hdr.sh_addr - hdr.sh_offset;
+ const int64 real_base = mem_offset - file_offset;
+ const int64 adjust = real_base - expected_base;
+
+ uint64 start = sym->st_value + adjust;
+
+ // Adjust function symbols for PowerPC64 by dereferencing and adjusting
+ // the function descriptor to get the function address.
+ if (header_.e_machine == EM_PPC64 && ElfArch::Type(sym) == STT_FUNC) {
+ const uint64 opd_addr =
+ AdjustPPC64FunctionDescriptorSymbolValue(sym->st_value);
+ // Only adjust the returned value if the function address was found.
+ if (opd_addr != sym->st_value) {
+ const int64 adjust_function_symbols =
+ real_base - base_for_text_;
+ start = opd_addr + adjust_function_symbols;
+ }
+ }
+
+ addr_to_sym_map.push_back(std::make_pair(start, sym));
+ }
+ }
+ std::sort(addr_to_sym_map.begin(), addr_to_sym_map.end(), &AddrToSymSorter);
+ addr_to_sym_map.erase(std::unique(addr_to_sym_map.begin(),
+ addr_to_sym_map.end(), &AddrToSymEquals),
+ addr_to_sym_map.end());
+
+ // Squeeze out any "nested functions".
+ // Nested functions are not allowed in C, but libc plays tricks.
+ //
+ // For example, here is disassembly of /lib64/tls/libc-2.3.5.so:
+ // 0x00000000000aa380 <read+0>: cmpl $0x0,0x2781b9(%rip)
+ // 0x00000000000aa387 <read+7>: jne 0xaa39b <read+27>
+ // 0x00000000000aa389 <__read_nocancel+0>: mov $0x0,%rax
+ // 0x00000000000aa390 <__read_nocancel+7>: syscall
+ // 0x00000000000aa392 <__read_nocancel+9>: cmp $0xfffffffffffff001,%rax
+ // 0x00000000000aa398 <__read_nocancel+15>: jae 0xaa3ef <read+111>
+ // 0x00000000000aa39a <__read_nocancel+17>: retq
+ // 0x00000000000aa39b <read+27>: sub $0x28,%rsp
+ // 0x00000000000aa39f <read+31>: mov %rdi,0x8(%rsp)
+ // ...
+ // Without removing __read_nocancel, symbolizer will return NULL
+ // given e.g. 0xaa39f (because the lower bound is __read_nocancel,
+ // but 0xaa39f is beyond its end.
+ if (addr_to_sym_map.empty()) {
+ return;
+ }
+ const ElfSectionReader<ElfArch> *const symbol_section =
+ this->GetSectionByType(section_type);
+ const ElfSectionReader<ElfArch> *const string_section =
+ this->GetSection(symbol_section->header().sh_link);
+
+ typename AddrToSymMap::iterator curr = addr_to_sym_map.begin();
+ // Always insert the first symbol.
+ symbols->AddSymbol(string_section->GetOffset(curr->second->st_name),
+ curr->first, curr->second->st_size);
+ typename AddrToSymMap::iterator prev = curr++;
+ for (; curr != addr_to_sym_map.end(); ++curr) {
+ const uint64 prev_addr = prev->first;
+ const uint64 curr_addr = curr->first;
+ const typename ElfArch::Sym *const prev_sym = prev->second;
+ const typename ElfArch::Sym *const curr_sym = curr->second;
+ if (prev_addr + prev_sym->st_size <= curr_addr ||
+ // The next condition is true if two symbols overlap like this:
+ //
+ // Previous symbol |----------------------------|
+ // Current symbol |-------------------------------|
+ //
+ // These symbols are not found in google3 codebase, but in
+ // jdk1.6.0_01_gg1/jre/lib/i386/server/libjvm.so.
+ //
+ // 0619e040 00000046 t CardTableModRefBS::write_region_work()
+ // 0619e070 00000046 t CardTableModRefBS::write_ref_array_work()
+ //
+ // We allow overlapped symbols rather than ignore these.
+ // Due to the way SymbolMap::GetSymbolAtPosition() works,
+ // lookup for any address in [curr_addr, curr_addr + its size)
+ // (e.g. 0619e071) will produce the current symbol,
+ // which is the desired outcome.
+ prev_addr + prev_sym->st_size < curr_addr + curr_sym->st_size) {
+ const char *name = string_section->GetOffset(curr_sym->st_name);
+ symbols->AddSymbol(name, curr_addr, curr_sym->st_size);
+ prev = curr;
+ } else {
+ // Current symbol is "nested" inside previous one like this:
+ //
+ // Previous symbol |----------------------------|
+ // Current symbol |---------------------|
+ //
+ // This happens within glibc, e.g. __read_nocancel is nested
+ // "inside" __read. Ignore "inner" symbol.
+ //DCHECK_LE(curr_addr + curr_sym->st_size,
+ // prev_addr + prev_sym->st_size);
+ ;
+ }
+ }
+ }
+*/
+
+ void VisitSymbols(typename ElfArch::Word section_type,
+ ElfReader::SymbolSink *sink) {
+ VisitSymbols(section_type, sink, -1, -1, false);
+ }
+
+ void VisitSymbols(typename ElfArch::Word section_type,
+ ElfReader::SymbolSink *sink,
+ int symbol_binding,
+ int symbol_type,
+ bool get_raw_symbol_values) {
+ for (SymbolIterator<ElfArch> it(this, section_type);
+ !it.done(); it.Next()) {
+ const char *name = it.GetSymbolName();
+ if (!name) continue;
+ const typename ElfArch::Sym *sym = it.GetSymbol();
+ if ((symbol_binding < 0 || ElfArch::Bind(sym) == symbol_binding) &&
+ (symbol_type < 0 || ElfArch::Type(sym) == symbol_type)) {
+ typename ElfArch::Sym symbol = *sym;
+ // Add a PLT symbol in addition to the main undefined symbol.
+ // Only do this for SHT_DYNSYM, because PLT symbols are dynamic.
+ int symbol_index = it.GetCurrentSymbolIndex();
+ // TODO(dthomson): Can be removed once all Java code is using the
+ // Google3 launcher.
+ if (section_type == SHT_DYNSYM &&
+ static_cast<unsigned int>(symbol_index) < symbols_plt_offsets_.size() &&
+ symbols_plt_offsets_[symbol_index] != 0) {
+ string plt_name = string(name) + kPLTFunctionSuffix;
+ if (plt_function_names_[symbol_index].empty()) {
+ plt_function_names_[symbol_index] = plt_name;
+ } else if (plt_function_names_[symbol_index] != plt_name) {
+ ;
+ }
+ sink->AddSymbol(plt_function_names_[symbol_index].c_str(),
+ symbols_plt_offsets_[it.GetCurrentSymbolIndex()],
+ plt_code_size_);
+ }
+ if (!get_raw_symbol_values)
+ AdjustSymbolValue(&symbol);
+ sink->AddSymbol(name, symbol.st_value, symbol.st_size);
+ }
+ }
+ }
+
+ void VisitRelocationEntries() {
+ if (visited_relocation_entries_) {
+ return;
+ }
+ visited_relocation_entries_ = true;
+
+ if (!plts_supported_) {
+ return;
+ }
+ // First determine if PLTs exist. If not, then there is nothing to do.
+ ElfReader::SectionInfo plt_section_info;
+ const char* plt_section =
+ GetSectionInfoByName(kElfPLTSectionName, &plt_section_info);
+ if (!plt_section) {
+ return;
+ }
+ if (plt_section_info.size == 0) {
+ return;
+ }
+
+ // The PLTs could be referenced by either a Rel or Rela (Rel with Addend)
+ // section.
+ ElfReader::SectionInfo rel_section_info;
+ ElfReader::SectionInfo rela_section_info;
+ const char* rel_section =
+ GetSectionInfoByName(kElfPLTRelSectionName, &rel_section_info);
+ const char* rela_section =
+ GetSectionInfoByName(kElfPLTRelaSectionName, &rela_section_info);
+
+ const typename ElfArch::Rel* rel =
+ reinterpret_cast<const typename ElfArch::Rel*>(rel_section);
+ const typename ElfArch::Rela* rela =
+ reinterpret_cast<const typename ElfArch::Rela*>(rela_section);
+
+ if (!rel_section && !rela_section) {
+ return;
+ }
+
+ // Use either Rel or Rela section, depending on which one exists.
+ size_t section_size = rel_section ? rel_section_info.size
+ : rela_section_info.size;
+ size_t entry_size = rel_section ? sizeof(typename ElfArch::Rel)
+ : sizeof(typename ElfArch::Rela);
+
+ // Determine the number of entries in the dynamic symbol table.
+ ElfReader::SectionInfo dynsym_section_info;
+ const char* dynsym_section =
+ GetSectionInfoByName(kElfDynSymSectionName, &dynsym_section_info);
+ // The dynsym section might not exist, or it might be empty. In either case
+ // there is nothing to be done so return.
+ if (!dynsym_section || dynsym_section_info.size == 0) {
+ return;
+ }
+ size_t num_dynamic_symbols =
+ dynsym_section_info.size / dynsym_section_info.entsize;
+ symbols_plt_offsets_.resize(num_dynamic_symbols, 0);
+
+ // TODO(dthomson): Can be removed once all Java code is using the
+ // Google3 launcher.
+ // Make storage room for PLT function name strings.
+ plt_function_names_.resize(num_dynamic_symbols);
+
+ for (size_t i = 0; i < section_size / entry_size; ++i) {
+ // Determine symbol index from the |r_info| field.
+ int sym_index = ElfArch::r_sym(rel_section ? rel[i].r_info
+ : rela[i].r_info);
+ if (static_cast<unsigned int>(sym_index) >= symbols_plt_offsets_.size()) {
+ continue;
+ }
+ symbols_plt_offsets_[sym_index] =
+ plt_section_info.addr + plt0_size_ + i * plt_code_size_;
+ }
+ }
+
+ // Return an ElfSectionReader for the first section of the given
+ // type by iterating through all section headers. Returns NULL if
+ // the section type is not found.
+ const ElfSectionReader<ElfArch> *GetSectionByType(
+ typename ElfArch::Word section_type) {
+ for (unsigned int k = 0u; k < GetNumSections(); ++k) {
+ if (section_headers_[k].sh_type == section_type) {
+ return GetSection(k);
+ }
+ }
+ return NULL;
+ }
+
+ // Return the name of section "shndx". Returns NULL if the section
+ // is not found.
+ const char *GetSectionNameByIndex(int shndx) {
+ return GetSectionName(section_headers_[shndx].sh_name);
+ }
+
+ // Return a pointer to section "shndx", and store the size in
+ // "size". Returns NULL if the section is not found.
+ const char *GetSectionContentsByIndex(int shndx, size_t *size) {
+ const ElfSectionReader<ElfArch> *section = GetSection(shndx);
+ if (section != NULL) {
+ *size = section->section_size();
+ return section->contents();
+ }
+ return NULL;
+ }
+
+ // Return a pointer to the first section of the given name by
+ // iterating through all section headers, and store the size in
+ // "size". Returns NULL if the section name is not found.
+ const char *GetSectionContentsByName(const string &section_name,
+ size_t *size) {
+ for (unsigned int k = 0u; k < GetNumSections(); ++k) {
+ // When searching for sections in a .dwp file, the sections
+ // we're looking for will always be at the end of the section
+ // table, so reverse the direction of iteration.
+ int shndx = is_dwp_ ? GetNumSections() - k - 1 : k;
+ const char *name = GetSectionName(section_headers_[shndx].sh_name);
+ if (name != NULL && ElfReader::SectionNamesMatch(section_name, name)) {
+ const ElfSectionReader<ElfArch> *section = GetSection(shndx);
+ if (section == NULL) {
+ return NULL;
+ } else {
+ *size = section->section_size();
+ return section->contents();
+ }
+ }
+ }
+ return NULL;
+ }
+
+ // This is like GetSectionContentsByName() but it returns a lot of extra
+ // information about the section.
+ const char *GetSectionInfoByName(const string &section_name,
+ ElfReader::SectionInfo *info) {
+ for (unsigned int k = 0u; k < GetNumSections(); ++k) {
+ // When searching for sections in a .dwp file, the sections
+ // we're looking for will always be at the end of the section
+ // table, so reverse the direction of iteration.
+ int shndx = is_dwp_ ? GetNumSections() - k - 1 : k;
+ const char *name = GetSectionName(section_headers_[shndx].sh_name);
+ if (name != NULL && ElfReader::SectionNamesMatch(section_name, name)) {
+ const ElfSectionReader<ElfArch> *section = GetSection(shndx);
+ if (section == NULL) {
+ return NULL;
+ } else {
+ info->type = section->header().sh_type;
+ info->flags = section->header().sh_flags;
+ info->addr = section->header().sh_addr;
+ info->offset = section->header().sh_offset;
+ info->size = section->header().sh_size;
+ info->link = section->header().sh_link;
+ info->info = section->header().sh_info;
+ info->addralign = section->header().sh_addralign;
+ info->entsize = section->header().sh_entsize;
+ return section->contents();
+ }
+ }
+ }
+ return NULL;
+ }
+
+ // p_vaddr of the first PT_LOAD segment (if any), or 0 if no PT_LOAD
+ // segments are present. This is the address an ELF image was linked
+ // (by static linker) to be loaded at. Usually (but not always) 0 for
+ // shared libraries and position-independent executables.
+ uint64 VaddrOfFirstLoadSegment() const {
+ // Relocatable objects (of type ET_REL) do not have LOAD segments.
+ if (header_.e_type == ET_REL) {
+ return 0;
+ }
+ for (int i = 0; i < GetNumProgramHeaders(); ++i) {
+ if (program_headers_[i].p_type == PT_LOAD) {
+ return program_headers_[i].p_vaddr;
+ }
+ }
+ return 0;
+ }
+
+ // According to the LSB ("ELF special sections"), sections with debug
+ // info are prefixed by ".debug". The names are not specified, but they
+ // look like ".debug_line", ".debug_info", etc.
+ bool HasDebugSections() {
+ // Debug sections are likely to be near the end, so reverse the
+ // direction of iteration.
+ for (int k = GetNumSections() - 1; k >= 0; --k) {
+ const char *name = GetSectionName(section_headers_[k].sh_name);
+ if (strncmp(name, ".debug", strlen(".debug")) == 0) return true;
+ if (strncmp(name, ".zdebug", strlen(".zdebug")) == 0) return true;
+ }
+ return false;
+ }
+
+ bool IsDynamicSharedObject() const {
+ return header_.e_type == ET_DYN;
+ }
+
+ // Return the number of sections.
+ uint64_t GetNumSections() const {
+ if (HasManySections())
+ return first_section_header_.sh_size;
+ return header_.e_shnum;
+ }
+
+ private:
+ typedef vector<pair<uint64, const typename ElfArch::Sym *> > AddrToSymMap;
+
+ static bool AddrToSymSorter(const typename AddrToSymMap::value_type& lhs,
+ const typename AddrToSymMap::value_type& rhs) {
+ return lhs.first < rhs.first;
+ }
+
+ static bool AddrToSymEquals(const typename AddrToSymMap::value_type& lhs,
+ const typename AddrToSymMap::value_type& rhs) {
+ return lhs.first == rhs.first;
+ }
+
+ // Does this ELF file have too many sections to fit in the program header?
+ bool HasManySections() const {
+ return header_.e_shnum == SHN_UNDEF;
+ }
+
+ // Return the number of program headers.
+ int GetNumProgramHeaders() const {
+ if (HasManySections() && header_.e_phnum == 0xffff &&
+ first_section_header_.sh_info != 0)
+ return first_section_header_.sh_info;
+ return header_.e_phnum;
+ }
+
+ // Return the index of the string table.
+ int GetStringTableIndex() const {
+ if (HasManySections()) {
+ if (header_.e_shstrndx == 0xffff)
+ return first_section_header_.sh_link;
+ else if (header_.e_shstrndx >= GetNumSections())
+ return 0;
+ }
+ return header_.e_shstrndx;
+ }
+
+ // Given an offset into the section header string table, return the
+ // section name.
+ const char *GetSectionName(typename ElfArch::Word sh_name) {
+ const ElfSectionReader<ElfArch> *shstrtab =
+ GetSection(GetStringTableIndex());
+ if (shstrtab != NULL) {
+ return shstrtab->GetOffset(sh_name);
+ }
+ return NULL;
+ }
+
+ // Return an ElfSectionReader for the given section. The reader will
+ // be freed when this object is destroyed.
+ const ElfSectionReader<ElfArch> *GetSection(int num) {
+ const char *name;
+ // Hard-coding the name for the section-name string table prevents
+ // infinite recursion.
+ if (num == GetStringTableIndex())
+ name = ".shstrtab";
+ else
+ name = GetSectionNameByIndex(num);
+ ElfSectionReader<ElfArch> *& reader = sections_[num];
+ if (reader == NULL)
+ reader = new ElfSectionReader<ElfArch>(name, path_, fd_,
+ section_headers_[num]);
+ return reader;
+ }
+
+ // Parse out the overall header information from the file and assert
+ // that it looks sane. This contains information like the magic
+ // number and target architecture.
+ bool ParseHeaders(int fd, const string &path) {
+ // Read in the global ELF header.
+ if (pread(fd, &header_, sizeof(header_), 0) != sizeof(header_)) {
+ return false;
+ }
+
+ // Must be an executable, dynamic shared object or relocatable object
+ if (header_.e_type != ET_EXEC &&
+ header_.e_type != ET_DYN &&
+ header_.e_type != ET_REL) {
+ return false;
+ }
+ // Need a section header.
+ if (header_.e_shoff == 0) {
+ return false;
+ }
+
+ if (header_.e_shnum == SHN_UNDEF) {
+ // The number of sections in the program header is only a 16-bit value. In
+ // the event of overflow (greater than SHN_LORESERVE sections), e_shnum
+ // will read SHN_UNDEF and the true number of section header table entries
+ // is found in the sh_size field of the first section header.
+ // See: http://www.sco.com/developers/gabi/2003-12-17/ch4.sheader.html
+ if (pread(fd, &first_section_header_, sizeof(first_section_header_),
+ header_.e_shoff) != sizeof(first_section_header_)) {
+ return false;
+ }
+ }
+
+ // Dynamically allocate enough space to store the section headers
+ // and read them out of the file.
+ const int section_headers_size =
+ GetNumSections() * sizeof(*section_headers_);
+ section_headers_ = new typename ElfArch::Shdr[section_headers_size];
+ if (pread(fd, section_headers_, section_headers_size, header_.e_shoff) !=
+ section_headers_size) {
+ return false;
+ }
+
+ // Dynamically allocate enough space to store the program headers
+ // and read them out of the file.
+ //const int program_headers_size =
+ // GetNumProgramHeaders() * sizeof(*program_headers_);
+ program_headers_ = new typename ElfArch::Phdr[GetNumProgramHeaders()];
+
+ // Presize the sections array for efficiency.
+ sections_.resize(GetNumSections(), NULL);
+ return true;
+ }
+
+ // Given the "value" of a function descriptor return the address of the
+ // function (i.e. the dereferenced value). Otherwise return "value".
+ uint64 AdjustPPC64FunctionDescriptorSymbolValue(uint64 value) {
+ if (opd_section_ != NULL &&
+ opd_info_.addr <= value &&
+ value < opd_info_.addr + opd_info_.size) {
+ uint64 offset = value - opd_info_.addr;
+ return (*reinterpret_cast<const uint64*>(opd_section_ + offset));
+ }
+ return value;
+ }
+
+ void AdjustSymbolValue(typename ElfArch::Sym* sym) {
+ switch (header_.e_machine) {
+ case EM_ARM:
+ // For ARM architecture, if the LSB of the function symbol offset is set,
+ // it indicates a Thumb function. This bit should not be taken literally.
+ // Clear it.
+ if (ElfArch::Type(sym) == STT_FUNC)
+ sym->st_value = AdjustARMThumbSymbolValue(sym->st_value);
+ break;
+ case EM_386:
+ // No adjustment needed for Intel x86 architecture. However, explicitly
+ // define this case as we use it quite often.
+ break;
+ case EM_PPC64:
+ // PowerPC64 currently has function descriptors as part of the ABI.
+ // Function symbols need to be adjusted accordingly.
+ if (ElfArch::Type(sym) == STT_FUNC)
+ sym->st_value = AdjustPPC64FunctionDescriptorSymbolValue(sym->st_value);
+ break;
+ default:
+ break;
+ }
+ }
+
+ friend class SymbolIterator<ElfArch>;
+
+ // The file we're reading.
+ const string path_;
+ // Open file descriptor for path_. Not owned by this object.
+ const int fd_;
+
+ // The global header of the ELF file.
+ typename ElfArch::Ehdr header_;
+
+ // The header of the first section. This may be used to supplement the ELF
+ // file header.
+ typename ElfArch::Shdr first_section_header_;
+
+ // Array of GetNumSections() section headers, allocated when we read
+ // in the global header.
+ typename ElfArch::Shdr *section_headers_;
+
+ // Array of GetNumProgramHeaders() program headers, allocated when we read
+ // in the global header.
+ typename ElfArch::Phdr *program_headers_;
+
+ // An array of pointers to ElfSectionReaders. Sections are
+ // mmaped as they're needed and not released until this object is
+ // destroyed.
+ vector<ElfSectionReader<ElfArch>*> sections_;
+
+ // For PowerPC64 we need to keep track of function descriptors when looking up
+ // values for funtion symbols values. Function descriptors are kept in the
+ // .opd section and are dereferenced to find the function address.
+ ElfReader::SectionInfo opd_info_;
+ const char *opd_section_; // Must be checked for NULL before use.
+ int64 base_for_text_;
+
+ // Read PLT-related sections for the current architecture.
+ bool plts_supported_;
+ // Code size of each PLT function for the current architecture.
+ size_t plt_code_size_;
+ // Size of the special first entry in the .plt section that calls the runtime
+ // loader resolution routine, and that all other entries jump to when doing
+ // lazy symbol binding.
+ size_t plt0_size_;
+
+ // Maps a dynamic symbol index to a PLT offset.
+ // The vector entry index is the dynamic symbol index.
+ std::vector<uint64> symbols_plt_offsets_;
+
+ // Container for PLT function name strings. These strings are passed by
+ // reference to SymbolSink::AddSymbol() so they need to be stored somewhere.
+ std::vector<string> plt_function_names_;
+
+ bool visited_relocation_entries_;
+
+ // True if this is a .dwp file.
+ bool is_dwp_;
+};
+
+ElfReader::ElfReader(const string &path)
+ : path_(path), fd_(-1), impl32_(NULL), impl64_(NULL) {
+ // linux 2.6.XX kernel can show deleted files like this:
+ // /var/run/nscd/dbYLJYaE (deleted)
+ // and the kernel-supplied vdso and vsyscall mappings like this:
+ // [vdso]
+ // [vsyscall]
+ if (MyHasSuffixString(path, " (deleted)"))
+ return;
+ if (path == "[vdso]")
+ return;
+ if (path == "[vsyscall]")
+ return;
+
+ fd_ = open(path.c_str(), O_RDONLY);
+}
+
+ElfReader::~ElfReader() {
+ if (fd_ != -1)
+ close(fd_);
+ if (impl32_ != NULL)
+ delete impl32_;
+ if (impl64_ != NULL)
+ delete impl64_;
+}
+
+
+// The only word-size specific part of this file is IsNativeElfFile().
+#if __WORDSIZE == 32
+#define NATIVE_ELF_ARCH Elf32
+#elif __WORDSIZE == 64
+#define NATIVE_ELF_ARCH Elf64
+#else
+#error "Invalid word size"
+#endif
+
+template <typename ElfArch>
+static bool IsElfFile(const int fd, const string &path) {
+ if (fd < 0)
+ return false;
+ if (!ElfReaderImpl<ElfArch>::IsArchElfFile(fd, NULL)) {
+ // No error message here. IsElfFile gets called many times.
+ return false;
+ }
+ return true;
+}
+
+bool ElfReader::IsNativeElfFile() const {
+ return IsElfFile<NATIVE_ELF_ARCH>(fd_, path_);
+}
+
+bool ElfReader::IsElf32File() const {
+ return IsElfFile<Elf32>(fd_, path_);
+}
+
+bool ElfReader::IsElf64File() const {
+ return IsElfFile<Elf64>(fd_, path_);
+}
+
+/*
+void ElfReader::AddSymbols(SymbolMap *symbols,
+ uint64 mem_offset, uint64 file_offset,
+ uint64 length) {
+ if (fd_ < 0)
+ return;
+ // TODO(chatham): Actually use the information about file offset and
+ // the length of the mapped section. On some machines the data
+ // section gets mapped as executable, and we'll end up reading the
+ // file twice and getting some of the offsets wrong.
+ if (IsElf32File()) {
+ GetImpl32()->GetSymbolPositions(symbols, SHT_SYMTAB,
+ mem_offset, file_offset);
+ GetImpl32()->GetSymbolPositions(symbols, SHT_DYNSYM,
+ mem_offset, file_offset);
+ } else if (IsElf64File()) {
+ GetImpl64()->GetSymbolPositions(symbols, SHT_SYMTAB,
+ mem_offset, file_offset);
+ GetImpl64()->GetSymbolPositions(symbols, SHT_DYNSYM,
+ mem_offset, file_offset);
+ }
+}
+*/
+
+void ElfReader::VisitSymbols(ElfReader::SymbolSink *sink) {
+ VisitSymbols(sink, -1, -1);
+}
+
+void ElfReader::VisitSymbols(ElfReader::SymbolSink *sink,
+ int symbol_binding,
+ int symbol_type) {
+ VisitSymbols(sink, symbol_binding, symbol_type, false);
+}
+
+void ElfReader::VisitSymbols(ElfReader::SymbolSink *sink,
+ int symbol_binding,
+ int symbol_type,
+ bool get_raw_symbol_values) {
+ if (IsElf32File()) {
+ GetImpl32()->VisitRelocationEntries();
+ GetImpl32()->VisitSymbols(SHT_SYMTAB, sink, symbol_binding, symbol_type,
+ get_raw_symbol_values);
+ GetImpl32()->VisitSymbols(SHT_DYNSYM, sink, symbol_binding, symbol_type,
+ get_raw_symbol_values);
+ } else if (IsElf64File()) {
+ GetImpl64()->VisitRelocationEntries();
+ GetImpl64()->VisitSymbols(SHT_SYMTAB, sink, symbol_binding, symbol_type,
+ get_raw_symbol_values);
+ GetImpl64()->VisitSymbols(SHT_DYNSYM, sink, symbol_binding, symbol_type,
+ get_raw_symbol_values);
+ }
+}
+
+uint64 ElfReader::VaddrOfFirstLoadSegment() {
+ if (IsElf32File()) {
+ return GetImpl32()->VaddrOfFirstLoadSegment();
+ } else if (IsElf64File()) {
+ return GetImpl64()->VaddrOfFirstLoadSegment();
+ } else {
+ return 0;
+ }
+}
+
+const char *ElfReader::GetSectionName(int shndx) {
+ if (shndx < 0 || static_cast<unsigned int>(shndx) >= GetNumSections()) return NULL;
+ if (IsElf32File()) {
+ return GetImpl32()->GetSectionNameByIndex(shndx);
+ } else if (IsElf64File()) {
+ return GetImpl64()->GetSectionNameByIndex(shndx);
+ } else {
+ return NULL;
+ }
+}
+
+uint64 ElfReader::GetNumSections() {
+ if (IsElf32File()) {
+ return GetImpl32()->GetNumSections();
+ } else if (IsElf64File()) {
+ return GetImpl64()->GetNumSections();
+ } else {
+ return 0;
+ }
+}
+
+const char *ElfReader::GetSectionByIndex(int shndx, size_t *size) {
+ if (IsElf32File()) {
+ return GetImpl32()->GetSectionContentsByIndex(shndx, size);
+ } else if (IsElf64File()) {
+ return GetImpl64()->GetSectionContentsByIndex(shndx, size);
+ } else {
+ return NULL;
+ }
+}
+
+const char *ElfReader::GetSectionByName(const string &section_name,
+ size_t *size) {
+ if (IsElf32File()) {
+ return GetImpl32()->GetSectionContentsByName(section_name, size);
+ } else if (IsElf64File()) {
+ return GetImpl64()->GetSectionContentsByName(section_name, size);
+ } else {
+ return NULL;
+ }
+}
+
+const char *ElfReader::GetSectionInfoByName(const string &section_name,
+ SectionInfo *info) {
+ if (IsElf32File()) {
+ return GetImpl32()->GetSectionInfoByName(section_name, info);
+ } else if (IsElf64File()) {
+ return GetImpl64()->GetSectionInfoByName(section_name, info);
+ } else {
+ return NULL;
+ }
+}
+
+bool ElfReader::SectionNamesMatch(const string &name, const string &sh_name) {
+ if ((name.find(".debug_", 0) == 0) && (sh_name.find(".zdebug_", 0) == 0)) {
+ const string name_suffix(name, strlen(".debug_"));
+ const string sh_name_suffix(sh_name, strlen(".zdebug_"));
+ return name_suffix == sh_name_suffix;
+ }
+ return name == sh_name;
+}
+
+bool ElfReader::IsDynamicSharedObject() {
+ if (IsElf32File()) {
+ return GetImpl32()->IsDynamicSharedObject();
+ } else if (IsElf64File()) {
+ return GetImpl64()->IsDynamicSharedObject();
+ } else {
+ return false;
+ }
+}
+
+ElfReaderImpl<Elf32> *ElfReader::GetImpl32() {
+ if (impl32_ == NULL) {
+ impl32_ = new ElfReaderImpl<Elf32>(path_, fd_);
+ }
+ return impl32_;
+}
+
+ElfReaderImpl<Elf64> *ElfReader::GetImpl64() {
+ if (impl64_ == NULL) {
+ impl64_ = new ElfReaderImpl<Elf64>(path_, fd_);
+ }
+ return impl64_;
+}
+
+// Return true if file is an ELF binary of ElfArch, with unstripped
+// debug info (debug_only=true) or symbol table (debug_only=false).
+// Otherwise, return false.
+template <typename ElfArch>
+static bool IsNonStrippedELFBinaryImpl(const string &path, const int fd,
+ bool debug_only) {
+ if (!ElfReaderImpl<ElfArch>::IsArchElfFile(fd, NULL)) return false;
+ ElfReaderImpl<ElfArch> elf_reader(path, fd);
+ return debug_only ?
+ elf_reader.HasDebugSections()
+ : (elf_reader.GetSectionByType(SHT_SYMTAB) != NULL);
+}
+
+// Helper for the IsNon[Debug]StrippedELFBinary functions.
+static bool IsNonStrippedELFBinaryHelper(const string &path,
+ bool debug_only) {
+ const int fd = open(path.c_str(), O_RDONLY);
+ if (fd == -1) {
+ return false;
+ }
+
+ if (IsNonStrippedELFBinaryImpl<Elf32>(path, fd, debug_only) ||
+ IsNonStrippedELFBinaryImpl<Elf64>(path, fd, debug_only)) {
+ close(fd);
+ return true;
+ }
+ close(fd);
+ return false;
+}
+
+bool ElfReader::IsNonStrippedELFBinary(const string &path) {
+ return IsNonStrippedELFBinaryHelper(path, false);
+}
+
+bool ElfReader::IsNonDebugStrippedELFBinary(const string &path) {
+ return IsNonStrippedELFBinaryHelper(path, true);
+}
+} // namespace dwarf2reader
diff --git a/src/common/dwarf/elf_reader.h b/src/common/dwarf/elf_reader.h
new file mode 100644
index 00000000..07477341
--- /dev/null
+++ b/src/common/dwarf/elf_reader.h
@@ -0,0 +1,166 @@
+// Copyright 2005 Google Inc. All Rights Reserved.
+// Author: chatham@google.com (Andrew Chatham)
+// Author: satorux@google.com (Satoru Takabayashi)
+//
+// ElfReader handles reading in ELF. It can extract symbols from the
+// current process, which may be used to symbolize stack traces
+// without having to make a potentially dangerous call to fork().
+//
+// ElfReader dynamically allocates memory, so it is not appropriate to
+// use once the address space might be corrupted, such as during
+// process death.
+//
+// ElfReader supports both 32-bit and 64-bit ELF binaries.
+
+#ifndef COMMON_DWARF_ELF_READER_H__
+#define COMMON_DWARF_ELF_READER_H__
+
+#include <string>
+#include <vector>
+
+#include "common/dwarf/types.h"
+
+using std::string;
+using std::vector;
+using std::pair;
+
+namespace dwarf2reader {
+
+class SymbolMap;
+class Elf32;
+class Elf64;
+template<typename ElfArch>
+class ElfReaderImpl;
+
+class ElfReader {
+ public:
+ explicit ElfReader(const string &path);
+ ~ElfReader();
+
+ // Parse the ELF prologue of this file and return whether it was
+ // successfully parsed and matches the word size and byte order of
+ // the current process.
+ bool IsNativeElfFile() const;
+
+ // Similar to IsNativeElfFile but checks if it's a 32-bit ELF file.
+ bool IsElf32File() const;
+
+ // Similar to IsNativeElfFile but checks if it's a 64-bit ELF file.
+ bool IsElf64File() const;
+
+ // Checks if it's an ELF file of type ET_DYN (shared object file).
+ bool IsDynamicSharedObject();
+
+ // Add symbols in the given ELF file into the provided SymbolMap,
+ // assuming that the file has been loaded into the specified
+ // offset.
+ //
+ // The remaining arguments are typically taken from a
+ // ProcMapsIterator (base/sysinfo.h) and describe which portions of
+ // the ELF file are mapped into which parts of memory:
+ //
+ // mem_offset - position at which the segment is mapped into memory
+ // file_offset - offset in the file where the mapping begins
+ // length - length of the mapped segment
+ void AddSymbols(SymbolMap *symbols,
+ uint64 mem_offset, uint64 file_offset,
+ uint64 length);
+
+ class SymbolSink {
+ public:
+ virtual ~SymbolSink() {}
+ virtual void AddSymbol(const char *name, uint64 address, uint64 size) = 0;
+ };
+
+ // Like AddSymbols above, but with no address correction.
+ // Processes any SHT_SYMTAB section, followed by any SHT_DYNSYM section.
+ void VisitSymbols(SymbolSink *sink);
+
+ // Like VisitSymbols above, but for a specific symbol binding/type.
+ // A negative value for the binding and type parameters means any
+ // binding or type.
+ void VisitSymbols(SymbolSink *sink, int symbol_binding, int symbol_type);
+
+ // Like VisitSymbols above but can optionally export raw symbol values instead
+ // of adjusted ones.
+ void VisitSymbols(SymbolSink *sink, int symbol_binding, int symbol_type,
+ bool get_raw_symbol_values);
+
+ // p_vaddr of the first PT_LOAD segment (if any), or 0 if no PT_LOAD
+ // segments are present. This is the address an ELF image was linked
+ // (by static linker) to be loaded at. Usually (but not always) 0 for
+ // shared libraries and position-independent executables.
+ uint64 VaddrOfFirstLoadSegment();
+
+ // Return the name of section "shndx". Returns NULL if the section
+ // is not found.
+ const char *GetSectionName(int shndx);
+
+ // Return the number of sections in the given ELF file.
+ uint64 GetNumSections();
+
+ // Get section "shndx" from the given ELF file. On success, return
+ // the pointer to the section and store the size in "size".
+ // On error, return NULL. The returned section data is only valid
+ // until the ElfReader gets destroyed.
+ const char *GetSectionByIndex(int shndx, size_t *size);
+
+ // Get section with "section_name" (ex. ".text", ".symtab") in the
+ // given ELF file. On success, return the pointer to the section
+ // and store the size in "size". On error, return NULL. The
+ // returned section data is only valid until the ElfReader gets
+ // destroyed.
+ const char *GetSectionByName(const string &section_name, size_t *size);
+
+ // This is like GetSectionByName() but it returns a lot of extra information
+ // about the section. The SectionInfo structure is almost identical to
+ // the typedef struct Elf64_Shdr defined in <elf.h>, but is redefined
+ // here so that the many short macro names in <elf.h> don't have to be
+ // added to our already cluttered namespace.
+ struct SectionInfo {
+ uint32 type; // Section type (SHT_xxx constant from elf.h).
+ uint64 flags; // Section flags (SHF_xxx constants from elf.h).
+ uint64 addr; // Section virtual address at execution.
+ uint64 offset; // Section file offset.
+ uint64 size; // Section size in bytes.
+ uint32 link; // Link to another section.
+ uint32 info; // Additional section information.
+ uint64 addralign; // Section alignment.
+ uint64 entsize; // Entry size if section holds a table.
+ };
+ const char *GetSectionInfoByName(const string &section_name,
+ SectionInfo *info);
+
+ // Check if "path" is an ELF binary that has not been stripped of symbol
+ // tables. This function supports both 32-bit and 64-bit ELF binaries.
+ static bool IsNonStrippedELFBinary(const string &path);
+
+ // Check if "path" is an ELF binary that has not been stripped of debug
+ // info. Unlike IsNonStrippedELFBinary, this function will return
+ // false for binaries passed through "strip -S".
+ static bool IsNonDebugStrippedELFBinary(const string &path);
+
+ // Match a requested section name with the section name as it
+ // appears in the elf-file, adjusting for compressed debug section
+ // names. For example, returns true if name == ".debug_abbrev" and
+ // sh_name == ".zdebug_abbrev"
+ static bool SectionNamesMatch(const string &name, const string &sh_name);
+
+ private:
+ // Lazily initialize impl32_ and return it.
+ ElfReaderImpl<Elf32> *GetImpl32();
+ // Ditto for impl64_.
+ ElfReaderImpl<Elf64> *GetImpl64();
+
+ // Path of the file we're reading.
+ const string path_;
+ // Read-only file descriptor for the file. May be -1 if there was an
+ // error during open.
+ int fd_;
+ ElfReaderImpl<Elf32> *impl32_;
+ ElfReaderImpl<Elf64> *impl64_;
+};
+
+} // namespace dwarf2reader
+
+#endif // COMMON_DWARF_ELF_READER_H__