diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/common/dwarf/bytereader.cc | 39 | ||||
-rw-r--r-- | src/common/dwarf/bytereader_unittest.cc | 14 | ||||
-rw-r--r-- | src/common/dwarf/cfi_assembler.cc | 107 | ||||
-rw-r--r-- | src/common/dwarf/cfi_assembler.h | 115 | ||||
-rw-r--r-- | src/common/dwarf/dwarf2enums.h | 14 | ||||
-rw-r--r-- | src/common/dwarf/dwarf2reader.cc | 359 | ||||
-rw-r--r-- | src/common/dwarf/dwarf2reader.h | 242 | ||||
-rw-r--r-- | src/common/dwarf/dwarf2reader_cfi_unittest.cc | 529 | ||||
-rw-r--r-- | src/common/linux/dump_symbols.cc | 36 | ||||
-rw-r--r-- | src/common/linux/dwarf_cfi_to_module.cc | 10 | ||||
-rw-r--r-- | src/common/linux/dwarf_cfi_to_module_unittest.cc | 14 |
11 files changed, 1336 insertions, 143 deletions
diff --git a/src/common/dwarf/bytereader.cc b/src/common/dwarf/bytereader.cc index 7d784659..a9b0020f 100644 --- a/src/common/dwarf/bytereader.cc +++ b/src/common/dwarf/bytereader.cc @@ -82,13 +82,15 @@ uint64 ByteReader::ReadInitialLength(const char* start, size_t* len) { bool ByteReader::ValidEncoding(DwarfPointerEncoding encoding) const { if (encoding == DW_EH_PE_omit) return true; if (encoding == DW_EH_PE_aligned) return true; - if (DwarfPointerEncoding(encoding & 0x7) > DW_EH_PE_udata8) return false; - if (DwarfPointerEncoding(encoding & 0x70) > DW_EH_PE_funcrel) return false; + if ((encoding & 0x7) > DW_EH_PE_udata8) + return false; + if ((encoding & 0x70) > DW_EH_PE_funcrel) + return false; return true; } bool ByteReader::UsableEncoding(DwarfPointerEncoding encoding) const { - switch (DwarfPointerEncoding(encoding & 0x70)) { + switch (encoding & 0x70) { case DW_EH_PE_absptr: return true; case DW_EH_PE_pcrel: return have_section_base_; case DW_EH_PE_textrel: return have_text_base_; @@ -101,13 +103,14 @@ bool ByteReader::UsableEncoding(DwarfPointerEncoding encoding) const { uint64 ByteReader::ReadEncodedPointer(const char *buffer, DwarfPointerEncoding encoding, size_t *len) const { - // This is what the GCC unwinder does. - if (encoding == DW_EH_PE_omit) { - *len = 0; - return 0; - } - - // Aligned pointers are always absolute machine-sized and -signed pointers. + // UsableEncoding doesn't approve of DW_EH_PE_omit, so we shouldn't + // see it here. + assert(encoding != DW_EH_PE_omit); + + // The Linux Standards Base 4.0 does not make this clear, but the + // GNU tools (gcc/unwind-pe.h; readelf/dwarf.c; gdb/dwarf2-frame.c) + // agree that aligned pointers are always absolute, machine-sized, + // machine-signed pointers. if (encoding == DW_EH_PE_aligned) { assert(have_section_base_); @@ -135,11 +138,17 @@ uint64 ByteReader::ReadEncodedPointer(const char *buffer, // Extract the value first, ignoring whether it's a pointer or an // offset relative to some base. uint64 offset; - switch (DwarfPointerEncoding(encoding & 0x0f)) { + switch (encoding & 0x0f) { case DW_EH_PE_absptr: - // As the low nybble value, DW_EH_PE_absptr simply means a - // machine-sized and -signed address; it doesn't mean it's absolute. - // So it is correct for us to relocate after this. + // DW_EH_PE_absptr is weird, as it is used as a meaningful value for + // both the high and low nybble of encoding bytes. When it appears in + // the high nybble, it means that the pointer is absolute, not an + // offset from some base address. When it appears in the low nybble, + // as here, it means that the pointer is stored as a normal + // machine-sized and machine-signed address. A low nybble of + // DW_EH_PE_absptr does not imply that the pointer is absolute; it is + // correct for us to treat the value as an offset from a base address + // if the upper nybble is not DW_EH_PE_absptr. offset = ReadAddress(buffer); *len = AddressSize(); break; @@ -193,7 +202,7 @@ uint64 ByteReader::ReadEncodedPointer(const char *buffer, // Find the appropriate base address. uint64 base; - switch (DwarfPointerEncoding(encoding & 0x70)) { + switch (encoding & 0x70) { case DW_EH_PE_absptr: base = 0; break; diff --git a/src/common/dwarf/bytereader_unittest.cc b/src/common/dwarf/bytereader_unittest.cc index 5e854c6e..729c54a8 100644 --- a/src/common/dwarf/bytereader_unittest.cc +++ b/src/common/dwarf/bytereader_unittest.cc @@ -56,6 +56,7 @@ struct ReaderFixture { }; class Reader: public ReaderFixture, public Test { }; +class ReaderDeathTest: public ReaderFixture, public Test { }; TEST_F(Reader, SimpleConstructor) { ByteReader reader(ENDIANNESS_BIG); @@ -373,13 +374,13 @@ TEST_F(Reader, ValidEncodings) { EXPECT_FALSE(reader.ValidEncoding(DwarfPointerEncoding(0xd0))); } -TEST_F(Reader, DW_EH_PE_omit) { +TEST_F(ReaderDeathTest, DW_EH_PE_omit) { static const char data[1] = { 42 }; ByteReader reader(ENDIANNESS_BIG); reader.SetAddressSize(4); - EXPECT_EQ(0U, reader.ReadEncodedPointer(data, dwarf2reader::DW_EH_PE_omit, - &pointer_size)); - EXPECT_EQ(0U, pointer_size); + EXPECT_DEATH(reader.ReadEncodedPointer(data, dwarf2reader::DW_EH_PE_omit, + &pointer_size), + "encoding != DW_EH_PE_omit"); } TEST_F(Reader, DW_EH_PE_absptr4) { @@ -561,6 +562,7 @@ TEST(UsableBase, CFI) { EXPECT_FALSE(reader.UsableEncoding(dwarf2reader::DW_EH_PE_textrel)); EXPECT_FALSE(reader.UsableEncoding(dwarf2reader::DW_EH_PE_datarel)); EXPECT_FALSE(reader.UsableEncoding(dwarf2reader::DW_EH_PE_funcrel)); + EXPECT_FALSE(reader.UsableEncoding(dwarf2reader::DW_EH_PE_omit)); EXPECT_FALSE(reader.UsableEncoding(DwarfPointerEncoding(0x60))); } @@ -572,6 +574,7 @@ TEST(UsableBase, Text) { EXPECT_TRUE(reader.UsableEncoding(dwarf2reader::DW_EH_PE_textrel)); EXPECT_FALSE(reader.UsableEncoding(dwarf2reader::DW_EH_PE_datarel)); EXPECT_FALSE(reader.UsableEncoding(dwarf2reader::DW_EH_PE_funcrel)); + EXPECT_FALSE(reader.UsableEncoding(dwarf2reader::DW_EH_PE_omit)); EXPECT_FALSE(reader.UsableEncoding(DwarfPointerEncoding(0x60))); } @@ -583,6 +586,7 @@ TEST(UsableBase, Data) { EXPECT_FALSE(reader.UsableEncoding(dwarf2reader::DW_EH_PE_textrel)); EXPECT_TRUE(reader.UsableEncoding(dwarf2reader::DW_EH_PE_datarel)); EXPECT_FALSE(reader.UsableEncoding(dwarf2reader::DW_EH_PE_funcrel)); + EXPECT_FALSE(reader.UsableEncoding(dwarf2reader::DW_EH_PE_omit)); EXPECT_FALSE(reader.UsableEncoding(DwarfPointerEncoding(0x60))); } @@ -594,6 +598,7 @@ TEST(UsableBase, Function) { EXPECT_FALSE(reader.UsableEncoding(dwarf2reader::DW_EH_PE_textrel)); EXPECT_FALSE(reader.UsableEncoding(dwarf2reader::DW_EH_PE_datarel)); EXPECT_TRUE(reader.UsableEncoding(dwarf2reader::DW_EH_PE_funcrel)); + EXPECT_FALSE(reader.UsableEncoding(dwarf2reader::DW_EH_PE_omit)); EXPECT_FALSE(reader.UsableEncoding(DwarfPointerEncoding(0x60))); } @@ -606,6 +611,7 @@ TEST(UsableBase, ClearFunction) { EXPECT_FALSE(reader.UsableEncoding(dwarf2reader::DW_EH_PE_textrel)); EXPECT_FALSE(reader.UsableEncoding(dwarf2reader::DW_EH_PE_datarel)); EXPECT_FALSE(reader.UsableEncoding(dwarf2reader::DW_EH_PE_funcrel)); + EXPECT_FALSE(reader.UsableEncoding(dwarf2reader::DW_EH_PE_omit)); EXPECT_FALSE(reader.UsableEncoding(DwarfPointerEncoding(0x60))); } diff --git a/src/common/dwarf/cfi_assembler.cc b/src/common/dwarf/cfi_assembler.cc index 07995458..110aa2e7 100644 --- a/src/common/dwarf/cfi_assembler.cc +++ b/src/common/dwarf/cfi_assembler.cc @@ -33,11 +33,13 @@ // See cfi_assembler.h for details. #include <cassert> +#include <stdlib.h> #include "common/dwarf/cfi_assembler.h" -#include "common/dwarf/dwarf2enums.h" namespace google_breakpad { + +using dwarf2reader::DwarfPointerEncoding; CFISection &CFISection::CIEHeader(u_int64_t code_alignment_factor, int data_alignment_factor, @@ -47,16 +49,21 @@ CFISection &CFISection::CIEHeader(u_int64_t code_alignment_factor, bool dwarf64) { assert(!entry_length_); entry_length_ = new PendingLength(); + in_fde_ = false; if (dwarf64) { D32(0xffffffff); D64(entry_length_->length); entry_length_->start = Here(); - D64(0xffffffffffffffffULL); // CIE distinguished value + // Write the CIE distinguished value. In .debug_frame sections, it's + // ~0; in .eh_frame sections, it's zero. + D64(eh_frame_ ? 0 : ~(u_int64_t)0); } else { D32(entry_length_->length); entry_length_->start = Here(); - D32(0xffffffff); // CIE distinguished value + // Write the CIE distinguished value. In .debug_frame sections, it's + // ~0; in .eh_frame sections, it's zero. + D32(eh_frame_ ? 0 : ~(u_int32_t)0); } D8(version); AppendCString(augmentation); @@ -75,19 +82,32 @@ CFISection &CFISection::FDEHeader(Label cie_pointer, bool dwarf64) { assert(!entry_length_); entry_length_ = new PendingLength(); + in_fde_ = true; + fde_start_address_ = initial_location; if (dwarf64) { D32(0xffffffff); D64(entry_length_->length); entry_length_->start = Here(); - D64(cie_pointer); + if (eh_frame_) + D64(Here() - cie_pointer); + else + D64(cie_pointer); } else { D32(entry_length_->length); entry_length_->start = Here(); - D32(cie_pointer); + if (eh_frame_) + D32(Here() - cie_pointer); + else + D32(cie_pointer); } - Append(endianness(), address_size_, initial_location); - Append(endianness(), address_size_, address_range); + EncodedPointer(initial_location); + // The FDE length in an .eh_frame section uses the same encoding as the + // initial location, but ignores the base address (selected by the upper + // nybble of the encoding), as it's a length, not an address that can be + // made relative. + EncodedPointer(address_range, + DwarfPointerEncoding(pointer_encoding_ & 0x0f)); return *this; } @@ -97,7 +117,80 @@ CFISection &CFISection::FinishEntry() { entry_length_->length = Here() - entry_length_->start; delete entry_length_; entry_length_ = NULL; + in_fde_ = false; return *this; } +CFISection &CFISection::EncodedPointer(u_int64_t address, + DwarfPointerEncoding encoding, + const EncodedPointerBases &bases) { + // Omitted data is extremely easy to emit. + if (encoding == dwarf2reader::DW_EH_PE_omit) + return *this; + + // If (encoding & dwarf2reader::DW_EH_PE_indirect) != 0, then we assume + // that ADDRESS is the address at which the pointer is stored --- in + // other words, that bit has no effect on how we write the pointer. + encoding = DwarfPointerEncoding(encoding & ~dwarf2reader::DW_EH_PE_indirect); + + // Find the base address to which this pointer is relative. The upper + // nybble of the encoding specifies this. + u_int64_t base; + switch (encoding & 0xf0) { + case dwarf2reader::DW_EH_PE_absptr: base = 0; break; + case dwarf2reader::DW_EH_PE_pcrel: base = bases.cfi + Size(); break; + case dwarf2reader::DW_EH_PE_textrel: base = bases.text; break; + case dwarf2reader::DW_EH_PE_datarel: base = bases.data; break; + case dwarf2reader::DW_EH_PE_funcrel: base = fde_start_address_; break; + case dwarf2reader::DW_EH_PE_aligned: base = 0; break; + default: abort(); + }; + + // Make ADDRESS relative. Yes, this is appropriate even for "absptr" + // values; see gcc/unwind-pe.h. + address -= base; + + // Align the pointer, if required. + if ((encoding & 0xf0) == dwarf2reader::DW_EH_PE_aligned) + Align(AddressSize()); + + // Append ADDRESS to this section in the appropriate form. For the + // fixed-width forms, we don't need to differentiate between signed and + // unsigned encodings, because ADDRESS has already been extended to 64 + // bits before it was passed to us. + switch (encoding & 0x0f) { + case dwarf2reader::DW_EH_PE_absptr: + Address(address); + break; + + case dwarf2reader::DW_EH_PE_uleb128: + ULEB128(address); + break; + + case dwarf2reader::DW_EH_PE_sleb128: + LEB128(address); + break; + + case dwarf2reader::DW_EH_PE_udata2: + case dwarf2reader::DW_EH_PE_sdata2: + D16(address); + break; + + case dwarf2reader::DW_EH_PE_udata4: + case dwarf2reader::DW_EH_PE_sdata4: + D32(address); + break; + + case dwarf2reader::DW_EH_PE_udata8: + case dwarf2reader::DW_EH_PE_sdata8: + D64(address); + break; + + default: + abort(); + } + + return *this; }; + +} // namespace google_breakpad diff --git a/src/common/dwarf/cfi_assembler.h b/src/common/dwarf/cfi_assembler.h index 449a8078..f5bf9710 100644 --- a/src/common/dwarf/cfi_assembler.h +++ b/src/common/dwarf/cfi_assembler.h @@ -39,11 +39,13 @@ #include <string> +#include "common/dwarf/dwarf2enums.h" #include "google_breakpad/common/breakpad_types.h" #include "processor/test_assembler.h" namespace google_breakpad { +using dwarf2reader::DwarfPointerEncoding; using google_breakpad::TestAssembler::Endianness; using google_breakpad::TestAssembler::Label; using google_breakpad::TestAssembler::Section; @@ -51,11 +53,53 @@ using std::string; class CFISection: public Section { public: + + // CFI augmentation strings beginning with 'z', defined by the + // Linux/IA-64 C++ ABI, can specify interesting encodings for + // addresses appearing in FDE headers and call frame instructions (and + // for additional fields whose presence the augmentation string + // specifies). In particular, pointers can be specified to be relative + // to various base address: the start of the .text section, the + // location holding the address itself, and so on. These allow the + // frame data to be position-independent even when they live in + // write-protected pages. These variants are specified at the + // following two URLs: + // + // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html + // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html + // + // CFISection leaves the production of well-formed 'z'-augmented CIEs and + // FDEs to the user, but does provide EncodedPointer, to emit + // properly-encoded addresses for a given pointer encoding. + // EncodedPointer uses an instance of this structure to find the base + // addresses it should use; you can establish a default for all encoded + // pointers appended to this section with SetEncodedPointerBases. + struct EncodedPointerBases { + EncodedPointerBases() : cfi(), text(), data() { } + + // The starting address of this CFI section in memory, for + // DW_EH_PE_pcrel. DW_EH_PE_pcrel pointers may only be used in data + // that has is loaded into the program's address space. + u_int64_t cfi; + + // The starting address of this file's .text section, for DW_EH_PE_textrel. + u_int64_t text; + + // The starting address of this file's .got or .eh_frame_hdr section, + // for DW_EH_PE_datarel. + u_int64_t data; + }; + // Create a CFISection whose endianness is ENDIANNESS, and where - // machine addresses are ADDRESS_SIZE bytes long. - CFISection(Endianness endianness, size_t address_size) - : Section(endianness), address_size_(address_size), - entry_length_(NULL) { + // machine addresses are ADDRESS_SIZE bytes long. If EH_FRAME is + // true, use the .eh_frame format, as described by the Linux + // Standards Base Core Specification, instead of the DWARF CFI + // format. + CFISection(Endianness endianness, size_t address_size, + bool eh_frame = false) + : Section(endianness), address_size_(address_size), eh_frame_(eh_frame), + pointer_encoding_(dwarf2reader::DW_EH_PE_absptr), + encoded_pointer_bases_(), entry_length_(NULL), in_fde_(false) { // The 'start', 'Here', and 'Mark' members of a CFISection all refer // to section offsets. start() = 0; @@ -64,6 +108,22 @@ class CFISection: public Section { // Return this CFISection's address size. size_t AddressSize() const { return address_size_; } + // Return true if this CFISection uses the .eh_frame format, or + // false if it contains ordinary DWARF CFI data. + bool ContainsEHFrame() const { return eh_frame_; } + + // Use ENCODING for pointers in calls to FDEHeader and EncodedPointer. + void SetPointerEncoding(DwarfPointerEncoding encoding) { + pointer_encoding_ = encoding; + } + + // Use the addresses in BASES as the base addresses for encoded + // pointers in subsequent calls to FDEHeader or EncodedPointer. + // This function makes a copy of BASES. + void SetEncodedPointerBases(const EncodedPointerBases &bases) { + encoded_pointer_bases_ = bases; + } + // Append a Common Information Entry header to this section with the // given values. If dwarf64 is true, use the 64-bit DWARF initial // length format for the CIE's initial length. Return a reference to @@ -109,6 +169,35 @@ class CFISection: public Section { return *this; } + // Append ADDRESS to this section, in the appropriate size and + // endianness. Return a reference to this section. + CFISection &Address(u_int64_t address) { + Section::Append(endianness(), address_size_, address); + return *this; + } + CFISection &Address(Label address) { + Section::Append(endianness(), address_size_, address); + return *this; + } + + // Append ADDRESS to this section, using ENCODING and BASES. ENCODING + // defaults to this section's default encoding, established by + // SetPointerEncoding. BASES defaults to this section's bases, set by + // SetEncodedPointerBases. If the DW_EH_PE_indirect bit is set in the + // encoding, assume that ADDRESS is where the true address is stored. + // Return a reference to this section. + // + // (C++ doesn't let me use default arguments here, because I want to + // refer to members of *this in the default argument expression.) + CFISection &EncodedPointer(u_int64_t address) { + return EncodedPointer(address, pointer_encoding_, encoded_pointer_bases_); + } + CFISection &EncodedPointer(u_int64_t address, DwarfPointerEncoding encoding) { + return EncodedPointer(address, encoding, encoded_pointer_bases_); + } + CFISection &EncodedPointer(u_int64_t address, DwarfPointerEncoding encoding, + const EncodedPointerBases &bases); + // Restate some member functions, to keep chaining working nicely. CFISection &Mark(Label *label) { Section::Mark(label); return *this; } CFISection &D8(u_int8_t v) { Section::D8(v); return *this; } @@ -133,6 +222,16 @@ class CFISection: public Section { // The size of a machine address for the data in this section. size_t address_size_; + // If true, we are generating a Linux .eh_frame section, instead of + // a standard DWARF .debug_frame section. + bool eh_frame_; + + // The encoding to use for FDE pointers. + DwarfPointerEncoding pointer_encoding_; + + // The base addresses to use when emitting encoded pointers. + EncodedPointerBases encoded_pointer_bases_; + // The length value for the current entry. // // Oddly, this must be dynamically allocated. Labels never get new @@ -142,6 +241,14 @@ class CFISection: public Section { // headers and track their positions. The alternative is explicit // destructor invocation and a placement new. Ick. PendingLength *entry_length_; + + // True if we are currently emitting an FDE --- that is, we have + // called FDEHeader but have not yet called FinishEntry. + bool in_fde_; + + // If in_fde_ is true, this is its starting address. We use this for + // emitting DW_EH_PE_funcrel pointers. + u_int64_t fde_start_address_; }; } // namespace google_breakpad diff --git a/src/common/dwarf/dwarf2enums.h b/src/common/dwarf/dwarf2enums.h index 8444821a..8ac28eb2 100644 --- a/src/common/dwarf/dwarf2enums.h +++ b/src/common/dwarf/dwarf2enums.h @@ -1,3 +1,5 @@ +// -*- mode: c++ -*- + // Copyright (c) 2010 Google Inc. All Rights Reserved. // // Redistribution and use in source and binary forms, with or without @@ -588,12 +590,22 @@ enum DwarfPointerEncoding DW_EH_PE_sdata2 = 0x0A, DW_EH_PE_sdata4 = 0x0B, DW_EH_PE_sdata8 = 0x0C, - DW_EH_PE_signed = 0x08, DW_EH_PE_pcrel = 0x10, DW_EH_PE_textrel = 0x20, DW_EH_PE_datarel = 0x30, DW_EH_PE_funcrel = 0x40, DW_EH_PE_aligned = 0x50, + + // The GNU toolchain sources define this enum value as well, + // simply to help classify the lower nybble values into signed and + // unsigned groups. + DW_EH_PE_signed = 0x08, + + // This is not documented in LSB 4.0, but it is used in both the + // Linux and OS X toolchains. It can be added to any other + // encoding (except DW_EH_PE_aligned), and indicates that the + // encoded value represents the address at which the true address + // is stored, not the true address itself. DW_EH_PE_indirect = 0x80 }; diff --git a/src/common/dwarf/dwarf2reader.cc b/src/common/dwarf/dwarf2reader.cc index fb6d7c4d..92ceb4a1 100644 --- a/src/common/dwarf/dwarf2reader.cc +++ b/src/common/dwarf/dwarf2reader.cc @@ -1245,6 +1245,8 @@ class CallFrameInfo::State { // 'o' unsigned LEB128 offset (OPERANDS->offset) // 's' signed LEB128 offset (OPERANDS->signed_offset) // 'a' machine-size address (OPERANDS->offset) + // (If the CIE has a 'z' augmentation string, 'a' uses the + // encoding specified by the 'R' argument.) // '1' a one-byte offset (OPERANDS->offset) // '2' a two-byte offset (OPERANDS->offset) // '4' a four-byte offset (OPERANDS->offset) @@ -1381,9 +1383,11 @@ bool CallFrameInfo::State::ParseOperands(const char *format, break; case 'a': - if (reader_->AddressSize() > bytes_left) return ReportIncomplete(); - operands->offset = reader_->ReadAddress(cursor_); - cursor_ += reader_->AddressSize(); + operands->offset = + reader_->ReadEncodedPointer(cursor_, entry_->cie->pointer_encoding, + &len); + if (len > bytes_left) return ReportIncomplete(); + cursor_ += len; break; case '1': @@ -1773,15 +1777,24 @@ bool CallFrameInfo::ReadEntryPrologue(const char *cursor, Entry *entry) { entry->kind = kUnknown; entry->end = NULL; - // Read the initial length. This sets reader_'s offset size. The length - // could be something like (uint64)-1, so we have to do two comparisons - // here. + // Read the initial length. This sets reader_'s offset size. size_t length_size; uint64 length = reader_->ReadInitialLength(cursor, &length_size); - if (length_size > size_t(buffer_end - cursor) || - length > size_t(buffer_end - (cursor + length_size))) + if (length_size > size_t(buffer_end - cursor)) return ReportIncomplete(entry); cursor += length_size; + + // In a .eh_frame section, a length of zero marks the end of the series + // of entries. + if (length == 0 && eh_frame_) { + entry->kind = kTerminator; + entry->end = cursor; + return true; + } + + // Validate the length. + if (length > size_t(buffer_end - cursor)) + return ReportIncomplete(entry); // The length is the number of bytes after the initial length field; // we have that position handy at this point, so compute the end @@ -1794,16 +1807,37 @@ bool CallFrameInfo::ReadEntryPrologue(const char *cursor, Entry *entry) { size_t offset_size = reader_->OffsetSize(); if (offset_size > size_t(entry->end - cursor)) return ReportIncomplete(entry); entry->id = reader_->ReadOffset(cursor); - cursor += offset_size; + + // Don't advance cursor past id field yet; in .eh_frame data we need + // the id's position to compute the section offset of an FDE's CIE. // Now we can decide what kind of entry this is. - if (offset_size == 4) - entry->kind = (entry->id == 0xffffffff) ? kCIE : kFDE; - else { - assert(offset_size == 8); - entry->kind = (entry->id == 0xffffffffffffffffULL) ? kCIE : kFDE; + if (eh_frame_) { + // In .eh_frame data, an ID of zero marks the entry as a CIE, and + // anything else is an offset from the id field of the FDE to the start + // of the CIE. + if (entry->id == 0) { + entry->kind = kCIE; + } else { + entry->kind = kFDE; + // Turn the offset from the id into an offset from the buffer's start. + entry->id = (cursor - buffer_) - entry->id; + } + } else { + // In DWARF CFI data, an ID of ~0 (of the appropriate width, given the + // offset size for the entry) marks the entry as a CIE, and anything + // else is the offset of the CIE from the beginning of the section. + if (offset_size == 4) + entry->kind = (entry->id == 0xffffffff) ? kCIE : kFDE; + else { + assert(offset_size == 8); + entry->kind = (entry->id == 0xffffffffffffffffULL) ? kCIE : kFDE; + } } + // Now advance cursor past the id. + cursor += offset_size; + // The fields specific to this kind of entry start here. entry->fields = cursor; @@ -1824,6 +1858,8 @@ bool CallFrameInfo::ReadCIEFields(CIE *cie) { cie->code_alignment_factor = 0; cie->data_alignment_factor = 0; cie->return_address_register = 0; + cie->has_z_augmentation = false; + cie->pointer_encoding = DW_EH_PE_absptr; cie->instructions = 0; // Parse the version number. @@ -1833,10 +1869,19 @@ bool CallFrameInfo::ReadCIEFields(CIE *cie) { cursor++; // If we don't recognize the version, we can't parse any more fields - // of the CIE. - if (cie->version < 1 || 3 < cie->version) { - reporter_->UnrecognizedVersion(cie->offset, cie->version); - return false; + // of the CIE. For DWARF CFI, we handle versions 1 through 3 (there + // was never a version 2 fo CFI data). For .eh_frame, we handle only + // version 1. + if (eh_frame_) { + if (cie->version != 1) { + reporter_->UnrecognizedVersion(cie->offset, cie->version); + return false; + } + } else { + if (cie->version < 1 || 3 < cie->version) { + reporter_->UnrecognizedVersion(cie->offset, cie->version); + return false; + } } const char *augmentation_start = cursor; @@ -1848,11 +1893,16 @@ bool CallFrameInfo::ReadCIEFields(CIE *cie) { // Skip the terminating '\0'. cursor++; - // If we don't recognize this augmentation, we can't parse any more - // fields of the CIE. - if (!cie->augmentation.empty()) { - // Augmentations can have arbitrary effects on the form of rest of - // the content, so we have to give up. + // Is this an augmentation we recognize? + if (cie->augmentation.empty()) { + ; // Stock DWARF CFI. + } else if (cie->augmentation[0] == 'z') { + // Linux C++ ABI 'z' augmentation, used for exception handling data. + cie->has_z_augmentation = true; + } else { + // Not an augmentation we recognize. Augmentations can have + // arbitrary effects on the form of rest of the content, so we + // have to give up. reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation); return false; } @@ -1878,6 +1928,100 @@ bool CallFrameInfo::ReadCIEFields(CIE *cie) { cursor += len; } + // If we have a 'z' augmentation string, find the augmentation data and + // use the augmentation string to parse it. + if (cie->has_z_augmentation) { + size_t data_size = reader_->ReadUnsignedLEB128(cursor, &len); + if (size_t(cie->end - cursor) < len + data_size) + return ReportIncomplete(cie); + cursor += len; + const char *data = cursor; + cursor += data_size; + const char *data_end = cursor; + + cie->has_z_lsda = false; + cie->has_z_personality = false; + cie->has_z_signal_frame = false; + + // Walk the augmentation string, and extract values from the + // augmentation data as the string directs. + for (size_t i = 1; i < cie->augmentation.size(); i++) { + switch (cie->augmentation[i]) { + case 'L': + // The CIE's augmentation data holds the language-specific data + // area pointer's encoding, and the FDE's augmentation data holds + // the pointer itself. + cie->has_z_lsda = true; + // Fetch the LSDA encoding from the augmentation data. + if (data >= data_end) return ReportIncomplete(cie); + cie->lsda_encoding = DwarfPointerEncoding(*data++); + if (!reader_->ValidEncoding(cie->lsda_encoding)) { + reporter_->InvalidPointerEncoding(cie->offset, cie->lsda_encoding); + return false; + } + // Don't check if the encoding is usable here --- we haven't + // read the FDE's fields yet, so we're not prepared for + // DW_EH_PE_funcrel, although that's a fine encoding for the + // LSDA to use, since it appears in the FDE. + break; + + case 'P': + // The CIE's augmentation data holds the personality routine + // pointer's encoding, followed by the pointer itself. + cie->has_z_personality = true; + // Fetch the personality routine pointer's encoding from the + // augmentation data. + if (data >= data_end) return ReportIncomplete(cie); + cie->personality_encoding = DwarfPointerEncoding(*data++); + if (!reader_->ValidEncoding(cie->personality_encoding)) { + reporter_->InvalidPointerEncoding(cie->offset, + cie->personality_encoding); + return false; + } + if (!reader_->UsableEncoding(cie->personality_encoding)) { + reporter_->UnusablePointerEncoding(cie->offset, + cie->personality_encoding); + return false; + } + // Fetch the personality routine's pointer itself from the data. + cie->personality_address = + reader_->ReadEncodedPointer(data, cie->personality_encoding, + &len); + if (len > size_t(data_end - data)) + return ReportIncomplete(cie); + data += len; + break; + + case 'R': + // The CIE's augmentation data holds the pointer encoding to use + // for addresses in the FDE. + if (data >= data_end) return ReportIncomplete(cie); + cie->pointer_encoding = DwarfPointerEncoding(*data++); + if (!reader_->ValidEncoding(cie->pointer_encoding)) { + reporter_->InvalidPointerEncoding(cie->offset, + cie->pointer_encoding); + return false; + } + if (!reader_->UsableEncoding(cie->pointer_encoding)) { + reporter_->UnusablePointerEncoding(cie->offset, + cie->pointer_encoding); + return false; + } + break; + + case 'S': + // Frames using this CIE are signal delivery frames. + cie->has_z_signal_frame = true; + break; + + default: + // An augmentation we don't recognize. + reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation); + return false; + } + } + } + // The CIE's instructions start here. cie->instructions = cursor; @@ -1886,19 +2030,66 @@ bool CallFrameInfo::ReadCIEFields(CIE *cie) { bool CallFrameInfo::ReadFDEFields(FDE *fde) { const char *cursor = fde->fields; - size_t address_size = reader_->AddressSize(); + size_t size; - // Since both fields are of known size, we can do all bounds - // checking here. - if (size_t(fde->end - cursor) < 2 * address_size) + fde->address = reader_->ReadEncodedPointer(cursor, fde->cie->pointer_encoding, + &size); + if (size > size_t(fde->end - cursor)) + return ReportIncomplete(fde); + cursor += size; + reader_->SetFunctionBase(fde->address); + + // For the length, we strip off the upper nybble of the encoding used for + // the starting address. + DwarfPointerEncoding length_encoding = + DwarfPointerEncoding(fde->cie->pointer_encoding & 0x0f); + fde->size = reader_->ReadEncodedPointer(cursor, length_encoding, &size); + if (size > size_t(fde->end - cursor)) return ReportIncomplete(fde); + cursor += size; + + // If the CIE has a 'z' augmentation string, then augmentation data + // appears here. + if (fde->cie->has_z_augmentation) { + size_t data_size = reader_->ReadUnsignedLEB128(cursor, &size); + if (size_t(fde->end - cursor) < size + data_size) + return ReportIncomplete(fde); + cursor += size; + + // In the abstract, we should walk the augmentation string, and extract + // items from the FDE's augmentation data as we encounter augmentation + // string characters that specify their presence: the ordering of items + // in the augmentation string determines the arrangement of values in + // the augmentation data. + // + // In practice, there's only ever one value in FDE augmentation data + // that we support --- the LSDA pointer --- and we have to bail if we + // see any unrecognized augmentation string characters. So if there is + // anything here at all, we know what it is, and where it starts. + if (fde->cie->has_z_lsda) { + // Check whether the LSDA's pointer encoding is usable now: only once + // we've parsed the FDE's starting address do we call reader_-> + // SetFunctionBase, so that the DW_EH_PE_funcrel encoding becomes + // usable. + if (!reader_->UsableEncoding(fde->cie->lsda_encoding)) { + reporter_->UnusablePointerEncoding(fde->cie->offset, + fde->cie->lsda_encoding); + return false; + } - // Parse the start address and size. - fde->address = reader_->ReadAddress(cursor); - fde->size = reader_->ReadAddress(cursor + address_size); + fde->lsda_address = + reader_->ReadEncodedPointer(cursor, fde->cie->lsda_encoding, &size); + if (size > data_size) + return ReportIncomplete(fde); + // Ideally, we would also complain here if there were unconsumed + // augmentation data. + } + + cursor += data_size; + } // The FDE's instructions start after those. - fde->instructions = cursor + 2 * address_size; + fde->instructions = cursor; return true; } @@ -1916,18 +2107,35 @@ bool CallFrameInfo::Start() { cursor = entry_end, all_ok = all_ok && ok) { FDE fde; - // Read the entry's prologue. - if (!ReadEntryPrologue(cursor, &fde)) - // We can't continue processing the section, because we may not - // have gotten the length. - return false; - // Make it easy to skip this entry with 'continue': assume that // things are not okay until we've checked all the data, and // prepare the address of the next entry. ok = false; + + // Read the entry's prologue. + if (!ReadEntryPrologue(cursor, &fde)) { + if (!fde.end) { + // If we couldn't even figure out this entry's extent, then we + // must stop processing entries altogether. + all_ok = false; + break; + } + entry_end = fde.end; + continue; + } + + // The next iteration picks up after this entry. entry_end = fde.end; + // Did we see an .eh_frame terminating mark? + if (fde.kind == kTerminator) { + // If there appears to be more data left in the section after the + // terminating mark, warn the user. But this is just a warning; + // we leave all_ok true. + if (fde.end < buffer_end) reporter_->EarlyEHTerminator(fde.offset); + break; + } + // In this loop, we skip CIEs. We only parse them fully when we // parse an FDE that refers to them. This limits our memory // consumption (beyond the buffer itself) to that needed to @@ -1973,10 +2181,38 @@ bool CallFrameInfo::Start() { continue; } + if (cie.has_z_augmentation) { + // Report the personality routine address, if we have one. + if (cie.has_z_personality) { + if (!handler_ + ->PersonalityRoutine(cie.personality_address, + IsIndirectEncoding(cie.personality_encoding))) + continue; + } + + // Report the language-specific data area address, if we have one. + if (cie.has_z_lsda) { + if (!handler_ + ->LanguageSpecificDataArea(fde.lsda_address, + IsIndirectEncoding(cie.lsda_encoding))) + continue; + } + + // If this is a signal-handling frame, report that. + if (cie.has_z_signal_frame) { + if (!handler_->SignalHandler()) + continue; + } + } + // Interpret the CIE's instructions, and then the FDE's instructions. State state(reader_, handler_, reporter_, fde.address); ok = state.InterpretCIE(cie) && state.InterpretFDE(fde); + // Tell the ByteReader that the function start address from the + // FDE header is no longer valid. + reader_->ClearFunctionBase(); + // Report the end of the entry. handler_->End(); } @@ -1989,9 +2225,11 @@ const char *CallFrameInfo::KindName(EntryKind kind) { return "entry"; else if (kind == CallFrameInfo::kCIE) return "common information entry"; - else { - assert(kind == CallFrameInfo::kFDE); + else if (kind == CallFrameInfo::kFDE) return "frame description entry"; + else { + assert (kind == CallFrameInfo::kTerminator); + return ".eh_frame sequence terminator"; } } @@ -2001,15 +2239,22 @@ bool CallFrameInfo::ReportIncomplete(Entry *entry) { } void CallFrameInfo::Reporter::Incomplete(uint64 offset, - CallFrameInfo::EntryKind kind) { + CallFrameInfo::EntryKind kind) { fprintf(stderr, "%s: CFI %s at offset 0x%llx in '%s': entry ends early\n", filename_.c_str(), CallFrameInfo::KindName(kind), offset, section_.c_str()); } +void CallFrameInfo::Reporter::EarlyEHTerminator(uint64 offset) { + fprintf(stderr, + "%s: CFI at offset 0x%llx in '%s': saw end-of-data marker" + " before end of section contents\n", + filename_.c_str(), offset, section_.c_str()); +} + void CallFrameInfo::Reporter::CIEPointerOutOfRange(uint64 offset, - uint64 cie_offset) { + uint64 cie_offset) { fprintf(stderr, "%s: CFI frame description entry at offset 0x%llx in '%s':" " CIE pointer is out of range: 0x%llx\n", @@ -2038,6 +2283,22 @@ void CallFrameInfo::Reporter::UnrecognizedAugmentation(uint64 offset, filename_.c_str(), offset, section_.c_str(), aug.c_str()); } +void CallFrameInfo::Reporter::InvalidPointerEncoding(uint64 offset, + uint8 encoding) { + fprintf(stderr, + "%s: CFI common information entry at offset 0x%llx in '%s':" + " 'z' augmentation specifies invalid pointer encoding: 0x%02x\n", + filename_.c_str(), offset, section_.c_str(), encoding); +} + +void CallFrameInfo::Reporter::UnusablePointerEncoding(uint64 offset, + uint8 encoding) { + fprintf(stderr, + "%s: CFI common information entry at offset 0x%llx in '%s':" + " 'z' augmentation specifies a pointer encoding for which we have no base address: 0x%02x\n", + filename_.c_str(), offset, section_.c_str(), encoding); +} + void CallFrameInfo::Reporter::RestoreInCIE(uint64 offset, uint64 insn_offset) { fprintf(stderr, "%s: CFI common information entry at offset 0x%llx in '%s':" @@ -2047,8 +2308,8 @@ void CallFrameInfo::Reporter::RestoreInCIE(uint64 offset, uint64 insn_offset) { } void CallFrameInfo::Reporter::BadInstruction(uint64 offset, - CallFrameInfo::EntryKind kind, - uint64 insn_offset) { + CallFrameInfo::EntryKind kind, + uint64 insn_offset) { fprintf(stderr, "%s: CFI %s at offset 0x%llx in section '%s':" " the instruction at offset 0x%llx is unrecognized\n", @@ -2057,8 +2318,8 @@ void CallFrameInfo::Reporter::BadInstruction(uint64 offset, } void CallFrameInfo::Reporter::NoCFARule(uint64 offset, - CallFrameInfo::EntryKind kind, - uint64 insn_offset) { + CallFrameInfo::EntryKind kind, + uint64 insn_offset) { fprintf(stderr, "%s: CFI %s at offset 0x%llx in section '%s':" " the instruction at offset 0x%llx assumes that a CFA rule has" @@ -2068,8 +2329,8 @@ void CallFrameInfo::Reporter::NoCFARule(uint64 offset, } void CallFrameInfo::Reporter::EmptyStateStack(uint64 offset, - CallFrameInfo::EntryKind kind, - uint64 insn_offset) { + CallFrameInfo::EntryKind kind, + uint64 insn_offset) { fprintf(stderr, "%s: CFI %s at offset 0x%llx in section '%s':" " the DW_CFA_restore_state instruction at offset 0x%llx" @@ -2079,8 +2340,8 @@ void CallFrameInfo::Reporter::EmptyStateStack(uint64 offset, } void CallFrameInfo::Reporter::ClearingCFARule(uint64 offset, - CallFrameInfo::EntryKind kind, - uint64 insn_offset) { + CallFrameInfo::EntryKind kind, + uint64 insn_offset) { fprintf(stderr, "%s: CFI %s at offset 0x%llx in section '%s':" " the DW_CFA_restore_state instruction at offset 0x%llx" diff --git a/src/common/dwarf/dwarf2reader.h b/src/common/dwarf/dwarf2reader.h index 136b8932..a7a13afb 100644 --- a/src/common/dwarf/dwarf2reader.h +++ b/src/common/dwarf/dwarf2reader.h @@ -46,6 +46,7 @@ #include <utility> #include <vector> +#include "common/dwarf/bytereader.h" #include "common/dwarf/dwarf2enums.h" #include "common/dwarf/types.h" @@ -53,7 +54,6 @@ using namespace std; namespace dwarf2reader { struct LineStateMachine; -class ByteReader; class Dwarf2Handler; class LineInfoHandler; @@ -556,7 +556,7 @@ class CallFrameInfo { public: // The different kinds of entries one finds in CFI. Used internally, // and for error reporting. - enum EntryKind { kUnknown, kCIE, kFDE }; + enum EntryKind { kUnknown, kCIE, kFDE, kTerminator }; // The handler class to which the parser hands the parsed call frame // information. Defined below. @@ -567,19 +567,75 @@ class CallFrameInfo { class Reporter; // Create a DWARF CFI parser. BUFFER points to the contents of the - // .debug_frame section to parse; BUFFER_LENGTH is its length in - // bytes. REPORTER is an error reporter the parser should use to - // report problems. READER is a ByteReader instance that has the - // endianness and address size set properly. Report the data we find - // to HANDLER. + // .debug_frame section to parse; BUFFER_LENGTH is its length in bytes. + // REPORTER is an error reporter the parser should use to report + // problems. READER is a ByteReader instance that has the endianness and + // address size set properly. Report the data we find to HANDLER. + // + // This class can also parse Linux C++ exception handling data, as found + // in '.eh_frame' sections. This data is a variant of DWARF CFI that is + // placed in loadable segments so that it is present in the program's + // address space, and is interpreted by the C++ runtime to search the + // call stack for a handler interested in the exception being thrown, + // actually pop the frames, and find cleanup code to run. + // + // There are two differences between the call frame information described + // in the DWARF standard and the exception handling data Linux places in + // the .eh_frame section: + // + // - Exception handling data uses uses a different format for call frame + // information entry headers. The distinguished CIE id, the way FDEs + // refer to their CIEs, and the way the end of the series of entries is + // determined are all slightly different. + // + // If the constructor's EH_FRAME argument is true, then the + // CallFrameInfo parses the entry headers as Linux C++ exception + // handling data. If EH_FRAME is false or omitted, the CallFrameInfo + // parses standard DWARF call frame information. + // + // - Linux C++ exception handling data uses CIE augmentation strings + // beginning with 'z' to specify the presence of additional data after + // the CIE and FDE headers and special encodings used for addresses in + // frame description entries. + // + // CallFrameInfo can handle 'z' augmentations in either DWARF CFI or + // exception handling data if you have supplied READER with the base + // addresses needed to interpret the pointer encodings that 'z' + // augmentations can specify. See the ByteReader interface for details + // about the base addresses. See the CallFrameInfo::Handler interface + // for details about the additional information one might find in + // 'z'-augmented data. + // + // Thus: + // + // - If you are parsing standard DWARF CFI, as found in a .debug_frame + // section, you should pass false for the EH_FRAME argument, or omit + // it, and you need not worry about providing READER with the + // additional base addresses. + // + // - If you want to parse Linux C++ exception handling data from a + // .eh_frame section, you should pass EH_FRAME as true, and call + // READER's Set*Base member functions before calling our Start method. + // + // - If you want to parse DWARF CFI that uses the 'z' augmentations + // (although I don't think any toolchain ever emits such data), you + // could pass false for EH_FRAME, but call READER's Set*Base members. + // + // The extensions the Linux C++ ABI makes to DWARF for exception + // handling are described here, rather poorly: + // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html + // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html + // + // The mechanics of C++ exception handling, personality routines, + // and language-specific data areas are described here, rather nicely: + // http://www.codesourcery.com/public/cxx-abi/abi-eh.html CallFrameInfo(const char *buffer, size_t buffer_length, - ByteReader *reader, Handler *handler, - Reporter *reporter) - : buffer_(buffer), - buffer_length_(buffer_length), - reader_(reader), - handler_(handler), - reporter_(reporter) { } + ByteReader *reader, Handler *handler, Reporter *reporter, + bool eh_frame = false) + : buffer_(buffer), buffer_length_(buffer_length), + reader_(reader), handler_(handler), reporter_(reporter), + eh_frame_(eh_frame) { } + ~CallFrameInfo() { } // Parse the entries in BUFFER, reporting what we find to HANDLER. @@ -603,6 +659,13 @@ class CallFrameInfo { // The start of this entry in the buffer. const char *start; + // Which kind of entry this is. + // + // We want to be able to use this for error reporting even while we're + // in the midst of parsing. Error reporting code may assume that kind, + // offset, and start fields are valid, although kind may be kUnknown. + EntryKind kind; + // The end of this entry's common prologue (initial length and id), and // the start of this entry's kind-specific fields. const char *fields; @@ -616,16 +679,10 @@ class CallFrameInfo { // simply buffer_ + offset + length.) const char *end; - // The CIE pointer or CIE id field. + // For both DWARF CFI and .eh_frame sections, this is the CIE id in a + // CIE, and the offset of the associated CIE in an FDE. uint64 id; - // The kind of entry we're parsing. - // - // This may be kUnknown at times, since we want to be able to - // count on it for error reporting even before we've finished - // parsing enough to tell what kind of entry we're looking at. - EntryKind kind; - // The CIE that applies to this entry, if we've parsed it. If this is a // CIE, then this field points to this structure. CIE *cie; @@ -638,12 +695,45 @@ class CallFrameInfo { uint64 code_alignment_factor; // scale for code address adjustments int data_alignment_factor; // scale for stack pointer adjustments unsigned return_address_register; // which register holds the return addr + + // True if this CIE includes Linux C++ ABI 'z' augmentation data. + bool has_z_augmentation; + + // Parsed 'z' augmentation data. These are meaningful only if + // has_z_augmentation is true. + bool has_z_lsda; // The 'z' augmentation included 'L'. + bool has_z_personality; // The 'z' augmentation included 'P'. + bool has_z_signal_frame; // The 'z' augmentation included 'S'. + + // If has_z_lsda is true, this is the encoding to be used for language- + // specific data area pointers in FDEs. + DwarfPointerEncoding lsda_encoding; + + // If has_z_personality is true, this is the encoding used for the + // personality routine pointer in the augmentation data. + DwarfPointerEncoding personality_encoding; + + // If has_z_personality is true, this is the address of the personality + // routine --- or, if personality_encoding & DW_EH_PE_indirect, the + // address where the personality routine's address is stored. + uint64 personality_address; + + // This is the encoding used for addresses in the FDE header and + // in DW_CFA_set_loc instructions. This is always valid, whether + // or not we saw a 'z' augmentation string; its default value is + // DW_EH_PE_absptr, which is what normal DWARF CFI uses. + DwarfPointerEncoding pointer_encoding; }; // A frame description entry (FDE). struct FDE: public Entry { uint64 address; // start address of described code uint64 size; // size of described code, in bytes + + // If cie->has_z_lsda is true, then this is the language-specific data + // area's address --- or its address's address, if cie->lsda_encoding + // has the DW_EH_PE_indirect bit set. + uint64 lsda_address; }; // Internal use. @@ -658,24 +748,27 @@ class CallFrameInfo { class RuleMap; class State; - // Parse the initial length and id of a CFI entry, either a CIE or an - // FDE. CURSOR points to the beginning of the data to parse. - // On success, populate ENTRY as appropriate, and return true. - // On failure, report the problem, and return false. + // Parse the initial length and id of a CFI entry, either a CIE, an FDE, + // or a .eh_frame end-of-data mark. CURSOR points to the beginning of the + // data to parse. On success, populate ENTRY as appropriate, and return + // true. On failure, report the problem, and return false. Even if we + // return false, set ENTRY->end to the first byte after the entry if we + // were able to figure that out, or NULL if we weren't. bool ReadEntryPrologue(const char *cursor, Entry *entry); - // Parse the fields of a CIE after the entry prologue. Assume that the - // 'Entry' fields of CIE are populated; use CIE->fields and CIE->end as - // the start and limit for parsing. On success, populate the rest of - // *CIE, and return true; on failure, report the problem and return - // false. + // Parse the fields of a CIE after the entry prologue, including any 'z' + // augmentation data. Assume that the 'Entry' fields of CIE are + // populated; use CIE->fields and CIE->end as the start and limit for + // parsing. On success, populate the rest of *CIE, and return true; on + // failure, report the problem and return false. bool ReadCIEFields(CIE *cie); - // Parse the fields of an FDE after the entry prologue. Assume that the - // 'Entry' fields of *FDE are initialized; use FDE->fields and FDE->end - // as the start and limit for parsing. Assume that FDE->cie is fully - // initialized. On success, populate the rest of *FDE, and return true; - // on failure, report the problem and return false. + // Parse the fields of an FDE after the entry prologue, including any 'z' + // augmentation data. Assume that the 'Entry' fields of *FDE are + // initialized; use FDE->fields and FDE->end as the start and limit for + // parsing. Assume that FDE->cie is fully initialized. On success, + // populate the rest of *FDE, and return true; on failure, report the + // problem and return false. bool ReadFDEFields(FDE *fde); // Report that ENTRY is incomplete, and return false. This is just a @@ -683,6 +776,11 @@ class CallFrameInfo { // little brevity. bool ReportIncomplete(Entry *entry); + // Return true if ENCODING has the DW_EH_PE_indirect bit set. + static bool IsIndirectEncoding(DwarfPointerEncoding encoding) { + return encoding & DW_EH_PE_indirect; + } + // The contents of the DWARF .debug_info section we're parsing. const char *buffer_; size_t buffer_length_; @@ -695,6 +793,9 @@ class CallFrameInfo { // For reporting problems in the info we're parsing. Reporter *reporter_; + + // True if we are processing .eh_frame-format data. + bool eh_frame_; }; // The handler class for CallFrameInfo. The a CFI parser calls the @@ -786,6 +887,61 @@ class CallFrameInfo::Handler { // everything is okay, or false if an error has occurred and parsing // should stop. virtual bool End() = 0; + + // Handler functions for Linux C++ exception handling data. These are + // only called if the data includes 'z' augmentation strings. + + // The Linux C++ ABI uses an extension of the DWARF CFI format to + // walk the stack to propagate exceptions from the throw to the + // appropriate catch, and do the appropriate cleanups along the way. + // CFI entries used for exception handling have two additional data + // associated with them: + // + // - The "language-specific data area" describes which exception + // types the function has 'catch' clauses for, and indicates how + // to go about re-entering the function at the appropriate catch + // clause. If the exception is not caught, it describes the + // destructors that must run before the frame is popped. + // + // - The "personality routine" is responsible for interpreting the + // language-specific data area's contents, and deciding whether + // the exception should continue to propagate down the stack, + // perhaps after doing some cleanup for this frame, or whether the + // exception will be caught here. + // + // In principle, the language-specific data area is opaque to + // everybody but the personality routine. In practice, these values + // may be useful or interesting to readers with extra context, and + // we have to at least skip them anyway, so we might as well report + // them to the handler. + + // This entry's exception handling personality routine's address is + // ADDRESS. If INDIRECT is true, then ADDRESS is the address at + // which the routine's address is stored. The default definition for + // this handler function simply returns true, allowing parsing of + // the entry to continue. + virtual bool PersonalityRoutine(uint64 address, bool indirect) { + return true; + } + + // This entry's language-specific data area (LSDA) is located at + // ADDRESS. If INDIRECT is true, then ADDRESS is the address at + // which the area's address is stored. The default definition for + // this handler function simply returns true, allowing parsing of + // the entry to continue. + virtual bool LanguageSpecificDataArea(uint64 address, bool indirect) { + return true; + } + + // This entry describes a signal trampoline --- this frame is the + // caller of a signal handler. The default definition for this + // handler function simply returns true, allowing parsing of the + // entry to continue. + // + // The best description of the rationale for and meaning of signal + // trampoline CFI entries seems to be in the GCC bug database: + // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26208 + virtual bool SignalHandler() { return true; } }; // The CallFrameInfo class makes calls on an instance of this class to @@ -811,6 +967,12 @@ class CallFrameInfo::Reporter { // haven't parsed enough of the entry to tell yet. virtual void Incomplete(uint64 offset, CallFrameInfo::EntryKind kind); + // The .eh_frame data has a four-byte zero at OFFSET where the next + // entry's length would be; this is a terminator. However, the buffer + // length as given to the CallFrameInfo constructor says there should be + // more data. + virtual void EarlyEHTerminator(uint64 offset); + // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the // section is not that large. virtual void CIEPointerOutOfRange(uint64 offset, uint64 cie_offset); @@ -830,6 +992,14 @@ class CallFrameInfo::Reporter { virtual void UnrecognizedAugmentation(uint64 offset, const string &augmentation); + // The pointer encoding ENCODING, specified by the CIE at OFFSET, is not + // a valid encoding. + virtual void InvalidPointerEncoding(uint64 offset, uint8 encoding); + + // The pointer encoding ENCODING, specified by the CIE at OFFSET, depends + // on a base address which has not been supplied. + virtual void UnusablePointerEncoding(uint64 offset, uint8 encoding); + // The CIE at OFFSET contains a DW_CFA_restore instruction at // INSN_OFFSET, which may not appear in a CIE. virtual void RestoreInCIE(uint64 offset, uint64 insn_offset); diff --git a/src/common/dwarf/dwarf2reader_cfi_unittest.cc b/src/common/dwarf/dwarf2reader_cfi_unittest.cc index 6083df71..4762cfbf 100644 --- a/src/common/dwarf/dwarf2reader_cfi_unittest.cc +++ b/src/common/dwarf/dwarf2reader_cfi_unittest.cc @@ -31,10 +31,33 @@ // dwarf2reader_cfi_unittest.cc: Unit tests for dwarf2reader::CallFrameInfo +#include <cstdlib> #include <vector> +// The '.eh_frame' format, used by the Linux C++ ABI for exception +// handling, is poorly specified. To help test our support for .eh_frame, +// if you #define WRITE_ELF while compiling this file, and add the +// 'include' directory from the binutils, gcc, or gdb source tree to the +// #include path, then each test that calls the +// PERHAPS_WRITE_DEBUG_FRAME_FILE or PERHAPS_WRITE_EH_FRAME_FILE will an +// ELF file containing a .debug_frame or .eh_frame section; you can then +// use tools like readelf to examine the test data, and check the tools' +// interpretation against the test's intentions. Each ELF file is named +// "cfitest-TEST", where TEST identifies the particular test. +#ifdef WRITE_ELF +#include <cstdio> +#include <cerrno> +#include <cstring> +extern "C" { +// To compile with WRITE_ELF, you should add the 'include' directory +// of the binutils, gcc, or gdb source tree to your #include path; +// that directory contains this header. +#include "elf/common.h" +} +#endif + #include "breakpad_googletest_includes.h" -#include "common/dwarf/bytereader.h" +#include "common/dwarf/bytereader-inl.h" #include "common/dwarf/cfi_assembler.h" #include "common/dwarf/dwarf2reader.h" #include "google_breakpad/common/breakpad_types.h" @@ -45,6 +68,7 @@ using google_breakpad::TestAssembler::kBigEndian; using google_breakpad::TestAssembler::kLittleEndian; using google_breakpad::TestAssembler::Section; +using dwarf2reader::DwarfPointerEncoding; using dwarf2reader::ENDIANNESS_BIG; using dwarf2reader::ENDIANNESS_LITTLE; using dwarf2reader::ByteReader; @@ -57,6 +81,18 @@ using testing::Sequence; using testing::Test; using testing::_; +#ifdef WRITE_ELF +void WriteELFFrameSection(const char *filename, const char *section_name, + const CFISection §ion); +#define PERHAPS_WRITE_DEBUG_FRAME_FILE(name, section) \ + WriteELFFrameSection("cfitest-" name, ".debug_frame", section); +#define PERHAPS_WRITE_EH_FRAME_FILE(name, section) \ + WriteELFFrameSection("cfitest-" name, ".eh_frame", section); +#else +#define PERHAPS_WRITE_DEBUG_FRAME_FILE(name, section) +#define PERHAPS_WRITE_EH_FRAME_FILE(name, section) +#endif + class MockCallFrameInfoHandler: public CallFrameInfo::Handler { public: MOCK_METHOD6(Entry, bool(size_t offset, uint64 address, uint64 length, @@ -74,16 +110,22 @@ class MockCallFrameInfoHandler: public CallFrameInfo::Handler { MOCK_METHOD3(ValExpressionRule, bool(uint64 address, int reg, const string &expression)); MOCK_METHOD0(End, bool()); + MOCK_METHOD2(PersonalityRoutine, bool(uint64 address, bool indirect)); + MOCK_METHOD2(LanguageSpecificDataArea, bool(uint64 address, bool indirect)); + MOCK_METHOD0(SignalHandler, bool()); }; class MockCallFrameErrorReporter: public CallFrameInfo::Reporter { public: MockCallFrameErrorReporter() : Reporter("mock filename", "mock section") { } MOCK_METHOD2(Incomplete, void(uint64, CallFrameInfo::EntryKind)); + MOCK_METHOD1(EarlyEHTerminator, void(uint64)); MOCK_METHOD2(CIEPointerOutOfRange, void(uint64, uint64)); MOCK_METHOD2(BadCIEId, void(uint64, uint64)); MOCK_METHOD2(UnrecognizedVersion, void(uint64, int version)); MOCK_METHOD2(UnrecognizedAugmentation, void(uint64, const string &)); + MOCK_METHOD2(InvalidPointerEncoding, void(uint64, uint8)); + MOCK_METHOD2(UnusablePointerEncoding, void(uint64, uint8)); MOCK_METHOD2(RestoreInCIE, void(uint64, uint64)); MOCK_METHOD3(BadInstruction, void(uint64, CallFrameInfo::EntryKind, uint64)); MOCK_METHOD3(NoCFARule, void(uint64, CallFrameInfo::EntryKind, uint64)); @@ -113,13 +155,19 @@ struct CFIFixture { EXPECT_CALL(handler, RegisterRule(_, _, _)).Times(0); EXPECT_CALL(handler, ExpressionRule(_, _, _)).Times(0); EXPECT_CALL(handler, ValExpressionRule(_, _, _)).Times(0); + EXPECT_CALL(handler, PersonalityRoutine(_, _)).Times(0); + EXPECT_CALL(handler, LanguageSpecificDataArea(_, _)).Times(0); + EXPECT_CALL(handler, SignalHandler()).Times(0); // Default expectations for the error/warning reporer. EXPECT_CALL(reporter, Incomplete(_, _)).Times(0); + EXPECT_CALL(reporter, EarlyEHTerminator(_)).Times(0); EXPECT_CALL(reporter, CIEPointerOutOfRange(_, _)).Times(0); EXPECT_CALL(reporter, BadCIEId(_, _)).Times(0); EXPECT_CALL(reporter, UnrecognizedVersion(_, _)).Times(0); EXPECT_CALL(reporter, UnrecognizedAugmentation(_, _)).Times(0); + EXPECT_CALL(reporter, InvalidPointerEncoding(_, _)).Times(0); + EXPECT_CALL(reporter, UnusablePointerEncoding(_, _)).Times(0); EXPECT_CALL(reporter, RestoreInCIE(_, _)).Times(0); EXPECT_CALL(reporter, BadInstruction(_, _, _)).Times(0); EXPECT_CALL(reporter, NoCFARule(_, _, _)).Times(0); @@ -249,6 +297,8 @@ TEST_F(CFI, SingleCIE) { section.Append(10, dwarf2reader::DW_CFA_nop); section.FinishEntry(); + PERHAPS_WRITE_DEBUG_FRAME_FILE("SingleCIE", section); + EXPECT_CALL(handler, Entry(_, _, _, _, _, _)).Times(0); EXPECT_CALL(handler, End()).Times(0); @@ -272,6 +322,8 @@ TEST_F(CFI, OneFDE) { .FDEHeader(cie, 0x7714740d, 0x3d5a10cd) .FinishEntry(); + PERHAPS_WRITE_DEBUG_FRAME_FILE("OneFDE", section); + { InSequence s; EXPECT_CALL(handler, @@ -294,7 +346,8 @@ TEST_F(CFI, TwoFDEsOneCIE) { CFISection section(kBigEndian, 4); Label cie; section - // First FDE. + // First FDE. readelf complains about this one because it makes + // a forward reference to its CIE. .FDEHeader(cie, 0xa42744df, 0xa3b42121) .FinishEntry() // CIE. @@ -305,6 +358,8 @@ TEST_F(CFI, TwoFDEsOneCIE) { .FDEHeader(cie, 0x6057d391, 0x700f608d) .FinishEntry(); + PERHAPS_WRITE_DEBUG_FRAME_FILE("TwoFDEsOneCIE", section); + { InSequence s; EXPECT_CALL(handler, @@ -338,7 +393,8 @@ TEST_F(CFI, TwoFDEsTwoCIEs) { .Mark(&cie1) .CIEHeader(0x694d5d45, 0x4233221b, 0xbf45e65a, 3, "") .FinishEntry() - // First FDE which cites second CIE. + // First FDE which cites second CIE. readelf complains about + // this one because it makes a forward reference to its CIE. .FDEHeader(cie2, 0x778b27dfe5871f05ULL, 0x324ace3448070926ULL) .FinishEntry() // Second FDE, which cites first CIE. @@ -349,6 +405,8 @@ TEST_F(CFI, TwoFDEsTwoCIEs) { .CIEHeader(0xfba3fad7, 0x6287e1fd, 0x61d2c581, 2, "") .FinishEntry(); + PERHAPS_WRITE_DEBUG_FRAME_FILE("TwoFDEsTwoCIEs", section); + { InSequence s; EXPECT_CALL(handler, @@ -394,6 +452,8 @@ TEST_F(CFI, BadVersion) { .FDEHeader(cie2, 0x2094735a, 0x6e875501) .FinishEntry(); + PERHAPS_WRITE_DEBUG_FRAME_FILE("BadVersion", section); + EXPECT_CALL(reporter, UnrecognizedVersion(_, 0x52)) .WillOnce(Return()); @@ -436,6 +496,8 @@ TEST_F(CFI, BadAugmentation) { .FDEHeader(cie2, 0x7bf0fda0, 0xcbcd28d8) .FinishEntry(); + PERHAPS_WRITE_DEBUG_FRAME_FILE("BadAugmentation", section); + EXPECT_CALL(reporter, UnrecognizedAugmentation(_, "spaniels!")) .WillOnce(Return()); @@ -475,6 +537,8 @@ TEST_F(CFI, CIEVersion1ReturnColumn) { .FDEHeader(cie, 0xb8d347b5, 0x825e55dc) .FinishEntry(); + PERHAPS_WRITE_DEBUG_FRAME_FILE("CIEVersion1ReturnColumn", section); + { InSequence s; EXPECT_CALL(handler, Entry(_, 0xb8d347b5, 0x825e55dc, 1, "", 0x9f)) @@ -507,6 +571,8 @@ TEST_F(CFI, CIEVersion3ReturnColumn) { .FDEHeader(cie, 0x86763f2b, 0x2a66dc23) .FinishEntry(); + PERHAPS_WRITE_DEBUG_FRAME_FILE("CIEVersion3ReturnColumn", section); + { InSequence s; EXPECT_CALL(handler, Entry(_, 0x86763f2b, 0x2a66dc23, 3, "", 0x89)) @@ -630,6 +696,8 @@ TEST_F(CFIInsn, DW_CFA_set_loc) { .D8(dwarf2reader::DW_CFA_def_cfa).ULEB128(0x4defb431).ULEB128(0x6d17b0ee) .FinishEntry(); + PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_set_loc", section); + EXPECT_CALL(handler, ValOffsetRule(0xb1ee3e7a, kCFARegister, 0x4defb431, 0x6d17b0ee)) .InSequence(s) @@ -649,6 +717,8 @@ TEST_F(CFIInsn, DW_CFA_advance_loc) { .D8(dwarf2reader::DW_CFA_def_cfa).ULEB128(0x5bbb3715).ULEB128(0x0186c7bf) .FinishEntry(); + PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_advance_loc", section); + EXPECT_CALL(handler, ValOffsetRule(fde_start + 0x2a * code_factor, kCFARegister, 0x5bbb3715, 0x0186c7bf)) @@ -667,6 +737,8 @@ TEST_F(CFIInsn, DW_CFA_advance_loc1) { .D8(dwarf2reader::DW_CFA_def_cfa).ULEB128(0x69d5696a).ULEB128(0x1eb7fc93) .FinishEntry(); + PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_advance_loc1", section); + EXPECT_CALL(handler, ValOffsetRule((fde_start + 0xd8 * code_factor), kCFARegister, 0x69d5696a, 0x1eb7fc93)) @@ -685,6 +757,8 @@ TEST_F(CFIInsn, DW_CFA_advance_loc2) { .D8(dwarf2reader::DW_CFA_def_cfa).ULEB128(0x3a368bed).ULEB128(0x3194ee37) .FinishEntry(); + PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_advance_loc2", section); + EXPECT_CALL(handler, ValOffsetRule((fde_start + 0x3adb * code_factor), kCFARegister, 0x3a368bed, 0x3194ee37)) @@ -703,6 +777,8 @@ TEST_F(CFIInsn, DW_CFA_advance_loc4) { .D8(dwarf2reader::DW_CFA_def_cfa).ULEB128(0x135270c5).ULEB128(0x24bad7cb) .FinishEntry(); + PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_advance_loc4", section); + EXPECT_CALL(handler, ValOffsetRule((fde_start + 0x15813c88ULL * code_factor), kCFARegister, 0x135270c5, 0x24bad7cb)) @@ -722,6 +798,8 @@ TEST_F(CFIInsn, DW_CFA_MIPS_advance_loc8) { .D8(dwarf2reader::DW_CFA_def_cfa).ULEB128(0xe17ed602).ULEB128(0x3d162e7f) .FinishEntry(); + PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_advance_loc8", section); + EXPECT_CALL(handler, ValOffsetRule((fde_start + 0x3c4f3945b92c14ULL * code_factor), kCFARegister, 0xe17ed602, 0x3d162e7f)) @@ -739,6 +817,8 @@ TEST_F(CFIInsn, DW_CFA_def_cfa) { .D8(dwarf2reader::DW_CFA_def_cfa).ULEB128(0x4e363a85).ULEB128(0x815f9aa7) .FinishEntry(); + PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_def_cfa", section); + EXPECT_CALL(handler, ValOffsetRule(fde_start, kCFARegister, 0x4e363a85, 0x815f9aa7)) .InSequence(s).WillOnce(Return(true)); @@ -1835,6 +1915,8 @@ TEST_F(CFIRestore, RestoreValExpressionRuleChanged) { .D8(dwarf2reader::DW_CFA_restore_state) .FinishEntry(); + PERHAPS_WRITE_DEBUG_FRAME_FILE("RestoreValExpressionRuleChanged", section); + EXPECT_CALL(handler, ValExpressionRule(fde_start, 0xb5ca5c46, "revolting")) .InSequence(s).WillOnce(Return(true)); EXPECT_CALL(handler, UndefinedRule(fde_start + code_factor, 0xb5ca5c46)) @@ -1861,6 +1943,9 @@ TEST_F(CFIRestore, RestoreValExpressionRuleChangedValExpression) { .D8(dwarf2reader::DW_CFA_restore_state) .FinishEntry(); + PERHAPS_WRITE_DEBUG_FRAME_FILE("RestoreValExpressionRuleChangedValExpression", + section); + EXPECT_CALL(handler, ValExpressionRule(fde_start, 0x500f5739, "repulsive")) .InSequence(s).WillOnce(Return(true)); EXPECT_CALL(handler, ValExpressionRule(fde_start + code_factor, 0x500f5739, @@ -1875,6 +1960,294 @@ TEST_F(CFIRestore, RestoreValExpressionRuleChangedValExpression) { ParseSection(§ion); } +struct EHFrameFixture: public CFIInsnFixture { + EHFrameFixture() + : CFIInsnFixture(), section(kBigEndian, 4, true) { + encoded_pointer_bases.cfi = 0x7f496cb2; + encoded_pointer_bases.text = 0x540f67b6; + encoded_pointer_bases.data = 0xe3eab768; + section.SetEncodedPointerBases(encoded_pointer_bases); + } + CFISection section; + CFISection::EncodedPointerBases encoded_pointer_bases; + + // Parse CFIInsnFixture::ParseSection, but parse the section as + // .eh_frame data, supplying stock base addresses. + void ParseEHFrameSection(CFISection *section, bool succeeds = true) { + EXPECT_TRUE(section->ContainsEHFrame()); + string contents; + EXPECT_TRUE(section->GetContents(&contents)); + dwarf2reader::Endianness endianness; + if (section->endianness() == kBigEndian) + endianness = ENDIANNESS_BIG; + else { + assert(section->endianness() == kLittleEndian); + endianness = ENDIANNESS_LITTLE; + } + ByteReader byte_reader(endianness); + byte_reader.SetAddressSize(section->AddressSize()); + byte_reader.SetCFIDataBase(encoded_pointer_bases.cfi, contents.data()); + byte_reader.SetTextBase(encoded_pointer_bases.text); + byte_reader.SetDataBase(encoded_pointer_bases.data); + CallFrameInfo parser(contents.data(), contents.size(), + &byte_reader, &handler, &reporter, true); + if (succeeds) + EXPECT_TRUE(parser.Start()); + else + EXPECT_FALSE(parser.Start()); + } + +}; + +class EHFrame: public EHFrameFixture, public Test { }; + +// A simple CIE, an FDE, and a terminator. +TEST_F(EHFrame, Terminator) { + Label cie; + section + .Mark(&cie) + .CIEHeader(9968, 2466, 67, 1, "") + .D8(dwarf2reader::DW_CFA_def_cfa).ULEB128(3772).ULEB128(1372) + .FinishEntry() + .FDEHeader(cie, 0x848037a1, 0x7b30475e) + .D8(dwarf2reader::DW_CFA_set_loc).D32(0x17713850) + .D8(dwarf2reader::DW_CFA_undefined).ULEB128(5721) + .FinishEntry() + .D32(0) // Terminate the sequence. + // This FDE should be ignored. + .FDEHeader(cie, 0xf19629fe, 0x439fb09b) + .FinishEntry(); + + PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.Terminator", section); + + EXPECT_CALL(handler, Entry(_, 0x848037a1, 0x7b30475e, 1, "", 67)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, ValOffsetRule(0x848037a1, kCFARegister, 3772, 1372)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, UndefinedRule(0x17713850, 5721)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(reporter, EarlyEHTerminator(_)) + .InSequence(s).WillOnce(Return()); + + ParseEHFrameSection(§ion); +} + +// The parser should recognize the Linux Standards Base 'z' augmentations. +TEST_F(EHFrame, SimpleFDE) { + DwarfPointerEncoding lsda_encoding = + DwarfPointerEncoding(dwarf2reader::DW_EH_PE_indirect + | dwarf2reader::DW_EH_PE_datarel + | dwarf2reader::DW_EH_PE_sdata2); + DwarfPointerEncoding fde_encoding = + DwarfPointerEncoding(dwarf2reader::DW_EH_PE_textrel + | dwarf2reader::DW_EH_PE_udata2); + + section.SetPointerEncoding(fde_encoding); + section.SetEncodedPointerBases(encoded_pointer_bases); + Label cie; + section + .Mark(&cie) + .CIEHeader(4873, 7012, 100, 1, "zSLPR") + .ULEB128(7) // Augmentation data length + .D8(lsda_encoding) // LSDA pointer format + .D8(dwarf2reader::DW_EH_PE_pcrel) // personality pointer format + .EncodedPointer(0x97baa00, dwarf2reader::DW_EH_PE_pcrel) // and value + .D8(fde_encoding) // FDE pointer format + .D8(dwarf2reader::DW_CFA_def_cfa).ULEB128(6706).ULEB128(31) + .FinishEntry() + .FDEHeader(cie, 0x540f6b56, 0xf686) + .ULEB128(2) // Augmentation data length + .EncodedPointer(0xe3eab475, lsda_encoding) // LSDA pointer, signed + .D8(dwarf2reader::DW_CFA_set_loc) + .EncodedPointer(0x540fa4ce, fde_encoding) + .D8(dwarf2reader::DW_CFA_undefined).ULEB128(0x675e) + .FinishEntry() + .D32(0); // terminator + + PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.SimpleFDE", section); + + EXPECT_CALL(handler, Entry(_, 0x540f6b56, 0xf686, 1, "zSLPR", 100)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, PersonalityRoutine(0x97baa00, false)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, LanguageSpecificDataArea(0xe3eab475, true)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, SignalHandler()) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, ValOffsetRule(0x540f6b56, kCFARegister, 6706, 31)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, UndefinedRule(0x540fa4ce, 0x675e)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()) + .InSequence(s).WillOnce(Return(true)); + + ParseEHFrameSection(§ion); +} + +// Check that we can handle an empty 'z' augmentation. +TEST_F(EHFrame, EmptyZ) { + Label cie; + section + .Mark(&cie) + .CIEHeader(5955, 5805, 228, 1, "z") + .ULEB128(0) // Augmentation data length + .D8(dwarf2reader::DW_CFA_def_cfa).ULEB128(3629).ULEB128(247) + .FinishEntry() + .FDEHeader(cie, 0xda007738, 0xfb55c641) + .ULEB128(0) // Augmentation data length + .D8(dwarf2reader::DW_CFA_advance_loc1).D8(11) + .D8(dwarf2reader::DW_CFA_undefined).ULEB128(3769) + .FinishEntry(); + + PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.EmptyZ", section); + + EXPECT_CALL(handler, Entry(_, 0xda007738, 0xfb55c641, 1, "z", 228)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, ValOffsetRule(0xda007738, kCFARegister, 3629, 247)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, UndefinedRule(0xda007738 + 11 * 5955, 3769)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()) + .InSequence(s).WillOnce(Return(true)); + + ParseEHFrameSection(§ion); +} + +// Check that we recognize bad 'z' augmentation characters. +TEST_F(EHFrame, BadZ) { + Label cie; + section + .Mark(&cie) + .CIEHeader(6937, 1045, 142, 1, "zQ") + .ULEB128(0) // Augmentation data length + .D8(dwarf2reader::DW_CFA_def_cfa).ULEB128(9006).ULEB128(7725) + .FinishEntry() + .FDEHeader(cie, 0x1293efa8, 0x236f53f2) + .ULEB128(0) // Augmentation data length + .D8(dwarf2reader::DW_CFA_advance_loc | 12) + .D8(dwarf2reader::DW_CFA_register).ULEB128(5667).ULEB128(3462) + .FinishEntry(); + + PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.BadZ", section); + + EXPECT_CALL(reporter, UnrecognizedAugmentation(_, "zQ")) + .WillOnce(Return()); + + ParseEHFrameSection(§ion, false); +} + +TEST_F(EHFrame, zL) { + Label cie; + DwarfPointerEncoding lsda_encoding = + DwarfPointerEncoding(dwarf2reader::DW_EH_PE_funcrel + | dwarf2reader::DW_EH_PE_udata2); + section + .Mark(&cie) + .CIEHeader(9285, 9959, 54, 1, "zL") + .ULEB128(1) // Augmentation data length + .D8(lsda_encoding) // encoding for LSDA pointer in FDE + + .FinishEntry() + .FDEHeader(cie, 0xd40091aa, 0x9aa6e746) + .ULEB128(2) // Augmentation data length + .EncodedPointer(0xd40099cd, lsda_encoding) // LSDA pointer + .FinishEntry() + .D32(0); // terminator + + PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.zL", section); + + EXPECT_CALL(handler, Entry(_, 0xd40091aa, 0x9aa6e746, 1, "zL", 54)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, LanguageSpecificDataArea(0xd40099cd, false)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()) + .InSequence(s).WillOnce(Return(true)); + + ParseEHFrameSection(§ion); +} + +TEST_F(EHFrame, zP) { + Label cie; + DwarfPointerEncoding personality_encoding = + DwarfPointerEncoding(dwarf2reader::DW_EH_PE_datarel + | dwarf2reader::DW_EH_PE_udata2); + section + .Mark(&cie) + .CIEHeader(1097, 6313, 17, 1, "zP") + .ULEB128(3) // Augmentation data length + .D8(personality_encoding) // encoding for personality routine + .EncodedPointer(0xe3eaccac, personality_encoding) // value + .FinishEntry() + .FDEHeader(cie, 0x0c8350c9, 0xbef11087) + .ULEB128(0) // Augmentation data length + .FinishEntry() + .D32(0); // terminator + + PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.zP", section); + + EXPECT_CALL(handler, Entry(_, 0x0c8350c9, 0xbef11087, 1, "zP", 17)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, PersonalityRoutine(0xe3eaccac, false)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()) + .InSequence(s).WillOnce(Return(true)); + + ParseEHFrameSection(§ion); +} + +TEST_F(EHFrame, zR) { + Label cie; + DwarfPointerEncoding pointer_encoding = + DwarfPointerEncoding(dwarf2reader::DW_EH_PE_textrel + | dwarf2reader::DW_EH_PE_sdata2); + section.SetPointerEncoding(pointer_encoding); + section + .Mark(&cie) + .CIEHeader(8011, 5496, 75, 1, "zR") + .ULEB128(1) // Augmentation data length + .D8(pointer_encoding) // encoding for FDE addresses + .FinishEntry() + .FDEHeader(cie, 0x540f9431, 0xbd0) + .ULEB128(0) // Augmentation data length + .FinishEntry() + .D32(0); // terminator + + PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.zR", section); + + EXPECT_CALL(handler, Entry(_, 0x540f9431, 0xbd0, 1, "zR", 75)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()) + .InSequence(s).WillOnce(Return(true)); + + ParseEHFrameSection(§ion); +} + +TEST_F(EHFrame, zS) { + Label cie; + section + .Mark(&cie) + .CIEHeader(9217, 7694, 57, 1, "zS") + .ULEB128(0) // Augmentation data length + .FinishEntry() + .FDEHeader(cie, 0xd40091aa, 0x9aa6e746) + .ULEB128(0) // Augmentation data length + .FinishEntry() + .D32(0); // terminator + + PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.zS", section); + + EXPECT_CALL(handler, Entry(_, 0xd40091aa, 0x9aa6e746, 1, "zS", 57)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, SignalHandler()) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()) + .InSequence(s).WillOnce(Return(true)); + + ParseEHFrameSection(§ion); +} + // These tests require manual inspection of the test output. struct CFIReporterFixture { CFIReporterFixture() : reporter("test file name", "test section name") { } @@ -1887,6 +2260,10 @@ TEST_F(CFIReporter, Incomplete) { reporter.Incomplete(0x0102030405060708ULL, CallFrameInfo::kUnknown); } +TEST_F(CFIReporter, EarlyEHTerminator) { + reporter.EarlyEHTerminator(0x0102030405060708ULL); +} + TEST_F(CFIReporter, CIEPointerOutOfRange) { reporter.CIEPointerOutOfRange(0x0123456789abcdefULL, 0xfedcba9876543210ULL); } @@ -1903,6 +2280,14 @@ TEST_F(CFIReporter, UnrecognizedAugmentation) { reporter.UnrecognizedAugmentation(0x0123456789abcdefULL, "poodles"); } +TEST_F(CFIReporter, InvalidPointerEncoding) { + reporter.InvalidPointerEncoding(0x0123456789abcdefULL, 0x42); +} + +TEST_F(CFIReporter, UnusablePointerEncoding) { + reporter.UnusablePointerEncoding(0x0123456789abcdefULL, 0x42); +} + TEST_F(CFIReporter, RestoreInCIE) { reporter.RestoreInCIE(0x0123456789abcdefULL, 0xfedcba9876543210ULL); } @@ -1918,7 +2303,7 @@ TEST_F(CFIReporter, NoCFARule) { } TEST_F(CFIReporter, EmptyStateStack) { - reporter.EmptyStateStack(0x0123456789abcdefULL, CallFrameInfo::kFDE, + reporter.EmptyStateStack(0x0123456789abcdefULL, CallFrameInfo::kTerminator, 0xfedcba9876543210ULL); } @@ -1926,3 +2311,139 @@ TEST_F(CFIReporter, ClearingCFARule) { reporter.ClearingCFARule(0x0123456789abcdefULL, CallFrameInfo::kFDE, 0xfedcba9876543210ULL); } + +#ifdef WRITE_ELF +// See comments at the top of the file mentioning WRITE_ELF for details. + +using google_breakpad::TestAssembler::Section; + +struct ELFSectionHeader { + ELFSectionHeader(unsigned int set_type) + : type(set_type), flags(0), address(0), link(0), info(0), + alignment(1), entry_size(0) { } + Label name; + unsigned int type; + u_int64_t flags; + u_int64_t address; + Label file_offset; + Label file_size; + unsigned int link; + unsigned int info; + u_int64_t alignment; + u_int64_t entry_size; +}; + +void AppendSectionHeader(CFISection *table, const ELFSectionHeader &header) { + (*table) + .D32(header.name) // name, index in string tbl + .D32(header.type) // type + .Address(header.flags) // flags + .Address(header.address) // address in memory + .Address(header.file_offset) // offset in ELF file + .Address(header.file_size) // length in bytes + .D32(header.link) // link to related section + .D32(header.info) // miscellaneous + .Address(header.alignment) // alignment + .Address(header.entry_size); // entry size +} + +void WriteELFFrameSection(const char *filename, const char *cfi_name, + const CFISection &cfi) { + int elf_class = cfi.AddressSize() == 4 ? ELFCLASS32 : ELFCLASS64; + int elf_data = (cfi.endianness() == kBigEndian + ? ELFDATA2MSB : ELFDATA2LSB); + CFISection elf(cfi.endianness(), cfi.AddressSize()); + Label elf_header_size, section_table_offset; + elf + .Append("\x7f" "ELF") + .D8(elf_class) // 32-bit or 64-bit ELF + .D8(elf_data) // endianness + .D8(1) // ELF version + .D8(ELFOSABI_LINUX) // Operating System/ABI indication + .D8(0) // ABI version + .Append(7, 0xda) // padding + .D16(ET_EXEC) // file type: executable file + .D16(EM_386) // architecture: Intel IA-32 + .D32(EV_CURRENT); // ELF version + elf + .Address(0x0123456789abcdefULL) // program entry point + .Address(0) // program header offset + .Address(section_table_offset) // section header offset + .D32(0) // processor-specific flags + .D16(elf_header_size) // ELF header size in bytes */ + .D16(elf_class == ELFCLASS32 ? 32 : 56) // program header entry size + .D16(0) // program header table entry count + .D16(elf_class == ELFCLASS32 ? 40 : 64) // section header entry size + .D16(3) // section count + .D16(1) // section name string table + .Mark(&elf_header_size); + + // The null section. Every ELF file has one, as the first entry in + // the section header table. + ELFSectionHeader null_header(SHT_NULL); + null_header.file_offset = 0; + null_header.file_size = 0; + + // The CFI section. The whole reason for writing out this ELF file + // is to put this in it so that we can run other dumping programs on + // it to check its contents. + ELFSectionHeader cfi_header(SHT_PROGBITS); + cfi_header.file_size = cfi.Size(); + + // The section holding the names of the sections. This is the + // section whose index appears in the e_shstrndx member of the ELF + // header. + ELFSectionHeader section_names_header(SHT_STRTAB); + CFISection section_names(cfi.endianness(), cfi.AddressSize()); + section_names + .Mark(&null_header.name) + .AppendCString("") + .Mark(§ion_names_header.name) + .AppendCString(".shstrtab") + .Mark(&cfi_header.name) + .AppendCString(cfi_name) + .Mark(§ion_names_header.file_size); + + // Create the section table. The ELF header's e_shoff member refers + // to this, and the e_shnum member gives the number of entries it + // contains. + CFISection section_table(cfi.endianness(), cfi.AddressSize()); + AppendSectionHeader(§ion_table, null_header); + AppendSectionHeader(§ion_table, section_names_header); + AppendSectionHeader(§ion_table, cfi_header); + + // Append the section table and the section contents to the ELF file. + elf + .Mark(§ion_table_offset) + .Append(section_table) + .Mark(§ion_names_header.file_offset) + .Append(section_names) + .Mark(&cfi_header.file_offset) + .Append(cfi); + + string contents; + if (!elf.GetContents(&contents)) { + fprintf(stderr, "failed to get ELF file contents\n"); + exit(1); + } + + FILE *out = fopen(filename, "w"); + if (!out) { + fprintf(stderr, "error opening ELF file '%s': %s\n", + filename, strerror(errno)); + exit(1); + } + + if (fwrite(contents.data(), 1, contents.size(), out) != contents.size()) { + fprintf(stderr, "error writing ELF data to '%s': %s\n", + filename, strerror(errno)); + exit(1); + } + + if (fclose(out) == EOF) { + fprintf(stderr, "error closing ELF file '%s': %s\n", + filename, strerror(errno)); + exit(1); + } +} +#endif diff --git a/src/common/linux/dump_symbols.cc b/src/common/linux/dump_symbols.cc index 79d7ad36..3df31372 100644 --- a/src/common/linux/dump_symbols.cc +++ b/src/common/linux/dump_symbols.cc @@ -46,6 +46,7 @@ #include <cstring> #include <string> +#include "common/dwarf/bytereader-inl.h" #include "common/dwarf/dwarf2diehandler.h" #include "common/linux/dump_stabs.h" #include "common/linux/dump_symbols.h" @@ -278,6 +279,9 @@ static bool LoadDwarfCFI(const string &dwarf_filename, const ElfW(Ehdr) *elf_header, const char *section_name, const ElfW(Shdr) *section, + bool eh_frame, + const ElfW(Shdr) *got_section, + const ElfW(Shdr) *text_section, Module *module) { // Find the appropriate set of register names for this file's // architecture. @@ -321,11 +325,19 @@ static bool LoadDwarfCFI(const string &dwarf_filename, dwarf_filename.c_str(), elf_header->e_ident[EI_CLASS]); return false; } + // Provide the base addresses for .eh_frame encoded pointers, if + // possible. + byte_reader.SetCFIDataBase(section->sh_addr, cfi); + if (got_section) + byte_reader.SetDataBase(got_section->sh_addr); + if (text_section) + byte_reader.SetTextBase(got_section->sh_addr); dwarf2reader::CallFrameInfo::Reporter dwarf_reporter(dwarf_filename, section_name); - dwarf2reader::CallFrameInfo parser(cfi, cfi_size, &byte_reader, - &handler, &dwarf_reporter); + dwarf2reader::CallFrameInfo parser(cfi, cfi_size, + &byte_reader, &handler, &dwarf_reporter, + eh_frame); parser.Start(); return true; } @@ -379,7 +391,25 @@ static bool LoadSymbols(const std::string &obj_file, ElfW(Ehdr) *elf_header, // information, the other debugging information could be perfectly // useful. LoadDwarfCFI(obj_file, elf_header, ".debug_frame", - dwarf_cfi_section, module); + dwarf_cfi_section, false, 0, 0, module); + } + + // Linux C++ exception handling information can also provide + // unwinding data. + const ElfW(Shdr) *eh_frame_section = + FindSectionByName(".eh_frame", sections, section_names, + elf_header->e_shnum); + if (eh_frame_section) { + // Pointers in .eh_frame data may be relative to the base addresses of + // certain sections. Provide those sections if present. + const ElfW(Shdr) *got_section = + FindSectionByName(".got", sections, section_names, elf_header->e_shnum); + const ElfW(Shdr) *text_section = + FindSectionByName(".text", sections, section_names, + elf_header->e_shnum); + // As above, ignore the return value of this function. + LoadDwarfCFI(obj_file, elf_header, ".eh_frame", + eh_frame_section, true, got_section, text_section, module); } if (!found_debug_info_section) { diff --git a/src/common/linux/dwarf_cfi_to_module.cc b/src/common/linux/dwarf_cfi_to_module.cc index d7946a0e..603acc0e 100644 --- a/src/common/linux/dwarf_cfi_to_module.cc +++ b/src/common/linux/dwarf_cfi_to_module.cc @@ -46,12 +46,10 @@ bool DwarfCFIToModule::Entry(size_t offset, uint64 address, uint64 length, uint8 version, const string &augmentation, unsigned return_address) { assert(!entry_); - // The latest CFI format version we understand is version 3. - if (version > 3) - return false; - // We only handle non-augmented DWARF unwinding data at the moment. - if (!augmentation.empty()) - return false; + + // If dwarf2reader::CallFrameInfo can handle this version and + // augmentation, then we should be okay with that, so there's no + // need to check them here. // Get ready to collect entries. entry_ = new Module::StackFrameEntry; diff --git a/src/common/linux/dwarf_cfi_to_module_unittest.cc b/src/common/linux/dwarf_cfi_to_module_unittest.cc index de769393..d7b08aef 100644 --- a/src/common/linux/dwarf_cfi_to_module_unittest.cc +++ b/src/common/linux/dwarf_cfi_to_module_unittest.cc @@ -78,20 +78,6 @@ struct DwarfCFIToModuleFixture { class Entry: public DwarfCFIToModuleFixture, public Test { }; -TEST_F(Entry, IgnoreVersion) { - ASSERT_FALSE(handler.Entry(0xf120e638, 0x2851bc1f7a181d6dULL, - 0x40589a48d66e5a88ULL, 4, "", 0x1ad80491)); - module.GetStackFrameEntries(&entries); - EXPECT_EQ(0U, entries.size()); -} - -TEST_F(Entry, IgnoreAugmentation) { - ASSERT_FALSE(handler.Entry(0x3f9d228a, 0xcf9a94bb805cf5a4ULL, - 0xe6c41bf958d4c171ULL, 3, "snazzy", 0x444a14f3)); - module.GetStackFrameEntries(&entries); - EXPECT_EQ(0U, entries.size()); -} - TEST_F(Entry, Accept) { ASSERT_TRUE(handler.Entry(0x3b8961b8, 0xa21069698096fc98ULL, 0xb440ce248169c8d6ULL, 3, "", 0xea93c106)); |