aboutsummaryrefslogtreecommitdiff
path: root/src/common/dwarf/dwarf2reader.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/common/dwarf/dwarf2reader.h')
-rw-r--r--src/common/dwarf/dwarf2reader.h242
1 files changed, 206 insertions, 36 deletions
diff --git a/src/common/dwarf/dwarf2reader.h b/src/common/dwarf/dwarf2reader.h
index 136b8932..a7a13afb 100644
--- a/src/common/dwarf/dwarf2reader.h
+++ b/src/common/dwarf/dwarf2reader.h
@@ -46,6 +46,7 @@
#include <utility>
#include <vector>
+#include "common/dwarf/bytereader.h"
#include "common/dwarf/dwarf2enums.h"
#include "common/dwarf/types.h"
@@ -53,7 +54,6 @@ using namespace std;
namespace dwarf2reader {
struct LineStateMachine;
-class ByteReader;
class Dwarf2Handler;
class LineInfoHandler;
@@ -556,7 +556,7 @@ class CallFrameInfo {
public:
// The different kinds of entries one finds in CFI. Used internally,
// and for error reporting.
- enum EntryKind { kUnknown, kCIE, kFDE };
+ enum EntryKind { kUnknown, kCIE, kFDE, kTerminator };
// The handler class to which the parser hands the parsed call frame
// information. Defined below.
@@ -567,19 +567,75 @@ class CallFrameInfo {
class Reporter;
// Create a DWARF CFI parser. BUFFER points to the contents of the
- // .debug_frame section to parse; BUFFER_LENGTH is its length in
- // bytes. REPORTER is an error reporter the parser should use to
- // report problems. READER is a ByteReader instance that has the
- // endianness and address size set properly. Report the data we find
- // to HANDLER.
+ // .debug_frame section to parse; BUFFER_LENGTH is its length in bytes.
+ // REPORTER is an error reporter the parser should use to report
+ // problems. READER is a ByteReader instance that has the endianness and
+ // address size set properly. Report the data we find to HANDLER.
+ //
+ // This class can also parse Linux C++ exception handling data, as found
+ // in '.eh_frame' sections. This data is a variant of DWARF CFI that is
+ // placed in loadable segments so that it is present in the program's
+ // address space, and is interpreted by the C++ runtime to search the
+ // call stack for a handler interested in the exception being thrown,
+ // actually pop the frames, and find cleanup code to run.
+ //
+ // There are two differences between the call frame information described
+ // in the DWARF standard and the exception handling data Linux places in
+ // the .eh_frame section:
+ //
+ // - Exception handling data uses uses a different format for call frame
+ // information entry headers. The distinguished CIE id, the way FDEs
+ // refer to their CIEs, and the way the end of the series of entries is
+ // determined are all slightly different.
+ //
+ // If the constructor's EH_FRAME argument is true, then the
+ // CallFrameInfo parses the entry headers as Linux C++ exception
+ // handling data. If EH_FRAME is false or omitted, the CallFrameInfo
+ // parses standard DWARF call frame information.
+ //
+ // - Linux C++ exception handling data uses CIE augmentation strings
+ // beginning with 'z' to specify the presence of additional data after
+ // the CIE and FDE headers and special encodings used for addresses in
+ // frame description entries.
+ //
+ // CallFrameInfo can handle 'z' augmentations in either DWARF CFI or
+ // exception handling data if you have supplied READER with the base
+ // addresses needed to interpret the pointer encodings that 'z'
+ // augmentations can specify. See the ByteReader interface for details
+ // about the base addresses. See the CallFrameInfo::Handler interface
+ // for details about the additional information one might find in
+ // 'z'-augmented data.
+ //
+ // Thus:
+ //
+ // - If you are parsing standard DWARF CFI, as found in a .debug_frame
+ // section, you should pass false for the EH_FRAME argument, or omit
+ // it, and you need not worry about providing READER with the
+ // additional base addresses.
+ //
+ // - If you want to parse Linux C++ exception handling data from a
+ // .eh_frame section, you should pass EH_FRAME as true, and call
+ // READER's Set*Base member functions before calling our Start method.
+ //
+ // - If you want to parse DWARF CFI that uses the 'z' augmentations
+ // (although I don't think any toolchain ever emits such data), you
+ // could pass false for EH_FRAME, but call READER's Set*Base members.
+ //
+ // The extensions the Linux C++ ABI makes to DWARF for exception
+ // handling are described here, rather poorly:
+ // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html
+ // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html
+ //
+ // The mechanics of C++ exception handling, personality routines,
+ // and language-specific data areas are described here, rather nicely:
+ // http://www.codesourcery.com/public/cxx-abi/abi-eh.html
CallFrameInfo(const char *buffer, size_t buffer_length,
- ByteReader *reader, Handler *handler,
- Reporter *reporter)
- : buffer_(buffer),
- buffer_length_(buffer_length),
- reader_(reader),
- handler_(handler),
- reporter_(reporter) { }
+ ByteReader *reader, Handler *handler, Reporter *reporter,
+ bool eh_frame = false)
+ : buffer_(buffer), buffer_length_(buffer_length),
+ reader_(reader), handler_(handler), reporter_(reporter),
+ eh_frame_(eh_frame) { }
+
~CallFrameInfo() { }
// Parse the entries in BUFFER, reporting what we find to HANDLER.
@@ -603,6 +659,13 @@ class CallFrameInfo {
// The start of this entry in the buffer.
const char *start;
+ // Which kind of entry this is.
+ //
+ // We want to be able to use this for error reporting even while we're
+ // in the midst of parsing. Error reporting code may assume that kind,
+ // offset, and start fields are valid, although kind may be kUnknown.
+ EntryKind kind;
+
// The end of this entry's common prologue (initial length and id), and
// the start of this entry's kind-specific fields.
const char *fields;
@@ -616,16 +679,10 @@ class CallFrameInfo {
// simply buffer_ + offset + length.)
const char *end;
- // The CIE pointer or CIE id field.
+ // For both DWARF CFI and .eh_frame sections, this is the CIE id in a
+ // CIE, and the offset of the associated CIE in an FDE.
uint64 id;
- // The kind of entry we're parsing.
- //
- // This may be kUnknown at times, since we want to be able to
- // count on it for error reporting even before we've finished
- // parsing enough to tell what kind of entry we're looking at.
- EntryKind kind;
-
// The CIE that applies to this entry, if we've parsed it. If this is a
// CIE, then this field points to this structure.
CIE *cie;
@@ -638,12 +695,45 @@ class CallFrameInfo {
uint64 code_alignment_factor; // scale for code address adjustments
int data_alignment_factor; // scale for stack pointer adjustments
unsigned return_address_register; // which register holds the return addr
+
+ // True if this CIE includes Linux C++ ABI 'z' augmentation data.
+ bool has_z_augmentation;
+
+ // Parsed 'z' augmentation data. These are meaningful only if
+ // has_z_augmentation is true.
+ bool has_z_lsda; // The 'z' augmentation included 'L'.
+ bool has_z_personality; // The 'z' augmentation included 'P'.
+ bool has_z_signal_frame; // The 'z' augmentation included 'S'.
+
+ // If has_z_lsda is true, this is the encoding to be used for language-
+ // specific data area pointers in FDEs.
+ DwarfPointerEncoding lsda_encoding;
+
+ // If has_z_personality is true, this is the encoding used for the
+ // personality routine pointer in the augmentation data.
+ DwarfPointerEncoding personality_encoding;
+
+ // If has_z_personality is true, this is the address of the personality
+ // routine --- or, if personality_encoding & DW_EH_PE_indirect, the
+ // address where the personality routine's address is stored.
+ uint64 personality_address;
+
+ // This is the encoding used for addresses in the FDE header and
+ // in DW_CFA_set_loc instructions. This is always valid, whether
+ // or not we saw a 'z' augmentation string; its default value is
+ // DW_EH_PE_absptr, which is what normal DWARF CFI uses.
+ DwarfPointerEncoding pointer_encoding;
};
// A frame description entry (FDE).
struct FDE: public Entry {
uint64 address; // start address of described code
uint64 size; // size of described code, in bytes
+
+ // If cie->has_z_lsda is true, then this is the language-specific data
+ // area's address --- or its address's address, if cie->lsda_encoding
+ // has the DW_EH_PE_indirect bit set.
+ uint64 lsda_address;
};
// Internal use.
@@ -658,24 +748,27 @@ class CallFrameInfo {
class RuleMap;
class State;
- // Parse the initial length and id of a CFI entry, either a CIE or an
- // FDE. CURSOR points to the beginning of the data to parse.
- // On success, populate ENTRY as appropriate, and return true.
- // On failure, report the problem, and return false.
+ // Parse the initial length and id of a CFI entry, either a CIE, an FDE,
+ // or a .eh_frame end-of-data mark. CURSOR points to the beginning of the
+ // data to parse. On success, populate ENTRY as appropriate, and return
+ // true. On failure, report the problem, and return false. Even if we
+ // return false, set ENTRY->end to the first byte after the entry if we
+ // were able to figure that out, or NULL if we weren't.
bool ReadEntryPrologue(const char *cursor, Entry *entry);
- // Parse the fields of a CIE after the entry prologue. Assume that the
- // 'Entry' fields of CIE are populated; use CIE->fields and CIE->end as
- // the start and limit for parsing. On success, populate the rest of
- // *CIE, and return true; on failure, report the problem and return
- // false.
+ // Parse the fields of a CIE after the entry prologue, including any 'z'
+ // augmentation data. Assume that the 'Entry' fields of CIE are
+ // populated; use CIE->fields and CIE->end as the start and limit for
+ // parsing. On success, populate the rest of *CIE, and return true; on
+ // failure, report the problem and return false.
bool ReadCIEFields(CIE *cie);
- // Parse the fields of an FDE after the entry prologue. Assume that the
- // 'Entry' fields of *FDE are initialized; use FDE->fields and FDE->end
- // as the start and limit for parsing. Assume that FDE->cie is fully
- // initialized. On success, populate the rest of *FDE, and return true;
- // on failure, report the problem and return false.
+ // Parse the fields of an FDE after the entry prologue, including any 'z'
+ // augmentation data. Assume that the 'Entry' fields of *FDE are
+ // initialized; use FDE->fields and FDE->end as the start and limit for
+ // parsing. Assume that FDE->cie is fully initialized. On success,
+ // populate the rest of *FDE, and return true; on failure, report the
+ // problem and return false.
bool ReadFDEFields(FDE *fde);
// Report that ENTRY is incomplete, and return false. This is just a
@@ -683,6 +776,11 @@ class CallFrameInfo {
// little brevity.
bool ReportIncomplete(Entry *entry);
+ // Return true if ENCODING has the DW_EH_PE_indirect bit set.
+ static bool IsIndirectEncoding(DwarfPointerEncoding encoding) {
+ return encoding & DW_EH_PE_indirect;
+ }
+
// The contents of the DWARF .debug_info section we're parsing.
const char *buffer_;
size_t buffer_length_;
@@ -695,6 +793,9 @@ class CallFrameInfo {
// For reporting problems in the info we're parsing.
Reporter *reporter_;
+
+ // True if we are processing .eh_frame-format data.
+ bool eh_frame_;
};
// The handler class for CallFrameInfo. The a CFI parser calls the
@@ -786,6 +887,61 @@ class CallFrameInfo::Handler {
// everything is okay, or false if an error has occurred and parsing
// should stop.
virtual bool End() = 0;
+
+ // Handler functions for Linux C++ exception handling data. These are
+ // only called if the data includes 'z' augmentation strings.
+
+ // The Linux C++ ABI uses an extension of the DWARF CFI format to
+ // walk the stack to propagate exceptions from the throw to the
+ // appropriate catch, and do the appropriate cleanups along the way.
+ // CFI entries used for exception handling have two additional data
+ // associated with them:
+ //
+ // - The "language-specific data area" describes which exception
+ // types the function has 'catch' clauses for, and indicates how
+ // to go about re-entering the function at the appropriate catch
+ // clause. If the exception is not caught, it describes the
+ // destructors that must run before the frame is popped.
+ //
+ // - The "personality routine" is responsible for interpreting the
+ // language-specific data area's contents, and deciding whether
+ // the exception should continue to propagate down the stack,
+ // perhaps after doing some cleanup for this frame, or whether the
+ // exception will be caught here.
+ //
+ // In principle, the language-specific data area is opaque to
+ // everybody but the personality routine. In practice, these values
+ // may be useful or interesting to readers with extra context, and
+ // we have to at least skip them anyway, so we might as well report
+ // them to the handler.
+
+ // This entry's exception handling personality routine's address is
+ // ADDRESS. If INDIRECT is true, then ADDRESS is the address at
+ // which the routine's address is stored. The default definition for
+ // this handler function simply returns true, allowing parsing of
+ // the entry to continue.
+ virtual bool PersonalityRoutine(uint64 address, bool indirect) {
+ return true;
+ }
+
+ // This entry's language-specific data area (LSDA) is located at
+ // ADDRESS. If INDIRECT is true, then ADDRESS is the address at
+ // which the area's address is stored. The default definition for
+ // this handler function simply returns true, allowing parsing of
+ // the entry to continue.
+ virtual bool LanguageSpecificDataArea(uint64 address, bool indirect) {
+ return true;
+ }
+
+ // This entry describes a signal trampoline --- this frame is the
+ // caller of a signal handler. The default definition for this
+ // handler function simply returns true, allowing parsing of the
+ // entry to continue.
+ //
+ // The best description of the rationale for and meaning of signal
+ // trampoline CFI entries seems to be in the GCC bug database:
+ // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26208
+ virtual bool SignalHandler() { return true; }
};
// The CallFrameInfo class makes calls on an instance of this class to
@@ -811,6 +967,12 @@ class CallFrameInfo::Reporter {
// haven't parsed enough of the entry to tell yet.
virtual void Incomplete(uint64 offset, CallFrameInfo::EntryKind kind);
+ // The .eh_frame data has a four-byte zero at OFFSET where the next
+ // entry's length would be; this is a terminator. However, the buffer
+ // length as given to the CallFrameInfo constructor says there should be
+ // more data.
+ virtual void EarlyEHTerminator(uint64 offset);
+
// The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the
// section is not that large.
virtual void CIEPointerOutOfRange(uint64 offset, uint64 cie_offset);
@@ -830,6 +992,14 @@ class CallFrameInfo::Reporter {
virtual void UnrecognizedAugmentation(uint64 offset,
const string &augmentation);
+ // The pointer encoding ENCODING, specified by the CIE at OFFSET, is not
+ // a valid encoding.
+ virtual void InvalidPointerEncoding(uint64 offset, uint8 encoding);
+
+ // The pointer encoding ENCODING, specified by the CIE at OFFSET, depends
+ // on a base address which has not been supplied.
+ virtual void UnusablePointerEncoding(uint64 offset, uint8 encoding);
+
// The CIE at OFFSET contains a DW_CFA_restore instruction at
// INSN_OFFSET, which may not appear in a CIE.
virtual void RestoreInCIE(uint64 offset, uint64 insn_offset);