diff options
Diffstat (limited to 'src/common/dwarf/dwarf2reader.h')
-rw-r--r-- | src/common/dwarf/dwarf2reader.h | 242 |
1 files changed, 206 insertions, 36 deletions
diff --git a/src/common/dwarf/dwarf2reader.h b/src/common/dwarf/dwarf2reader.h index 136b8932..a7a13afb 100644 --- a/src/common/dwarf/dwarf2reader.h +++ b/src/common/dwarf/dwarf2reader.h @@ -46,6 +46,7 @@ #include <utility> #include <vector> +#include "common/dwarf/bytereader.h" #include "common/dwarf/dwarf2enums.h" #include "common/dwarf/types.h" @@ -53,7 +54,6 @@ using namespace std; namespace dwarf2reader { struct LineStateMachine; -class ByteReader; class Dwarf2Handler; class LineInfoHandler; @@ -556,7 +556,7 @@ class CallFrameInfo { public: // The different kinds of entries one finds in CFI. Used internally, // and for error reporting. - enum EntryKind { kUnknown, kCIE, kFDE }; + enum EntryKind { kUnknown, kCIE, kFDE, kTerminator }; // The handler class to which the parser hands the parsed call frame // information. Defined below. @@ -567,19 +567,75 @@ class CallFrameInfo { class Reporter; // Create a DWARF CFI parser. BUFFER points to the contents of the - // .debug_frame section to parse; BUFFER_LENGTH is its length in - // bytes. REPORTER is an error reporter the parser should use to - // report problems. READER is a ByteReader instance that has the - // endianness and address size set properly. Report the data we find - // to HANDLER. + // .debug_frame section to parse; BUFFER_LENGTH is its length in bytes. + // REPORTER is an error reporter the parser should use to report + // problems. READER is a ByteReader instance that has the endianness and + // address size set properly. Report the data we find to HANDLER. + // + // This class can also parse Linux C++ exception handling data, as found + // in '.eh_frame' sections. This data is a variant of DWARF CFI that is + // placed in loadable segments so that it is present in the program's + // address space, and is interpreted by the C++ runtime to search the + // call stack for a handler interested in the exception being thrown, + // actually pop the frames, and find cleanup code to run. + // + // There are two differences between the call frame information described + // in the DWARF standard and the exception handling data Linux places in + // the .eh_frame section: + // + // - Exception handling data uses uses a different format for call frame + // information entry headers. The distinguished CIE id, the way FDEs + // refer to their CIEs, and the way the end of the series of entries is + // determined are all slightly different. + // + // If the constructor's EH_FRAME argument is true, then the + // CallFrameInfo parses the entry headers as Linux C++ exception + // handling data. If EH_FRAME is false or omitted, the CallFrameInfo + // parses standard DWARF call frame information. + // + // - Linux C++ exception handling data uses CIE augmentation strings + // beginning with 'z' to specify the presence of additional data after + // the CIE and FDE headers and special encodings used for addresses in + // frame description entries. + // + // CallFrameInfo can handle 'z' augmentations in either DWARF CFI or + // exception handling data if you have supplied READER with the base + // addresses needed to interpret the pointer encodings that 'z' + // augmentations can specify. See the ByteReader interface for details + // about the base addresses. See the CallFrameInfo::Handler interface + // for details about the additional information one might find in + // 'z'-augmented data. + // + // Thus: + // + // - If you are parsing standard DWARF CFI, as found in a .debug_frame + // section, you should pass false for the EH_FRAME argument, or omit + // it, and you need not worry about providing READER with the + // additional base addresses. + // + // - If you want to parse Linux C++ exception handling data from a + // .eh_frame section, you should pass EH_FRAME as true, and call + // READER's Set*Base member functions before calling our Start method. + // + // - If you want to parse DWARF CFI that uses the 'z' augmentations + // (although I don't think any toolchain ever emits such data), you + // could pass false for EH_FRAME, but call READER's Set*Base members. + // + // The extensions the Linux C++ ABI makes to DWARF for exception + // handling are described here, rather poorly: + // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html + // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html + // + // The mechanics of C++ exception handling, personality routines, + // and language-specific data areas are described here, rather nicely: + // http://www.codesourcery.com/public/cxx-abi/abi-eh.html CallFrameInfo(const char *buffer, size_t buffer_length, - ByteReader *reader, Handler *handler, - Reporter *reporter) - : buffer_(buffer), - buffer_length_(buffer_length), - reader_(reader), - handler_(handler), - reporter_(reporter) { } + ByteReader *reader, Handler *handler, Reporter *reporter, + bool eh_frame = false) + : buffer_(buffer), buffer_length_(buffer_length), + reader_(reader), handler_(handler), reporter_(reporter), + eh_frame_(eh_frame) { } + ~CallFrameInfo() { } // Parse the entries in BUFFER, reporting what we find to HANDLER. @@ -603,6 +659,13 @@ class CallFrameInfo { // The start of this entry in the buffer. const char *start; + // Which kind of entry this is. + // + // We want to be able to use this for error reporting even while we're + // in the midst of parsing. Error reporting code may assume that kind, + // offset, and start fields are valid, although kind may be kUnknown. + EntryKind kind; + // The end of this entry's common prologue (initial length and id), and // the start of this entry's kind-specific fields. const char *fields; @@ -616,16 +679,10 @@ class CallFrameInfo { // simply buffer_ + offset + length.) const char *end; - // The CIE pointer or CIE id field. + // For both DWARF CFI and .eh_frame sections, this is the CIE id in a + // CIE, and the offset of the associated CIE in an FDE. uint64 id; - // The kind of entry we're parsing. - // - // This may be kUnknown at times, since we want to be able to - // count on it for error reporting even before we've finished - // parsing enough to tell what kind of entry we're looking at. - EntryKind kind; - // The CIE that applies to this entry, if we've parsed it. If this is a // CIE, then this field points to this structure. CIE *cie; @@ -638,12 +695,45 @@ class CallFrameInfo { uint64 code_alignment_factor; // scale for code address adjustments int data_alignment_factor; // scale for stack pointer adjustments unsigned return_address_register; // which register holds the return addr + + // True if this CIE includes Linux C++ ABI 'z' augmentation data. + bool has_z_augmentation; + + // Parsed 'z' augmentation data. These are meaningful only if + // has_z_augmentation is true. + bool has_z_lsda; // The 'z' augmentation included 'L'. + bool has_z_personality; // The 'z' augmentation included 'P'. + bool has_z_signal_frame; // The 'z' augmentation included 'S'. + + // If has_z_lsda is true, this is the encoding to be used for language- + // specific data area pointers in FDEs. + DwarfPointerEncoding lsda_encoding; + + // If has_z_personality is true, this is the encoding used for the + // personality routine pointer in the augmentation data. + DwarfPointerEncoding personality_encoding; + + // If has_z_personality is true, this is the address of the personality + // routine --- or, if personality_encoding & DW_EH_PE_indirect, the + // address where the personality routine's address is stored. + uint64 personality_address; + + // This is the encoding used for addresses in the FDE header and + // in DW_CFA_set_loc instructions. This is always valid, whether + // or not we saw a 'z' augmentation string; its default value is + // DW_EH_PE_absptr, which is what normal DWARF CFI uses. + DwarfPointerEncoding pointer_encoding; }; // A frame description entry (FDE). struct FDE: public Entry { uint64 address; // start address of described code uint64 size; // size of described code, in bytes + + // If cie->has_z_lsda is true, then this is the language-specific data + // area's address --- or its address's address, if cie->lsda_encoding + // has the DW_EH_PE_indirect bit set. + uint64 lsda_address; }; // Internal use. @@ -658,24 +748,27 @@ class CallFrameInfo { class RuleMap; class State; - // Parse the initial length and id of a CFI entry, either a CIE or an - // FDE. CURSOR points to the beginning of the data to parse. - // On success, populate ENTRY as appropriate, and return true. - // On failure, report the problem, and return false. + // Parse the initial length and id of a CFI entry, either a CIE, an FDE, + // or a .eh_frame end-of-data mark. CURSOR points to the beginning of the + // data to parse. On success, populate ENTRY as appropriate, and return + // true. On failure, report the problem, and return false. Even if we + // return false, set ENTRY->end to the first byte after the entry if we + // were able to figure that out, or NULL if we weren't. bool ReadEntryPrologue(const char *cursor, Entry *entry); - // Parse the fields of a CIE after the entry prologue. Assume that the - // 'Entry' fields of CIE are populated; use CIE->fields and CIE->end as - // the start and limit for parsing. On success, populate the rest of - // *CIE, and return true; on failure, report the problem and return - // false. + // Parse the fields of a CIE after the entry prologue, including any 'z' + // augmentation data. Assume that the 'Entry' fields of CIE are + // populated; use CIE->fields and CIE->end as the start and limit for + // parsing. On success, populate the rest of *CIE, and return true; on + // failure, report the problem and return false. bool ReadCIEFields(CIE *cie); - // Parse the fields of an FDE after the entry prologue. Assume that the - // 'Entry' fields of *FDE are initialized; use FDE->fields and FDE->end - // as the start and limit for parsing. Assume that FDE->cie is fully - // initialized. On success, populate the rest of *FDE, and return true; - // on failure, report the problem and return false. + // Parse the fields of an FDE after the entry prologue, including any 'z' + // augmentation data. Assume that the 'Entry' fields of *FDE are + // initialized; use FDE->fields and FDE->end as the start and limit for + // parsing. Assume that FDE->cie is fully initialized. On success, + // populate the rest of *FDE, and return true; on failure, report the + // problem and return false. bool ReadFDEFields(FDE *fde); // Report that ENTRY is incomplete, and return false. This is just a @@ -683,6 +776,11 @@ class CallFrameInfo { // little brevity. bool ReportIncomplete(Entry *entry); + // Return true if ENCODING has the DW_EH_PE_indirect bit set. + static bool IsIndirectEncoding(DwarfPointerEncoding encoding) { + return encoding & DW_EH_PE_indirect; + } + // The contents of the DWARF .debug_info section we're parsing. const char *buffer_; size_t buffer_length_; @@ -695,6 +793,9 @@ class CallFrameInfo { // For reporting problems in the info we're parsing. Reporter *reporter_; + + // True if we are processing .eh_frame-format data. + bool eh_frame_; }; // The handler class for CallFrameInfo. The a CFI parser calls the @@ -786,6 +887,61 @@ class CallFrameInfo::Handler { // everything is okay, or false if an error has occurred and parsing // should stop. virtual bool End() = 0; + + // Handler functions for Linux C++ exception handling data. These are + // only called if the data includes 'z' augmentation strings. + + // The Linux C++ ABI uses an extension of the DWARF CFI format to + // walk the stack to propagate exceptions from the throw to the + // appropriate catch, and do the appropriate cleanups along the way. + // CFI entries used for exception handling have two additional data + // associated with them: + // + // - The "language-specific data area" describes which exception + // types the function has 'catch' clauses for, and indicates how + // to go about re-entering the function at the appropriate catch + // clause. If the exception is not caught, it describes the + // destructors that must run before the frame is popped. + // + // - The "personality routine" is responsible for interpreting the + // language-specific data area's contents, and deciding whether + // the exception should continue to propagate down the stack, + // perhaps after doing some cleanup for this frame, or whether the + // exception will be caught here. + // + // In principle, the language-specific data area is opaque to + // everybody but the personality routine. In practice, these values + // may be useful or interesting to readers with extra context, and + // we have to at least skip them anyway, so we might as well report + // them to the handler. + + // This entry's exception handling personality routine's address is + // ADDRESS. If INDIRECT is true, then ADDRESS is the address at + // which the routine's address is stored. The default definition for + // this handler function simply returns true, allowing parsing of + // the entry to continue. + virtual bool PersonalityRoutine(uint64 address, bool indirect) { + return true; + } + + // This entry's language-specific data area (LSDA) is located at + // ADDRESS. If INDIRECT is true, then ADDRESS is the address at + // which the area's address is stored. The default definition for + // this handler function simply returns true, allowing parsing of + // the entry to continue. + virtual bool LanguageSpecificDataArea(uint64 address, bool indirect) { + return true; + } + + // This entry describes a signal trampoline --- this frame is the + // caller of a signal handler. The default definition for this + // handler function simply returns true, allowing parsing of the + // entry to continue. + // + // The best description of the rationale for and meaning of signal + // trampoline CFI entries seems to be in the GCC bug database: + // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26208 + virtual bool SignalHandler() { return true; } }; // The CallFrameInfo class makes calls on an instance of this class to @@ -811,6 +967,12 @@ class CallFrameInfo::Reporter { // haven't parsed enough of the entry to tell yet. virtual void Incomplete(uint64 offset, CallFrameInfo::EntryKind kind); + // The .eh_frame data has a four-byte zero at OFFSET where the next + // entry's length would be; this is a terminator. However, the buffer + // length as given to the CallFrameInfo constructor says there should be + // more data. + virtual void EarlyEHTerminator(uint64 offset); + // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the // section is not that large. virtual void CIEPointerOutOfRange(uint64 offset, uint64 cie_offset); @@ -830,6 +992,14 @@ class CallFrameInfo::Reporter { virtual void UnrecognizedAugmentation(uint64 offset, const string &augmentation); + // The pointer encoding ENCODING, specified by the CIE at OFFSET, is not + // a valid encoding. + virtual void InvalidPointerEncoding(uint64 offset, uint8 encoding); + + // The pointer encoding ENCODING, specified by the CIE at OFFSET, depends + // on a base address which has not been supplied. + virtual void UnusablePointerEncoding(uint64 offset, uint8 encoding); + // The CIE at OFFSET contains a DW_CFA_restore instruction at // INSN_OFFSET, which may not appear in a CIE. virtual void RestoreInCIE(uint64 offset, uint64 insn_offset); |