// -*- mode: C++ -*- // Copyright (c) 2010 Google Inc. All Rights Reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef COMMON_DWARF_BYTEREADER_H__ #define COMMON_DWARF_BYTEREADER_H__ #include #include "common/dwarf/types.h" namespace dwarf2reader { // We can't use the obvious name of LITTLE_ENDIAN and BIG_ENDIAN // because it conflicts with a macro enum Endianness { ENDIANNESS_BIG, ENDIANNESS_LITTLE }; // A ByteReader knows how to read single- and multi-byte values of // various endiannesses, sizes, and encodings, as used in DWARF // debugging information. class ByteReader { public: // Construct a ByteReader capable of reading one-, two-, four-, and // eight-byte values according to ENDIANNESS, absolute machine-sized // addresses, DWARF-style "initial length" values, and signed and // unsigned LEB128 numbers. explicit ByteReader(enum Endianness endianness); virtual ~ByteReader(); // Read a single byte from BUFFER and return it as an unsigned 8 bit // number. uint8 ReadOneByte(const char* buffer) const; // Read two bytes from BUFFER and return them as an unsigned 16 bit // number, using this ByteReader's endianness. uint16 ReadTwoBytes(const char* buffer) const; // Read four bytes from BUFFER and return them as an unsigned 32 bit // number, using this ByteReader's endianness. This function returns // a uint64 so that it is compatible with ReadAddress and // ReadOffset. The number it returns will never be outside the range // of an unsigned 32 bit integer. uint64 ReadFourBytes(const char* buffer) const; // Read eight bytes from BUFFER and return them as an unsigned 64 // bit number, using this ByteReader's endianness. uint64 ReadEightBytes(const char* buffer) const; // Read an unsigned LEB128 (Little Endian Base 128) number from // BUFFER and return it as an unsigned 64 bit integer. Set LEN to // the number of bytes read. // // The unsigned LEB128 representation of an integer N is a variable // number of bytes: // // - If N is between 0 and 0x7f, then its unsigned LEB128 // representation is a single byte whose value is N. // // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) | // 0x80, followed by the unsigned LEB128 representation of N / // 128, rounded towards negative infinity. // // In other words, we break VALUE into groups of seven bits, put // them in little-endian order, and then write them as eight-bit // bytes with the high bit on all but the last. uint64 ReadUnsignedLEB128(const char* buffer, size_t* len) const; // Read a signed LEB128 number from BUFFER and return it as an // signed 64 bit integer. Set LEN to the number of bytes read. // // The signed LEB128 representation of an integer N is a variable // number of bytes: // // - If N is between -0x40 and 0x3f, then its signed LEB128 // representation is a single byte whose value is N in two's // complement. // // - Otherwise, its signed LEB128 representation is (N & 0x7f) | // 0x80, followed by the signed LEB128 representation of N / 128, // rounded towards negative infinity. // // In other words, we break VALUE into groups of seven bits, put // them in little-endian order, and then write them as eight-bit // bytes with the high bit on all but the last. int64 ReadSignedLEB128(const char* buffer, size_t* len) const; // Indicate that addresses on this architecture are SIZE bytes long. SIZE // must be either 4 or 8. (DWARF allows addresses to be any number of // bytes in length from 1 to 255, but we only support 32- and 64-bit // addresses at the moment.) You must call this before using the // ReadAddress member function. // // For data in a .debug_info section, or something that .debug_info // refers to like line number or macro data, the compilation unit // header's address_size field indicates the address size to use. Call // frame information doesn't indicate its address size (a shortcoming of // the spec); you must supply the appropriate size based on the // architecture of the target machine. void SetAddressSize(uint8 size); // Return the current address size, in bytes. This is either 4, // indicating 32-bit addresses, or 8, indicating 64-bit addresses. uint8 AddressSize() const { return address_size_; } // Read an address from BUFFER and return it as an unsigned 64 bit // integer, respecting this ByteReader's endianness and address size. You // must call SetAddressSize before calling this function. uint64 ReadAddress(const char* buffer) const; // DWARF actually defines two slightly different formats: 32-bit DWARF // and 64-bit DWARF. This is *not* related to the size of registers or // addresses on the target machine; it refers only to the size of section // offsets and data lengths appearing in the DWARF data. One only needs // 64-bit DWARF when the debugging data itself is larger than 4GiB. // 32-bit DWARF can handle x86_64 or PPC64 code just fine, unless the // debugging data itself is very large. // // DWARF information identifies itself as 32-bit or 64-bit DWARF: each // compilation unit and call frame information entry begins with an // "initial length" field, which, in addition to giving the length of the // data, also indicates the size of section offsets and lengths appearing // in that data. The ReadInitialLength member function, below, reads an // initial length and sets the ByteReader's offset size as a side effect. // Thus, in the normal process of reading DWARF data, the appropriate // offset size is set automatically. So, you should only need to call // SetOffsetSize if you are using the same ByteReader to jump from the // midst of one block of DWARF data into another. // Read a DWARF "initial length" field from START, and return it as // an unsigned 64 bit integer, respecting this ByteReader's // endianness. Set *LEN to the length of the initial length in // bytes, either four or twelve. As a side effect, set this // ByteReader's offset size to either 4 (if we see a 32-bit DWARF // initial length) or 8 (if we see a 64-bit DWARF initial length). // // A DWARF initial length is either: // // - a byte count stored as an unsigned 32-bit value less than // 0xffffff00, indicating that the data whose length is being // measured uses the 32-bit DWARF format, or // // - The 32-bit value 0xffffffff, followed by a 64-bit byte count, // indicating that the data whose length is being measured uses // the 64-bit DWARF format. uint64 ReadInitialLength(const char* start, size_t* len); // Read an offset from BUFFER and return it as an unsigned 64 bit // integer, respecting the ByteReader's endianness. In 32-bit DWARF, the // offset is 4 bytes long; in 64-bit DWARF, the offset is eight bytes // long. You must call ReadInitialLength or SetOffsetSize before calling // this function; see the comments above for details. uint64 ReadOffset(const char* buffer) const; // Return the current offset size, in bytes. // A return value of 4 indicates that we are reading 32-bit DWARF. // A return value of 8 indicates that we are reading 64-bit DWARF. uint8 OffsetSize() const { return offset_size_; } // Indicate that section offsets and lengths are SIZE bytes long. SIZE // must be either 4 (meaning 32-bit DWARF) or 8 (meaning 64-bit DWARF). // Usually, you should not call this function yourself; instead, let a // call to ReadInitialLength establish the data's offset size // automatically. void SetOffsetSize(uint8 size); private: // Function pointer type for our address and offset readers. typedef uint64 (ByteReader::*AddressReader)(const char*) const; // Read an offset from BUFFER and return it as an unsigned 64 bit // integer. DWARF2/3 define offsets as either 4 or 8 bytes, // generally depending on the amount of DWARF2/3 info present. // This function pointer gets set by SetOffsetSize. AddressReader offset_reader_; // Read an address from BUFFER and return it as an unsigned 64 bit // integer. DWARF2/3 allow addresses to be any size from 0-255 // bytes currently. Internally we support 4 and 8 byte addresses, // and will CHECK on anything else. // This function pointer gets set by SetAddressSize. AddressReader address_reader_; Endianness endian_; uint8 address_size_; uint8 offset_size_; }; } // namespace dwarf2reader #endif // COMMON_DWARF_BYTEREADER_H__