aboutsummaryrefslogtreecommitdiff
path: root/src/common/dwarf/bytereader.h
blob: bda450d1adba843162734a15a0bac3c8e6ee9073 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
// -*- mode: C++ -*-

// Copyright (c) 2010 Google Inc. All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#ifndef COMMON_DWARF_BYTEREADER_H__
#define COMMON_DWARF_BYTEREADER_H__

#include <string>
#include "common/dwarf/types.h"

namespace dwarf2reader {

// We can't use the obvious name of LITTLE_ENDIAN and BIG_ENDIAN
// because it conflicts with a macro
enum Endianness {
  ENDIANNESS_BIG,
  ENDIANNESS_LITTLE
};

// A ByteReader knows how to read single- and multi-byte values of
// various endiannesses, sizes, and encodings, as used in DWARF
// debugging information.
class ByteReader {
 public:
  // Construct a ByteReader capable of reading one-, two-, four-, and
  // eight-byte values according to ENDIANNESS, absolute machine-sized
  // addresses, DWARF-style "initial length" values, and signed and
  // unsigned LEB128 numbers.
  explicit ByteReader(enum Endianness endianness);
  virtual ~ByteReader();

  // Read a single byte from BUFFER and return it as an unsigned 8 bit
  // number.
  uint8 ReadOneByte(const char* buffer) const;

  // Read two bytes from BUFFER and return them as an unsigned 16 bit
  // number, using this ByteReader's endianness.
  uint16 ReadTwoBytes(const char* buffer) const;

  // Read four bytes from BUFFER and return them as an unsigned 32 bit
  // number, using this ByteReader's endianness. This function returns
  // a uint64 so that it is compatible with ReadAddress and
  // ReadOffset. The number it returns will never be outside the range
  // of an unsigned 32 bit integer.
  uint64 ReadFourBytes(const char* buffer) const;

  // Read eight bytes from BUFFER and return them as an unsigned 64
  // bit number, using this ByteReader's endianness.
  uint64 ReadEightBytes(const char* buffer) const;

  // Read an unsigned LEB128 (Little Endian Base 128) number from
  // BUFFER and return it as an unsigned 64 bit integer. Set LEN to
  // the number of bytes read.
  //
  // The unsigned LEB128 representation of an integer N is a variable
  // number of bytes:
  //
  // - If N is between 0 and 0x7f, then its unsigned LEB128
  //   representation is a single byte whose value is N.
  // 
  // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) |
  //   0x80, followed by the unsigned LEB128 representation of N /
  //   128, rounded towards negative infinity.
  //
  // In other words, we break VALUE into groups of seven bits, put
  // them in little-endian order, and then write them as eight-bit
  // bytes with the high bit on all but the last.
  uint64 ReadUnsignedLEB128(const char* buffer, size_t* len) const;

  // Read a signed LEB128 number from BUFFER and return it as an
  // signed 64 bit integer. Set LEN to the number of bytes read.
  //
  // The signed LEB128 representation of an integer N is a variable
  // number of bytes:
  //
  // - If N is between -0x40 and 0x3f, then its signed LEB128
  //   representation is a single byte whose value is N in two's
  //   complement.
  // 
  // - Otherwise, its signed LEB128 representation is (N & 0x7f) |
  //   0x80, followed by the signed LEB128 representation of N / 128,
  //   rounded towards negative infinity.
  //
  // In other words, we break VALUE into groups of seven bits, put
  // them in little-endian order, and then write them as eight-bit
  // bytes with the high bit on all but the last.
  int64 ReadSignedLEB128(const char* buffer, size_t* len) const;

  // Indicate that addresses on this architecture are SIZE bytes long. SIZE
  // must be either 4 or 8. (DWARF allows addresses to be any number of
  // bytes in length from 1 to 255, but we only support 32- and 64-bit
  // addresses at the moment.) You must call this before using the
  // ReadAddress member function.
  //
  // For data in a .debug_info section, or something that .debug_info
  // refers to like line number or macro data, the compilation unit
  // header's address_size field indicates the address size to use. Call
  // frame information doesn't indicate its address size (a shortcoming of
  // the spec); you must supply the appropriate size based on the
  // architecture of the target machine.
  void SetAddressSize(uint8 size);

  // Return the current address size, in bytes. This is either 4,
  // indicating 32-bit addresses, or 8, indicating 64-bit addresses.
  uint8 AddressSize() const { return address_size_; }

  // Read an address from BUFFER and return it as an unsigned 64 bit
  // integer, respecting this ByteReader's endianness and address size. You
  // must call SetAddressSize before calling this function.
  uint64 ReadAddress(const char* buffer) const;

  // DWARF actually defines two slightly different formats: 32-bit DWARF
  // and 64-bit DWARF. This is *not* related to the size of registers or
  // addresses on the target machine; it refers only to the size of section
  // offsets and data lengths appearing in the DWARF data. One only needs
  // 64-bit DWARF when the debugging data itself is larger than 4GiB.
  // 32-bit DWARF can handle x86_64 or PPC64 code just fine, unless the
  // debugging data itself is very large.
  //
  // DWARF information identifies itself as 32-bit or 64-bit DWARF: each
  // compilation unit and call frame information entry begins with an
  // "initial length" field, which, in addition to giving the length of the
  // data, also indicates the size of section offsets and lengths appearing
  // in that data. The ReadInitialLength member function, below, reads an
  // initial length and sets the ByteReader's offset size as a side effect.
  // Thus, in the normal process of reading DWARF data, the appropriate
  // offset size is set automatically. So, you should only need to call
  // SetOffsetSize if you are using the same ByteReader to jump from the
  // midst of one block of DWARF data into another.

  // Read a DWARF "initial length" field from START, and return it as
  // an unsigned 64 bit integer, respecting this ByteReader's
  // endianness. Set *LEN to the length of the initial length in
  // bytes, either four or twelve. As a side effect, set this
  // ByteReader's offset size to either 4 (if we see a 32-bit DWARF
  // initial length) or 8 (if we see a 64-bit DWARF initial length).
  //
  // A DWARF initial length is either:
  //
  // - a byte count stored as an unsigned 32-bit value less than
  //   0xffffff00, indicating that the data whose length is being
  //   measured uses the 32-bit DWARF format, or
  //
  // - The 32-bit value 0xffffffff, followed by a 64-bit byte count,
  //   indicating that the data whose length is being measured uses
  //   the 64-bit DWARF format.
  uint64 ReadInitialLength(const char* start, size_t* len);

  // Read an offset from BUFFER and return it as an unsigned 64 bit
  // integer, respecting the ByteReader's endianness. In 32-bit DWARF, the
  // offset is 4 bytes long; in 64-bit DWARF, the offset is eight bytes
  // long. You must call ReadInitialLength or SetOffsetSize before calling
  // this function; see the comments above for details.
  uint64 ReadOffset(const char* buffer) const;

  // Return the current offset size, in bytes.
  // A return value of 4 indicates that we are reading 32-bit DWARF.
  // A return value of 8 indicates that we are reading 64-bit DWARF.
  uint8 OffsetSize() const { return offset_size_; }

  // Indicate that section offsets and lengths are SIZE bytes long. SIZE
  // must be either 4 (meaning 32-bit DWARF) or 8 (meaning 64-bit DWARF).
  // Usually, you should not call this function yourself; instead, let a
  // call to ReadInitialLength establish the data's offset size
  // automatically.
  void SetOffsetSize(uint8 size);

 private:

  // Function pointer type for our address and offset readers.
  typedef uint64 (ByteReader::*AddressReader)(const char*) const;

  // Read an offset from BUFFER and return it as an unsigned 64 bit
  // integer.  DWARF2/3 define offsets as either 4 or 8 bytes,
  // generally depending on the amount of DWARF2/3 info present.
  // This function pointer gets set by SetOffsetSize.
  AddressReader offset_reader_;

  // Read an address from BUFFER and return it as an unsigned 64 bit
  // integer.  DWARF2/3 allow addresses to be any size from 0-255
  // bytes currently.  Internally we support 4 and 8 byte addresses,
  // and will CHECK on anything else.
  // This function pointer gets set by SetAddressSize.
  AddressReader address_reader_;

  Endianness endian_;
  uint8 address_size_;
  uint8 offset_size_;
};

}  // namespace dwarf2reader

#endif  // COMMON_DWARF_BYTEREADER_H__