aboutsummaryrefslogtreecommitdiff
path: root/src/common/mac/macho_reader.h
blob: 30db742db5f1b495110653585e9abdeb9dbcb0a0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
// -*- mode: C++ -*-

// Copyright (c) 2010, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>

// macho_reader.h: A class for parsing Mach-O files.

#ifndef BREAKPAD_COMMON_MAC_MACHO_READER_H_
#define BREAKPAD_COMMON_MAC_MACHO_READER_H_

#include <mach-o/loader.h>
#include <mach-o/fat.h>
#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>

#include <map>
#include <string>
#include <vector>

#include "common/byte_cursor.h"
#include "common/mac/super_fat_arch.h"

namespace google_breakpad {
namespace mach_o {

using std::map;
using std::string;
using std::vector;

// The Mac headers don't specify particular types for these groups of
// constants, but defining them here provides some documentation
// value.  We also give them the same width as the fields in which
// they appear, which makes them a bit easier to use with ByteCursors.
typedef uint32_t Magic;
typedef uint32_t FileType;
typedef uint32_t FileFlags;
typedef uint32_t LoadCommandType;
typedef uint32_t SegmentFlags;
typedef uint32_t SectionFlags;

// A parser for fat binary files, used to store universal binaries.
// When applied to a (non-fat) Mach-O file, this behaves as if the
// file were a fat file containing a single object file.
class FatReader {
 public:

  // A class for reporting errors found while parsing fat binary files. The
  // default definitions of these methods print messages to stderr.
  class Reporter {
   public:
    // Create a reporter that attributes problems to |filename|.
    explicit Reporter(const string &filename) : filename_(filename) { }

    virtual ~Reporter() { }

    // The data does not begin with a fat binary or Mach-O magic number.
    // This is a fatal error.
    virtual void BadHeader();

    // The Mach-O fat binary file ends abruptly, without enough space
    // to contain an object file it claims is present.
    virtual void MisplacedObjectFile();

    // The file ends abruptly: either it is not large enough to hold a
    // complete header, or the header implies that contents are present
    // beyond the actual end of the file.
    virtual void TooShort();

   private:
    // The filename to which the reader should attribute problems.
    string filename_;
  };

  // Create a fat binary file reader that uses |reporter| to report problems.
  explicit FatReader(Reporter *reporter) : reporter_(reporter) { }

  // Read the |size| bytes at |buffer| as a fat binary file. On success,
  // return true; on failure, report the problem to reporter_ and return
  // false.
  //
  // If the data is a plain Mach-O file, rather than a fat binary file,
  // then the reader behaves as if it had found a fat binary file whose
  // single object file is the Mach-O file.
  bool Read(const uint8_t *buffer, size_t size);

  // Return an array of 'SuperFatArch' structures describing the
  // object files present in this fat binary file. Set |size| to the
  // number of elements in the array.
  //
  // Assuming Read returned true, the entries are validated: it is safe to
  // assume that the offsets and sizes in each SuperFatArch refer to subranges
  // of the bytes passed to Read.
  //
  // If there are no object files in this fat binary, then this
  // function can return NULL.
  //
  // The array is owned by this FatReader instance; it will be freed when
  // this FatReader is destroyed.
  //
  // This function returns a C-style array instead of a vector to make it
  // possible to use the result with OS X functions like NXFindBestFatArch,
  // so that the symbol dumper will behave consistently with other OS X
  // utilities that work with fat binaries.
  const SuperFatArch* object_files(size_t *count) const {
    *count = object_files_.size();
    if (object_files_.size() > 0)
      return &object_files_[0];
    return NULL;
  }

 private:
  // We use this to report problems parsing the file's contents. (WEAK)
  Reporter *reporter_;

  // The contents of the fat binary or Mach-O file we're parsing. We do not
  // own the storage it refers to.
  ByteBuffer buffer_;

  // The magic number of this binary, in host byte order.
  Magic magic_;

  // The list of object files in this binary.
  // object_files_.size() == fat_header.nfat_arch
  vector<SuperFatArch> object_files_;
};

// A segment in a Mach-O file. All these fields have been byte-swapped as
// appropriate for use by the executing architecture.
struct Segment {
  // The ByteBuffers below point into the bytes passed to the Reader that
  // created this Segment.

  ByteBuffer section_list;    // This segment's section list.
  ByteBuffer contents;        // This segment's contents.

  // This segment's name.
  string name;

  // The address at which this segment should be loaded in memory. If
  // bits_64 is false, only the bottom 32 bits of this value are valid.
  uint64_t vmaddr;

  // The size of this segment when loaded into memory. This may be larger
  // than contents.Size(), in which case the extra area will be
  // initialized with zeros. If bits_64 is false, only the bottom 32 bits
  // of this value are valid.
  uint64_t vmsize;

  // The maximum and initial VM protection of this segment's contents.
  uint32_t maxprot;
  uint32_t initprot;

  // The number of sections in section_list.
  uint32_t nsects;

  // Flags describing this segment, from SegmentFlags.
  uint32_t flags;

  // True if this is a 64-bit section; false if it is a 32-bit section.
  bool bits_64;
};

// A section in a Mach-O file. All these fields have been byte-swapped as
// appropriate for use by the executing architecture.
struct Section {
  // This section's contents. This points into the bytes passed to the
  // Reader that created this Section.
  ByteBuffer contents;

  // This section's name.
  string section_name;  // section[_64].sectname
  // The name of the segment this section belongs to.
  string segment_name;  // section[_64].segname

  // The address at which this section's contents should be loaded in
  // memory. If bits_64 is false, only the bottom 32 bits of this value
  // are valid.
  uint64_t address;

  // The contents of this section should be loaded into memory at an
  // address which is a multiple of (two raised to this power).
  uint32_t align;

  // Flags from SectionFlags describing the section's contents.
  uint32_t flags;

  // We don't support reading relocations yet.

  // True if this is a 64-bit section; false if it is a 32-bit section.
  bool bits_64;
};

// A map from section names to Sections.
typedef map<string, Section> SectionMap;

// A reader for a Mach-O file.
//
// This does not handle fat binaries; see FatReader above. FatReader
// provides a friendly interface for parsing data that could be either a
// fat binary or a Mach-O file.
class Reader {
 public:

  // A class for reporting errors found while parsing Mach-O files. The
  // default definitions of these member functions print messages to
  // stderr.
  class Reporter {
   public:
    // Create a reporter that attributes problems to |filename|.
    explicit Reporter(const string &filename) : filename_(filename) { }
    virtual ~Reporter() { }

    // Reporter functions for fatal errors return void; the reader will
    // definitely return an error to its caller after calling them

    // The data does not begin with a Mach-O magic number, or the magic
    // number does not match the expected value for the cpu architecture.
    // This is a fatal error.
    virtual void BadHeader();

    // The data contained in a Mach-O fat binary (|cpu_type|, |cpu_subtype|)
    // does not match the expected CPU architecture
    // (|expected_cpu_type|, |expected_cpu_subtype|).
    virtual void CPUTypeMismatch(cpu_type_t cpu_type,
                                 cpu_subtype_t cpu_subtype,
                                 cpu_type_t expected_cpu_type,
                                 cpu_subtype_t expected_cpu_subtype);

    // The file ends abruptly: either it is not large enough to hold a
    // complete header, or the header implies that contents are present
    // beyond the actual end of the file.
    virtual void HeaderTruncated();

    // The file's load command region, as given in the Mach-O header, is
    // too large for the file.
    virtual void LoadCommandRegionTruncated();

    // The file's Mach-O header claims the file contains |claimed| load
    // commands, but the I'th load command, of type |type|, extends beyond
    // the end of the load command region, as given by the Mach-O header.
    // If |type| is zero, the command's type was unreadable.
    virtual void LoadCommandsOverrun(size_t claimed, size_t i,
                                     LoadCommandType type);

    // The contents of the |i|'th load command, of type |type|, extend beyond
    // the size given in the load command's header.
    virtual void LoadCommandTooShort(size_t i, LoadCommandType type);

    // The LC_SEGMENT or LC_SEGMENT_64 load command for the segment named
    // |name| is too short to hold the sections that its header says it does.
    // (This more specific than LoadCommandTooShort.)
    virtual void SectionsMissing(const string &name);

    // The segment named |name| claims that its contents lie beyond the end
    // of the file.
    virtual void MisplacedSegmentData(const string &name);

    // The section named |section| in the segment named |segment| claims that
    // its contents do not lie entirely within the segment.
    virtual void MisplacedSectionData(const string &section,
                                      const string &segment);

    // The LC_SYMTAB command claims that symbol table contents are located
    // beyond the end of the file.
    virtual void MisplacedSymbolTable();

    // An attempt was made to read a Mach-O file of the unsupported
    // CPU architecture |cpu_type|.
    virtual void UnsupportedCPUType(cpu_type_t cpu_type);

   private:
    string filename_;
  };

  // A handler for sections parsed from a segment. The WalkSegmentSections
  // member function accepts an instance of this class, and applies it to
  // each section defined in a given segment.
  class SectionHandler {
   public:
    virtual ~SectionHandler() { }

    // Called to report that the segment's section list contains |section|.
    // This should return true if the iteration should continue, or false
    // if it should stop.
    virtual bool HandleSection(const Section &section) = 0;
  };

  // A handler for the load commands in a Mach-O file.
  class LoadCommandHandler {
   public:
    LoadCommandHandler() { }
    virtual ~LoadCommandHandler() { }

    // When called from WalkLoadCommands, the following handler functions
    // should return true if they wish to continue iterating over the load
    // command list, or false if they wish to stop iterating.
    //
    // When called from LoadCommandIterator::Handle or Reader::Handle,
    // these functions' return values are simply passed through to Handle's
    // caller.
    //
    // The definitions provided by this base class simply return true; the
    // default is to silently ignore sections whose member functions the
    // subclass doesn't override.

    // COMMAND is load command we don't recognize. We provide only the
    // command type and a ByteBuffer enclosing the command's data (If we
    // cannot parse the command type or its size, we call
    // reporter_->IncompleteLoadCommand instead.)
    virtual bool UnknownCommand(LoadCommandType type,
                                const ByteBuffer &contents) {
      return true;
    }

    // The load command is LC_SEGMENT or LC_SEGMENT_64, defining a segment
    // with the properties given in |segment|.
    virtual bool SegmentCommand(const Segment &segment) {
      return true;
    }

    // The load command is LC_SYMTAB. |entries| holds the array of nlist
    // entries, and |names| holds the strings the entries refer to.
    virtual bool SymtabCommand(const ByteBuffer &entries,
                               const ByteBuffer &names) {
      return true;
    }

    // Add handler functions for more load commands here as needed.
  };

  // Create a Mach-O file reader that reports problems to |reporter|.
  explicit Reader(Reporter *reporter)
      : reporter_(reporter) { }

  // Read the given data as a Mach-O file. The reader retains pointers
  // into the data passed, so the data should live as long as the reader
  // does. On success, return true; on failure, return false.
  //
  // At most one of these functions should be invoked once on each Reader
  // instance.
  bool Read(const uint8_t *buffer,
            size_t size,
            cpu_type_t expected_cpu_type,
            cpu_subtype_t expected_cpu_subtype);
  bool Read(const ByteBuffer &buffer,
            cpu_type_t expected_cpu_type,
            cpu_subtype_t expected_cpu_subtype) {
    return Read(buffer.start,
                buffer.Size(),
                expected_cpu_type,
                expected_cpu_subtype);
  }

  // Return this file's characteristics, as found in the Mach-O header.
  cpu_type_t    cpu_type()    const { return cpu_type_; }
  cpu_subtype_t cpu_subtype() const { return cpu_subtype_; }
  FileType      file_type()   const { return file_type_; }
  FileFlags     flags()       const { return flags_; }

  // Return true if this is a 64-bit Mach-O file, false if it is a 32-bit
  // Mach-O file.
  bool bits_64() const { return bits_64_; }

  // Return true if this is a big-endian Mach-O file, false if it is
  // little-endian.
  bool big_endian() const { return big_endian_; }

  // Apply |handler| to each load command in this Mach-O file, stopping when
  // a handler function returns false. If we encounter a malformed load
  // command, report it via reporter_ and return false. Return true if all
  // load commands were parseable and all handlers returned true.
  bool WalkLoadCommands(LoadCommandHandler *handler) const;

  // Set |segment| to describe the segment named |name|, if present. If
  // found, |segment|'s byte buffers refer to a subregion of the bytes
  // passed to Read. If we find the section, return true; otherwise,
  // return false.
  bool FindSegment(const string &name, Segment *segment) const;

  // Apply |handler| to each section defined in |segment|. If |handler| returns
  // false, stop iterating and return false. If all calls to |handler| return
  // true and we reach the end of the section list, return true.
  bool WalkSegmentSections(const Segment &segment, SectionHandler *handler)
    const;

  // Clear |section_map| and then populate it with a map of the sections
  // in |segment|, from section names to Section structures.
  // Each Section's contents refer to bytes in |segment|'s contents.
  // On success, return true; if a problem occurs, report it and return false.
  bool MapSegmentSections(const Segment &segment, SectionMap *section_map)
    const;

 private:
  // Used internally.
  class SegmentFinder;
  class SectionMapper;

  // We use this to report problems parsing the file's contents. (WEAK)
  Reporter *reporter_;

  // The contents of the Mach-O file we're parsing. We do not own the
  // storage it refers to.
  ByteBuffer buffer_;

  // True if this file is big-endian.
  bool big_endian_;

  // True if this file is a 64-bit Mach-O file.
  bool bits_64_;

  // This file's cpu type and subtype.
  cpu_type_t cpu_type_;        // mach_header[_64].cputype
  cpu_subtype_t cpu_subtype_;  // mach_header[_64].cpusubtype

  // This file's type.
  FileType file_type_;         // mach_header[_64].filetype

  // The region of buffer_ occupied by load commands.
  ByteBuffer load_commands_;

  // The number of load commands in load_commands_.
  uint32_t load_command_count_;  // mach_header[_64].ncmds

  // This file's header flags.
  FileFlags flags_;
};

}  // namespace mach_o
}  // namespace google_breakpad

#endif  // BREAKPAD_COMMON_MAC_MACHO_READER_H_