From 35c41e00ee2cf9280fe0122c75877ba70b41bb46 Mon Sep 17 00:00:00 2001
From: "ted.mielczarek" <ted.mielczarek@4c0a9323-5329-0410-9bdc-e9ce6186880e>
Date: Fri, 25 Jun 2010 16:56:16 +0000
Subject: Breakpad Mac symbol dumper: Add new Mach-O reader class.

This patch adds files defining new classes in the google_breakpad::Mach_O
namespace for parsing fat binaries and Mach-O files. These are used in the
new dumper to handle STABS debugging information, DWARF call frame
information, and .eh_frame exception handling stack walking information.

These new classes are independent of endianness and word size, and
therefore can be used on binaries of all the relevant architectures: x86,
x86_64, ppc, and ARM.

The patch adds a complete set of unit tests for the new classes.
A=jimb R=mark (http://breakpad.appspot.com/93001/show, http://breakpad.appspot.com/115001/show)

git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@610 4c0a9323-5329-0410-9bdc-e9ce6186880e
---
 src/common/mac/macho_reader.cc | 524 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 524 insertions(+)
 create mode 100644 src/common/mac/macho_reader.cc

(limited to 'src/common/mac/macho_reader.cc')

diff --git a/src/common/mac/macho_reader.cc b/src/common/mac/macho_reader.cc
new file mode 100644
index 00000000..53da1807
--- /dev/null
+++ b/src/common/mac/macho_reader.cc
@@ -0,0 +1,524 @@
+// Copyright (c) 2010, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// macho_reader.cc: Implementation of google_breakpad::Mach_O::FatReader and
+// google_breakpad::Mach_O::Reader. See macho_reader.h for details.
+
+#include "common/mac/macho_reader.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+namespace google_breakpad {
+namespace mach_o {
+
+// If NDEBUG is #defined, then the 'assert' macro doesn't evaluate its
+// arguments, so you can't place expressions that do necessary work in
+// the argument of an assert. Nor can you assign the result of the
+// expression to a variable and assert that the variable's value is
+// true: you'll get unused variable warnings when NDEBUG is #defined.
+//
+// ASSERT_ALWAYS_EVAL always evaluates its argument, and asserts that
+// the result is true if NDEBUG is not #defined.
+#if defined(NDEBUG)
+#define ASSERT_ALWAYS_EVAL(x) (x)
+#else
+#define ASSERT_ALWAYS_EVAL(x) assert(x)
+#endif
+
+void FatReader::Reporter::BadHeader() {
+  fprintf(stderr, "%s: file is neither a fat binary file"
+          " nor a Mach-O object file\n", filename_.c_str());
+}
+
+void FatReader::Reporter::TooShort() {
+  fprintf(stderr, "%s: file too short for the data it claims to contain\n",
+          filename_.c_str());
+}
+
+void FatReader::Reporter::MisplacedObjectFile() {
+  fprintf(stderr, "%s: file too short for the object files it claims"
+          " to contain\n", filename_.c_str());
+}
+
+bool FatReader::Read(const uint8_t *buffer, size_t size) {
+  buffer_.start = buffer;
+  buffer_.end = buffer + size;
+  ByteCursor cursor(&buffer_);
+
+  // Fat binaries always use big-endian, so read the magic number in
+  // that endianness. To recognize Mach-O magic numbers, which can use
+  // either endianness, check for both the proper and reversed forms
+  // of the magic numbers.
+  cursor.set_big_endian(true);
+  if (cursor >> magic_) {
+    if (magic_ == FAT_MAGIC) {
+      // How many object files does this fat binary contain?
+      uint32_t object_files_count;
+      if (!(cursor >> object_files_count)) {  // nfat_arch
+        reporter_->TooShort();
+        return false;
+      }
+
+      // Read the list of object files.
+      object_files_.resize(object_files_count);
+      for (size_t i = 0; i < object_files_count; i++) {
+        struct fat_arch *objfile = &object_files_[i];
+
+        // Read this object file entry, byte-swapping as appropriate.
+        cursor >> objfile->cputype
+               >> objfile->cpusubtype
+               >> objfile->offset
+               >> objfile->size
+               >> objfile->align;
+        if (!cursor) {
+          reporter_->TooShort();
+          return false;
+        }
+        // Does the file actually have the bytes this entry refers to?
+        size_t fat_size = buffer_.Size();
+        if (objfile->offset > fat_size ||
+            objfile->size > fat_size - objfile->offset) {
+          reporter_->MisplacedObjectFile();
+          return false;
+        }
+      }
+
+      return true;
+    } else if (magic_ == MH_MAGIC || magic_ == MH_MAGIC_64 ||
+               magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) {
+      // If this is a little-endian Mach-O file, fix the cursor's endianness.
+      if (magic_ == MH_CIGAM || magic_ == MH_CIGAM_64)
+        cursor.set_big_endian(false);
+      // Record the entire file as a single entry in the object file list.
+      object_files_.resize(1);
+
+      // Get the cpu type and subtype from the Mach-O header.
+      if (!(cursor >> object_files_[0].cputype
+                   >> object_files_[0].cpusubtype)) {
+        reporter_->TooShort();
+        return false;
+      }
+
+      object_files_[0].offset = 0;
+      object_files_[0].size = buffer_.Size();
+      // This alignment is correct for 32 and 64-bit x86 and ppc.
+      // See get_align in the lipo source for other architectures:
+      // http://www.opensource.apple.com/source/cctools/cctools-773/misc/lipo.c
+      object_files_[0].align = 12;  // 2^12 == 4096
+      
+      return true;
+    }
+  }
+  
+  reporter_->BadHeader();
+  return false;
+}
+
+void Reader::Reporter::BadHeader() {
+  fprintf(stderr, "%s: file is not a Mach-O object file\n", filename_.c_str());
+}
+
+void Reader::Reporter::CPUTypeMismatch(cpu_type_t cpu_type,
+                                       cpu_subtype_t cpu_subtype,
+                                       cpu_type_t expected_cpu_type,
+                                       cpu_subtype_t expected_cpu_subtype) {
+  fprintf(stderr, "%s: CPU type %d, subtype %d does not match expected"
+          " type %d, subtype %d\n",
+          filename_.c_str(), cpu_type, cpu_subtype,
+          expected_cpu_type, expected_cpu_subtype);
+}
+
+void Reader::Reporter::HeaderTruncated() {
+  fprintf(stderr, "%s: file does not contain a complete Mach-O header\n",
+          filename_.c_str());
+}
+
+void Reader::Reporter::LoadCommandRegionTruncated() {
+  fprintf(stderr, "%s: file too short to hold load command region"
+          " given in Mach-O header\n", filename_.c_str());
+}
+
+void Reader::Reporter::LoadCommandsOverrun(size_t claimed, size_t i,
+                                           LoadCommandType type) {
+  fprintf(stderr, "%s: file's header claims there are %ld"
+          " load commands, but load command #%ld",
+          filename_.c_str(), claimed, i);
+  if (type) fprintf(stderr, ", of type %d,", type);
+  fprintf(stderr, " extends beyond the end of the load command region\n");
+}
+
+void Reader::Reporter::LoadCommandTooShort(size_t i, LoadCommandType type) {
+  fprintf(stderr, "%s: the contents of load command #%ld, of type %d,"
+          " extend beyond the size given in the load command's header\n",
+          filename_.c_str(), i, type);
+}
+
+void Reader::Reporter::SectionsMissing(const string &name) {
+  fprintf(stderr, "%s: the load command for segment '%s'"
+          " is too short to hold the section headers it claims to have\n",
+          filename_.c_str(), name.c_str());
+}
+
+void Reader::Reporter::MisplacedSegmentData(const string &name) {
+  fprintf(stderr, "%s: the segment '%s' claims its contents lie beyond"
+          " the end of the file\n", filename_.c_str(), name.c_str());
+}
+
+void Reader::Reporter::MisplacedSectionData(const string &section,
+                                            const string &segment) {
+  fprintf(stderr, "%s: the section '%s' in segment '%s'"
+          " claims its contents lie outside the segment's contents\n",
+          filename_.c_str(), section.c_str(), segment.c_str());
+}
+
+void Reader::Reporter::MisplacedSymbolTable() {
+  fprintf(stderr, "%s: the LC_SYMTAB load command claims that the symbol"
+          " table's contents are located beyond the end of the file\n",
+          filename_.c_str());
+}
+
+void Reader::Reporter::UnsupportedCPUType(cpu_type_t cpu_type) {
+  fprintf(stderr, "%s: CPU type %d is not supported\n",
+          filename_.c_str(), cpu_type);
+}
+
+bool Reader::Read(const uint8_t *buffer,
+                  size_t size,
+                  cpu_type_t expected_cpu_type,
+                  cpu_subtype_t expected_cpu_subtype) {
+  assert(!buffer_.start);
+  buffer_.start = buffer;
+  buffer_.end = buffer + size;
+  ByteCursor cursor(&buffer_, true);
+  uint32_t magic;
+  if (!(cursor >> magic)) {
+    reporter_->HeaderTruncated();
+    return false;
+  }
+
+  if (expected_cpu_type != CPU_TYPE_ANY) {
+    uint32_t expected_magic;
+    // validate that magic matches the expected cpu type
+    switch (expected_cpu_type) {
+      case CPU_TYPE_I386:
+        expected_magic = MH_CIGAM;
+        break;
+      case CPU_TYPE_POWERPC:
+        expected_magic = MH_MAGIC;
+        break;
+      case CPU_TYPE_X86_64:
+        expected_magic = MH_CIGAM_64;
+        break;
+      case CPU_TYPE_POWERPC64:
+        expected_magic = MH_MAGIC_64;
+        break;
+      default:
+        reporter_->UnsupportedCPUType(expected_cpu_type);
+        return false;
+    }
+
+    if (expected_magic != magic) {
+      reporter_->BadHeader();
+      return false;
+    }
+  }
+
+  // Since the byte cursor is in big-endian mode, a reversed magic number
+  // always indicates a little-endian file, regardless of our own endianness.
+  switch (magic) {
+    case MH_MAGIC:    big_endian_ = true;  bits_64_ = false; break;
+    case MH_CIGAM:    big_endian_ = false; bits_64_ = false; break;
+    case MH_MAGIC_64: big_endian_ = true;  bits_64_ = true;  break;
+    case MH_CIGAM_64: big_endian_ = false; bits_64_ = true;  break;
+    default:
+      reporter_->BadHeader();
+      return false;
+  }
+  cursor.set_big_endian(big_endian_);
+  uint32_t commands_size, reserved;
+  cursor >> cpu_type_ >> cpu_subtype_ >> file_type_ >> load_command_count_
+         >> commands_size >> flags_;
+  if (bits_64_)
+    cursor >> reserved;
+  if (!cursor) {
+    reporter_->HeaderTruncated();
+    return false;
+  }
+
+  if (expected_cpu_type != CPU_TYPE_ANY &&
+      (expected_cpu_type != cpu_type_ ||
+       expected_cpu_subtype != cpu_subtype_)) {
+    reporter_->CPUTypeMismatch(cpu_type_, cpu_subtype_,
+                              expected_cpu_type, expected_cpu_subtype);
+    return false;
+  }
+
+  cursor
+      .PointTo(&load_commands_.start, commands_size)
+      .PointTo(&load_commands_.end, 0);
+  if (!cursor) {
+    reporter_->LoadCommandRegionTruncated();
+    return false;
+  }
+
+  return true;
+}
+
+bool Reader::WalkLoadCommands(Reader::LoadCommandHandler *handler) const {
+  ByteCursor list_cursor(&load_commands_, big_endian_);
+
+  for (size_t index = 0; index < load_command_count_; ++index) {
+    // command refers to this load command alone, so that cursor will
+    // refuse to read past the load command's end. But since we haven't
+    // read the size yet, let command initially refer to the entire
+    // remainder of the load command series.
+    ByteBuffer command(list_cursor.here(), list_cursor.Available());
+    ByteCursor cursor(&command, big_endian_);
+    
+    // Read the command type and size --- fields common to all commands.
+    uint32_t type, size;
+    if (!(cursor >> type)) {
+      reporter_->LoadCommandsOverrun(load_command_count_, index, 0);
+      return false;
+    }
+    if (!(cursor >> size) || size > command.Size()) {
+      reporter_->LoadCommandsOverrun(load_command_count_, index, type);
+      return false;
+    }
+
+    // Now that we've read the length, restrict command's range to this
+    // load command only.
+    command.end = command.start + size;
+
+    switch (type) {
+      case LC_SEGMENT:
+      case LC_SEGMENT_64: {
+        Segment segment;
+        segment.bits_64 = (type == LC_SEGMENT_64);
+        size_t word_size = segment.bits_64 ? 8 : 4;
+        cursor.CString(&segment.name, 16);
+        size_t file_offset, file_size;
+        cursor
+            .Read(word_size, false, &segment.vmaddr)
+            .Read(word_size, false, &segment.vmsize)
+            .Read(word_size, false, &file_offset)
+            .Read(word_size, false, &file_size);
+        cursor >> segment.maxprot
+               >> segment.initprot
+               >> segment.nsects
+               >> segment.flags;
+        if (!cursor) {
+          reporter_->LoadCommandTooShort(index, type);
+          return false;
+        }
+        if (file_offset > buffer_.Size() ||
+            file_size > buffer_.Size() - file_offset) {
+          reporter_->MisplacedSegmentData(segment.name);
+          return false;
+        }
+        // Mach-O files in .dSYM bundles have the contents of the loaded
+        // segments removed, and their file offsets and file sizes zeroed
+        // out. To help us handle this special case properly, give such
+        // segments' contents NULL starting and ending pointers.
+        if (file_offset == 0 && file_size == 0) {
+          segment.contents.start = segment.contents.end = NULL;
+        } else {
+          segment.contents.start = buffer_.start + file_offset;
+          segment.contents.end = segment.contents.start + file_size;
+        }
+        // The section list occupies the remainder of this load command's space.
+        segment.section_list.start = cursor.here();
+        segment.section_list.end = command.end;
+
+        if (!handler->SegmentCommand(segment))
+          return false;
+        break;
+      }
+
+      case LC_SYMTAB: {
+        uint32_t symoff, nsyms, stroff, strsize;
+        cursor >> symoff >> nsyms >> stroff >> strsize;
+        if (!cursor) {
+          reporter_->LoadCommandTooShort(index, type);
+          return false;
+        }
+        // How big are the entries in the symbol table?
+        // sizeof(struct nlist_64) : sizeof(struct nlist),
+        // but be paranoid about alignment vs. target architecture.
+        size_t symbol_size = bits_64_ ? 16 : 12;
+        // How big is the entire symbol array?
+        size_t symbols_size = nsyms * symbol_size;
+        if (symoff > buffer_.Size() || symbols_size > buffer_.Size() - symoff ||
+            stroff > buffer_.Size() || strsize > buffer_.Size() - stroff) {
+          reporter_->MisplacedSymbolTable();
+          return false;
+        }
+        ByteBuffer entries(buffer_.start + symoff, symbols_size);
+        ByteBuffer names(buffer_.start + stroff, strsize);
+        if (!handler->SymtabCommand(entries, names))
+          return false;
+        break;
+      }
+      
+      default: {
+        if (!handler->UnknownCommand(type, command))
+          return false;
+        break;
+      }
+    }
+
+    list_cursor.set_here(command.end);
+  }
+
+  return true;
+}
+
+// A load command handler that looks for a segment of a given name.
+class Reader::SegmentFinder : public LoadCommandHandler {
+ public:
+  // Create a load command handler that looks for a segment named NAME,
+  // and sets SEGMENT to describe it if found.
+  SegmentFinder(const string &name, Segment *segment) 
+      : name_(name), segment_(segment), found_() { }
+
+  // Return true if the traversal found the segment, false otherwise.
+  bool found() const { return found_; }
+
+  bool SegmentCommand(const Segment &segment) {
+    if (segment.name == name_) {
+      *segment_ = segment;
+      found_ = true;
+      return false;
+    }
+    return true;
+  }
+
+ private:
+  // The name of the segment our creator is looking for.
+  const string &name_;
+
+  // Where we should store the segment if found. (WEAK)
+  Segment *segment_;
+
+  // True if we found the segment.
+  bool found_;
+};
+
+bool Reader::FindSegment(const string &name, Segment *segment) const {
+  SegmentFinder finder(name, segment);
+  WalkLoadCommands(&finder);
+  return finder.found();
+}
+
+bool Reader::WalkSegmentSections(const Segment &segment,
+                                 SectionHandler *handler) const {
+  size_t word_size = segment.bits_64 ? 8 : 4;
+  ByteCursor cursor(&segment.section_list, big_endian_);
+
+  for (size_t i = 0; i < segment.nsects; i++) {
+    Section section;
+    section.bits_64 = segment.bits_64;
+    uint64_t size;
+    uint32_t offset, dummy32;
+    cursor
+        .CString(&section.section_name, 16)
+        .CString(&section.segment_name, 16)
+        .Read(word_size, false, &section.address)
+        .Read(word_size, false, &size)
+        >> offset
+        >> section.align
+        >> dummy32
+        >> dummy32
+        >> section.flags
+        >> dummy32
+        >> dummy32;
+    if (section.bits_64)
+      cursor >> dummy32;
+    if (!cursor) {
+      reporter_->SectionsMissing(segment.name);
+      return false;
+    }
+    if ((section.flags & SECTION_TYPE) == S_ZEROFILL) {
+      // Zero-fill sections have a size, but no contents.
+      section.contents.start = section.contents.end = NULL;
+    } else if (segment.contents.start == NULL && 
+               segment.contents.end == NULL) {
+      // Mach-O files in .dSYM bundles have the contents of the loaded
+      // segments removed, and their file offsets and file sizes zeroed
+      // out.  However, the sections within those segments still have
+      // non-zero sizes.  There's no reason to call MisplacedSectionData in
+      // this case; the caller may just need the section's load
+      // address. But do set the contents' limits to NULL, for safety.
+      section.contents.start = section.contents.end = NULL;
+    } else {
+      if (offset < size_t(segment.contents.start - buffer_.start) ||
+          offset > size_t(segment.contents.end - buffer_.start) ||
+          size > size_t(segment.contents.end - buffer_.start - offset)) {
+        reporter_->MisplacedSectionData(section.section_name,
+                                        section.segment_name);
+        return false;
+      }
+      section.contents.start = buffer_.start + offset;
+      section.contents.end = section.contents.start + size;
+    }
+    if (!handler->HandleSection(section))
+      return false;
+  }
+  return true;
+}
+
+// A SectionHandler that builds a SectionMap for the sections within a
+// given segment.
+class Reader::SectionMapper: public SectionHandler {
+ public:
+  // Create a SectionHandler that populates MAP with an entry for
+  // each section it is given.
+  SectionMapper(SectionMap *map) : map_(map) { }
+  bool HandleSection(const Section &section) {
+    (*map_)[section.section_name] = section;
+    return true;
+  }
+ private:
+  // The map under construction. (WEAK)
+  SectionMap *map_;
+};
+
+bool Reader::MapSegmentSections(const Segment &segment,
+                                SectionMap *section_map) const {
+  section_map->clear();
+  SectionMapper mapper(section_map);
+  return WalkSegmentSections(segment, &mapper);
+}
+
+}  // namespace mach_o
+}  // namespace google_breakpad
-- 
cgit v1.2.1