From 35c41e00ee2cf9280fe0122c75877ba70b41bb46 Mon Sep 17 00:00:00 2001 From: "ted.mielczarek" Date: Fri, 25 Jun 2010 16:56:16 +0000 Subject: Breakpad Mac symbol dumper: Add new Mach-O reader class. This patch adds files defining new classes in the google_breakpad::Mach_O namespace for parsing fat binaries and Mach-O files. These are used in the new dumper to handle STABS debugging information, DWARF call frame information, and .eh_frame exception handling stack walking information. These new classes are independent of endianness and word size, and therefore can be used on binaries of all the relevant architectures: x86, x86_64, ppc, and ARM. The patch adds a complete set of unit tests for the new classes. A=jimb R=mark (http://breakpad.appspot.com/93001/show, http://breakpad.appspot.com/115001/show) git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@610 4c0a9323-5329-0410-9bdc-e9ce6186880e --- src/common/mac/macho_reader.cc | 524 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 524 insertions(+) create mode 100644 src/common/mac/macho_reader.cc (limited to 'src/common/mac/macho_reader.cc') diff --git a/src/common/mac/macho_reader.cc b/src/common/mac/macho_reader.cc new file mode 100644 index 00000000..53da1807 --- /dev/null +++ b/src/common/mac/macho_reader.cc @@ -0,0 +1,524 @@ +// Copyright (c) 2010, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy + +// macho_reader.cc: Implementation of google_breakpad::Mach_O::FatReader and +// google_breakpad::Mach_O::Reader. See macho_reader.h for details. + +#include "common/mac/macho_reader.h" + +#include +#include +#include + +namespace google_breakpad { +namespace mach_o { + +// If NDEBUG is #defined, then the 'assert' macro doesn't evaluate its +// arguments, so you can't place expressions that do necessary work in +// the argument of an assert. Nor can you assign the result of the +// expression to a variable and assert that the variable's value is +// true: you'll get unused variable warnings when NDEBUG is #defined. +// +// ASSERT_ALWAYS_EVAL always evaluates its argument, and asserts that +// the result is true if NDEBUG is not #defined. +#if defined(NDEBUG) +#define ASSERT_ALWAYS_EVAL(x) (x) +#else +#define ASSERT_ALWAYS_EVAL(x) assert(x) +#endif + +void FatReader::Reporter::BadHeader() { + fprintf(stderr, "%s: file is neither a fat binary file" + " nor a Mach-O object file\n", filename_.c_str()); +} + +void FatReader::Reporter::TooShort() { + fprintf(stderr, "%s: file too short for the data it claims to contain\n", + filename_.c_str()); +} + +void FatReader::Reporter::MisplacedObjectFile() { + fprintf(stderr, "%s: file too short for the object files it claims" + " to contain\n", filename_.c_str()); +} + +bool FatReader::Read(const uint8_t *buffer, size_t size) { + buffer_.start = buffer; + buffer_.end = buffer + size; + ByteCursor cursor(&buffer_); + + // Fat binaries always use big-endian, so read the magic number in + // that endianness. To recognize Mach-O magic numbers, which can use + // either endianness, check for both the proper and reversed forms + // of the magic numbers. + cursor.set_big_endian(true); + if (cursor >> magic_) { + if (magic_ == FAT_MAGIC) { + // How many object files does this fat binary contain? + uint32_t object_files_count; + if (!(cursor >> object_files_count)) { // nfat_arch + reporter_->TooShort(); + return false; + } + + // Read the list of object files. + object_files_.resize(object_files_count); + for (size_t i = 0; i < object_files_count; i++) { + struct fat_arch *objfile = &object_files_[i]; + + // Read this object file entry, byte-swapping as appropriate. + cursor >> objfile->cputype + >> objfile->cpusubtype + >> objfile->offset + >> objfile->size + >> objfile->align; + if (!cursor) { + reporter_->TooShort(); + return false; + } + // Does the file actually have the bytes this entry refers to? + size_t fat_size = buffer_.Size(); + if (objfile->offset > fat_size || + objfile->size > fat_size - objfile->offset) { + reporter_->MisplacedObjectFile(); + return false; + } + } + + return true; + } else if (magic_ == MH_MAGIC || magic_ == MH_MAGIC_64 || + magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) { + // If this is a little-endian Mach-O file, fix the cursor's endianness. + if (magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) + cursor.set_big_endian(false); + // Record the entire file as a single entry in the object file list. + object_files_.resize(1); + + // Get the cpu type and subtype from the Mach-O header. + if (!(cursor >> object_files_[0].cputype + >> object_files_[0].cpusubtype)) { + reporter_->TooShort(); + return false; + } + + object_files_[0].offset = 0; + object_files_[0].size = buffer_.Size(); + // This alignment is correct for 32 and 64-bit x86 and ppc. + // See get_align in the lipo source for other architectures: + // http://www.opensource.apple.com/source/cctools/cctools-773/misc/lipo.c + object_files_[0].align = 12; // 2^12 == 4096 + + return true; + } + } + + reporter_->BadHeader(); + return false; +} + +void Reader::Reporter::BadHeader() { + fprintf(stderr, "%s: file is not a Mach-O object file\n", filename_.c_str()); +} + +void Reader::Reporter::CPUTypeMismatch(cpu_type_t cpu_type, + cpu_subtype_t cpu_subtype, + cpu_type_t expected_cpu_type, + cpu_subtype_t expected_cpu_subtype) { + fprintf(stderr, "%s: CPU type %d, subtype %d does not match expected" + " type %d, subtype %d\n", + filename_.c_str(), cpu_type, cpu_subtype, + expected_cpu_type, expected_cpu_subtype); +} + +void Reader::Reporter::HeaderTruncated() { + fprintf(stderr, "%s: file does not contain a complete Mach-O header\n", + filename_.c_str()); +} + +void Reader::Reporter::LoadCommandRegionTruncated() { + fprintf(stderr, "%s: file too short to hold load command region" + " given in Mach-O header\n", filename_.c_str()); +} + +void Reader::Reporter::LoadCommandsOverrun(size_t claimed, size_t i, + LoadCommandType type) { + fprintf(stderr, "%s: file's header claims there are %ld" + " load commands, but load command #%ld", + filename_.c_str(), claimed, i); + if (type) fprintf(stderr, ", of type %d,", type); + fprintf(stderr, " extends beyond the end of the load command region\n"); +} + +void Reader::Reporter::LoadCommandTooShort(size_t i, LoadCommandType type) { + fprintf(stderr, "%s: the contents of load command #%ld, of type %d," + " extend beyond the size given in the load command's header\n", + filename_.c_str(), i, type); +} + +void Reader::Reporter::SectionsMissing(const string &name) { + fprintf(stderr, "%s: the load command for segment '%s'" + " is too short to hold the section headers it claims to have\n", + filename_.c_str(), name.c_str()); +} + +void Reader::Reporter::MisplacedSegmentData(const string &name) { + fprintf(stderr, "%s: the segment '%s' claims its contents lie beyond" + " the end of the file\n", filename_.c_str(), name.c_str()); +} + +void Reader::Reporter::MisplacedSectionData(const string §ion, + const string &segment) { + fprintf(stderr, "%s: the section '%s' in segment '%s'" + " claims its contents lie outside the segment's contents\n", + filename_.c_str(), section.c_str(), segment.c_str()); +} + +void Reader::Reporter::MisplacedSymbolTable() { + fprintf(stderr, "%s: the LC_SYMTAB load command claims that the symbol" + " table's contents are located beyond the end of the file\n", + filename_.c_str()); +} + +void Reader::Reporter::UnsupportedCPUType(cpu_type_t cpu_type) { + fprintf(stderr, "%s: CPU type %d is not supported\n", + filename_.c_str(), cpu_type); +} + +bool Reader::Read(const uint8_t *buffer, + size_t size, + cpu_type_t expected_cpu_type, + cpu_subtype_t expected_cpu_subtype) { + assert(!buffer_.start); + buffer_.start = buffer; + buffer_.end = buffer + size; + ByteCursor cursor(&buffer_, true); + uint32_t magic; + if (!(cursor >> magic)) { + reporter_->HeaderTruncated(); + return false; + } + + if (expected_cpu_type != CPU_TYPE_ANY) { + uint32_t expected_magic; + // validate that magic matches the expected cpu type + switch (expected_cpu_type) { + case CPU_TYPE_I386: + expected_magic = MH_CIGAM; + break; + case CPU_TYPE_POWERPC: + expected_magic = MH_MAGIC; + break; + case CPU_TYPE_X86_64: + expected_magic = MH_CIGAM_64; + break; + case CPU_TYPE_POWERPC64: + expected_magic = MH_MAGIC_64; + break; + default: + reporter_->UnsupportedCPUType(expected_cpu_type); + return false; + } + + if (expected_magic != magic) { + reporter_->BadHeader(); + return false; + } + } + + // Since the byte cursor is in big-endian mode, a reversed magic number + // always indicates a little-endian file, regardless of our own endianness. + switch (magic) { + case MH_MAGIC: big_endian_ = true; bits_64_ = false; break; + case MH_CIGAM: big_endian_ = false; bits_64_ = false; break; + case MH_MAGIC_64: big_endian_ = true; bits_64_ = true; break; + case MH_CIGAM_64: big_endian_ = false; bits_64_ = true; break; + default: + reporter_->BadHeader(); + return false; + } + cursor.set_big_endian(big_endian_); + uint32_t commands_size, reserved; + cursor >> cpu_type_ >> cpu_subtype_ >> file_type_ >> load_command_count_ + >> commands_size >> flags_; + if (bits_64_) + cursor >> reserved; + if (!cursor) { + reporter_->HeaderTruncated(); + return false; + } + + if (expected_cpu_type != CPU_TYPE_ANY && + (expected_cpu_type != cpu_type_ || + expected_cpu_subtype != cpu_subtype_)) { + reporter_->CPUTypeMismatch(cpu_type_, cpu_subtype_, + expected_cpu_type, expected_cpu_subtype); + return false; + } + + cursor + .PointTo(&load_commands_.start, commands_size) + .PointTo(&load_commands_.end, 0); + if (!cursor) { + reporter_->LoadCommandRegionTruncated(); + return false; + } + + return true; +} + +bool Reader::WalkLoadCommands(Reader::LoadCommandHandler *handler) const { + ByteCursor list_cursor(&load_commands_, big_endian_); + + for (size_t index = 0; index < load_command_count_; ++index) { + // command refers to this load command alone, so that cursor will + // refuse to read past the load command's end. But since we haven't + // read the size yet, let command initially refer to the entire + // remainder of the load command series. + ByteBuffer command(list_cursor.here(), list_cursor.Available()); + ByteCursor cursor(&command, big_endian_); + + // Read the command type and size --- fields common to all commands. + uint32_t type, size; + if (!(cursor >> type)) { + reporter_->LoadCommandsOverrun(load_command_count_, index, 0); + return false; + } + if (!(cursor >> size) || size > command.Size()) { + reporter_->LoadCommandsOverrun(load_command_count_, index, type); + return false; + } + + // Now that we've read the length, restrict command's range to this + // load command only. + command.end = command.start + size; + + switch (type) { + case LC_SEGMENT: + case LC_SEGMENT_64: { + Segment segment; + segment.bits_64 = (type == LC_SEGMENT_64); + size_t word_size = segment.bits_64 ? 8 : 4; + cursor.CString(&segment.name, 16); + size_t file_offset, file_size; + cursor + .Read(word_size, false, &segment.vmaddr) + .Read(word_size, false, &segment.vmsize) + .Read(word_size, false, &file_offset) + .Read(word_size, false, &file_size); + cursor >> segment.maxprot + >> segment.initprot + >> segment.nsects + >> segment.flags; + if (!cursor) { + reporter_->LoadCommandTooShort(index, type); + return false; + } + if (file_offset > buffer_.Size() || + file_size > buffer_.Size() - file_offset) { + reporter_->MisplacedSegmentData(segment.name); + return false; + } + // Mach-O files in .dSYM bundles have the contents of the loaded + // segments removed, and their file offsets and file sizes zeroed + // out. To help us handle this special case properly, give such + // segments' contents NULL starting and ending pointers. + if (file_offset == 0 && file_size == 0) { + segment.contents.start = segment.contents.end = NULL; + } else { + segment.contents.start = buffer_.start + file_offset; + segment.contents.end = segment.contents.start + file_size; + } + // The section list occupies the remainder of this load command's space. + segment.section_list.start = cursor.here(); + segment.section_list.end = command.end; + + if (!handler->SegmentCommand(segment)) + return false; + break; + } + + case LC_SYMTAB: { + uint32_t symoff, nsyms, stroff, strsize; + cursor >> symoff >> nsyms >> stroff >> strsize; + if (!cursor) { + reporter_->LoadCommandTooShort(index, type); + return false; + } + // How big are the entries in the symbol table? + // sizeof(struct nlist_64) : sizeof(struct nlist), + // but be paranoid about alignment vs. target architecture. + size_t symbol_size = bits_64_ ? 16 : 12; + // How big is the entire symbol array? + size_t symbols_size = nsyms * symbol_size; + if (symoff > buffer_.Size() || symbols_size > buffer_.Size() - symoff || + stroff > buffer_.Size() || strsize > buffer_.Size() - stroff) { + reporter_->MisplacedSymbolTable(); + return false; + } + ByteBuffer entries(buffer_.start + symoff, symbols_size); + ByteBuffer names(buffer_.start + stroff, strsize); + if (!handler->SymtabCommand(entries, names)) + return false; + break; + } + + default: { + if (!handler->UnknownCommand(type, command)) + return false; + break; + } + } + + list_cursor.set_here(command.end); + } + + return true; +} + +// A load command handler that looks for a segment of a given name. +class Reader::SegmentFinder : public LoadCommandHandler { + public: + // Create a load command handler that looks for a segment named NAME, + // and sets SEGMENT to describe it if found. + SegmentFinder(const string &name, Segment *segment) + : name_(name), segment_(segment), found_() { } + + // Return true if the traversal found the segment, false otherwise. + bool found() const { return found_; } + + bool SegmentCommand(const Segment &segment) { + if (segment.name == name_) { + *segment_ = segment; + found_ = true; + return false; + } + return true; + } + + private: + // The name of the segment our creator is looking for. + const string &name_; + + // Where we should store the segment if found. (WEAK) + Segment *segment_; + + // True if we found the segment. + bool found_; +}; + +bool Reader::FindSegment(const string &name, Segment *segment) const { + SegmentFinder finder(name, segment); + WalkLoadCommands(&finder); + return finder.found(); +} + +bool Reader::WalkSegmentSections(const Segment &segment, + SectionHandler *handler) const { + size_t word_size = segment.bits_64 ? 8 : 4; + ByteCursor cursor(&segment.section_list, big_endian_); + + for (size_t i = 0; i < segment.nsects; i++) { + Section section; + section.bits_64 = segment.bits_64; + uint64_t size; + uint32_t offset, dummy32; + cursor + .CString(§ion.section_name, 16) + .CString(§ion.segment_name, 16) + .Read(word_size, false, §ion.address) + .Read(word_size, false, &size) + >> offset + >> section.align + >> dummy32 + >> dummy32 + >> section.flags + >> dummy32 + >> dummy32; + if (section.bits_64) + cursor >> dummy32; + if (!cursor) { + reporter_->SectionsMissing(segment.name); + return false; + } + if ((section.flags & SECTION_TYPE) == S_ZEROFILL) { + // Zero-fill sections have a size, but no contents. + section.contents.start = section.contents.end = NULL; + } else if (segment.contents.start == NULL && + segment.contents.end == NULL) { + // Mach-O files in .dSYM bundles have the contents of the loaded + // segments removed, and their file offsets and file sizes zeroed + // out. However, the sections within those segments still have + // non-zero sizes. There's no reason to call MisplacedSectionData in + // this case; the caller may just need the section's load + // address. But do set the contents' limits to NULL, for safety. + section.contents.start = section.contents.end = NULL; + } else { + if (offset < size_t(segment.contents.start - buffer_.start) || + offset > size_t(segment.contents.end - buffer_.start) || + size > size_t(segment.contents.end - buffer_.start - offset)) { + reporter_->MisplacedSectionData(section.section_name, + section.segment_name); + return false; + } + section.contents.start = buffer_.start + offset; + section.contents.end = section.contents.start + size; + } + if (!handler->HandleSection(section)) + return false; + } + return true; +} + +// A SectionHandler that builds a SectionMap for the sections within a +// given segment. +class Reader::SectionMapper: public SectionHandler { + public: + // Create a SectionHandler that populates MAP with an entry for + // each section it is given. + SectionMapper(SectionMap *map) : map_(map) { } + bool HandleSection(const Section §ion) { + (*map_)[section.section_name] = section; + return true; + } + private: + // The map under construction. (WEAK) + SectionMap *map_; +}; + +bool Reader::MapSegmentSections(const Segment &segment, + SectionMap *section_map) const { + section_map->clear(); + SectionMapper mapper(section_map); + return WalkSegmentSections(segment, &mapper); +} + +} // namespace mach_o +} // namespace google_breakpad -- cgit v1.2.1