diff options
Diffstat (limited to 'src/common')
-rw-r--r-- | src/common/mac/dump_syms.h | 180 | ||||
-rw-r--r-- | src/common/mac/dump_syms.mm | 1548 | ||||
-rw-r--r-- | src/common/mac/macho_walker.cc | 17 |
3 files changed, 536 insertions, 1209 deletions
diff --git a/src/common/mac/dump_syms.h b/src/common/mac/dump_syms.h index 1acaf44a..f2bee657 100644 --- a/src/common/mac/dump_syms.h +++ b/src/common/mac/dump_syms.h @@ -1,4 +1,6 @@ -// Copyright (c) 2006, Google Inc. +// -*- mode: c++ -*- + +// Copyright (c) 2010, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -27,53 +29,133 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// dump_syms.h: Interface for DumpSymbols. This class will take a mach-o file -// and extract the symbol information and write it to a file using the -// breakpad symbol file format. +// Author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// dump_syms.h: Declaration of google_breakpad::DumpSymbols, a class for +// reading debugging information from Mach-O files and writing it out as a +// Breakpad symbol file. -#import <Foundation/Foundation.h> +#include <Foundation/Foundation.h> #include <mach-o/loader.h> -#include "common/dwarf/dwarf2reader.h" - -// This will map from an architecture string to a SectionMap, which -// will contain the offsets for all the sections in the dictionary -typedef map<string, dwarf2reader::SectionMap *> ArchSectionMap; - -@interface DumpSymbols : NSObject { - @protected - NSString *sourcePath_; // Source of symbols (STRONG) - NSString *architecture_; // Architecture to extract (STRONG) - NSMutableDictionary *addresses_; // Addresses and symbols (STRONG) - NSMutableSet *functionAddresses_; // Function addresses (STRONG) - NSMutableDictionary *sources_; // Address and Source file paths (STRONG) - NSMutableDictionary *headers_; // Mach-o header information (STRONG) - NSMutableDictionary *sectionData_; // Keyed by seg/sect name (STRONG) - uint32_t lastStartAddress_; - ArchSectionMap *sectionsForArch_; -} - -- (id)initWithContentsOfFile:(NSString *)machoFile; - -- (NSArray *)availableArchitectures; - -// One of ppc, x86, i386, ppc64, x86_64 -// If the architecture is not available, it will return NO -// If not set, the native architecture will be used -- (BOOL)setArchitecture:(NSString *)architecture; -- (NSString *)architecture; - -// Write the symbols to |symbolFilePath|. Return YES if successful. -- (BOOL)writeSymbolFile:(NSString *)symbolFilePath; - -@end - -@interface MachSection : NSObject { - @protected - struct section *sect_; - uint32_t sectionNumber_; -} -- (id)initWithMachSection:(struct section *)sect andNumber:(uint32_t)sectionNumber; -- (struct section*)sectionPointer; -- (uint32_t)sectionNumber; - -@end +#include <stdio.h> +#include <stdlib.h> + +#include <string> +#include <vector> + +#include "common/byte_cursor.h" +#include "common/mac/macho_reader.h" +#include "common/module.h" + +namespace google_breakpad { + +class DumpSymbols { + public: + DumpSymbols() + : input_pathname_(), + object_filename_(), + contents_(), + selected_object_file_(), + selected_object_name_() { } + ~DumpSymbols() { + [input_pathname_ release]; + [object_filename_ release]; + [contents_ release]; + } + + // Prepare to read debugging information from |filename|. |filename| may be + // the name of a universal binary, a Mach-O file, or a dSYM bundle + // containing either of the above. On success, return true; if there is a + // problem reading |filename|, report it and return false. + // + // (This class uses NSString for filenames and related values, + // because the Mac Foundation framework seems to support + // filename-related operations more fully on NSString values.) + bool Read(NSString *filename); + + // If this dumper's file includes an object file for |cpu_type| and + // |cpu_subtype|, then select that object file for dumping, and return + // true. Otherwise, return false, and leave this dumper's selected + // architecture unchanged. + // + // By default, if this dumper's file contains only one object file, then + // the dumper will dump those symbols; and if it contains more than one + // object file, then the dumper will dump the object file whose + // architecture matches that of this dumper program. + bool SetArchitecture(cpu_type_t cpu_type, cpu_subtype_t cpu_subtype); + + // Return a pointer to an array of 'struct fat_arch' structures, + // describing the object files contained in this dumper's file. Set + // *|count| to the number of elements in the array. The returned array is + // owned by this DumpSymbols instance. + // + // If there are no available architectures, this function + // may return NULL. + const struct fat_arch *AvailableArchitectures(size_t *count) { + *count = object_files_.size(); + if (object_files_.size() > 0) + return &object_files_[0]; + return NULL; + } + + // Read the selected object file's debugging information, and write it + // out to |stream|. Return true on success; if an error occurs, report it + // and return false. + bool WriteSymbolFile(FILE *stream); + + private: + // Used internally. + class DumperLineToModule; + class LoadCommandDumper; + + // Return an identifier string for the file this DumpSymbols is dumping. + std::string Identifier(); + + // Read debugging information from |dwarf_sections|, which was taken from + // |macho_reader|, and add it to |module|. On success, return true; + // on failure, report the problem and return false. + bool ReadDwarf(google_breakpad::Module *module, + const mach_o::Reader &macho_reader, + const mach_o::SectionMap &dwarf_sections) const; + + // Read DWARF CFI or .eh_frame data from |section|, belonging to + // |macho_reader|, and record it in |module|. If |eh_frame| is true, + // then the data is .eh_frame-format data; otherwise, it is standard DWARF + // .debug_frame data. On success, return true; on failure, report + // the problem and return false. + bool ReadCFI(google_breakpad::Module *module, + const mach_o::Reader &macho_reader, + const mach_o::Section §ion, + bool eh_frame) const; + + // The name of the file or bundle whose symbols this will dump. + // This is the path given to Read, for use in error messages. + NSString *input_pathname_; + + // The name of the file this DumpSymbols will actually read debugging + // information from. Normally, this is the same as input_pathname_, but if + // filename refers to a dSYM bundle, then this is the resource file + // within that bundle. + NSString *object_filename_; + + // The complete contents of object_filename_, mapped into memory. + NSData *contents_; + + // A vector of fat_arch structures describing the object files + // object_filename_ contains. If object_filename_ refers to a fat binary, + // this may have more than one element; if it refers to a Mach-O file, this + // has exactly one element. + vector<struct fat_arch> object_files_; + + // The object file in object_files_ selected to dump, or NULL if + // SetArchitecture hasn't been called yet. + const struct fat_arch *selected_object_file_; + + // A string that identifies the selected object file, for use in error + // messages. This is usually object_filename_, but if that refers to a + // fat binary, it includes an indication of the particular architecture + // within that binary. + string selected_object_name_; +}; + +} // namespace google_breakpad diff --git a/src/common/mac/dump_syms.mm b/src/common/mac/dump_syms.mm index cf70b1c6..ab2f2b9e 100644 --- a/src/common/mac/dump_syms.mm +++ b/src/common/mac/dump_syms.mm @@ -1,4 +1,6 @@ -// Copyright (c) 2006, Google Inc. +// -*- mode: c++ -*- + +// Copyright (c) 2010, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -27,1196 +29,450 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// Author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + // dump_syms.mm: Create a symbol file for use with minidumps -#include <unistd.h> -#include <signal.h> -#include <cxxabi.h> -#include <stdlib.h> +#include "common/mac/dump_syms.h" -#include <mach/machine.h> +#include <Foundation/Foundation.h> #include <mach-o/arch.h> #include <mach-o/fat.h> -#include <mach-o/loader.h> -#include <mach-o/nlist.h> -#include <mach-o/stab.h> -#include <fcntl.h> - -#import <Foundation/Foundation.h> - -#import "dump_syms.h" -#import "common/mac/file_id.h" -#import "common/mac/macho_utilities.h" -#import "common/dwarf/dwarf2reader.h" -#import "common/dwarf/functioninfo.h" -#import "common/dwarf/bytereader.h" - +#include <stdio.h> + +#include <string> +#include <vector> + +#include "common/dwarf/bytereader-inl.h" +#include "common/dwarf/dwarf2reader.h" +#include "common/dwarf_cfi_to_module.h" +#include "common/dwarf_cu_to_module.h" +#include "common/dwarf_line_to_module.h" +#include "common/mac/file_id.h" +#include "common/mac/macho_reader.h" +#include "common/module.h" +#include "common/stabs_reader.h" +#include "common/stabs_to_module.h" + +using dwarf2reader::ByteReader; +using google_breakpad::DwarfCUToModule; +using google_breakpad::DwarfLineToModule; using google_breakpad::FileID; - -static NSString *kAddressSymbolKey = @"symbol"; -static NSString *kAddressConvertedSymbolKey = @"converted_symbol"; -static NSString *kAddressSourceLineKey = @"line"; -static NSString *kFunctionSizeKey = @"size"; -static NSString *kFunctionFileKey = @"source_file"; -static NSString *kHeaderBaseAddressKey = @"baseAddr"; -static NSString *kHeaderSizeKey = @"size"; -static NSString *kHeaderOffsetKey = @"offset"; // Offset to the header -static NSString *kHeaderIs64BitKey = @"is64"; -static NSString *kHeaderCPUTypeKey = @"cpuType"; - -// The section for __TEXT, __text seems to be always 1. This is useful -// for pruning out extraneous non-function symbols. -static const int kTextSection = 1; - -// Dump FunctionMap to stdout. Print address, function name, file -// name, line number, lowpc, and highpc if available. -void DumpFunctionMap(const dwarf2reader::FunctionMap function_map) { - for (dwarf2reader::FunctionMap::const_iterator iter = function_map.begin(); - iter != function_map.end(); ++iter) { - if (iter->second->name.empty()) { - continue; - } - printf("%08llx: %s", iter->first, - iter->second->name.data()); - if (!iter->second->file.empty()) { - printf(" - %s", iter->second->file.data()); - if (iter->second->line != 0) { - printf(":%u", iter->second->line); - } - } - if (iter->second->lowpc != 0 && iter->second->highpc != 0) { - printf(" (%08llx - %08llx)\n", - iter->second->lowpc, - iter->second->highpc); - } - } -} - - -@interface DumpSymbols(PrivateMethods) -- (NSString *)convertCPlusPlusSymbol:(NSString *)symbol; -- (void)addFunction:(NSString *)name line:(int)line address:(uint64_t)address section:(int)section; -- (BOOL)processSymbolItem:(struct nlist_64 *)list stringTable:(char *)table; -- (BOOL)loadSymbolInfo:(void *)base offset:(uint32_t)offset; -- (BOOL)loadSymbolInfo64:(void *)base offset:(uint32_t)offset; -- (BOOL)loadSymbolInfoForArchitecture; -- (BOOL)loadDWARFSymbolInfo:(void *)base offset:(uint32_t)offset; -- (BOOL)loadSTABSSymbolInfo:(void *)base offset:(uint32_t)offset; -- (void)generateSectionDictionary:(struct mach_header*)header; -- (BOOL)loadHeader:(void *)base offset:(uint32_t)offset; -- (BOOL)loadHeader64:(void *)base offset:(uint32_t)offset; -- (BOOL)loadModuleInfo; -- (void)processDWARFLineNumberInfo:(dwarf2reader::LineMap*)line_map; -- (void)processDWARFFunctionInfo:(dwarf2reader::FunctionMap*)address_to_funcinfo; -- (void)processDWARFSourceFileInfo:(vector<dwarf2reader::SourceFileInfo>*) files; -- (BOOL)loadSymbolInfo:(void *)base offset:(uint32_t)offset; -- (dwarf2reader::SectionMap*)getSectionMapForArchitecture:(NSString*)architecture; -@end - -@implementation DumpSymbols -//============================================================================= -- (NSString *)convertCPlusPlusSymbol:(NSString *)symbol { - // __cxa_demangle will realloc this if needed - char *buffer = (char *)malloc(1024); - size_t buffer_size = 1024; - int result; - - const char *sym = [symbol UTF8String]; - NSString *demangled = nil; - buffer = abi::__cxa_demangle(sym, buffer, &buffer_size, &result); - if (result == 0) { - demangled = [NSString stringWithUTF8String:buffer]; - } - free(buffer); - return demangled; -} - -//============================================================================= -- (void)addFunction:(NSString *)name line:(int)line address:(uint64_t)address section:(int)section { - NSNumber *addressNum = [NSNumber numberWithUnsignedLongLong:address]; - - if (!address) - return; - - // If the function starts with "_Z" or "__Z" then demangle it. - BOOL isCPP = NO; - - if ([name hasPrefix:@"__Z"]) { - // Remove the leading underscore - name = [name substringFromIndex:1]; - isCPP = YES; - } else if ([name hasPrefix:@"_Z"]) { - isCPP = YES; - } - - // Filter out non-functions - if ([name hasSuffix:@".eh"]) - return; - - if ([name hasSuffix:@"__func__"]) - return; - - if ([name hasSuffix:@"GCC_except_table"]) - return; - - if (isCPP) { - // OBJCPP_MANGLING_HACK - // There are cases where ObjC++ mangles up an ObjC name using quasi-C++ - // mangling: - // @implementation Foozles + (void)barzles { - // static int Baz = 0; - // } @end - // gives you _ZZ18+[Foozles barzles]E3Baz - // c++filt won't parse this properly, and will crash in certain cases. - // Logged as radar: - // 5129938: c++filt does not deal with ObjC++ symbols - // If 5129938 ever gets fixed, we can remove this, but for now this prevents - // c++filt from attempting to demangle names it doesn't know how to handle. - // This is with c++filt 2.16 - NSCharacterSet *objcppCharSet = [NSCharacterSet characterSetWithCharactersInString:@"-+[]: "]; - NSRange emptyRange = { NSNotFound, 0 }; - NSRange objcppRange = [name rangeOfCharacterFromSet:objcppCharSet]; - isCPP = NSEqualRanges(objcppRange, emptyRange); - } else if ([name characterAtIndex:0] == '_') { - // Remove the leading underscore - name = [name substringFromIndex:1]; - } - - // If there's already an entry for this address, check and see if we can add - // either the symbol, or a missing line # - NSMutableDictionary *dict = [addresses_ objectForKey:addressNum]; - - if (!dict) { - dict = [[NSMutableDictionary alloc] init]; - [addresses_ setObject:dict forKey:addressNum]; - [dict release]; - } - - if (name && ![dict objectForKey:kAddressSymbolKey]) { - [dict setObject:name forKey:kAddressSymbolKey]; - - // only functions, not line number addresses - [functionAddresses_ addObject:addressNum]; - } - - if (isCPP) { - // try demangling - NSString *demangled = [self convertCPlusPlusSymbol:name]; - if (demangled != nil) - [dict setObject:demangled forKey:kAddressConvertedSymbolKey]; - } - - if (line && ![dict objectForKey:kAddressSourceLineKey]) - [dict setObject:[NSNumber numberWithUnsignedInt:line] - forKey:kAddressSourceLineKey]; - -} - -//============================================================================= -- (BOOL)processSymbolItem:(struct nlist_64 *)list stringTable:(char *)table { - uint32_t n_strx = list->n_un.n_strx; - BOOL result = NO; - - // We don't care about non-section specific information except function length - if (list->n_sect == 0 && list->n_type != N_FUN ) - return NO; - - if (list->n_type == N_FUN) { - if (list->n_sect != 0) { - // we get the function address from the first N_FUN - lastStartAddress_ = list->n_value; - } - else { - // an N_FUN from section 0 may follow the initial N_FUN - // giving us function length information - NSMutableDictionary *dict = [addresses_ objectForKey: - [NSNumber numberWithUnsignedLong:lastStartAddress_]]; - - assert(dict); - - // only set the function size the first time - // (sometimes multiple section 0 N_FUN entries appear!) - if (![dict objectForKey:kFunctionSizeKey]) { - [dict setObject:[NSNumber numberWithUnsignedLongLong:list->n_value] - forKey:kFunctionSizeKey]; - } - } - } - - int line = list->n_desc; - - // __TEXT __text section - NSMutableDictionary *archSections = [sectionData_ objectForKey:architecture_]; - - uint32_t mainSection = [[archSections objectForKey:@"__TEXT__text" ] sectionNumber]; - - // Extract debugging information: - // Doc: http://developer.apple.com/documentation/DeveloperTools/gdb/stabs/stabs_toc.html - // Header: /usr/include/mach-o/stab.h: - if (list->n_type == N_SO) { - NSString *src = [NSString stringWithUTF8String:&table[n_strx]]; - NSString *ext = [src pathExtension]; - NSNumber *address = [NSNumber numberWithUnsignedLongLong:list->n_value]; - - // Leopard puts .c files with no code as an offset of 0, but a - // crash can't happen here and it throws off our code that matches - // symbols to line numbers so we ignore them.. - // Return YES because this isn't an error, just something we don't - // care to handle. - if ([address unsignedLongValue] == 0) { - return YES; - } - // TODO(waylonis):Ensure that we get the full path for the source file - // from the first N_SO record - // If there is an extension, we'll consider it source code - if ([ext length]) { - if (!sources_) - sources_ = [[NSMutableDictionary alloc] init]; - // Save the source associated with an address - [sources_ setObject:src forKey:address]; - result = YES; - } - } else if (list->n_type == N_FUN) { - NSString *fn = [NSString stringWithUTF8String:&table[n_strx]]; - NSRange range = [fn rangeOfString:@":" options:NSBackwardsSearch]; - - if (![fn length]) - return NO; - - if (range.length > 0) { - // The function has a ":" followed by some stuff, so strip it off - fn = [fn substringToIndex:range.location]; - } +using google_breakpad::mach_o::FatReader; +using google_breakpad::mach_o::Section; +using google_breakpad::mach_o::Segment; +using google_breakpad::Module; +using google_breakpad::StabsReader; +using google_breakpad::StabsToModule; +using std::make_pair; +using std::pair; +using std::string; +using std::vector; + +namespace google_breakpad { + +bool DumpSymbols::Read(NSString *filename) { + if (![[NSFileManager defaultManager] fileExistsAtPath:filename]) { + fprintf(stderr, "Object file does not exist: %s\n", + [filename fileSystemRepresentation]); + return false; + } + + input_pathname_ = [filename retain]; + + // Does this filename refer to a dSYM bundle? + NSBundle *bundle = [NSBundle bundleWithPath:input_pathname_]; + + if (bundle) { + // Filenames referring to bundles usually have names of the form + // "<basename>.dSYM"; however, if the user has specified a wrapper + // suffix (the WRAPPER_SUFFIX and WRAPPER_EXTENSION build settings), + // then the name may have the form "<basename>.<extension>.dSYM". In + // either case, the resource name for the file containing the DWARF + // info within the bundle is <basename>. + // + // Since there's no way to tell how much to strip off, remove one + // extension at a time, and use the first one that + // pathForResource:ofType:inDirectory likes. + NSString *base_name = [input_pathname_ lastPathComponent]; + NSString *dwarf_resource; - [self addFunction:fn line:line address:list->n_value section:list->n_sect ]; - - result = YES; - } else if (list->n_type == N_SLINE && list->n_sect == mainSection) { - [self addFunction:nil line:line address:list->n_value section:list->n_sect ]; - result = YES; - } else if (((list->n_type & N_TYPE) == N_SECT) && !(list->n_type & N_STAB)) { - // Regular symbols or ones that are external - NSString *fn = [NSString stringWithUTF8String:&table[n_strx]]; - - [self addFunction:fn line:0 address:list->n_value section:list->n_sect ]; - result = YES; - } - - return result; -} - -#define SwapLongLongIfNeeded(a) (swap ? NXSwapLongLong(a) : (a)) -#define SwapLongIfNeeded(a) (swap ? NXSwapLong(a) : (a)) -#define SwapIntIfNeeded(a) (swap ? NXSwapInt(a) : (a)) -#define SwapShortIfNeeded(a) (swap ? NXSwapShort(a) : (a)) - -//============================================================================= -- (BOOL)loadSymbolInfo:(void *)base offset:(uint32_t)offset { - BOOL loadedStabs = [self loadSTABSSymbolInfo:base offset:offset]; - - NSMutableDictionary *archSections = [sectionData_ objectForKey:architecture_]; - BOOL loadedDWARF = NO; - if ([archSections objectForKey:@"__DWARF__debug_info"]) { - // Treat this this as debug information - loadedDWARF = [self loadDWARFSymbolInfo:base offset:offset]; - } - - return loadedDWARF || loadedStabs; -} - -//============================================================================= -- (BOOL)loadDWARFSymbolInfo:(void *)base offset:(uint32_t)offset { - - struct mach_header *header = (struct mach_header *) - ((uint32_t)base + offset); - BOOL swap = (header->magic == MH_CIGAM); - - NSMutableDictionary *archSections = [sectionData_ objectForKey:architecture_]; - assert (archSections != nil); - section *dbgInfoSection = [[archSections objectForKey:@"__DWARF__debug_info"] sectionPointer]; - uint32_t debugInfoSize = SwapLongIfNeeded(dbgInfoSection->size); - -#if __BIG_ENDIAN__ - dwarf2reader::ByteReader byte_reader(swap ? - dwarf2reader::ENDIANNESS_LITTLE : - dwarf2reader::ENDIANNESS_BIG); -#elif __LITTLE_ENDIAN__ - dwarf2reader::ByteReader byte_reader(swap ? - dwarf2reader::ENDIANNESS_BIG : - dwarf2reader::ENDIANNESS_LITTLE); -#endif - uint64_t dbgOffset = 0; - - dwarf2reader::SectionMap* oneArchitectureSectionMap = [self getSectionMapForArchitecture:architecture_]; - - while (dbgOffset < debugInfoSize) { - // Prepare necessary objects. - dwarf2reader::FunctionMap off_to_funcinfo; - dwarf2reader::FunctionMap address_to_funcinfo; - dwarf2reader::LineMap line_map; - vector<dwarf2reader::SourceFileInfo> files; - vector<string> dirs; - - dwarf2reader::CULineInfoHandler line_info_handler(&files, &dirs, - &line_map); - - dwarf2reader::CUFunctionInfoHandler function_info_handler(&files, &dirs, - &line_map, - &off_to_funcinfo, - &address_to_funcinfo, - &line_info_handler, - *oneArchitectureSectionMap, - &byte_reader); - - dwarf2reader::CompilationUnit compilation_unit(*oneArchitectureSectionMap, - dbgOffset, - &byte_reader, - &function_info_handler); - - dbgOffset += compilation_unit.Start(); - - // The next 3 functions take the info that the dwarf reader - // gives and massages them into the data structures that - // dump_syms uses - [self processDWARFSourceFileInfo:&files]; - [self processDWARFFunctionInfo:&address_to_funcinfo]; - [self processDWARFLineNumberInfo:&line_map]; - } - - return YES; -} - -- (void)processDWARFSourceFileInfo:(vector<dwarf2reader::SourceFileInfo>*) files { - if (!sources_) - sources_ = [[NSMutableDictionary alloc] init]; - // Save the source associated with an address - vector<dwarf2reader::SourceFileInfo>::const_iterator iter = files->begin(); - for (; iter != files->end(); iter++) { - NSString *sourceFile = [NSString stringWithUTF8String:(*iter).name.c_str()]; - if ((*iter).lowpc != ULLONG_MAX) { - NSNumber *address = [NSNumber numberWithUnsignedLongLong:(*iter).lowpc]; - if ([address unsignedLongLongValue] == 0) { - continue; - } - [sources_ setObject:sourceFile forKey:address]; - } - } -} - -- (void)processDWARFFunctionInfo:(dwarf2reader::FunctionMap*)address_to_funcinfo { - for (dwarf2reader::FunctionMap::const_iterator iter = address_to_funcinfo->begin(); - iter != address_to_funcinfo->end(); ++iter) { - if (iter->second->name.empty()) { - continue; - } - - if (!addresses_) - addresses_ = [[NSMutableDictionary alloc] init]; - - NSNumber *addressNum = [NSNumber numberWithUnsignedLongLong:(*iter).second->lowpc]; - - [functionAddresses_ addObject:addressNum]; - - NSMutableDictionary *dict = [addresses_ objectForKey:addressNum]; - - if (!dict) { - dict = [[NSMutableDictionary alloc] init]; - [addresses_ setObject:dict forKey:addressNum]; - [dict release]; - } - - // set name of function if it isn't already set - if (![dict objectForKey:kAddressSymbolKey]) { - NSString *symbolName = [NSString stringWithUTF8String:iter->second->name.c_str()]; - [dict setObject:symbolName forKey:kAddressSymbolKey]; - } - - // try demangling function name if we have a mangled name - if (![dict objectForKey:kAddressConvertedSymbolKey] && - !iter->second->mangled_name.empty()) { - NSString *mangled = [NSString stringWithUTF8String:iter->second->mangled_name.c_str()]; - NSString *demangled = [self convertCPlusPlusSymbol:mangled]; - if (demangled != nil) - [dict setObject:demangled forKey:kAddressConvertedSymbolKey]; - } - - // set line number for beginning of function - if (iter->second->line && ![dict objectForKey:kAddressSourceLineKey]) - [dict setObject:[NSNumber numberWithUnsignedInt:iter->second->line] - forKey:kAddressSourceLineKey]; - - // set function size by subtracting low PC from high PC - if (![dict objectForKey:kFunctionSizeKey]) { - [dict setObject:[NSNumber numberWithUnsignedLongLong:iter->second->highpc - iter->second->lowpc] - forKey:kFunctionSizeKey]; - } - - // Set the file that the function is in - if (![dict objectForKey:kFunctionFileKey]) { - [dict setObject:[NSString stringWithUTF8String:iter->second->file.c_str()] - forKey:kFunctionFileKey]; - } - } -} - -- (void)processDWARFLineNumberInfo:(dwarf2reader::LineMap*)line_map { - for (dwarf2reader::LineMap::const_iterator iter = line_map->begin(); - iter != line_map->end(); - ++iter) { - - NSNumber *addressNum = [NSNumber numberWithUnsignedLongLong:iter->first]; - NSMutableDictionary *dict = [addresses_ objectForKey:addressNum]; - - if (!dict) { - dict = [[NSMutableDictionary alloc] init]; - [addresses_ setObject:dict forKey:addressNum]; - [dict release]; - } - - if (iter->second.second && ![dict objectForKey:kAddressSourceLineKey]) { - [dict setObject:[NSNumber numberWithUnsignedInt:iter->second.second] - forKey:kAddressSourceLineKey]; - } - - // Set the file that the function's address is in - if (![dict objectForKey:kFunctionFileKey]) { - [dict setObject:[NSString stringWithUTF8String:iter->second.first.c_str()] - forKey:kFunctionFileKey]; - } - } -} - -//============================================================================= -- (BOOL)loadSTABSSymbolInfo:(void *)base offset:(uint32_t)offset { - struct mach_header *header = (struct mach_header *)((uint32_t)base + offset); - BOOL swap = (header->magic == MH_CIGAM); - uint32_t count = SwapLongIfNeeded(header->ncmds); - struct load_command *cmd = - (struct load_command *)((uint32_t)header + sizeof(struct mach_header)); - uint32_t symbolTableCommand = SwapLongIfNeeded(LC_SYMTAB); - BOOL result = NO; - - if (!addresses_) - addresses_ = [[NSMutableDictionary alloc] init]; - - for (uint32_t i = 0; cmd && (i < count); ++i) { - if (cmd->cmd == symbolTableCommand) { - struct symtab_command *symtab = (struct symtab_command *)cmd; - uint32_t ncmds = SwapLongIfNeeded(symtab->nsyms); - uint32_t symoff = SwapLongIfNeeded(symtab->symoff); - uint32_t stroff = SwapLongIfNeeded(symtab->stroff); - struct nlist *list = (struct nlist *)((uint32_t)base + symoff + offset); - char *strtab = ((char *)header + stroff); - - // Process each command, looking for debugging stuff - for (uint32_t j = 0; j < ncmds; ++j, ++list) { - // Fill in an nlist_64 structure and process with that - struct nlist_64 nlist64; - nlist64.n_un.n_strx = SwapLongIfNeeded(list->n_un.n_strx); - nlist64.n_type = list->n_type; - nlist64.n_sect = list->n_sect; - nlist64.n_desc = SwapShortIfNeeded(list->n_desc); - nlist64.n_value = (uint64_t)SwapLongIfNeeded(list->n_value); - - // TODO(nealsid): is this broken? we get NO if one symbol fails - // but then we lose that information if another suceeeds - if ([self processSymbolItem:&nlist64 stringTable:strtab]) - result = YES; + do { + NSString *new_base_name = [base_name stringByDeletingPathExtension]; + + // If stringByDeletingPathExtension returned the name unchanged, then + // there's nothing more for us to strip off --- lose. + if ([new_base_name isEqualToString:base_name]) { + fprintf(stderr, "Unable to find DWARF-bearing file in bundle: %s\n", + [input_pathname_ fileSystemRepresentation]); + return false; } - } - - uint32_t cmdSize = SwapLongIfNeeded(cmd->cmdsize); - cmd = (struct load_command *)((uint32_t)cmd + cmdSize); - } - - return result; -} - -//============================================================================= -- (BOOL)loadSymbolInfo64:(void *)base offset:(uint32_t)offset { - struct mach_header_64 *header = (struct mach_header_64 *) - ((uint32_t)base + offset); - BOOL swap = (header->magic == MH_CIGAM_64); - uint32_t count = SwapLongIfNeeded(header->ncmds); - struct load_command *cmd = - (struct load_command *)((uint32_t)header + sizeof(struct mach_header)); - uint32_t symbolTableCommand = SwapLongIfNeeded(LC_SYMTAB); - BOOL result = NO; - for (uint32_t i = 0; cmd && (i < count); i++) { - if (cmd->cmd == symbolTableCommand) { - struct symtab_command *symtab = (struct symtab_command *)cmd; - uint32_t ncmds = SwapLongIfNeeded(symtab->nsyms); - uint32_t symoff = SwapLongIfNeeded(symtab->symoff); - uint32_t stroff = SwapLongIfNeeded(symtab->stroff); - struct nlist_64 *list = (struct nlist_64 *)((uint32_t)base + symoff); - char *strtab = ((char *)header + stroff); + // Take the shortened result as our new base_name. + base_name = new_base_name; - // Process each command, looking for debugging stuff - for (uint32_t j = 0; j < ncmds; ++j, ++list) { - if (!(list->n_type & (N_STAB | N_TYPE))) - continue; + // Try to find a DWARF resource in the bundle under the new base_name. + dwarf_resource = [bundle pathForResource:base_name + ofType:nil inDirectory:@"DWARF"]; + } while (!dwarf_resource); - // Fill in an nlist_64 structure and process with that - struct nlist_64 nlist64; - nlist64.n_un.n_strx = SwapLongIfNeeded(list->n_un.n_strx); - nlist64.n_type = list->n_type; - nlist64.n_sect = list->n_sect; - nlist64.n_desc = SwapShortIfNeeded(list->n_desc); - nlist64.n_value = SwapLongLongIfNeeded(list->n_value); - - if ([self processSymbolItem:&nlist64 stringTable:strtab]) - result = YES; - } - } - - uint32_t cmdSize = SwapLongIfNeeded(cmd->cmdsize); - cmd = (struct load_command *)((uint32_t)cmd + cmdSize); - } - - return result; -} - -//============================================================================= -- (BOOL)loadSymbolInfoForArchitecture { - NSMutableData *data = [[NSMutableData alloc] - initWithContentsOfMappedFile:sourcePath_]; - - NSDictionary *headerInfo = [headers_ objectForKey:architecture_]; - void *base = [data mutableBytes]; - uint32_t offset = - [[headerInfo objectForKey:kHeaderOffsetKey] unsignedLongValue]; - BOOL is64 = [[headerInfo objectForKey:kHeaderIs64BitKey] boolValue]; - BOOL result = is64 ? [self loadSymbolInfo64:base offset:offset] : - [self loadSymbolInfo:base offset:offset]; - - [data release]; - return result; -} - -- (dwarf2reader::SectionMap*)getSectionMapForArchitecture:(NSString*)architecture { - - string currentArch([architecture UTF8String]); - dwarf2reader::SectionMap *oneArchitectureSectionMap; - - ArchSectionMap::const_iterator iter = sectionsForArch_->find(currentArch); - - if (iter == sectionsForArch_->end()) { - oneArchitectureSectionMap = new dwarf2reader::SectionMap(); - sectionsForArch_->insert(make_pair(currentArch, oneArchitectureSectionMap)); + object_filename_ = [dwarf_resource retain]; } else { - oneArchitectureSectionMap = iter->second; - } - - return oneArchitectureSectionMap; + object_filename_ = [input_pathname_ retain]; + } + + // Read the file's contents into memory. + // + // The documentation for dataWithContentsOfMappedFile says: + // + // Because of file mapping restrictions, this method should only be + // used if the file is guaranteed to exist for the duration of the + // data object’s existence. It is generally safer to use the + // dataWithContentsOfFile: method. + // + // I gather this means that OS X doesn't have (or at least, that method + // doesn't use) a form of mapping like Linux's MAP_PRIVATE, where the + // process appears to get its own copy of the data, and changes to the + // file don't affect memory and vice versa). + NSError *error; + contents_ = [NSData dataWithContentsOfFile:object_filename_ + options:0 + error:&error]; + if (!contents_) { + fprintf(stderr, "Error reading object file: %s: %s\n", + [object_filename_ fileSystemRepresentation], + [[error localizedDescription] UTF8String]); + return false; + } + [contents_ retain]; + + // Get the list of object files present in the file. + FatReader::Reporter fat_reporter([object_filename_ + fileSystemRepresentation]); + FatReader fat_reader(&fat_reporter); + if (!fat_reader.Read(reinterpret_cast<const uint8_t *>([contents_ bytes]), + [contents_ length])) { + return false; + } + + // Get our own copy of fat_reader's object file list. + size_t object_files_count; + const struct fat_arch *object_files = + fat_reader.object_files(&object_files_count); + if (object_files_count == 0) { + fprintf(stderr, "Fat binary file contains *no* architectures: %s\n", + [object_filename_ fileSystemRepresentation]); + return false; + } + object_files_.resize(object_files_count); + memcpy(&object_files_[0], object_files, + sizeof(struct fat_arch) * object_files_count); + + return true; } -//============================================================================= -// build a dictionary of section numbers keyed off a string -// which is the concatenation of the segment name and the section name -- (void)generateSectionDictionary:(struct mach_header*)header { - - BOOL swap = (header->magic == MH_CIGAM); - uint32_t count = SwapLongIfNeeded(header->ncmds); - struct load_command *cmd = - (struct load_command *)((uint32_t)header + sizeof(struct mach_header)); - uint32_t segmentCommand = SwapLongIfNeeded(LC_SEGMENT); - uint32_t sectionNumber = 1; // section numbers are counted from 1 - - cpu_type_t cpu = SwapIntIfNeeded(header->cputype); - - NSString *arch; - - if (cpu & CPU_ARCH_ABI64) - arch = ((cpu & ~CPU_ARCH_ABI64) == CPU_TYPE_X86) ? - @"x86_64" : @"ppc64"; - else - arch = (cpu == CPU_TYPE_X86) ? @"x86" : @"ppc"; - - NSMutableDictionary *archSections; - - if (!sectionData_) { - sectionData_ = [[NSMutableDictionary alloc] init]; - } - - if (![sectionData_ objectForKey:architecture_]) { - [sectionData_ setObject:[[NSMutableDictionary alloc] init] forKey:arch]; - } - - archSections = [sectionData_ objectForKey:arch]; - - dwarf2reader::SectionMap* oneArchitectureSectionMap = [self getSectionMapForArchitecture:arch]; - - // loop through every segment command, then through every section - // contained inside each of them - for (uint32_t i = 0; cmd && (i < count); ++i) { - if (cmd->cmd == segmentCommand) { - struct segment_command *seg = (struct segment_command *)cmd; - section *sect = (section *)((uint32_t)cmd + sizeof(segment_command)); - uint32_t nsects = SwapLongIfNeeded(seg->nsects); - - for (uint32_t j = 0; j < nsects; ++j) { - NSString *segSectName = [NSString stringWithFormat:@"%s%s", - seg->segname, sect->sectname]; - - [archSections setObject:[[MachSection alloc] initWithMachSection:sect andNumber:sectionNumber] - forKey:segSectName]; - - // filter out sections with size 0, offset 0 - if (sect->offset != 0 && sect->size != 0) { - // fill sectionmap for dwarf reader - oneArchitectureSectionMap->insert(make_pair(sect->sectname,make_pair(((const char*)header) + SwapLongIfNeeded(sect->offset), (size_t)SwapLongIfNeeded(sect->size)))); - } - - ++sect; - ++sectionNumber; - } - } - - uint32_t cmdSize = SwapLongIfNeeded(cmd->cmdsize); - cmd = (struct load_command *)((uint32_t)cmd + cmdSize); - } +bool DumpSymbols::SetArchitecture(cpu_type_t cpu_type, + cpu_subtype_t cpu_subtype) { + // Find the best match for the architecture the user requested. + const struct fat_arch *best_match + = NXFindBestFatArch(cpu_type, cpu_subtype, &object_files_[0], + object_files_.size()); + if (!best_match) return false; + + // Record the selected object file. + selected_object_file_ = best_match; + return true; } -//============================================================================= -- (BOOL)loadHeader:(void *)base offset:(uint32_t)offset { - struct mach_header *header = (struct mach_header *)((uint32_t)base + offset); - BOOL swap = (header->magic == MH_CIGAM); - uint32_t count = SwapLongIfNeeded(header->ncmds); - struct load_command *cmd = - (struct load_command *)((uint32_t)header + sizeof(struct mach_header)); - uint32_t segmentCommand = SwapLongIfNeeded(LC_SEGMENT); - - [self generateSectionDictionary:header]; - - for (uint32_t i = 0; cmd && (i < count); ++i) { - if (cmd->cmd == segmentCommand) { - struct segment_command *seg = (struct segment_command *)cmd; - - if (!strcmp(seg->segname, "__TEXT")) { - uint32_t addr = SwapLongIfNeeded(seg->vmaddr); - uint32_t size = SwapLongIfNeeded(seg->vmsize); - cpu_type_t cpu = SwapIntIfNeeded(header->cputype); - NSString *cpuStr = (cpu == CPU_TYPE_I386) ? @"x86" : @"ppc"; - - [headers_ setObject:[NSDictionary dictionaryWithObjectsAndKeys: - [NSNumber numberWithUnsignedLongLong:(uint64_t)addr], - kHeaderBaseAddressKey, - [NSNumber numberWithUnsignedLongLong:(uint64_t)size], kHeaderSizeKey, - [NSNumber numberWithUnsignedLong:offset], kHeaderOffsetKey, - [NSNumber numberWithBool:NO], kHeaderIs64BitKey, - [NSNumber numberWithUnsignedLong:cpu], kHeaderCPUTypeKey, - nil] forKey:cpuStr]; - - return YES; - } - } - - uint32_t cmdSize = SwapLongIfNeeded(cmd->cmdsize); - cmd = (struct load_command *)((uint32_t)cmd + cmdSize); +string DumpSymbols::Identifier() { + FileID file_id([object_filename_ fileSystemRepresentation]); + unsigned char identifier_bytes[16]; + cpu_type_t cpu_type = selected_object_file_->cputype; + if (!file_id.MachoIdentifier(cpu_type, identifier_bytes)) { + fprintf(stderr, "Unable to calculate UUID of mach-o binary %s!\n", + [object_filename_ fileSystemRepresentation]); + return ""; } - return NO; -} - -//============================================================================= -- (BOOL)loadHeader64:(void *)base offset:(uint32_t)offset { - struct mach_header_64 *header = - (struct mach_header_64 *)((uint32_t)base + offset); - BOOL swap = (header->magic == MH_CIGAM_64); - uint32_t count = SwapLongIfNeeded(header->ncmds); - struct load_command *cmd = - (struct load_command *)((uint32_t)header + sizeof(struct mach_header_64)); - - for (uint32_t i = 0; cmd && (i < count); ++i) { - uint32_t segmentCommand = SwapLongIfNeeded(LC_SEGMENT_64); - if (cmd->cmd == segmentCommand) { - struct segment_command_64 *seg = (struct segment_command_64 *)cmd; - if (!strcmp(seg->segname, "__TEXT")) { - uint64_t addr = SwapLongLongIfNeeded(seg->vmaddr); - uint64_t size = SwapLongLongIfNeeded(seg->vmsize); - cpu_type_t cpu = SwapIntIfNeeded(header->cputype); - cpu &= (~CPU_ARCH_ABI64); - NSString *cpuStr = (cpu == CPU_TYPE_I386) ? @"x86_64" : @"ppc64"; - - [headers_ setObject:[NSDictionary dictionaryWithObjectsAndKeys: - [NSNumber numberWithUnsignedLongLong:addr], kHeaderBaseAddressKey, - [NSNumber numberWithUnsignedLongLong:size], kHeaderSizeKey, - [NSNumber numberWithUnsignedLong:offset], kHeaderOffsetKey, - [NSNumber numberWithBool:YES], kHeaderIs64BitKey, - [NSNumber numberWithUnsignedLong:cpu], kHeaderCPUTypeKey, - nil] forKey:cpuStr]; - return YES; - } - } + char identifier_string[40]; + FileID::ConvertIdentifierToString(identifier_bytes, identifier_string, + sizeof(identifier_string)); - uint32_t cmdSize = SwapLongIfNeeded(cmd->cmdsize); - cmd = (struct load_command *)((uint32_t)cmd + cmdSize); - } + string compacted(identifier_string); + for(size_t i = compacted.find('-'); i != string::npos; + i = compacted.find('-', i)) + compacted.erase(i, 1); - return NO; + return compacted; } -//============================================================================= -- (BOOL)loadModuleInfo { - uint64_t result = 0; - NSMutableData *data = [[NSMutableData alloc] - initWithContentsOfMappedFile:sourcePath_]; - void *bytes = [data mutableBytes]; - struct fat_header *fat = (struct fat_header *)bytes; - - if (!fat) { - [data release]; - return 0; - } - - // Gather some information based on the header - BOOL isFat = fat->magic == FAT_MAGIC || fat->magic == FAT_CIGAM; - BOOL is64 = fat->magic == MH_MAGIC_64 || fat->magic == MH_CIGAM_64; - BOOL is32 = fat->magic == MH_MAGIC || fat->magic == MH_CIGAM; - BOOL swap = fat->magic == FAT_CIGAM || fat->magic == MH_CIGAM_64 || - fat->magic == MH_CIGAM; - - if (!is64 && !is32 && !isFat) { - [data release]; - return 0; - } - - // Load any available architectures and save the information - headers_ = [[NSMutableDictionary alloc] init]; - - if (isFat) { - struct fat_arch *archs = - (struct fat_arch *)((uint32_t)fat + sizeof(struct fat_header)); - uint32_t count = SwapLongIfNeeded(fat->nfat_arch); - - for (uint32_t i = 0; i < count; ++i) { - archs[i].cputype = SwapIntIfNeeded(archs[i].cputype); - archs[i].cpusubtype = SwapIntIfNeeded(archs[i].cpusubtype); - archs[i].offset = SwapLongIfNeeded(archs[i].offset); - archs[i].size = SwapLongIfNeeded(archs[i].size); - archs[i].align = SwapLongIfNeeded(archs[i].align); +// A line-to-module loader that accepts line number info parsed by +// dwarf2reader::LineInfo and populates a Module and a line vector +// with the results. +class DumpSymbols::DumperLineToModule: + public DwarfCUToModule::LineToModuleFunctor { + public: + // Create a line-to-module converter using BYTE_READER. + DumperLineToModule(dwarf2reader::ByteReader *byte_reader) + : byte_reader_(byte_reader) { } + void operator()(const char *program, uint64 length, + Module *module, vector<Module::Line> *lines) { + DwarfLineToModule handler(module, lines); + dwarf2reader::LineInfo parser(program, length, byte_reader_, &handler); + parser.Start(); + } + private: + dwarf2reader::ByteReader *byte_reader_; // WEAK +}; + +bool DumpSymbols::ReadDwarf(google_breakpad::Module *module, + const mach_o::Reader &macho_reader, + const mach_o::SectionMap &dwarf_sections) const { + // Build a byte reader of the appropriate endianness. + ByteReader byte_reader(macho_reader.big_endian() + ? dwarf2reader::ENDIANNESS_BIG + : dwarf2reader::ENDIANNESS_LITTLE); + + // Construct a context for this file. + DwarfCUToModule::FileContext file_context(selected_object_name_, + module); + + // Build a dwarf2reader::SectionMap from our mach_o::SectionMap. + for (mach_o::SectionMap::const_iterator it = dwarf_sections.begin(); + it != dwarf_sections.end(); it++) { + file_context.section_map[it->first] = + make_pair(reinterpret_cast<const char *>(it->second.contents.start), + it->second.contents.Size()); + } + + // Find the __debug_info section. + std::pair<const char *, uint64> debug_info_section + = file_context.section_map["__debug_info"]; + // There had better be a __debug_info section! + if (!debug_info_section.first) { + fprintf(stderr, "%s: __DWARF segment of file has no __debug_info section\n", + selected_object_name_.c_str()); + return false; + } + + // Build a line-to-module loader for the root handler to use. + DumperLineToModule line_to_module(&byte_reader); + + // Walk the __debug_info section, one compilation unit at a time. + uint64 debug_info_length = debug_info_section.second; + for (uint64 offset = 0; offset < debug_info_length;) { + // Make a handler for the root DIE that populates MODULE with the + // debug info. + DwarfCUToModule::WarningReporter reporter(selected_object_name_, + offset); + DwarfCUToModule root_handler(&file_context, &line_to_module, &reporter); + // Make a Dwarf2Handler that drives our DIEHandler. + dwarf2reader::DIEDispatcher die_dispatcher(&root_handler); + // Make a DWARF parser for the compilation unit at OFFSET. + dwarf2reader::CompilationUnit dwarf_reader(file_context.section_map, + offset, + &byte_reader, + &die_dispatcher); + // Process the entire compilation unit; get the offset of the next. + offset += dwarf_reader.Start(); + } + + return true; +} - if (archs[i].cputype & CPU_ARCH_ABI64) - result = [self loadHeader64:bytes offset:archs[i].offset]; +bool DumpSymbols::ReadCFI(google_breakpad::Module *module, + const mach_o::Reader &macho_reader, + const mach_o::Section §ion, + bool eh_frame) const { + // Find the appropriate set of register names for this file's + // architecture. + vector<string> register_names; + switch (macho_reader.cpu_type()) { + case CPU_TYPE_X86: + register_names = DwarfCFIToModule::RegisterNames::I386(); + break; + case CPU_TYPE_X86_64: + register_names = DwarfCFIToModule::RegisterNames::X86_64(); + break; + case CPU_TYPE_ARM: + register_names = DwarfCFIToModule::RegisterNames::ARM(); + break; + default: { + const NXArchInfo *arch = + NXGetArchInfoFromCpuType(macho_reader.cpu_type(), + macho_reader.cpu_subtype()); + fprintf(stderr, "%s: cannot convert DWARF call frame information for ", + selected_object_name_.c_str()); + if (arch) + fprintf(stderr, "architecture '%s'", arch->name); else - result = [self loadHeader:bytes offset:archs[i].offset]; + fprintf(stderr, "architecture %d,%d", + macho_reader.cpu_type(), macho_reader.cpu_subtype()); + fprintf(stderr, " to Breakpad symbol file: no register name table\n"); + return false; } - } else if (is32) { - result = [self loadHeader:bytes offset:0]; - } else { - result = [self loadHeader64:bytes offset:0]; - } - - [data release]; - return result; -} - -//============================================================================= -static BOOL WriteFormat(int fd, const char *fmt, ...) { - va_list list; - char buffer[4096]; - ssize_t expected, written; - - va_start(list, fmt); - vsnprintf(buffer, sizeof(buffer), fmt, list); - expected = strlen(buffer); - written = write(fd, buffer, strlen(buffer)); - va_end(list); - - return expected == written; -} - -//============================================================================= -- (BOOL)outputSymbolFile:(int)fd { - // Get the baseAddress for this architecture - NSDictionary *archDict = [headers_ objectForKey:architecture_]; - NSNumber *baseAddressNum = [archDict objectForKey:kHeaderBaseAddressKey]; - uint64_t baseAddress = - baseAddressNum ? [baseAddressNum unsignedLongLongValue] : 0; - NSNumber *moduleSizeNum = [archDict objectForKey:kHeaderSizeKey]; - uint64_t moduleSize = - moduleSizeNum ? [moduleSizeNum unsignedLongLongValue] : 0; - - // UUID - FileID file_id([sourcePath_ fileSystemRepresentation]); - unsigned char identifier[16]; - char identifierStr[40]; - const char *moduleName = [[sourcePath_ lastPathComponent] UTF8String]; - int cpu_type = [[archDict objectForKey:kHeaderCPUTypeKey] unsignedLongValue]; - if (file_id.MachoIdentifier(cpu_type, identifier)) { - FileID::ConvertIdentifierToString(identifier, identifierStr, - sizeof(identifierStr)); - } - else { - fprintf(stderr, "Unable to calculate UUID of mach-o binary!\n"); - return NO; } - // keep track exclusively of function addresses - // for sanity checking function lengths - functionAddresses_ = [[NSMutableSet alloc] init]; - - // Gather the information - [self loadSymbolInfoForArchitecture]; - - NSArray *sortedAddresses = [[addresses_ allKeys] - sortedArrayUsingSelector:@selector(compare:)]; + // Find the call frame information and its size. + const char *cfi = reinterpret_cast<const char *>(section.contents.start); + size_t cfi_size = section.contents.Size(); - NSArray *sortedFunctionAddresses = [[functionAddresses_ allObjects] - sortedArrayUsingSelector:@selector(compare:)]; - - // position ourselves at the 2nd function - unsigned int funcIndex = 1; - - // Remove the dashes from the string - NSMutableString *compactedStr = - [NSMutableString stringWithCString:identifierStr encoding:NSASCIIStringEncoding]; - [compactedStr replaceOccurrencesOfString:@"-" withString:@"" options:0 - range:NSMakeRange(0, [compactedStr length])]; - - if (!WriteFormat(fd, "MODULE mac %s %s0 %s\n", [architecture_ UTF8String], - [compactedStr UTF8String], moduleName)) { - return NO; - } - - // Sources ordered by address - NSArray *sources = [[sources_ allKeys] - sortedArrayUsingSelector:@selector(compare:)]; - NSMutableDictionary *fileNameToFileIndex = [[NSMutableDictionary alloc] init]; - unsigned int sourceCount = [sources count]; - for (unsigned int i = 0; i < sourceCount; ++i) { - NSString *file = [sources_ objectForKey:[sources objectAtIndex:i]]; - if (!WriteFormat(fd, "FILE %d %s\n", i + 1, [file UTF8String])) - return NO; - - [fileNameToFileIndex setObject:[NSNumber numberWithUnsignedInt:i+1] - forKey:file]; - } - - // Symbols - char terminatingChar = '\n'; - uint32_t fileIdx = 0, nextFileIdx = 0; - uint64_t nextSourceFileAddress = 0; - NSNumber *nextAddress; - uint64_t nextAddressVal; - unsigned int addressCount = [sortedAddresses count]; - - bool insideFunction = false; - - for (unsigned int i = 0; i < addressCount; ++i) { - NSNumber *address = [sortedAddresses objectAtIndex:i]; - // skip sources that have a starting address of 0 - if ([address unsignedLongValue] == 0) { - continue; - } - - uint64_t addressVal = [address unsignedLongLongValue] - baseAddress; - - // Get the next address to calculate the length - if (i + 1 < addressCount) { - nextAddress = [sortedAddresses objectAtIndex:i + 1]; - nextAddressVal = [nextAddress unsignedLongLongValue] - baseAddress; - } else { - nextAddressVal = baseAddress + moduleSize; - // The symbol reader doesn't want a trailing newline - terminatingChar = '\0'; - } + // Plug together the parser, handler, and their entourages. + DwarfCFIToModule::Reporter module_reporter(selected_object_name_, + section.section_name); + DwarfCFIToModule handler(module, register_names, &module_reporter); + dwarf2reader::ByteReader byte_reader(macho_reader.big_endian() ? + dwarf2reader::ENDIANNESS_BIG : + dwarf2reader::ENDIANNESS_LITTLE); + byte_reader.SetAddressSize(macho_reader.bits_64() ? 8 : 4); + // At the moment, according to folks at Apple and some cursory + // investigation, Mac OS X only uses DW_EH_PE_pcrel-based pointers, so + // this is the only base address the CFI parser will need. + byte_reader.SetCFIDataBase(section.address, cfi); - NSDictionary *dict = [addresses_ objectForKey:address]; - NSNumber *line = [dict objectForKey:kAddressSourceLineKey]; - NSString *symbol = [dict objectForKey:kAddressConvertedSymbolKey]; - - if (!symbol) - symbol = [dict objectForKey:kAddressSymbolKey]; - - // sanity check the function length by making sure it doesn't - // run beyond the next function entry - uint64_t nextFunctionAddress = 0; - if (symbol && funcIndex < [sortedFunctionAddresses count]) { - nextFunctionAddress = [[sortedFunctionAddresses objectAtIndex:funcIndex] - unsignedLongLongValue] - baseAddress; - ++funcIndex; - } - - // Skip some symbols - if ([symbol hasPrefix:@"vtable for"]) - continue; - - if ([symbol hasPrefix:@"__static_initialization_and_destruction_0"]) - continue; - - if ([symbol hasPrefix:@"_GLOBAL__I_"]) - continue; - - if ([symbol hasPrefix:@"__func__."]) - continue; - - if ([symbol hasPrefix:@"__gnu"]) - continue; - - if ([symbol hasPrefix:@"typeinfo "]) - continue; - - if ([symbol hasPrefix:@"EH_frame"]) - continue; - - if ([symbol hasPrefix:@"GCC_except_table"]) - continue; - - if ([symbol hasPrefix:@"__tcf"]) - continue; - - if ([symbol hasPrefix:@"non-virtual thunk"]) - continue; - - // Find the source file (if any) that contains this address - while (sourceCount && (addressVal >= nextSourceFileAddress)) { - fileIdx = nextFileIdx; - - if (nextFileIdx < sourceCount) { - NSNumber *addr = [sources objectAtIndex:nextFileIdx]; - ++nextFileIdx; - nextSourceFileAddress = [addr unsignedLongLongValue] - baseAddress; - } else { - nextSourceFileAddress = baseAddress + moduleSize; - break; - } - } - - NSNumber *functionLength = [dict objectForKey:kFunctionSizeKey]; - - if (line) { - if (symbol && functionLength) { - - uint64_t functionLengthVal = [functionLength unsignedLongLongValue]; - - insideFunction = true; - // sanity check to make sure the length we were told does not exceed - // the space between this function and the next - if (nextFunctionAddress != 0) { - uint64_t functionLengthVal2 = nextFunctionAddress - addressVal; - - if(functionLengthVal > functionLengthVal2 ) { - functionLengthVal = functionLengthVal2; - } - } - - // Function - if (!WriteFormat(fd, "FUNC %llx %llx 0 %s\n", addressVal, - functionLengthVal, [symbol UTF8String])) - return NO; - } - - // Throw out line number information that doesn't correspond to - // any function - if (insideFunction) { - // Source line - uint64_t length = nextAddressVal - addressVal; - - // if fileNameToFileIndex/dict has an entry for the - // file/kFunctionFileKey, we're processing DWARF and have stored - // files for each program counter. If there is no entry, we're - // processing STABS and can use the old method of mapping - // addresses to files(which was basically iterating over a set - // of addresses until we reached one that was greater than the - // high PC of the current file, then moving on to the next file) - NSNumber *fileIndex = [fileNameToFileIndex objectForKey:[dict objectForKey:kFunctionFileKey]]; - if (!WriteFormat(fd, "%llx %llx %d %d\n", addressVal, length, - [line unsignedIntValue], fileIndex ? [fileIndex unsignedIntValue] : fileIdx)) - return NO; - } - } else { - // PUBLIC <address> <stack-size> <name> - if (!WriteFormat(fd, "PUBLIC %llx 0 %s\n", addressVal, - [symbol UTF8String])) - return NO; - insideFunction = false; - } - } - - return YES; + dwarf2reader::CallFrameInfo::Reporter dwarf_reporter(selected_object_name_, + section.section_name); + dwarf2reader::CallFrameInfo parser(cfi, cfi_size, + &byte_reader, &handler, &dwarf_reporter, + eh_frame); + parser.Start(); + return true; } -//============================================================================= -- (id)initWithContentsOfFile:(NSString *)path { - if ((self = [super init])) { - - if (![[NSFileManager defaultManager] fileExistsAtPath:path]) { - [self autorelease]; - return nil; - } - - sourcePath_ = [path copy]; - - // Test for .DSYM bundle - NSBundle *dsymBundle = [NSBundle bundleWithPath:sourcePath_]; - - if (dsymBundle) { - - // we need to take the DSYM bundle path and remove it's - // extension to get the name of the file inside the resources - // directory of the bundle that actually has the DWARF - // information - // But, Xcode supports something called "Wrapper extension"(see - // build settings), which would make the bundle name - // /tmp/foo/test.kext.dSYM, but the dwarf binary name would - // still be "test". so, now we loop through until deleting the - // extension doesn't change the string - - // e.g. suppose sourcepath_ is /tmp/foo/test.dSYM - - NSString *dwarfBinName = [sourcePath_ lastPathComponent]; - NSString *dwarfBinPath; - - // We use a do/while loop so we can handle files without an extension - do { - dwarfBinName = [dwarfBinName stringByDeletingPathExtension]; - // now, dwarfBinName is "test" - dwarfBinPath = [dsymBundle pathForResource:dwarfBinName ofType:nil inDirectory:@"DWARF"]; - if (dwarfBinPath != nil) - break; - } while (![[dwarfBinName stringByDeletingPathExtension] isEqualToString:dwarfBinName]); - - if (dwarfBinPath == nil) { - NSLog(@"The bundle passed on the command line does not appear to be a DWARF dSYM bundle"); - [self autorelease]; - return nil; - } - - // otherwise we're good to go - [sourcePath_ release]; - - sourcePath_ = [dwarfBinPath copy]; - NSLog(@"Loading DWARF dSYM file from %@", sourcePath_); - } - - sectionsForArch_ = new ArchSectionMap(); - - if (![self loadModuleInfo]) { - [self autorelease]; - return nil; - } - - // If there's more than one, use the native one - if ([headers_ count] > 1) { - const NXArchInfo *localArchInfo = NXGetLocalArchInfo(); - - if (localArchInfo) { - cpu_type_t cpu = localArchInfo->cputype; - NSString *arch; +// A LoadCommandHandler that loads whatever debugging data it finds into a +// Module. +class DumpSymbols::LoadCommandDumper: + public mach_o::Reader::LoadCommandHandler { + public: + // Create a load command dumper handling load commands from READER's + // file, and adding data to MODULE. + LoadCommandDumper(const DumpSymbols &dumper, + google_breakpad::Module *module, + const mach_o::Reader &reader) + : dumper_(dumper), module_(module), reader_(reader) { } + + bool SegmentCommand(const mach_o::Segment &segment); + bool SymtabCommand(const ByteBuffer &entries, const ByteBuffer &strings); + + private: + const DumpSymbols &dumper_; + google_breakpad::Module *module_; // WEAK + const mach_o::Reader &reader_; +}; + +bool DumpSymbols::LoadCommandDumper::SegmentCommand(const Segment &segment) { + mach_o::SectionMap section_map; + if (!reader_.MapSegmentSections(segment, §ion_map)) + return false; + + if (segment.name == "__TEXT") { + module_->SetLoadAddress(segment.vmaddr); + mach_o::SectionMap::const_iterator eh_frame = + section_map.find("__eh_frame"); + if (eh_frame != section_map.end()) { + // If there is a problem reading this, don't treat it as a fatal error. + dumper_.ReadCFI(module_, reader_, eh_frame->second, true); + } + return true; + } + + if (segment.name == "__DWARF") { + if (!dumper_.ReadDwarf(module_, reader_, section_map)) + return false; + mach_o::SectionMap::const_iterator debug_frame + = section_map.find("__debug_frame"); + if (debug_frame != section_map.end()) { + // If there is a problem reading this, don't treat it as a fatal error. + dumper_.ReadCFI(module_, reader_, debug_frame->second, false); + } + } + + return true; +} - if (cpu & CPU_ARCH_ABI64) - arch = ((cpu & ~CPU_ARCH_ABI64) == CPU_TYPE_X86) ? - @"x86_64" : @"ppc64"; - else - arch = (cpu == CPU_TYPE_X86) ? @"x86" : @"ppc"; +bool DumpSymbols::LoadCommandDumper::SymtabCommand(const ByteBuffer &entries, + const ByteBuffer &strings) { + StabsToModule stabs_to_module(module_); + // Mac OS X STABS are never "unitized", and the size of the 'value' field + // matches the address size of the executable. + StabsReader stabs_reader(entries.start, entries.Size(), + strings.start, strings.Size(), + reader_.big_endian(), + reader_.bits_64() ? 8 : 4, + true, + &stabs_to_module); + if (!stabs_reader.Process()) + return false; + stabs_to_module.Finalize(); + return true; +} - [self setArchitecture:arch]; +bool DumpSymbols::WriteSymbolFile(FILE *stream) { + // Select an object file, if SetArchitecture hasn't been called to set one + // explicitly. + if (!selected_object_file_) { + // If there's only one architecture, that's the one. + if (object_files_.size() == 1) + selected_object_file_ = &object_files_[0]; + else { + // Look for an object file whose architecture matches our own. + const NXArchInfo *local_arch = NXGetLocalArchInfo(); + if (!SetArchitecture(local_arch->cputype, local_arch->cpusubtype)) { + fprintf(stderr, "%s: object file contains more than one" + " architecture, none of which match the current" + " architecture; specify an architecture explicitly" + " with '-a ARCH' to resolve the ambiguity\n", + [object_filename_ fileSystemRepresentation]); + return false; } - } else { - // Specify the default architecture - [self setArchitecture:[[headers_ allKeys] objectAtIndex:0]]; } } - return self; -} - -//============================================================================= -- (NSArray *)availableArchitectures { - return [headers_ allKeys]; -} + assert(selected_object_file_); -//============================================================================= -- (void)dealloc { - [sourcePath_ release]; - [architecture_ release]; - [addresses_ release]; - [functionAddresses_ release]; - [sources_ release]; - [headers_ release]; - delete sectionsForArch_; - - [super dealloc]; -} + // Find the name of the selected file's architecture, to appear in + // the MODULE record and in error messages. + const NXArchInfo *selected_arch_info + = NXGetArchInfoFromCpuType(selected_object_file_->cputype, + selected_object_file_->cpusubtype); -//============================================================================= -- (BOOL)setArchitecture:(NSString *)architecture { - NSString *normalized = [architecture lowercaseString]; - BOOL isValid = NO; - - if ([normalized isEqualToString:@"ppc"]) { - isValid = YES; - } - else if ([normalized isEqualToString:@"i386"]) { - normalized = @"x86"; - isValid = YES; - } - else if ([normalized isEqualToString:@"x86"]) { - isValid = YES; - } - else if ([normalized isEqualToString:@"ppc64"]) { - isValid = YES; - } - else if ([normalized isEqualToString:@"x86_64"]) { - isValid = YES; + // Produce a name to use in error messages that includes the + // filename, and the architecture, if there is more than one. + selected_object_name_ = [object_filename_ UTF8String]; + if (object_files_.size() > 1) { + selected_object_name_ += ", architecture "; + selected_object_name_ + selected_arch_info->name; } - if (isValid) { - if (![headers_ objectForKey:normalized]) - return NO; + // Compute a module name, to appear in the MODULE record. + NSString *module_name = [object_filename_ lastPathComponent]; - [architecture_ autorelease]; - architecture_ = [normalized copy]; - } + // Choose an identifier string, to appear in the MODULE record. + string identifier = Identifier(); + if (identifier.empty()) + return false; + identifier += "0"; - return isValid; -} - -//============================================================================= -- (NSString *)architecture { - return architecture_; -} + // Create a module to hold the debugging information. + Module module([module_name UTF8String], "mac", selected_arch_info->name, + identifier); -//============================================================================= -- (BOOL)writeSymbolFile:(NSString *)destinationPath { - const char *dest = [destinationPath fileSystemRepresentation]; - int fd; + // Parse the selected object file. + mach_o::Reader::Reporter reporter(selected_object_name_); + mach_o::Reader reader(&reporter); + if (!reader.Read(reinterpret_cast<const uint8_t *>([contents_ bytes]) + + selected_object_file_->offset, + selected_object_file_->size, + selected_object_file_->cputype, + selected_object_file_->cpusubtype)) + return false; - if ([[destinationPath substringToIndex:1] isEqualToString:@"-"]) - fd = STDOUT_FILENO; - else - fd = open(dest, O_WRONLY | O_CREAT | O_TRUNC, 0666); + // Walk its load commands, and deal with whatever is there. + LoadCommandDumper load_command_dumper(*this, &module, reader); + if (!reader.WalkLoadCommands(&load_command_dumper)) + return false; - if (fd == -1) - return NO; - - BOOL result = [self outputSymbolFile:fd]; - - close(fd); - - return result; + return module.Write(stream); } -@end - -@implementation MachSection - -- (id)initWithMachSection:(section *)sect andNumber:(uint32_t)sectionNumber { - if ((self = [super init])) { - sect_ = sect; - sectionNumber_ = sectionNumber; - } - - return self; -} - -- (section*)sectionPointer { - return sect_; -} - -- (uint32_t)sectionNumber { - return sectionNumber_; -} -@end +} // namespace google_breakpad diff --git a/src/common/mac/macho_walker.cc b/src/common/mac/macho_walker.cc index 4e1d9f16..ecea8997 100644 --- a/src/common/mac/macho_walker.cc +++ b/src/common/mac/macho_walker.cc @@ -62,22 +62,11 @@ MachoWalker::~MachoWalker() { } int MachoWalker::ValidateCPUType(int cpu_type) { - // If the user didn't specify, try to use the local architecture. If that - // fails, use the base type for the executable. + // If the user didn't specify, use the local architecture. if (cpu_type == 0) { const NXArchInfo *arch = NXGetLocalArchInfo(); - if (arch) - cpu_type = arch->cputype; - else -#if __ppc__ - cpu_type = CPU_TYPE_POWERPC; -#elif __i386__ - cpu_type = CPU_TYPE_X86; -#elif __x86_64__ - cpu_type = CPU_TYPE_X86_64; -#else -#error Unknown architecture -- are you on a PDP-11? -#endif + assert(arch); + cpu_type = arch->cputype; } return cpu_type; |