diff options
author | nealsid <nealsid@4c0a9323-5329-0410-9bdc-e9ce6186880e> | 2008-10-08 23:56:02 +0000 |
---|---|---|
committer | nealsid <nealsid@4c0a9323-5329-0410-9bdc-e9ce6186880e> | 2008-10-08 23:56:02 +0000 |
commit | cb4aa6b804d9c4a5d43aa50bfbc46257bc55c001 (patch) | |
tree | 43e29af9b6c3a85db4964e3b53b4e8ad64f2ca6b /src/common/mac | |
parent | Generate Windows full memory dumps as a separate file. Patch by Huan Ren (diff) | |
download | breakpad-cb4aa6b804d9c4a5d43aa50bfbc46257bc55c001.tar.xz |
Added DWARF support to Breakpad client. Thanks to Google for open sourcing their DWARF code!
Modified dump_syms to detect dSYM bundles or a binary with DWARF data appropriately, and convert data from DWARF reader to dump_syms native structures
R=danny.berlin (original writer of DWARF code)
git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@286 4c0a9323-5329-0410-9bdc-e9ce6186880e
Diffstat (limited to 'src/common/mac')
-rw-r--r-- | src/common/mac/dump_syms.h | 21 | ||||
-rw-r--r-- | src/common/mac/dump_syms.mm | 327 | ||||
-rw-r--r-- | src/common/mac/dwarf/bytereader-inl.h | 141 | ||||
-rw-r--r-- | src/common/mac/dwarf/bytereader.cc | 62 | ||||
-rw-r--r-- | src/common/mac/dwarf/bytereader.h | 132 | ||||
-rw-r--r-- | src/common/mac/dwarf/dwarf2enums.h | 490 | ||||
-rw-r--r-- | src/common/mac/dwarf/dwarf2reader.cc | 830 | ||||
-rw-r--r-- | src/common/mac/dwarf/dwarf2reader.h | 393 | ||||
-rw-r--r-- | src/common/mac/dwarf/functioninfo.cc | 198 | ||||
-rw-r--r-- | src/common/mac/dwarf/functioninfo.h | 175 | ||||
-rw-r--r-- | src/common/mac/dwarf/line_state_machine.h | 61 | ||||
-rw-r--r-- | src/common/mac/dwarf/types.h | 46 |
12 files changed, 2862 insertions, 14 deletions
diff --git a/src/common/mac/dump_syms.h b/src/common/mac/dump_syms.h index 1b2bedc7..e234a6a6 100644 --- a/src/common/mac/dump_syms.h +++ b/src/common/mac/dump_syms.h @@ -30,9 +30,14 @@ // dump_syms.h: Interface for DumpSymbols. This class will take a mach-o file // and extract the symbol information and write it to a file using the // breakpad symbol file format. -// NOTE: Only Stabs format is currently supported -- not DWARF. #import <Foundation/Foundation.h> +#include <mach-o/loader.h> +#include "common/mac/dwarf/dwarf2reader.h" + +// This will map from an architecture string to a SectionMap, which +// will contain the offsets for all the sections in the dictionary +typedef hash_map<string, dwarf2reader::SectionMap *> ArchSectionMap; @interface DumpSymbols : NSObject { @protected @@ -43,8 +48,9 @@ NSMutableDictionary *sources_; // Address and Source file paths (STRONG) NSMutableArray *cppAddresses_; // Addresses of C++ symbols (STRONG) NSMutableDictionary *headers_; // Mach-o header information (STRONG) - NSMutableDictionary *sectionNumbers_; // Keyed by seg/sect name (STRONG) + NSMutableDictionary *sectionData_; // Keyed by seg/sect name (STRONG) uint32_t lastStartAddress_; + ArchSectionMap *sectionsForArch_; } - (id)initWithContentsOfFile:(NSString *)machoFile; @@ -61,3 +67,14 @@ - (BOOL)writeSymbolFile:(NSString *)symbolFilePath; @end + +@interface MachSection : NSObject { + @protected + struct section *sect_; + uint32_t sectionNumber_; +} +- (id)initWithMachSection:(struct section *)sect andNumber:(uint32_t)sectionNumber; +- (struct section*)sectionPointer; +- (uint32_t)sectionNumber; + +@end diff --git a/src/common/mac/dump_syms.mm b/src/common/mac/dump_syms.mm index 133c6ab2..d5ae55f8 100644 --- a/src/common/mac/dump_syms.mm +++ b/src/common/mac/dump_syms.mm @@ -47,6 +47,9 @@ #import "dump_syms.h" #import "common/mac/file_id.h" #import "common/mac/macho_utilities.h" +#import "common/mac/dwarf/dwarf2reader.h" +#import "common/mac/dwarf/functioninfo.h" +#import "common/mac/dwarf/bytereader.h" using google_breakpad::FileID; @@ -65,6 +68,40 @@ static NSString *kUnknownSymbol = @"???"; // for pruning out extraneous non-function symbols. static const int kTextSection = 1; +namespace __gnu_cxx { +template<> + struct hash<std::string> { + size_t operator()(const std::string& k) const { + return hash< const char* >()( k.c_str() ); + } +}; +} + +// Dump FunctionMap to stdout. Print address, function name, file +// name, line number, lowpc, and highpc if available. +void DumpFunctionMap(const dwarf2reader::FunctionMap function_map) { + for (dwarf2reader::FunctionMap::const_iterator iter = function_map.begin(); + iter != function_map.end(); ++iter) { + if (iter->second->name.empty()) { + continue; + } + printf("%08llx: %s", iter->first, + iter->second->name.data()); + if (!iter->second->file.empty()) { + printf(" - %s", iter->second->file.data()); + if (iter->second->line != 0) { + printf(":%u", iter->second->line); + } + } + if (iter->second->lowpc != 0 && iter->second->highpc != 0) { + printf(" (%08llx - %08llx)\n", + iter->second->lowpc, + iter->second->highpc); + } + } +} + + @interface DumpSymbols(PrivateMethods) - (NSArray *)convertCPlusPlusSymbols:(NSArray *)symbols; - (void)convertSymbols; @@ -73,10 +110,17 @@ static const int kTextSection = 1; - (BOOL)loadSymbolInfo:(void *)base offset:(uint32_t)offset; - (BOOL)loadSymbolInfo64:(void *)base offset:(uint32_t)offset; - (BOOL)loadSymbolInfoForArchitecture; +- (BOOL)loadDWARFSymbolInfo:(void *)base offset:(uint32_t)offset; +- (BOOL)loadSTABSSymbolInfo:(void *)base offset:(uint32_t)offset; - (void)generateSectionDictionary:(struct mach_header*)header; - (BOOL)loadHeader:(void *)base offset:(uint32_t)offset; - (BOOL)loadHeader64:(void *)base offset:(uint32_t)offset; - (BOOL)loadModuleInfo; +- (void)processDWARFLineNumberInfo:(dwarf2reader::LineMap*)line_map; +- (void)processDWARFFunctionInfo:(dwarf2reader::FunctionMap*)address_to_funcinfo; +- (void)processDWARFSourceFileInfo:(vector<dwarf2reader::SourceFileInfo>*) files; +- (BOOL)loadSymbolInfo:(void *)base offset:(uint32_t)offset; +- (dwarf2reader::SectionMap*)getSectionMapForArchitecture:(NSString*)architecture; @end @implementation DumpSymbols @@ -241,6 +285,7 @@ static const int kTextSection = 1; if (line && ![dict objectForKey:kAddressSourceLineKey]) [dict setObject:[NSNumber numberWithUnsignedInt:line] forKey:kAddressSourceLineKey]; + } //============================================================================= @@ -277,7 +322,9 @@ static const int kTextSection = 1; int line = list->n_desc; // __TEXT __text section - uint32_t mainSection = [[sectionNumbers_ objectForKey:@"__TEXT__text" ] unsignedLongValue]; + NSMutableDictionary *archSections = [sectionData_ objectForKey:architecture_]; + + uint32_t mainSection = [[archSections objectForKey:@"__TEXT__text" ] sectionNumber]; // Extract debugging information: // Doc: http://developer.apple.com/documentation/DeveloperTools/gdb/stabs/stabs_toc.html @@ -303,7 +350,7 @@ static const int kTextSection = 1; sources_ = [[NSMutableDictionary alloc] init]; // Save the source associated with an address [sources_ setObject:src forKey:address]; - + NSLog(@"Setting source %@ for %@", src, address); result = YES; } } else if (list->n_type == N_FUN) { @@ -321,7 +368,7 @@ static const int kTextSection = 1; [self addFunction:fn line:line address:list->n_value section:list->n_sect ]; result = YES; - } else if (list->n_type == N_SLINE && list->n_sect == mainSection ) { + } else if (list->n_type == N_SLINE && list->n_sect == mainSection) { [self addFunction:nil line:line address:list->n_value section:list->n_sect ]; result = YES; } else if (((list->n_type & N_TYPE) == N_SECT) && !(list->n_type & N_STAB)) { @@ -339,8 +386,155 @@ static const int kTextSection = 1; #define SwapLongIfNeeded(a) (swap ? NXSwapLong(a) : (a)) #define SwapIntIfNeeded(a) (swap ? NXSwapInt(a) : (a)) #define SwapShortIfNeeded(a) (swap ? NXSwapShort(a) : (a)) + //============================================================================= - (BOOL)loadSymbolInfo:(void *)base offset:(uint32_t)offset { + NSMutableDictionary *archSections = [sectionData_ objectForKey:architecture_]; + if ([archSections objectForKey:@"__DWARF__debug_info"]) { + // Treat this this as debug information + return [self loadDWARFSymbolInfo:base offset:offset]; + } + + return [self loadSTABSSymbolInfo:base offset:offset]; +} + +//============================================================================= +- (BOOL)loadDWARFSymbolInfo:(void *)base offset:(uint32_t)offset { + + struct mach_header *header = (struct mach_header *) + ((uint32_t)base + offset); + BOOL swap = (header->magic == MH_CIGAM); + + NSMutableDictionary *archSections = [sectionData_ objectForKey:architecture_]; + assert (archSections != nil); + section *dbgInfoSection = [[archSections objectForKey:@"__DWARF__debug_info"] sectionPointer]; + uint32_t debugInfoSize = SwapLongIfNeeded(dbgInfoSection->size); + + // i think this will break if run on a big-endian machine + dwarf2reader::ByteReader byte_reader(swap ? + dwarf2reader::ENDIANNESS_BIG : + dwarf2reader::ENDIANNESS_LITTLE); + + uint64_t dbgOffset = 0; + + dwarf2reader::SectionMap* oneArchitectureSectionMap = [self getSectionMapForArchitecture:architecture_]; + + while (dbgOffset < debugInfoSize) { + // Prepare necessary objects. + dwarf2reader::FunctionMap off_to_funcinfo; + dwarf2reader::FunctionMap address_to_funcinfo; + dwarf2reader::LineMap line_map; + vector<dwarf2reader::SourceFileInfo> files; + vector<string> dirs; + + dwarf2reader::CULineInfoHandler line_info_handler(&files, &dirs, + &line_map); + + dwarf2reader::CUFunctionInfoHandler function_info_handler(&files, &dirs, + &line_map, + &off_to_funcinfo, + &address_to_funcinfo, + &line_info_handler, + *oneArchitectureSectionMap, + &byte_reader); + + dwarf2reader::CompilationUnit compilation_unit(*oneArchitectureSectionMap, + dbgOffset, + &byte_reader, + &function_info_handler); + + dbgOffset += compilation_unit.Start(); + + // The next 3 functions take the info that the dwarf reader + // gives and massages them into the data structures that + // dump_syms uses + [self processDWARFSourceFileInfo:&files]; + [self processDWARFFunctionInfo:&address_to_funcinfo]; + [self processDWARFLineNumberInfo:&line_map]; + } + + return YES; +} + +- (void)processDWARFSourceFileInfo:(vector<dwarf2reader::SourceFileInfo>*) files { + if (!sources_) + sources_ = [[NSMutableDictionary alloc] init]; + // Save the source associated with an address + vector<dwarf2reader::SourceFileInfo>::const_iterator iter = files->begin(); + for (; iter != files->end(); iter++) { + NSString *sourceFile = [NSString stringWithUTF8String:(*iter).name.c_str()]; + if ((*iter).lowpc != ULLONG_MAX) { + NSNumber *address = [NSNumber numberWithUnsignedLongLong:(*iter).lowpc]; + [sources_ setObject:sourceFile forKey:address]; + } + } +} + +- (void)processDWARFFunctionInfo:(dwarf2reader::FunctionMap*)address_to_funcinfo { + for (dwarf2reader::FunctionMap::const_iterator iter = address_to_funcinfo->begin(); + iter != address_to_funcinfo->end(); ++iter) { + if (iter->second->name.empty()) { + continue; + } + + if (!addresses_) + addresses_ = [[NSMutableDictionary alloc] init]; + + NSNumber *addressNum = [NSNumber numberWithUnsignedLongLong:(*iter).second->lowpc]; + + [functionAddresses_ addObject:addressNum]; + + NSMutableDictionary *dict = [addresses_ objectForKey:addressNum]; + + if (!dict) { + dict = [[NSMutableDictionary alloc] init]; + [addresses_ setObject:dict forKey:addressNum]; + [dict release]; + } + + // set name of function if it isn't already set + if (![dict objectForKey:kAddressSymbolKey]) { + NSString *symbolName = [NSString stringWithUTF8String:iter->second->name.c_str()]; + [dict setObject:symbolName forKey:kAddressSymbolKey]; + } + + // set line number for beginning of function + if (![dict objectForKey:kAddressSourceLineKey]) + [dict setObject:[NSNumber numberWithUnsignedInt:iter->second->line] + forKey:kAddressSourceLineKey]; + + // set function size by subtracting low PC from high PC + if (![dict objectForKey:kFunctionSizeKey]) { + [dict setObject:[NSNumber numberWithUnsignedLongLong:iter->second->highpc - iter->second->lowpc] + forKey:kFunctionSizeKey]; + } + + } +} + +- (void)processDWARFLineNumberInfo:(dwarf2reader::LineMap*)line_map { + for (dwarf2reader::LineMap::const_iterator iter = line_map->begin(); + iter != line_map->end(); + ++iter) { + + NSNumber *addressNum = [NSNumber numberWithUnsignedLongLong:iter->first]; + NSMutableDictionary *dict = [addresses_ objectForKey:addressNum]; + + if (!dict) { + dict = [[NSMutableDictionary alloc] init]; + [addresses_ setObject:dict forKey:addressNum]; + [dict release]; + } + + if (![dict objectForKey:kAddressSourceLineKey]) { + [dict setObject:[NSNumber numberWithUnsignedInt:iter->second.second] + forKey:kAddressSourceLineKey]; + } + } +} + +//============================================================================= +- (BOOL)loadSTABSSymbolInfo:(void *)base offset:(uint32_t)offset { struct mach_header *header = (struct mach_header *)((uint32_t)base + offset); BOOL swap = (header->magic == MH_CIGAM); uint32_t count = SwapLongIfNeeded(header->ncmds); @@ -434,6 +628,7 @@ static const int kTextSection = 1; - (BOOL)loadSymbolInfoForArchitecture { NSMutableData *data = [[NSMutableData alloc] initWithContentsOfMappedFile:sourcePath_]; + NSDictionary *headerInfo = [headers_ objectForKey:architecture_]; void *base = [data mutableBytes]; uint32_t offset = @@ -446,10 +641,28 @@ static const int kTextSection = 1; return result; } +- (dwarf2reader::SectionMap*)getSectionMapForArchitecture:(NSString*)architecture { + + string currentArch([architecture UTF8String]); + dwarf2reader::SectionMap *oneArchitectureSectionMap; + + ArchSectionMap::const_iterator iter = sectionsForArch_->find(currentArch); + + if (iter == sectionsForArch_->end()) { + oneArchitectureSectionMap = new dwarf2reader::SectionMap(); + sectionsForArch_->insert(make_pair(currentArch, oneArchitectureSectionMap)); + } else { + oneArchitectureSectionMap = iter->second; + } + + return oneArchitectureSectionMap; +} + //============================================================================= // build a dictionary of section numbers keyed off a string // which is the concatenation of the segment name and the section name - (void)generateSectionDictionary:(struct mach_header*)header { + BOOL swap = (header->magic == MH_CIGAM); uint32_t count = SwapLongIfNeeded(header->ncmds); struct load_command *cmd = @@ -457,8 +670,29 @@ static const int kTextSection = 1; uint32_t segmentCommand = SwapLongIfNeeded(LC_SEGMENT); uint32_t sectionNumber = 1; // section numbers are counted from 1 - if (!sectionNumbers_) - sectionNumbers_ = [[NSMutableDictionary alloc] init]; + cpu_type_t cpu = SwapIntIfNeeded(header->cputype); + + NSString *arch; + + if (cpu & CPU_ARCH_ABI64) + arch = ((cpu & ~CPU_ARCH_ABI64) == CPU_TYPE_X86) ? + @"x86_64" : @"ppc64"; + else + arch = (cpu == CPU_TYPE_X86) ? @"x86" : @"ppc"; + + NSMutableDictionary *archSections; + + if (!sectionData_) { + sectionData_ = [[NSMutableDictionary alloc] init]; + } + + if (![sectionData_ objectForKey:architecture_]) { + [sectionData_ setObject:[[NSMutableDictionary alloc] init] forKey:arch]; + } + + archSections = [sectionData_ objectForKey:arch]; + + dwarf2reader::SectionMap* oneArchitectureSectionMap = [self getSectionMapForArchitecture:arch]; // loop through every segment command, then through every section // contained inside each of them @@ -469,13 +703,18 @@ static const int kTextSection = 1; uint32_t nsects = SwapLongIfNeeded(seg->nsects); for (uint32_t j = 0; j < nsects; ++j) { - //printf("%d: %s %s\n", sectionNumber, seg->segname, sect->sectname ); NSString *segSectName = [NSString stringWithFormat:@"%s%s", - seg->segname, sect->sectname ]; - - [sectionNumbers_ setValue:[NSNumber numberWithUnsignedLong:sectionNumber] - forKey:segSectName ]; + seg->segname, sect->sectname]; + [archSections setObject:[[MachSection alloc] initWithMachSection:sect andNumber:sectionNumber] + forKey:segSectName]; + + // filter out sections with size 0, offset 0 + if (sect->offset != 0 && sect->size != 0) { + // fill sectionmap for dwarf reader + oneArchitectureSectionMap->insert(make_pair(sect->sectname,make_pair(((const char*)header) + SwapLongIfNeeded(sect->offset), (size_t)SwapLongIfNeeded(sect->size)))); + } + ++sect; ++sectionNumber; } @@ -825,6 +1064,49 @@ static BOOL WriteFormat(int fd, const char *fmt, ...) { sourcePath_ = [path copy]; + // Test for .DSYM bundle + NSBundle *dsymBundle = [NSBundle bundleWithPath:sourcePath_]; + + if (dsymBundle) { + + // we need to take the DSYM bundle path and remove it's + // extension to get the name of the file inside the resources + // directory of the bundle that actually has the DWARF + // information + // But, Xcode supports something called "Wrapper extension"(see + // build settings), which would make the bundle name + // /tmp/foo/test.kext.dSYM, but the dwarf binary name would + // still be "test". so, now we loop through until deleting the + // extension doesn't change the string + + // e.g. suppose sourcepath_ is /tmp/foo/test.dSYM + + NSString *dwarfBinName = [[sourcePath_ lastPathComponent] stringByDeletingPathExtension]; + + // now, dwarfBinName is "test" + + while (![[dwarfBinName stringByDeletingPathExtension] isEqualToString:dwarfBinName]) { + dwarfBinName = [dwarfBinName stringByDeletingPathExtension]; + } + + NSString *dwarfBinPath; + dwarfBinPath = [dsymBundle pathForResource:dwarfBinName ofType:nil inDirectory:@"DWARF"]; + + if (dwarfBinPath == nil) { + NSLog(@"The bundle passed on the command line does not appear to be a DWARF dSYM bundle"); + [self autorelease]; + return nil; + } + + // otherwise we're good to go + [sourcePath_ release]; + + sourcePath_ = [dwarfBinPath copy]; + NSLog(@"Loading DWARF dSYM file from %@", sourcePath_); + } + + sectionsForArch_ = new ArchSectionMap(); + if (![self loadModuleInfo]) { [self autorelease]; return nil; @@ -868,7 +1150,8 @@ static BOOL WriteFormat(int fd, const char *fmt, ...) { [functionAddresses_ release]; [sources_ release]; [headers_ release]; - + delete sectionsForArch_; + [super dealloc]; } @@ -899,7 +1182,7 @@ static BOOL WriteFormat(int fd, const char *fmt, ...) { return NO; [architecture_ autorelease]; - architecture_ = [architecture copy]; + architecture_ = [normalized copy]; } return isValid; @@ -931,3 +1214,23 @@ static BOOL WriteFormat(int fd, const char *fmt, ...) { } @end + +@implementation MachSection + +- (id)initWithMachSection:(section *)sect andNumber:(uint32_t)sectionNumber { + if ((self = [super init])) { + sect_ = sect; + sectionNumber_ = sectionNumber; + } + + return self; +} + +- (section*)sectionPointer { + return sect_; +} + +- (uint32_t)sectionNumber { + return sectionNumber_; +} +@end diff --git a/src/common/mac/dwarf/bytereader-inl.h b/src/common/mac/dwarf/bytereader-inl.h new file mode 100644 index 00000000..7ccd0fe6 --- /dev/null +++ b/src/common/mac/dwarf/bytereader-inl.h @@ -0,0 +1,141 @@ +// Copyright 2006 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef UTIL_DEBUGINFO_BYTEREADER_INL_H__ +#define UTIL_DEBUGINFO_BYTEREADER_INL_H__ + +#include "common/mac/dwarf/bytereader.h" + +namespace dwarf2reader { + +inline uint8 ByteReader::ReadOneByte(const char* buffer) const { + return buffer[0]; +} + +inline uint16 ByteReader::ReadTwoBytes(const char* buffer) const { + const uint16 buffer0 = static_cast<uint16>(buffer[0]); + const uint16 buffer1 = static_cast<uint16>(buffer[1]); + if (endian_ == ENDIANNESS_LITTLE) { + return buffer0 | buffer1 << 8; + } else { + return buffer1 | buffer0 << 8; + } +} + +inline uint64 ByteReader::ReadFourBytes(const char* buffer) const { + const uint32 buffer0 = static_cast<uint32>(buffer[0]); + const uint32 buffer1 = static_cast<uint32>(buffer[1]); + const uint32 buffer2 = static_cast<uint32>(buffer[2]); + const uint32 buffer3 = static_cast<uint32>(buffer[3]); + if (endian_ == ENDIANNESS_LITTLE) { + return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24; + } else { + return buffer3 | buffer2 << 8 | buffer1 << 16 | buffer0 << 24; + } +} + +inline uint64 ByteReader::ReadEightBytes(const char* buffer) const { + const uint64 buffer0 = static_cast<uint64>(buffer[0]); + const uint64 buffer1 = static_cast<uint64>(buffer[1]); + const uint64 buffer2 = static_cast<uint64>(buffer[2]); + const uint64 buffer3 = static_cast<uint64>(buffer[3]); + const uint64 buffer4 = static_cast<uint64>(buffer[4]); + const uint64 buffer5 = static_cast<uint64>(buffer[5]); + const uint64 buffer6 = static_cast<uint64>(buffer[6]); + const uint64 buffer7 = static_cast<uint64>(buffer[7]); + if (endian_ == ENDIANNESS_LITTLE) { + return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24 | + buffer4 << 32 | buffer5 << 40 | buffer6 << 48 | buffer7 << 56; + } else { + return buffer7 | buffer6 << 8 | buffer5 << 16 | buffer4 << 24 | + buffer3 << 32 | buffer2 << 40 | buffer1 << 48 | buffer0 << 56; + } +} + +// Read an unsigned LEB128 number. Each byte contains 7 bits of +// information, plus one bit saying whether the number continues or +// not. + +inline uint64 ByteReader::ReadUnsignedLEB128(const char* buffer, + size_t* len) const { + uint64 result = 0; + size_t num_read = 0; + unsigned int shift = 0; + unsigned char byte; + + do { + byte = *buffer++; + num_read++; + + result |= (static_cast<uint64>(byte & 0x7f)) << shift; + + shift += 7; + + } while (byte & 0x80); + + *len = num_read; + + return result; +} + +// Read a signed LEB128 number. These are like regular LEB128 +// numbers, except the last byte may have a sign bit set. + +inline int64 ByteReader::ReadSignedLEB128(const char* buffer, + size_t* len) const { + int64 result = 0; + unsigned int shift = 0; + size_t num_read = 0; + unsigned char byte; + + do { + byte = *buffer++; + num_read++; + result |= (static_cast<uint64>(byte & 0x7f) << shift); + shift += 7; + } while (byte & 0x80); + + if ((shift < 8 * sizeof (result)) && (byte & 0x40)) + result |= -((static_cast<int64>(1)) << shift); + *len = num_read; + return result; +} + +inline uint64 ByteReader::ReadOffset(const char* buffer) const { + assert(this->offset_reader_); + return (this->*offset_reader_)(buffer); +} + +inline uint64 ByteReader::ReadAddress(const char* buffer) const { + assert(this->address_reader_); + return (this->*address_reader_)(buffer); +} + +} // namespace dwarf2reader + +#endif // UTIL_DEBUGINFO_BYTEREADER_INL_H__ diff --git a/src/common/mac/dwarf/bytereader.cc b/src/common/mac/dwarf/bytereader.cc new file mode 100644 index 00000000..823cf2b6 --- /dev/null +++ b/src/common/mac/dwarf/bytereader.cc @@ -0,0 +1,62 @@ +// Copyright 2006 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "common/mac/dwarf/bytereader-inl.h" + +#include "common/mac/dwarf/bytereader.h" + +namespace dwarf2reader { + +ByteReader::ByteReader(enum Endianness endian) + :offset_reader_(NULL), address_reader_(NULL), endian_(endian), + address_size_(0), offset_size_(0) +{ } + +ByteReader::~ByteReader() { } + +void ByteReader::SetOffsetSize(uint8 size) { + offset_size_ = size; + assert(size == 4 || size == 8); + if (size == 4) { + this->offset_reader_ = &ByteReader::ReadFourBytes; + } else { + this->offset_reader_ = &ByteReader::ReadEightBytes; + } +} + +void ByteReader::SetAddressSize(uint8 size) { + address_size_ = size; + assert(size == 4 || size == 8); + if (size == 4) { + this->address_reader_ = &ByteReader::ReadFourBytes; + } else { + this->address_reader_ = &ByteReader::ReadEightBytes; + } +} + +} // namespace dwarf2reader diff --git a/src/common/mac/dwarf/bytereader.h b/src/common/mac/dwarf/bytereader.h new file mode 100644 index 00000000..299dd9ac --- /dev/null +++ b/src/common/mac/dwarf/bytereader.h @@ -0,0 +1,132 @@ +// Copyright 2006 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef COMMON_MAC_DWARF_BYTEREADER_H__ +#define COMMON_MAC_DWARF_BYTEREADER_H__ + +#include <string> +#include "common/mac/dwarf/types.h" + +namespace dwarf2reader { + +// We can't use the obvious name of LITTLE_ENDIAN and BIG_ENDIAN +// because it conflicts with a macro +enum Endianness { + ENDIANNESS_BIG, + ENDIANNESS_LITTLE +}; + +// Class that knows how to read both big endian and little endian +// numbers, for use in DWARF2/3 reader. +// Takes an endianness argument. +// To read addresses and offsets, SetAddressSize and SetOffsetSize +// must be called first. +class ByteReader { + public: + explicit ByteReader(enum Endianness endian); + virtual ~ByteReader(); + + // Set the address size to SIZE, which sets up the ReadAddress member + // so that it works. + void SetAddressSize(uint8 size); + + // Set the offset size to SIZE, which sets up the ReadOffset member + // so that it works. + void SetOffsetSize(uint8 size); + + // Return the current offset size + uint8 OffsetSize() const { return offset_size_; } + + // Return the current address size + uint8 AddressSize() const { return address_size_; } + + // Read a single byte from BUFFER and return it as an unsigned 8 bit + // number. + uint8 ReadOneByte(const char* buffer) const; + + // Read two bytes from BUFFER and return it as an unsigned 16 bit + // number. + uint16 ReadTwoBytes(const char* buffer) const; + + // Read four bytes from BUFFER and return it as an unsigned 32 bit + // number. This function returns a uint64 so that it is compatible + // with ReadAddress and ReadOffset. The number it returns will + // never be outside the range of an unsigned 32 bit integer. + uint64 ReadFourBytes(const char* buffer) const; + + // Read eight bytes from BUFFER and return it as an unsigned 64 bit + // number + uint64 ReadEightBytes(const char* buffer) const; + + // Read an unsigned LEB128 (Little Endian Base 128) number from + // BUFFER and return it as an unsigned 64 bit integer. LEN is set + // to the length read. Everybody seems to reinvent LEB128 as a + // variable size integer encoding, DWARF has had it for a long time. + uint64 ReadUnsignedLEB128(const char* buffer, size_t* len) const; + + // Read a signed LEB128 number from BUFFER and return it as an + // signed 64 bit integer. LEN is set to the length read. + int64 ReadSignedLEB128(const char* buffer, size_t* len) const; + + // Read an offset from BUFFER and return it as an unsigned 64 bit + // integer. DWARF2/3 define offsets as either 4 or 8 bytes, + // generally depending on the amount of DWARF2/3 info present. + uint64 ReadOffset(const char* buffer) const; + + // Read an address from BUFFER and return it as an unsigned 64 bit + // integer. DWARF2/3 allow addresses to be any size from 0-255 + // bytes currently. Internally we support 4 and 8 byte addresses, + // and will CHECK on anything else. + uint64 ReadAddress(const char* buffer) const; + + private: + + // Function pointer type for our address and offset readers. + typedef uint64 (ByteReader::*AddressReader)(const char*) const; + + // Read an offset from BUFFER and return it as an unsigned 64 bit + // integer. DWARF2/3 define offsets as either 4 or 8 bytes, + // generally depending on the amount of DWARF2/3 info present. + // This function pointer gets set by SetOffsetSize. + AddressReader offset_reader_; + + // Read an address from BUFFER and return it as an unsigned 64 bit + // integer. DWARF2/3 allow addresses to be any size from 0-255 + // bytes currently. Internally we support 4 and 8 byte addresses, + // and will CHECK on anything else. + // This function pointer gets set by SetAddressSize. + AddressReader address_reader_; + + Endianness endian_; + uint8 address_size_; + uint8 offset_size_; +}; + +} // namespace dwarf2reader + +#endif // COMMON_MAC_DWARF_BYTEREADER_H__ diff --git a/src/common/mac/dwarf/dwarf2enums.h b/src/common/mac/dwarf/dwarf2enums.h new file mode 100644 index 00000000..c52e2b47 --- /dev/null +++ b/src/common/mac/dwarf/dwarf2enums.h @@ -0,0 +1,490 @@ +// Copyright 2006 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef COMMON_MAC_DWARF_DWARF2ENUMS_H__ +#define COMMON_MAC_DWARF_DWARF2ENUMS_H__ + +namespace dwarf2reader { + +// These enums do not follow the google3 style only because they are +// known universally (specs, other implementations) by the names in +// exactly this capitalization. +// Tag names and codes. +enum DwarfTag { + DW_TAG_padding = 0x00, + DW_TAG_array_type = 0x01, + DW_TAG_class_type = 0x02, + DW_TAG_entry_point = 0x03, + DW_TAG_enumeration_type = 0x04, + DW_TAG_formal_parameter = 0x05, + DW_TAG_imported_declaration = 0x08, + DW_TAG_label = 0x0a, + DW_TAG_lexical_block = 0x0b, + DW_TAG_member = 0x0d, + DW_TAG_pointer_type = 0x0f, + DW_TAG_reference_type = 0x10, + DW_TAG_compile_unit = 0x11, + DW_TAG_string_type = 0x12, + DW_TAG_structure_type = 0x13, + DW_TAG_subroutine_type = 0x15, + DW_TAG_typedef = 0x16, + DW_TAG_union_type = 0x17, + DW_TAG_unspecified_parameters = 0x18, + DW_TAG_variant = 0x19, + DW_TAG_common_block = 0x1a, + DW_TAG_common_inclusion = 0x1b, + DW_TAG_inheritance = 0x1c, + DW_TAG_inlined_subroutine = 0x1d, + DW_TAG_module = 0x1e, + DW_TAG_ptr_to_member_type = 0x1f, + DW_TAG_set_type = 0x20, + DW_TAG_subrange_type = 0x21, + DW_TAG_with_stmt = 0x22, + DW_TAG_access_declaration = 0x23, + DW_TAG_base_type = 0x24, + DW_TAG_catch_block = 0x25, + DW_TAG_const_type = 0x26, + DW_TAG_constant = 0x27, + DW_TAG_enumerator = 0x28, + DW_TAG_file_type = 0x29, + DW_TAG_friend = 0x2a, + DW_TAG_namelist = 0x2b, + DW_TAG_namelist_item = 0x2c, + DW_TAG_packed_type = 0x2d, + DW_TAG_subprogram = 0x2e, + DW_TAG_template_type_param = 0x2f, + DW_TAG_template_value_param = 0x30, + DW_TAG_thrown_type = 0x31, + DW_TAG_try_block = 0x32, + DW_TAG_variant_part = 0x33, + DW_TAG_variable = 0x34, + DW_TAG_volatile_type = 0x35, + // DWARF 3. + DW_TAG_dwarf_procedure = 0x36, + DW_TAG_restrict_type = 0x37, + DW_TAG_interface_type = 0x38, + DW_TAG_namespace = 0x39, + DW_TAG_imported_module = 0x3a, + DW_TAG_unspecified_type = 0x3b, + DW_TAG_partial_unit = 0x3c, + DW_TAG_imported_unit = 0x3d, + // SGI/MIPS Extensions. + DW_TAG_MIPS_loop = 0x4081, + // HP extensions. See: + // ftp://ftp.hp.com/pub/lang/tools/WDB/wdb-4.0.tar.gz + DW_TAG_HP_array_descriptor = 0x4090, + // GNU extensions. + DW_TAG_format_label = 0x4101, // For FORTRAN 77 and Fortran 90. + DW_TAG_function_template = 0x4102, // For C++. + DW_TAG_class_template = 0x4103, // For C++. + DW_TAG_GNU_BINCL = 0x4104, + DW_TAG_GNU_EINCL = 0x4105, + // Extensions for UPC. See: http://upc.gwu.edu/~upc. + DW_TAG_upc_shared_type = 0x8765, + DW_TAG_upc_strict_type = 0x8766, + DW_TAG_upc_relaxed_type = 0x8767, + // PGI (STMicroelectronics) extensions. No documentation available. + DW_TAG_PGI_kanji_type = 0xA000, + DW_TAG_PGI_interface_block = 0xA020 +}; + + +enum DwarfHasChild { + DW_children_no = 0, + DW_children_yes = 1 +}; + +// Form names and codes. +enum DwarfForm { + DW_FORM_addr = 0x01, + DW_FORM_block2 = 0x03, + DW_FORM_block4 = 0x04, + DW_FORM_data2 = 0x05, + DW_FORM_data4 = 0x06, + DW_FORM_data8 = 0x07, + DW_FORM_string = 0x08, + DW_FORM_block = 0x09, + DW_FORM_block1 = 0x0a, + DW_FORM_data1 = 0x0b, + DW_FORM_flag = 0x0c, + DW_FORM_sdata = 0x0d, + DW_FORM_strp = 0x0e, + DW_FORM_udata = 0x0f, + DW_FORM_ref_addr = 0x10, + DW_FORM_ref1 = 0x11, + DW_FORM_ref2 = 0x12, + DW_FORM_ref4 = 0x13, + DW_FORM_ref8 = 0x14, + DW_FORM_ref_udata = 0x15, + DW_FORM_indirect = 0x16 +}; + +// Attribute names and codes +enum DwarfAttribute { + DW_AT_sibling = 0x01, + DW_AT_location = 0x02, + DW_AT_name = 0x03, + DW_AT_ordering = 0x09, + DW_AT_subscr_data = 0x0a, + DW_AT_byte_size = 0x0b, + DW_AT_bit_offset = 0x0c, + DW_AT_bit_size = 0x0d, + DW_AT_element_list = 0x0f, + DW_AT_stmt_list = 0x10, + DW_AT_low_pc = 0x11, + DW_AT_high_pc = 0x12, + DW_AT_language = 0x13, + DW_AT_member = 0x14, + DW_AT_discr = 0x15, + DW_AT_discr_value = 0x16, + DW_AT_visibility = 0x17, + DW_AT_import = 0x18, + DW_AT_string_length = 0x19, + DW_AT_common_reference = 0x1a, + DW_AT_comp_dir = 0x1b, + DW_AT_const_value = 0x1c, + DW_AT_containing_type = 0x1d, + DW_AT_default_value = 0x1e, + DW_AT_inline = 0x20, + DW_AT_is_optional = 0x21, + DW_AT_lower_bound = 0x22, + DW_AT_producer = 0x25, + DW_AT_prototyped = 0x27, + DW_AT_return_addr = 0x2a, + DW_AT_start_scope = 0x2c, + DW_AT_stride_size = 0x2e, + DW_AT_upper_bound = 0x2f, + DW_AT_abstract_origin = 0x31, + DW_AT_accessibility = 0x32, + DW_AT_address_class = 0x33, + DW_AT_artificial = 0x34, + DW_AT_base_types = 0x35, + DW_AT_calling_convention = 0x36, + DW_AT_count = 0x37, + DW_AT_data_member_location = 0x38, + DW_AT_decl_column = 0x39, + DW_AT_decl_file = 0x3a, + DW_AT_decl_line = 0x3b, + DW_AT_declaration = 0x3c, + DW_AT_discr_list = 0x3d, + DW_AT_encoding = 0x3e, + DW_AT_external = 0x3f, + DW_AT_frame_base = 0x40, + DW_AT_friend = 0x41, + DW_AT_identifier_case = 0x42, + DW_AT_macro_info = 0x43, + DW_AT_namelist_items = 0x44, + DW_AT_priority = 0x45, + DW_AT_segment = 0x46, + DW_AT_specification = 0x47, + DW_AT_static_link = 0x48, + DW_AT_type = 0x49, + DW_AT_use_location = 0x4a, + DW_AT_variable_parameter = 0x4b, + DW_AT_virtuality = 0x4c, + DW_AT_vtable_elem_location = 0x4d, + // DWARF 3 values. + DW_AT_allocated = 0x4e, + DW_AT_associated = 0x4f, + DW_AT_data_location = 0x50, + DW_AT_stride = 0x51, + DW_AT_entry_pc = 0x52, + DW_AT_use_UTF8 = 0x53, + DW_AT_extension = 0x54, + DW_AT_ranges = 0x55, + DW_AT_trampoline = 0x56, + DW_AT_call_column = 0x57, + DW_AT_call_file = 0x58, + DW_AT_call_line = 0x59, + // SGI/MIPS extensions. + DW_AT_MIPS_fde = 0x2001, + DW_AT_MIPS_loop_begin = 0x2002, + DW_AT_MIPS_tail_loop_begin = 0x2003, + DW_AT_MIPS_epilog_begin = 0x2004, + DW_AT_MIPS_loop_unroll_factor = 0x2005, + DW_AT_MIPS_software_pipeline_depth = 0x2006, + DW_AT_MIPS_linkage_name = 0x2007, + DW_AT_MIPS_stride = 0x2008, + DW_AT_MIPS_abstract_name = 0x2009, + DW_AT_MIPS_clone_origin = 0x200a, + DW_AT_MIPS_has_inlines = 0x200b, + // HP extensions. + DW_AT_HP_block_index = 0x2000, + DW_AT_HP_unmodifiable = 0x2001, // Same as DW_AT_MIPS_fde. + DW_AT_HP_actuals_stmt_list = 0x2010, + DW_AT_HP_proc_per_section = 0x2011, + DW_AT_HP_raw_data_ptr = 0x2012, + DW_AT_HP_pass_by_reference = 0x2013, + DW_AT_HP_opt_level = 0x2014, + DW_AT_HP_prof_version_id = 0x2015, + DW_AT_HP_opt_flags = 0x2016, + DW_AT_HP_cold_region_low_pc = 0x2017, + DW_AT_HP_cold_region_high_pc = 0x2018, + DW_AT_HP_all_variables_modifiable = 0x2019, + DW_AT_HP_linkage_name = 0x201a, + DW_AT_HP_prof_flags = 0x201b, // In comp unit of procs_info for -g. + // GNU extensions. + DW_AT_sf_names = 0x2101, + DW_AT_src_info = 0x2102, + DW_AT_mac_info = 0x2103, + DW_AT_src_coords = 0x2104, + DW_AT_body_begin = 0x2105, + DW_AT_body_end = 0x2106, + DW_AT_GNU_vector = 0x2107, + // VMS extensions. + DW_AT_VMS_rtnbeg_pd_address = 0x2201, + // UPC extension. + DW_AT_upc_threads_scaled = 0x3210, + // PGI (STMicroelectronics) extensions. + DW_AT_PGI_lbase = 0x3a00, + DW_AT_PGI_soffset = 0x3a01, + DW_AT_PGI_lstride = 0x3a02 +}; + + +// Line number opcodes. +enum DwarfLineNumberOps { + DW_LNS_extended_op = 0, + DW_LNS_copy = 1, + DW_LNS_advance_pc = 2, + DW_LNS_advance_line = 3, + DW_LNS_set_file = 4, + DW_LNS_set_column = 5, + DW_LNS_negate_stmt = 6, + DW_LNS_set_basic_block = 7, + DW_LNS_const_add_pc = 8, + DW_LNS_fixed_advance_pc = 9, + // DWARF 3. + DW_LNS_set_prologue_end = 10, + DW_LNS_set_epilogue_begin = 11, + DW_LNS_set_isa = 12 +}; + +// Line number extended opcodes. +enum DwarfLineNumberExtendedOps { + DW_LNE_end_sequence = 1, + DW_LNE_set_address = 2, + DW_LNE_define_file = 3, + // HP extensions. + DW_LNE_HP_negate_is_UV_update = 0x11, + DW_LNE_HP_push_context = 0x12, + DW_LNE_HP_pop_context = 0x13, + DW_LNE_HP_set_file_line_column = 0x14, + DW_LNE_HP_set_routine_name = 0x15, + DW_LNE_HP_set_sequence = 0x16, + DW_LNE_HP_negate_post_semantics = 0x17, + DW_LNE_HP_negate_function_exit = 0x18, + DW_LNE_HP_negate_front_end_logical = 0x19, + DW_LNE_HP_define_proc = 0x20 +}; + +// Type encoding names and codes +enum DwarfEncoding { + DW_ATE_address =0x1, + DW_ATE_boolean =0x2, + DW_ATE_complex_float =0x3, + DW_ATE_float =0x4, + DW_ATE_signed =0x5, + DW_ATE_signed_char =0x6, + DW_ATE_unsigned =0x7, + DW_ATE_unsigned_char =0x8, + // DWARF3/DWARF3f + DW_ATE_imaginary_float =0x9, + DW_ATE_packed_decimal =0xa, + DW_ATE_numeric_string =0xb, + DW_ATE_edited =0xc, + DW_ATE_signed_fixed =0xd, + DW_ATE_unsigned_fixed =0xe, + DW_ATE_decimal_float =0xf, + DW_ATE_lo_user =0x80, + DW_ATE_hi_user =0xff +}; + +// Location virtual machine opcodes +enum DwarfOpcode { + DW_OP_addr =0x03, + DW_OP_deref =0x06, + DW_OP_const1u =0x08, + DW_OP_const1s =0x09, + DW_OP_const2u =0x0a, + DW_OP_const2s =0x0b, + DW_OP_const4u =0x0c, + DW_OP_const4s =0x0d, + DW_OP_const8u =0x0e, + DW_OP_const8s =0x0f, + DW_OP_constu =0x10, + DW_OP_consts =0x11, + DW_OP_dup =0x12, + DW_OP_drop =0x13, + DW_OP_over =0x14, + DW_OP_pick =0x15, + DW_OP_swap =0x16, + DW_OP_rot =0x17, + DW_OP_xderef =0x18, + DW_OP_abs =0x19, + DW_OP_and =0x1a, + DW_OP_div =0x1b, + DW_OP_minus =0x1c, + DW_OP_mod =0x1d, + DW_OP_mul =0x1e, + DW_OP_neg =0x1f, + DW_OP_not =0x20, + DW_OP_or =0x21, + DW_OP_plus =0x22, + DW_OP_plus_uconst =0x23, + DW_OP_shl =0x24, + DW_OP_shr =0x25, + DW_OP_shra =0x26, + DW_OP_xor =0x27, + DW_OP_bra =0x28, + DW_OP_eq =0x29, + DW_OP_ge =0x2a, + DW_OP_gt =0x2b, + DW_OP_le =0x2c, + DW_OP_lt =0x2d, + DW_OP_ne =0x2e, + DW_OP_skip =0x2f, + DW_OP_lit0 =0x30, + DW_OP_lit1 =0x31, + DW_OP_lit2 =0x32, + DW_OP_lit3 =0x33, + DW_OP_lit4 =0x34, + DW_OP_lit5 =0x35, + DW_OP_lit6 =0x36, + DW_OP_lit7 =0x37, + DW_OP_lit8 =0x38, + DW_OP_lit9 =0x39, + DW_OP_lit10 =0x3a, + DW_OP_lit11 =0x3b, + DW_OP_lit12 =0x3c, + DW_OP_lit13 =0x3d, + DW_OP_lit14 =0x3e, + DW_OP_lit15 =0x3f, + DW_OP_lit16 =0x40, + DW_OP_lit17 =0x41, + DW_OP_lit18 =0x42, + DW_OP_lit19 =0x43, + DW_OP_lit20 =0x44, + DW_OP_lit21 =0x45, + DW_OP_lit22 =0x46, + DW_OP_lit23 =0x47, + DW_OP_lit24 =0x48, + DW_OP_lit25 =0x49, + DW_OP_lit26 =0x4a, + DW_OP_lit27 =0x4b, + DW_OP_lit28 =0x4c, + DW_OP_lit29 =0x4d, + DW_OP_lit30 =0x4e, + DW_OP_lit31 =0x4f, + DW_OP_reg0 =0x50, + DW_OP_reg1 =0x51, + DW_OP_reg2 =0x52, + DW_OP_reg3 =0x53, + DW_OP_reg4 =0x54, + DW_OP_reg5 =0x55, + DW_OP_reg6 =0x56, + DW_OP_reg7 =0x57, + DW_OP_reg8 =0x58, + DW_OP_reg9 =0x59, + DW_OP_reg10 =0x5a, + DW_OP_reg11 =0x5b, + DW_OP_reg12 =0x5c, + DW_OP_reg13 =0x5d, + DW_OP_reg14 =0x5e, + DW_OP_reg15 =0x5f, + DW_OP_reg16 =0x60, + DW_OP_reg17 =0x61, + DW_OP_reg18 =0x62, + DW_OP_reg19 =0x63, + DW_OP_reg20 =0x64, + DW_OP_reg21 =0x65, + DW_OP_reg22 =0x66, + DW_OP_reg23 =0x67, + DW_OP_reg24 =0x68, + DW_OP_reg25 =0x69, + DW_OP_reg26 =0x6a, + DW_OP_reg27 =0x6b, + DW_OP_reg28 =0x6c, + DW_OP_reg29 =0x6d, + DW_OP_reg30 =0x6e, + DW_OP_reg31 =0x6f, + DW_OP_breg0 =0x70, + DW_OP_breg1 =0x71, + DW_OP_breg2 =0x72, + DW_OP_breg3 =0x73, + DW_OP_breg4 =0x74, + DW_OP_breg5 =0x75, + DW_OP_breg6 =0x76, + DW_OP_breg7 =0x77, + DW_OP_breg8 =0x78, + DW_OP_breg9 =0x79, + DW_OP_breg10 =0x7a, + DW_OP_breg11 =0x7b, + DW_OP_breg12 =0x7c, + DW_OP_breg13 =0x7d, + DW_OP_breg14 =0x7e, + DW_OP_breg15 =0x7f, + DW_OP_breg16 =0x80, + DW_OP_breg17 =0x81, + DW_OP_breg18 =0x82, + DW_OP_breg19 =0x83, + DW_OP_breg20 =0x84, + DW_OP_breg21 =0x85, + DW_OP_breg22 =0x86, + DW_OP_breg23 =0x87, + DW_OP_breg24 =0x88, + DW_OP_breg25 =0x89, + DW_OP_breg26 =0x8a, + DW_OP_breg27 =0x8b, + DW_OP_breg28 =0x8c, + DW_OP_breg29 =0x8d, + DW_OP_breg30 =0x8e, + DW_OP_breg31 =0x8f, + DW_OP_regX =0x90, + DW_OP_fbreg =0x91, + DW_OP_bregX =0x92, + DW_OP_piece =0x93, + DW_OP_deref_size =0x94, + DW_OP_xderef_size =0x95, + DW_OP_nop =0x96, + // DWARF3/DWARF3f + DW_OP_push_object_address =0x97, + DW_OP_call2 =0x98, + DW_OP_call4 =0x99, + DW_OP_call_ref =0x9a, + DW_OP_form_tls_address =0x9b, + DW_OP_call_frame_cfa =0x9c, + DW_OP_bit_piece =0x9d, + DW_OP_lo_user =0xe0, + DW_OP_hi_user =0xff, + // GNU extensions + DW_OP_GNU_push_tls_address =0xe0 +}; + +} // namespace dwarf2reader +#endif // COMMON_MAC_DWARF_DWARF2ENUMS_H__ diff --git a/src/common/mac/dwarf/dwarf2reader.cc b/src/common/mac/dwarf/dwarf2reader.cc new file mode 100644 index 00000000..6e7a2f1d --- /dev/null +++ b/src/common/mac/dwarf/dwarf2reader.cc @@ -0,0 +1,830 @@ +// Copyright 2006 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <ext/hash_map> +#include <stack> +#include <utility> + +#include "common/mac/dwarf/bytereader-inl.h" +#include "common/mac/dwarf/dwarf2reader.h" +#include "common/mac/dwarf/bytereader.h" +#include "common/mac/dwarf/line_state_machine.h" + +namespace __gnu_cxx +{ + template<> struct hash< std::string > + { + size_t operator()( const std::string& x ) const + { + return hash< const char* >()( x.c_str() ); + } + }; +} + +namespace dwarf2reader { + +// Read a DWARF2/3 initial length field from START, using READER, and +// report the length in LEN. Return the actual initial length. + +static uint64 ReadInitialLength(const char* start, + ByteReader* reader, size_t* len) { + const uint64 initial_length = reader->ReadFourBytes(start); + start += 4; + + // In DWARF2/3, if the initial length is all 1 bits, then the offset + // size is 8 and we need to read the next 8 bytes for the real length. + if (initial_length == 0xffffffff) { + reader->SetOffsetSize(8); + *len = 12; + return reader->ReadOffset(start); + } else { + reader->SetOffsetSize(4); + *len = 4; + } + return initial_length; +} + +CompilationUnit::CompilationUnit(const SectionMap& sections, uint64 offset, + ByteReader* reader, Dwarf2Handler* handler) + : offset_from_section_start_(offset), reader_(reader), + sections_(sections), handler_(handler), abbrevs_(NULL), + string_buffer_(NULL), string_buffer_length_(0) {} + +// Read a DWARF2/3 abbreviation section. +// Each abbrev consists of a abbreviation number, a tag, a byte +// specifying whether the tag has children, and a list of +// attribute/form pairs. +// The list of forms is terminated by a 0 for the attribute, and a +// zero for the form. The entire abbreviation section is terminated +// by a zero for the code. + +void CompilationUnit::ReadAbbrevs() { + if (abbrevs_) + return; + + // First get the debug_abbrev section + SectionMap::const_iterator iter = sections_.find("__debug_abbrev"); + assert(iter != sections_.end()); + + abbrevs_ = new vector<Abbrev>; + abbrevs_->resize(1); + + // The only way to check whether we are reading over the end of the + // buffer would be to first compute the size of the leb128 data by + // reading it, then go back and read it again. + const char* abbrev_start = iter->second.first + + header_.abbrev_offset; + const char* abbrevptr = abbrev_start; + const uint64 abbrev_length = iter->second.second - header_.abbrev_offset; + + while (1) { + CompilationUnit::Abbrev abbrev; + size_t len; + const uint32 number = reader_->ReadUnsignedLEB128(abbrevptr, &len); + + if (number == 0) + break; + abbrev.number = number; + abbrevptr += len; + + assert(abbrevptr < abbrev_start + abbrev_length); + const uint32 tag = reader_->ReadUnsignedLEB128(abbrevptr, &len); + abbrevptr += len; + abbrev.tag = static_cast<enum DwarfTag>(tag); + + assert(abbrevptr < abbrev_start + abbrev_length); + abbrev.has_children = reader_->ReadOneByte(abbrevptr); + abbrevptr += 1; + + assert(abbrevptr < abbrev_start + abbrev_length); + + while (1) { + const uint32 nametemp = reader_->ReadUnsignedLEB128(abbrevptr, &len); + abbrevptr += len; + + assert(abbrevptr < abbrev_start + abbrev_length); + const uint32 formtemp = reader_->ReadUnsignedLEB128(abbrevptr, &len); + abbrevptr += len; + if (nametemp == 0 && formtemp == 0) + break; + + const enum DwarfAttribute name = + static_cast<enum DwarfAttribute>(nametemp); + const enum DwarfForm form = static_cast<enum DwarfForm>(formtemp); + abbrev.attributes.push_back(make_pair(name, form)); + } + assert(abbrev.number == abbrevs_->size()); + abbrevs_->push_back(abbrev); + } +} + +// Skips a single DIE's attributes. +const char* CompilationUnit::SkipDIE(const char* start, + const Abbrev& abbrev) { + for (AttributeList::const_iterator i = abbrev.attributes.begin(); + i != abbrev.attributes.end(); + i++) { + start = SkipAttribute(start, i->second); + } + return start; +} + +// Skips a single attribute form's data. +const char* CompilationUnit::SkipAttribute(const char* start, + enum DwarfForm form) { + size_t len; + + switch (form) { + case DW_FORM_indirect: + form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start, + &len)); + start += len; + return SkipAttribute(start, form); + break; + + case DW_FORM_data1: + case DW_FORM_flag: + case DW_FORM_ref1: + return start + 1; + break; + case DW_FORM_ref2: + case DW_FORM_data2: + return start + 2; + break; + case DW_FORM_ref4: + case DW_FORM_data4: + return start + 4; + break; + case DW_FORM_ref8: + case DW_FORM_data8: + return start + 8; + break; + case DW_FORM_string: + return start + strlen(start) + 1; + break; + case DW_FORM_udata: + case DW_FORM_ref_udata: + reader_->ReadUnsignedLEB128(start, &len); + return start + len; + break; + + case DW_FORM_sdata: + reader_->ReadSignedLEB128(start, &len); + return start + len; + break; + case DW_FORM_addr: + return start + reader_->AddressSize(); + break; + case DW_FORM_ref_addr: + // DWARF2 and 3 differ on whether ref_addr is address size or + // offset size. + assert(header_.version == 2 || header_.version == 3); + if (header_.version == 2) { + return start + reader_->AddressSize(); + } else if (header_.version == 3) { + return start + reader_->OffsetSize(); + } + break; + + case DW_FORM_block1: + return start + 1 + reader_->ReadOneByte(start); + break; + case DW_FORM_block2: + return start + 2 + reader_->ReadTwoBytes(start); + break; + case DW_FORM_block4: + return start + 4 + reader_->ReadFourBytes(start); + break; + case DW_FORM_block: { + uint64 size = reader_->ReadUnsignedLEB128(start, &len); + return start + size + len; + } + break; + case DW_FORM_strp: + return start + reader_->OffsetSize(); + break; + default: + fprintf(stderr,"Unhandled form type"); + } + fprintf(stderr,"Unhandled form type"); + return NULL; +} + +// Read a DWARF2/3 header. +// The header is variable length in DWARF3 (and DWARF2 as extended by +// most compilers), and consists of an length field, a version number, +// the offset in the .debug_abbrev section for our abbrevs, and an +// address size. +void CompilationUnit::ReadHeader() { + const char* headerptr = buffer_; + size_t initial_length_size; + + assert(headerptr + 4 < buffer_ + buffer_length_); + const uint64 initial_length = ReadInitialLength(headerptr, reader_, + &initial_length_size); + headerptr += initial_length_size; + header_.length = initial_length; + + assert(headerptr + 2 < buffer_ + buffer_length_); + header_.version = reader_->ReadTwoBytes(headerptr); + headerptr += 2; + + assert(headerptr + reader_->OffsetSize() < buffer_ + buffer_length_); + header_.abbrev_offset = reader_->ReadOffset(headerptr); + headerptr += reader_->OffsetSize(); + + assert(headerptr + 1 < buffer_ + buffer_length_); + header_.address_size = reader_->ReadOneByte(headerptr); + reader_->SetAddressSize(header_.address_size); + headerptr += 1; + + after_header_ = headerptr; + + // This check ensures that we don't have to do checking during the + // reading of DIEs. header_.length does not include the size of the + // initial length. + assert(buffer_ + initial_length_size + header_.length <= + buffer_ + buffer_length_); +} + +uint64 CompilationUnit::Start() { + // First get the debug_info section + SectionMap::const_iterator iter = sections_.find("__debug_info"); + assert(iter != sections_.end()); + + // Set up our buffer + buffer_ = iter->second.first + offset_from_section_start_; + buffer_length_ = iter->second.second - offset_from_section_start_; + + // Read the header + ReadHeader(); + + // Figure out the real length from the end of the initial length to + // the end of the compilation unit, since that is the value we + // return. + uint64 ourlength = header_.length; + if (reader_->OffsetSize() == 8) + ourlength += 12; + else + ourlength += 4; + + // See if the user wants this compilation unit, and if not, just return. + if (!handler_->StartCompilationUnit(offset_from_section_start_, + reader_->AddressSize(), + reader_->OffsetSize(), + header_.length, + header_.version)) + return ourlength; + + // Otherwise, continue by reading our abbreviation entries. + ReadAbbrevs(); + + // Set the string section if we have one. + iter = sections_.find("__debug_str"); + if (iter != sections_.end()) { + string_buffer_ = iter->second.first; + string_buffer_length_ = iter->second.second; + } + + // Now that we have our abbreviations, start processing DIE's. + ProcessDIEs(); + + return ourlength; +} + +// If one really wanted, you could merge SkipAttribute and +// ProcessAttribute +// This is all boring data manipulation and calling of the handler. +const char* CompilationUnit::ProcessAttribute( + uint64 dieoffset, const char* start, enum DwarfAttribute attr, + enum DwarfForm form) { + size_t len; + + switch (form) { + // DW_FORM_indirect is never used because it is such a space + // waster. + case DW_FORM_indirect: + form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start, + &len)); + start += len; + return ProcessAttribute(dieoffset, start, attr, form); + break; + + case DW_FORM_data1: + case DW_FORM_flag: + case DW_FORM_ref1: + handler_->ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadOneByte(start)); + return start + 1; + break; + case DW_FORM_ref2: + case DW_FORM_data2: + handler_->ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadTwoBytes(start)); + return start + 2; + break; + case DW_FORM_ref4: + case DW_FORM_data4: + handler_->ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadFourBytes(start)); + return start + 4; + break; + case DW_FORM_ref8: + case DW_FORM_data8: + handler_->ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadEightBytes(start)); + return start + 8; + break; + case DW_FORM_string: { + const char* str = start; + handler_->ProcessAttributeString(dieoffset, attr, form, + str); + return start + strlen(str) + 1; + } + break; + case DW_FORM_udata: + case DW_FORM_ref_udata: + handler_->ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadUnsignedLEB128(start, + &len)); + return start + len; + break; + + case DW_FORM_sdata: + handler_->ProcessAttributeSigned(dieoffset, attr, form, + reader_->ReadSignedLEB128(start, &len)); + return start + len; + break; + case DW_FORM_addr: + handler_->ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadAddress(start)); + return start + reader_->AddressSize(); + break; + case DW_FORM_ref_addr: + // DWARF2 and 3 differ on whether ref_addr is address size or + // offset size. + assert(header_.version == 2 || header_.version == 3); + if (header_.version == 2) { + handler_->ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadAddress(start)); + return start + reader_->AddressSize(); + } else if (header_.version == 3) { + handler_->ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadOffset(start)); + return start + reader_->OffsetSize(); + } + break; + + case DW_FORM_block1: { + uint64 datalen = reader_->ReadOneByte(start); + handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 1, + datalen); + return start + 1 + datalen; + } + break; + case DW_FORM_block2: { + uint64 datalen = reader_->ReadTwoBytes(start); + handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 2, + datalen); + return start + 2 + datalen; + } + break; + case DW_FORM_block4: { + uint64 datalen = reader_->ReadFourBytes(start); + handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 4, + datalen); + return start + 4 + datalen; + } + break; + case DW_FORM_block: { + uint64 datalen = reader_->ReadUnsignedLEB128(start, &len); + handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + len, + datalen); + return start + datalen + len; + } + break; + case DW_FORM_strp: { + assert(string_buffer_ != NULL); + + const uint64 offset = reader_->ReadOffset(start); + assert(string_buffer_ + offset < string_buffer_ + string_buffer_length_); + + const char* str = string_buffer_ + offset; + handler_->ProcessAttributeString(dieoffset, attr, form, + str); + return start + reader_->OffsetSize(); + } + break; + default: + fprintf(stderr, "Unhandled form type"); + } + fprintf(stderr, "Unhandled form type"); + return NULL; +} + +const char* CompilationUnit::ProcessDIE(uint64 dieoffset, + const char* start, + const Abbrev& abbrev) { + for (AttributeList::const_iterator i = abbrev.attributes.begin(); + i != abbrev.attributes.end(); + i++) { + start = ProcessAttribute(dieoffset, start, i->first, i->second); + } + return start; +} + +void CompilationUnit::ProcessDIEs() { + const char* dieptr = after_header_; + size_t len; + + // lengthstart is the place the length field is based on. + // It is the point in the header after the initial length field + const char* lengthstart = buffer_; + + // In 64 bit dwarf, the initial length is 12 bytes, because of the + // 0xffffffff at the start. + if (reader_->OffsetSize() == 8) + lengthstart += 12; + else + lengthstart += 4; + + // we need semantics of boost scoped_ptr here - no intention of trasnferring + // ownership of the stack. use const, but then we limit ourselves to not + // ever being able to call .reset() on the smart pointer. + auto_ptr<stack<uint64> > const die_stack(new stack<uint64>); + + while (dieptr < (lengthstart + header_.length)) { + // We give the user the absolute offset from the beginning of + // debug_info, since they need it to deal with ref_addr forms. + uint64 absolute_offset = (dieptr - buffer_) + offset_from_section_start_; + + uint64 abbrev_num = reader_->ReadUnsignedLEB128(dieptr, &len); + + dieptr += len; + + // Abbrev == 0 represents the end of a list of children. + if (abbrev_num == 0) { + const uint64 offset = die_stack->top(); + die_stack->pop(); + handler_->EndDIE(offset); + continue; + } + + const Abbrev& abbrev = abbrevs_->at(abbrev_num); + const enum DwarfTag tag = abbrev.tag; + if (!handler_->StartDIE(absolute_offset, tag, abbrev.attributes)) { + dieptr = SkipDIE(dieptr, abbrev); + } else { + dieptr = ProcessDIE(absolute_offset, dieptr, abbrev); + } + + if (abbrev.has_children) { + die_stack->push(absolute_offset); + } else { + handler_->EndDIE(absolute_offset); + } + } +} + +LineInfo::LineInfo(const char* buffer, uint64 buffer_length, + ByteReader* reader, LineInfoHandler* handler): + handler_(handler), reader_(reader), buffer_(buffer), + buffer_length_(buffer_length) { + header_.std_opcode_lengths = NULL; +} + +uint64 LineInfo::Start() { + ReadHeader(); + ReadLines(); + return after_header_ - buffer_; +} + +// The header for a debug_line section is mildly complicated, because +// the line info is very tightly encoded. +void LineInfo::ReadHeader() { + const char* lineptr = buffer_; + size_t initial_length_size; + + const uint64 initial_length = ReadInitialLength(lineptr, reader_, + &initial_length_size); + + lineptr += initial_length_size; + header_.total_length = initial_length; + assert(buffer_ + initial_length_size + header_.total_length <= + buffer_ + buffer_length_); + + // Address size *must* be set by CU ahead of time. + assert(reader_->AddressSize() != 0); + + header_.version = reader_->ReadTwoBytes(lineptr); + lineptr += 2; + + header_.prologue_length = reader_->ReadOffset(lineptr); + lineptr += reader_->OffsetSize(); + + header_.min_insn_length = reader_->ReadOneByte(lineptr); + lineptr += 1; + + header_.default_is_stmt = reader_->ReadOneByte(lineptr); + lineptr += 1; + + header_.line_base = *reinterpret_cast<const int8*>(lineptr); + lineptr += 1; + + header_.line_range = reader_->ReadOneByte(lineptr); + lineptr += 1; + + header_.opcode_base = reader_->ReadOneByte(lineptr); + lineptr += 1; + + header_.std_opcode_lengths = new vector<unsigned char>; + header_.std_opcode_lengths->resize(header_.opcode_base + 1); + (*header_.std_opcode_lengths)[0] = 0; + for (int i = 1; i < header_.opcode_base; i++) { + (*header_.std_opcode_lengths)[i] = reader_->ReadOneByte(lineptr); + lineptr += 1; + } + + // It is legal for the directory entry table to be empty. + if (*lineptr) { + uint32 dirindex = 1; + while (*lineptr) { + const char* dirname = lineptr; + handler_->DefineDir(dirname, dirindex); + lineptr += strlen(dirname) + 1; + dirindex++; + } + } + lineptr++; + + // It is also legal for the file entry table to be empty. + if (*lineptr) { + uint32 fileindex = 1; + size_t len; + while (*lineptr) { + const char* filename = lineptr; + lineptr += strlen(filename) + 1; + + uint64 dirindex = reader_->ReadUnsignedLEB128(lineptr, &len); + lineptr += len; + + uint64 mod_time = reader_->ReadUnsignedLEB128(lineptr, &len); + lineptr += len; + + uint64 filelength = reader_->ReadUnsignedLEB128(lineptr, &len); + lineptr += len; + handler_->DefineFile(filename, fileindex, dirindex, mod_time, + filelength); + fileindex++; + } + } + lineptr++; + + after_header_ = lineptr; +} + +/* static */ +bool LineInfo::ProcessOneOpcode(ByteReader* reader, + LineInfoHandler* handler, + const struct LineInfoHeader &header, + const char* start, + struct LineStateMachine* lsm, + size_t* len, + uintptr_t pc, + bool *lsm_passes_pc) { + size_t oplen = 0; + size_t templen; + uint8 opcode = reader->ReadOneByte(start); + oplen++; + start++; + + // If the opcode is great than the opcode_base, it is a special + // opcode. Most line programs consist mainly of special opcodes. + if (opcode >= header.opcode_base) { + opcode -= header.opcode_base; + const int64 advance_address = (opcode / header.line_range) + * header.min_insn_length; + const int64 advance_line = (opcode % header.line_range) + + header.line_base; + + // Check if the lsm passes "pc". If so, mark it as passed. + if (lsm_passes_pc && + lsm->address <= pc && pc < lsm->address + advance_address) { + *lsm_passes_pc = true; + } + + lsm->address += advance_address; + lsm->line_num += advance_line; + lsm->basic_block = true; + *len = oplen; + return true; + } + + // Otherwise, we have the regular opcodes + switch (opcode) { + case DW_LNS_copy: { + lsm->basic_block = false; + *len = oplen; + return true; + } + + case DW_LNS_advance_pc: { + uint64 advance_address = reader->ReadUnsignedLEB128(start, &templen); + oplen += templen; + + // Check if the lsm passes "pc". If so, mark it as passed. + if (lsm_passes_pc && lsm->address <= pc && + pc < lsm->address + header.min_insn_length * advance_address) { + *lsm_passes_pc = true; + } + + lsm->address += header.min_insn_length * advance_address; + } + break; + case DW_LNS_advance_line: { + const int64 advance_line = reader->ReadSignedLEB128(start, &templen); + oplen += templen; + lsm->line_num += advance_line; + + // With gcc 4.2.1, we can get the line_no here for the first time + // since DW_LNS_advance_line is called after DW_LNE_set_address is + // called. So we check if the lsm passes "pc" here, not in + // DW_LNE_set_address. + if (lsm_passes_pc && lsm->address == pc) { + *lsm_passes_pc = true; + } + } + break; + case DW_LNS_set_file: { + const uint64 fileno = reader->ReadUnsignedLEB128(start, &templen); + oplen += templen; + lsm->file_num = fileno; + } + break; + case DW_LNS_set_column: { + const uint64 colno = reader->ReadUnsignedLEB128(start, &templen); + oplen += templen; + lsm->column_num = colno; + } + break; + case DW_LNS_negate_stmt: { + lsm->is_stmt = !lsm->is_stmt; + } + break; + case DW_LNS_set_basic_block: { + lsm->basic_block = true; + } + break; + case DW_LNS_fixed_advance_pc: { + const uint16 advance_address = reader->ReadTwoBytes(start); + oplen += 2; + + // Check if the lsm passes "pc". If so, mark it as passed. + if (lsm_passes_pc && + lsm->address <= pc && pc < lsm->address + advance_address) { + *lsm_passes_pc = true; + } + + lsm->address += advance_address; + } + break; + case DW_LNS_const_add_pc: { + const int64 advance_address = header.min_insn_length + * ((255 - header.opcode_base) + / header.line_range); + + // Check if the lsm passes "pc". If so, mark it as passed. + if (lsm_passes_pc && + lsm->address <= pc && pc < lsm->address + advance_address) { + *lsm_passes_pc = true; + } + + lsm->address += advance_address; + } + break; + case DW_LNS_extended_op: { + const size_t extended_op_len = reader->ReadUnsignedLEB128(start, + &templen); + start += templen; + oplen += templen + extended_op_len; + + const uint64 extended_op = reader->ReadOneByte(start); + start++; + + switch (extended_op) { + case DW_LNE_end_sequence: { + lsm->end_sequence = true; + *len = oplen; + return true; + } + break; + case DW_LNE_set_address: { + // With gcc 4.2.1, we cannot tell the line_no here since + // DW_LNE_set_address is called before DW_LNS_advance_line is + // called. So we do not check if the lsm passes "pc" here. See + // also the comment in DW_LNS_advance_line. + uint64 address = reader->ReadAddress(start); + lsm->address = address; + } + break; + case DW_LNE_define_file: { + const char* filename = start; + + templen = strlen(filename) + 1; + start += templen; + + uint64 dirindex = reader->ReadUnsignedLEB128(start, &templen); + oplen += templen; + + const uint64 mod_time = reader->ReadUnsignedLEB128(start, + &templen); + oplen += templen; + + const uint64 filelength = reader->ReadUnsignedLEB128(start, + &templen); + oplen += templen; + + if (handler) { + handler->DefineFile(filename, -1, dirindex, mod_time, + filelength); + } + } + break; + } + } + break; + + default: { + // Ignore unknown opcode silently + if (header.std_opcode_lengths) { + for (int i = 0; i < (*header.std_opcode_lengths)[opcode]; i++) { + size_t templen; + reader->ReadUnsignedLEB128(start, &templen); + start += templen; + oplen += templen; + } + } + } + break; + } + *len = oplen; + return false; +} + +void LineInfo::ReadLines() { + struct LineStateMachine lsm; + + // lengthstart is the place the length field is based on. + // It is the point in the header after the initial length field + const char* lengthstart = buffer_; + + // In 64 bit dwarf, the initial length is 12 bytes, because of the + // 0xffffffff at the start. + if (reader_->OffsetSize() == 8) + lengthstart += 12; + else + lengthstart += 4; + + const char* lineptr = after_header_; + while (lineptr < lengthstart + header_.total_length) { + lsm.Reset(header_.default_is_stmt); + while (!lsm.end_sequence) { + size_t oplength; + bool add_line = ProcessOneOpcode(reader_, handler_, header_, + lineptr, &lsm, &oplength, (uintptr_t)-1, NULL); + if (add_line) + handler_->AddLine(lsm.address, lsm.file_num, lsm.line_num, + lsm.column_num); + lineptr += oplength; + } + } + + after_header_ = lengthstart + header_.total_length; +} + +} // namespace dwarf2reader diff --git a/src/common/mac/dwarf/dwarf2reader.h b/src/common/mac/dwarf/dwarf2reader.h new file mode 100644 index 00000000..f27cdac7 --- /dev/null +++ b/src/common/mac/dwarf/dwarf2reader.h @@ -0,0 +1,393 @@ +// Copyright 2006 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This file contains definitions related to the DWARF2/3 reader and +// it's handler interfaces. +// The DWARF2/3 specification can be found at +// http://dwarf.freestandards.org and should be considered required +// reading if you wish to modify the implementation. +// Only a cursory attempt is made to explain terminology that is +// used here, as it is much better explained in the standard documents +#ifndef COMMON_MAC_DWARF_DWARF2READER_H__ +#define COMMON_MAC_DWARF_DWARF2READER_H__ + +#include <ext/hash_map> +#include <list> +#include <string> +#include <utility> +#include <vector> + +#include "common/mac/dwarf/dwarf2enums.h" +#include "common/mac/dwarf/types.h" + +using namespace std; +using namespace __gnu_cxx; + +namespace dwarf2reader { +struct LineStateMachine; +class ByteReader; +class Dwarf2Handler; +class LineInfoHandler; + +// This maps from a string naming a section to a pair containing a +// the data for the section, and the size of the section. +typedef hash_map<string, pair<const char*, uint64> > SectionMap; +typedef list<pair<enum DwarfAttribute, enum DwarfForm> > AttributeList; +typedef AttributeList::iterator AttributeIterator; +typedef AttributeList::const_iterator ConstAttributeIterator; + +struct LineInfoHeader { + uint64 total_length; + uint16 version; + uint64 prologue_length; + uint8 min_insn_length; // insn stands for instructin + bool default_is_stmt; // stmt stands for statement + int8 line_base; + uint8 line_range; + uint8 opcode_base; + // Use a pointer so that signalsafe_addr2line is able to use this structure + // without heap allocation problem. + vector<unsigned char> *std_opcode_lengths; +}; + +class LineInfo { + public: + + // Initializes a .debug_line reader. Buffer and buffer length point + // to the beginning and length of the line information to read. + // Reader is a ByteReader class that has the endianness set + // properly. + LineInfo(const char* buffer_, uint64 buffer_length, + ByteReader* reader, LineInfoHandler* handler); + + virtual ~LineInfo() { + if (header_.std_opcode_lengths) { + delete header_.std_opcode_lengths; + } + } + + // Start processing line info, and calling callbacks in the handler. + // Consumes the line number information for a single compilation unit. + // Returns the number of bytes processed. + uint64 Start(); + + // Process a single line info opcode at START using the state + // machine at LSM. Return true if we should define a line using the + // current state of the line state machine. Place the length of the + // opcode in LEN. + // If LSM_PASSES_PC is non-NULL, this function also checks if the lsm + // passes the address of PC. In other words, LSM_PASSES_PC will be + // set to true, if the following condition is met. + // + // lsm's old address < PC <= lsm's new address + static bool ProcessOneOpcode(ByteReader* reader, + LineInfoHandler* handler, + const struct LineInfoHeader &header, + const char* start, + struct LineStateMachine* lsm, + size_t* len, + uintptr_t pc, + bool *lsm_passes_pc); + + private: + // Reads the DWARF2/3 header for this line info. + void ReadHeader(); + + // Reads the DWARF2/3 line information + void ReadLines(); + + // The associated handler to call processing functions in + LineInfoHandler* handler_; + + // The associated ByteReader that handles endianness issues for us + ByteReader* reader_; + + // A DWARF2/3 line info header. This is not the same size as + // in the actual file, as the one in the file may have a 32 bit or + // 64 bit lengths + + struct LineInfoHeader header_; + + // buffer is the buffer for our line info, starting at exactly where + // the line info to read is. after_header is the place right after + // the end of the line information header. + const char* buffer_; + uint64 buffer_length_; + const char* after_header_; +}; + +// This class is the main interface between the line info reader and +// the client. The virtual functions inside this get called for +// interesting events that happen during line info reading. The +// default implementation does nothing + +class LineInfoHandler { + public: + LineInfoHandler() { } + + virtual ~LineInfoHandler() { } + + // Called when we define a directory. NAME is the directory name, + // DIR_NUM is the directory number + virtual void DefineDir(const string& name, uint32 dir_num) { } + + // Called when we define a filename. NAME is the filename, FILE_NUM + // is the file number which is -1 if the file index is the next + // index after the last numbered index (this happens when files are + // dynamically defined by the line program), DIR_NUM is the + // directory index for the directory name of this file, MOD_TIME is + // the modification time of the file, and LENGTH is the length of + // the file + virtual void DefineFile(const string& name, int32 file_num, + uint32 dir_num, uint64 mod_time, + uint64 length) { } + + // Called when the line info reader has a new line, address pair + // ready for us. ADDRESS is the address of the code, FILE_NUM is + // the file number containing the code, LINE_NUM is the line number in + // that file for the code, and COLUMN_NUM is the column number the code + // starts at, if we know it (0 otherwise). + virtual void AddLine(uint64 address, uint32 file_num, uint32 line_num, + uint32 column_num) { } +}; + +// The base of DWARF2/3 debug info is a DIE (Debugging Information +// Entry. +// DWARF groups DIE's into a tree and calls the root of this tree a +// "compilation unit". Most of the time, their is one compilation +// unit in the .debug_info section for each file that had debug info +// generated. +// Each DIE consists of + +// 1. a tag specifying a thing that is being described (ie +// DW_TAG_subprogram for functions, DW_TAG_variable for variables, etc +// 2. attributes (such as DW_AT_location for location in memory, +// DW_AT_name for name), and data for each attribute. +// 3. A flag saying whether the DIE has children or not + +// In order to gain some amount of compression, the format of +// each DIE (tag name, attributes and data forms for the attributes) +// are stored in a separate table called the "abbreviation table". +// This is done because a large number of DIEs have the exact same tag +// and list of attributes, but different data for those attributes. +// As a result, the .debug_info section is just a stream of data, and +// requires reading of the .debug_abbrev section to say what the data +// means. + +// As a warning to the user, it should be noted that the reason for +// using absolute offsets from the beginning of .debug_info is that +// DWARF2/3 support referencing DIE's from other DIE's by their offset +// from either the current compilation unit start, *or* the beginning +// of the .debug_info section. This means it is possible to reference +// a DIE in one compilation unit from a DIE in another compilation +// unit. This style of reference is usually used to eliminate +// duplicated information that occurs across compilation +// units, such as base types, etc. GCC 3.4+ support this with +// -feliminate-dwarf2-dups. Other toolchains will sometimes do +// duplicate elimination in the linker. + +class CompilationUnit { + public: + + // Initialize a compilation unit. This requires a map of sections, + // the offset of this compilation unit in the debug_info section, a + // ByteReader, and a Dwarf2Handler class to call callbacks in. + CompilationUnit(const SectionMap& sections, uint64 offset, + ByteReader* reader, Dwarf2Handler* handler); + virtual ~CompilationUnit() { + if (abbrevs_) delete abbrevs_; + } + + // Begin reading a Dwarf2 compilation unit, and calling the + // callbacks in the Dwarf2Handler + // Return the offset of the end of the compilation unit - the passed + // in offset. + uint64 Start(); + + private: + + // This struct represents a single DWARF2/3 abbreviation + // The abbreviation tells how to read a DWARF2/3 DIE, and consist of a + // tag and a list of attributes, as well as the data form of each attribute. + struct Abbrev { + uint32 number; + enum DwarfTag tag; + bool has_children; + AttributeList attributes; + }; + + // A DWARF2/3 compilation unit header. This is not the same size as + // in the actual file, as the one in the file may have a 32 bit or + // 64 bit length. + struct CompilationUnitHeader { + uint64 length; + uint16 version; + uint64 abbrev_offset; + uint8 address_size; + } header_; + + // Reads the DWARF2/3 header for this compilation unit. + void ReadHeader(); + + // Reads the DWARF2/3 abbreviations for this compilation unit + void ReadAbbrevs(); + + // Processes a single DIE for this compilation unit and return a new + // pointer just past the end of it + const char* ProcessDIE(uint64 dieoffset, + const char* start, + const Abbrev& abbrev); + + // Processes a single attribute and return a new pointer just past the + // end of it + const char* ProcessAttribute(uint64 dieoffset, + const char* start, + enum DwarfAttribute attr, + enum DwarfForm form); + + // Processes all DIEs for this compilation unit + void ProcessDIEs(); + + // Skips the die with attributes specified in ABBREV starting at + // START, and return the new place to position the stream to. + const char* SkipDIE(const char* start, + const Abbrev& abbrev); + + // Skips the attribute starting at START, with FORM, and return the + // new place to position the stream to. + const char* SkipAttribute(const char* start, + enum DwarfForm form); + + // Offset from section start is the offset of this compilation unit + // from the beginning of the .debug_info section. + uint64 offset_from_section_start_; + + // buffer is the buffer for our CU, starting at .debug_info + offset + // passed in from constructor. + // after_header points to right after the compilation unit header. + const char* buffer_; + uint64 buffer_length_; + const char* after_header_; + + // The associated ByteReader that handles endianness issues for us + ByteReader* reader_; + + // The map of sections in our file to buffers containing their data + const SectionMap& sections_; + + // The associated handler to call processing functions in + Dwarf2Handler* handler_; + + // Set of DWARF2/3 abbreviations for this compilation unit. Indexed + // by abbreviation number, which means that abbrevs_[0] is not + // valid. + vector<Abbrev>* abbrevs_; + + // String section buffer and length, if we have a string section. + // This is here to avoid doing a section lookup for strings in + // ProcessAttribute, which is in the hot path for DWARF2 reading. + const char* string_buffer_; + uint64 string_buffer_length_; +}; + +// This class is the main interface between the reader and the +// client. The virtual functions inside this get called for +// interesting events that happen during DWARF2 reading. +// The default implementation skips everything. + +class Dwarf2Handler { + public: + Dwarf2Handler() { } + + virtual ~Dwarf2Handler() { } + + // Start to process a compilation unit at OFFSET from the beginning of the + // debug_info section. Return false if you would like + // to skip this compilation unit. + virtual bool StartCompilationUnit(uint64 offset, uint8 address_size, + uint8 offset_size, uint64 cu_length, + uint8 dwarf_version) { return false; } + + // Start to process a DIE at OFFSET from the beginning of the + // debug_info section. Return false if you would like to skip this + // DIE. + virtual bool StartDIE(uint64 offset, enum DwarfTag tag, + const AttributeList& attrs) { return false; } + + // Called when we have an attribute with unsigned data to give to + // our handler. The attribute is for the DIE at OFFSET from the + // beginning of compilation unit, has a name of ATTR, a form of + // FORM, and the actual data of the attribute is in DATA. + virtual void ProcessAttributeUnsigned(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data) { } + + // Called when we have an attribute with signed data to give to + // our handler. The attribute is for the DIE at OFFSET from the + // beginning of compilation unit, has a name of ATTR, a form of + // FORM, and the actual data of the attribute is in DATA. + virtual void ProcessAttributeSigned(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + int64 data) { } + + // Called when we have an attribute with a buffer of data to give to + // our handler. The attribute is for the DIE at OFFSET from the + // beginning of compilation unit, has a name of ATTR, a form of + // FORM, and the actual data of the attribute is in DATA, and the + // length of the buffer is LENGTH. The buffer is owned by the + // caller, not the callee, and may not persist for very long. If + // you want the data to be available later, it needs to be copied. + virtual void ProcessAttributeBuffer(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const char* data, + uint64 len) { } + + // Called when we have an attribute with string data to give to + // our handler. The attribute is for the DIE at OFFSET from the + // beginning of compilation unit, has a name of ATTR, a form of + // FORM, and the actual data of the attribute is in DATA. + virtual void ProcessAttributeString(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const string& data) { } + + // Called when finished processing the DIE at OFFSET. + // Because DWARF2/3 specifies a tree of DIEs, you may get starts + // before ends of the previous DIE, as we process children before + // ending the parent. + virtual void EndDIE(uint64 offset) { } + +}; + + +} // namespace dwarf2reader + +#endif // UTIL_DEBUGINFO_DWARF2READER_H__ diff --git a/src/common/mac/dwarf/functioninfo.cc b/src/common/mac/dwarf/functioninfo.cc new file mode 100644 index 00000000..e9d33b83 --- /dev/null +++ b/src/common/mac/dwarf/functioninfo.cc @@ -0,0 +1,198 @@ +// Copyright 2006 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This is a client for the dwarf2reader to extract function and line +// information from the debug info. + +#include <map> +#include <queue> +#include <vector> + + +#include "common/mac/dwarf/functioninfo.h" + +#include "common/mac/dwarf/bytereader.h" + + +namespace __gnu_cxx +{ + template<> + struct hash<std::string> + { + size_t operator()(const std::string& k) const; + }; +} + + +namespace dwarf2reader { + +CULineInfoHandler::CULineInfoHandler(vector<SourceFileInfo>* files, + vector<string>* dirs, + LineMap* linemap):linemap_(linemap), + files_(files), + dirs_(dirs) { + // The dirs and files are 1 indexed, so just make sure we put + // nothing in the 0 vector. + assert(dirs->size() == 0); + assert(files->size() == 0); + dirs->push_back(""); + SourceFileInfo s; + s.name = ""; + s.lowpc = ULLONG_MAX; + files->push_back(s); +} + +void CULineInfoHandler::DefineDir(const string& name, uint32 dir_num) { + // These should never come out of order, actually + assert(dir_num == dirs_->size()); + dirs_->push_back(name); +} + +void CULineInfoHandler::DefineFile(const string& name, + int32 file_num, uint32 dir_num, + uint64 mod_time, uint64 length) { + assert(dir_num >= 0); + assert(dir_num < dirs_->size()); + + // These should never come out of order, actually. + if (file_num == (int32)files_->size() || file_num == -1) { + string dir = dirs_->at(dir_num); + + SourceFileInfo s; + s.lowpc = ULLONG_MAX; + + if (dir == "") { + s.name = name; + } else { + s.name = dir + "/" + name; + } + + files_->push_back(s); + } else { + fprintf(stderr, "error in DefineFile"); + } +} + +void CULineInfoHandler::AddLine(uint64 address, uint32 file_num, + uint32 line_num, uint32 column_num) { + if (file_num < files_->size()) { + linemap_->insert(make_pair(address, make_pair(files_->at(file_num).name.c_str(), + line_num))); + + if(address < files_->at(file_num).lowpc) { + files_->at(file_num).lowpc = address; + } + } else { + fprintf(stderr,"error in AddLine"); + } +} + +bool CUFunctionInfoHandler::StartCompilationUnit(uint64 offset, + uint8 address_size, + uint8 offset_size, + uint64 cu_length, + uint8 dwarf_version) { + return true; +} + + +// For function info, we only care about subprograms and inlined +// subroutines. For line info, the DW_AT_stmt_list lives in the +// compile unit tag. + +bool CUFunctionInfoHandler::StartDIE(uint64 offset, enum DwarfTag tag, + const AttributeList& attrs) { + switch (tag) { + case DW_TAG_subprogram: + case DW_TAG_inlined_subroutine: { + current_function_info_ = new FunctionInfo; + current_function_info_->lowpc = current_function_info_->highpc = 0; + current_function_info_->name = ""; + current_function_info_->line = 0; + current_function_info_->file = ""; + offset_to_funcinfo_->insert(make_pair(offset, current_function_info_)); + }; + // FALLTHROUGH + case DW_TAG_compile_unit: + return true; + default: + return false; + } + return false; +} + +// Only care about the name attribute for functions + +void CUFunctionInfoHandler::ProcessAttributeString(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const string &data) { + if (attr == DW_AT_name && current_function_info_) + current_function_info_->name = data; +} + +void CUFunctionInfoHandler::ProcessAttributeUnsigned(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data) { + if (attr == DW_AT_stmt_list) { + SectionMap::const_iterator iter = sections_.find("__debug_line"); + assert(iter != sections_.end()); + + // this should be a scoped_ptr but we dont' use boost :-( + auto_ptr<LineInfo> lireader(new LineInfo(iter->second.first + data, + iter->second.second - data, + reader_, linehandler_)); + lireader->Start(); + } else if (current_function_info_) { + switch (attr) { + case DW_AT_low_pc: + current_function_info_->lowpc = data; + break; + case DW_AT_high_pc: + current_function_info_->highpc = data; + break; + case DW_AT_decl_line: + current_function_info_->line = data; + break; + case DW_AT_decl_file: + current_function_info_->file = files_->at(data).name; + break; + default: + break; + } + } +} + +void CUFunctionInfoHandler::EndDIE(uint64 offset) { + if (current_function_info_ && current_function_info_->lowpc) + address_to_funcinfo_->insert(make_pair(current_function_info_->lowpc, + current_function_info_)); +} + +} // namespace dwarf2reader diff --git a/src/common/mac/dwarf/functioninfo.h b/src/common/mac/dwarf/functioninfo.h new file mode 100644 index 00000000..f529f092 --- /dev/null +++ b/src/common/mac/dwarf/functioninfo.h @@ -0,0 +1,175 @@ +// Copyright 2006 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +// This file contains the definitions for a DWARF2/3 information +// collector that uses the DWARF2/3 reader interface to build a mapping +// of addresses to files, lines, and functions. + +#ifndef COMMON_MAC_DWARF_FUNCTIONINFO_H__ +#define COMMON_MAC_DWARF_FUNCTIONINFO_H__ + +#include <map> +#include <string> +#include <utility> +#include <vector> + +#include "common/mac/dwarf/dwarf2reader.h" + + +namespace dwarf2reader { + +struct FunctionInfo { + // Name of the function + string name; + // File containing this function + string file; + // Line number for start of function. + uint32 line; + // Beginning address for this function + uint64 lowpc; + // End address for this function. + uint64 highpc; +}; + +struct SourceFileInfo { + // Name of the source file name + string name; + // Low address of source file name + uint64 lowpc; +}; + +typedef map<uint64, FunctionInfo*> FunctionMap; +typedef map<uint64, pair<string, uint32> > LineMap; + +// This class is a basic line info handler that fills in the dirs, +// file, and linemap passed into it with the data produced from the +// LineInfoHandler. +class CULineInfoHandler: public LineInfoHandler { + public: + + // + CULineInfoHandler(vector<SourceFileInfo>* files, + vector<string>* dirs, + LineMap* linemap); + virtual ~CULineInfoHandler() { } + + // Called when we define a directory. We just place NAME into dirs_ + // at position DIR_NUM. + virtual void DefineDir(const string& name, uint32 dir_num); + + // Called when we define a filename. We just place + // concat(dirs_[DIR_NUM], NAME) into files_ at position FILE_NUM. + virtual void DefineFile(const string& name, int32 file_num, + uint32 dir_num, uint64 mod_time, uint64 length); + + + // Called when the line info reader has a new line, address pair + // ready for us. ADDRESS is the address of the code, FILE_NUM is + // the file number containing the code, LINE_NUM is the line number + // in that file for the code, and COLUMN_NUM is the column number + // the code starts at, if we know it (0 otherwise). + virtual void AddLine(uint64 address, uint32 file_num, uint32 line_num, + uint32 column_num); + + + private: + LineMap* linemap_; + vector<SourceFileInfo>* files_; + vector<string>* dirs_; +}; + +class CUFunctionInfoHandler: public Dwarf2Handler { + public: + CUFunctionInfoHandler(vector<SourceFileInfo>* files, + vector<string>* dirs, + LineMap* linemap, + FunctionMap* offset_to_funcinfo, + FunctionMap* address_to_funcinfo, + CULineInfoHandler* linehandler, + const SectionMap& sections, + ByteReader* reader) + : files_(files), dirs_(dirs), linemap_(linemap), + offset_to_funcinfo_(offset_to_funcinfo), + address_to_funcinfo_(address_to_funcinfo), + linehandler_(linehandler), sections_(sections), + reader_(reader), current_function_info_(NULL) { } + + virtual ~CUFunctionInfoHandler() { } + + // Start to process a compilation unit at OFFSET from the beginning of the + // debug_info section. We want to see all compilation units, so we + // always return true. + + virtual bool StartCompilationUnit(uint64 offset, uint8 address_size, + uint8 offset_size, uint64 cu_length, + uint8 dwarf_version); + + // Start to process a DIE at OFFSET from the beginning of the + // debug_info section. We only care about function related DIE's. + virtual bool StartDIE(uint64 offset, enum DwarfTag tag, + const AttributeList& attrs); + + // Called when we have an attribute with unsigned data to give to + // our handler. The attribute is for the DIE at OFFSET from the + // beginning of compilation unit, has a name of ATTR, a form of + // FORM, and the actual data of the attribute is in DATA. + virtual void ProcessAttributeUnsigned(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data); + + // Called when we have an attribute with string data to give to + // our handler. The attribute is for the DIE at OFFSET from the + // beginning of compilation unit, has a name of ATTR, a form of + // FORM, and the actual data of the attribute is in DATA. + virtual void ProcessAttributeString(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const string& data); + + // Called when finished processing the DIE at OFFSET. + // Because DWARF2/3 specifies a tree of DIEs, you may get starts + // before ends of the previous DIE, as we process children before + // ending the parent. + virtual void EndDIE(uint64 offset); + + private: + vector<SourceFileInfo>* files_; + vector<string>* dirs_; + LineMap* linemap_; + FunctionMap* offset_to_funcinfo_; + FunctionMap* address_to_funcinfo_; + CULineInfoHandler* linehandler_; + const SectionMap& sections_; + ByteReader* reader_; + FunctionInfo* current_function_info_; +}; + +} // namespace dwarf2reader +#endif // COMMON_MAC_DWARF_FUNCTIONINFO_H__ diff --git a/src/common/mac/dwarf/line_state_machine.h b/src/common/mac/dwarf/line_state_machine.h new file mode 100644 index 00000000..4ba98f74 --- /dev/null +++ b/src/common/mac/dwarf/line_state_machine.h @@ -0,0 +1,61 @@ +// Copyright 2008 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +#ifndef COMMON_MAC_DWARF_LINE_STATE_MACHINE_H__ +#define COMMON_MAC_DWARF_LINE_STATE_MACHINE_H__ + +namespace dwarf2reader { + +// This is the format of a DWARF2/3 line state machine that we process +// opcodes using. There is no need for anything outside the lineinfo +// processor to know how this works. +struct LineStateMachine { + void Reset(bool default_is_stmt) { + file_num = 1; + address = 0; + line_num = 1; + column_num = 0; + is_stmt = default_is_stmt; + basic_block = false; + end_sequence = false; + } + + uint32 file_num; + uint64 address; + uint64 line_num; + uint32 column_num; + bool is_stmt; // stmt means statement. + bool basic_block; + bool end_sequence; +}; + +} // namespace dwarf2reader + + +#endif // COMMON_MAC_DWARF_LINE_STATE_MACHINE_H__ diff --git a/src/common/mac/dwarf/types.h b/src/common/mac/dwarf/types.h new file mode 100644 index 00000000..76541597 --- /dev/null +++ b/src/common/mac/dwarf/types.h @@ -0,0 +1,46 @@ +// Copyright 2008 Google, Inc. All Rights reserved +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +// This file contains some typedefs for basic types + + +#ifndef _COMMON_MAC_DWARF_TYPES_H__ +#define _COMMON_MAC_DWARF_TYPES_H__ + +typedef signed char int8; +typedef short int16; +typedef int int32; +typedef long long int64; + +typedef unsigned char uint8; +typedef unsigned short uint16; +typedef unsigned int uint32; +typedef unsigned long long uint64; + +#endif // _COMMON_MAC_DWARF_TYPES_H__ |