From b223627d81c083a64f2ccecf2651a18111421280 Mon Sep 17 00:00:00 2001 From: "ted.mielczarek" Date: Thu, 8 Apr 2010 23:06:23 +0000 Subject: provide a network source line resolver + server. r=mark,jimb at http://breakpad.appspot.com/36001 git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@569 4c0a9323-5329-0410-9bdc-e9ce6186880e --- src/processor/basic_source_line_resolver.cc | 265 +++++++++++----------------- 1 file changed, 99 insertions(+), 166 deletions(-) (limited to 'src/processor/basic_source_line_resolver.cc') diff --git a/src/processor/basic_source_line_resolver.cc b/src/processor/basic_source_line_resolver.cc index 0385f89f..52044dbf 100644 --- a/src/processor/basic_source_line_resolver.cc +++ b/src/processor/basic_source_line_resolver.cc @@ -44,10 +44,11 @@ #include "google_breakpad/processor/basic_source_line_resolver.h" #include "google_breakpad/processor/code_module.h" #include "google_breakpad/processor/stack_frame.h" +#include "processor/cfi_frame_info.h" #include "processor/linked_ptr.h" #include "processor/scoped_ptr.h" #include "processor/windows_frame_info.h" -#include "processor/cfi_frame_info.h" +#include "processor/tokenize.h" using std::map; using std::vector; @@ -55,6 +56,8 @@ using std::make_pair; namespace google_breakpad { +static const char *kWhitespace = " \r\n"; + struct BasicSourceLineResolver::Line { Line(MemAddr addr, MemAddr code_size, int file_id, int source_line) : address(addr) @@ -135,32 +138,6 @@ class BasicSourceLineResolver::Module { friend class BasicSourceLineResolver; typedef map FileMap; - // The types for windows_frame_info_. This is equivalent to MS DIA's - // StackFrameTypeEnum. Each identifies a different type of frame - // information, although all are represented in the symbol file in the - // same format. These are used as indices to the windows_frame_info_ array. - enum WindowsFrameInfoTypes { - WINDOWS_FRAME_INFO_FPO = 0, - WINDOWS_FRAME_INFO_TRAP, // not used here - WINDOWS_FRAME_INFO_TSS, // not used here - WINDOWS_FRAME_INFO_STANDARD, - WINDOWS_FRAME_INFO_FRAME_DATA, - WINDOWS_FRAME_INFO_LAST, // must be the last sequentially-numbered item - WINDOWS_FRAME_INFO_UNKNOWN = -1 - }; - - // Splits line into at most max_tokens space-separated tokens, placing - // them in the tokens vector. line is a 0-terminated string that - // optionally ends with a newline character or combination, which will - // be removed. line must not contain any embedded '\n' or '\r' characters. - // If more tokens than max_tokens are present, the final token is placed - // into the vector without splitting it up at all. This modifies line as - // a side effect. Returns true if exactly max_tokens tokens are returned, - // and false if fewer are returned. This is not considered a failure of - // Tokenize, but may be treated as a failure if the caller expects an - // exact, as opposed to maximum, number of tokens. - static bool Tokenize(char *line, int max_tokens, vector *tokens); - // Parses a file declaration bool ParseFile(char *file_line); @@ -178,9 +155,6 @@ class BasicSourceLineResolver::Module { // it in the appropriate table. bool ParseStackInfo(char *stack_info_line); - // Parses a STACK WIN record, storing it in windows_frame_info_. - bool ParseWindowsFrameInfo(char *stack_info_line); - // Parses a STACK CFI record, storing it in cfi_frame_info_. bool ParseCFIFrameInfo(char *stack_info_line); @@ -198,7 +172,7 @@ class BasicSourceLineResolver::Module { // there may be overlaps between maps of different types, but some // information is only available as certain types. ContainedRangeMap< MemAddr, linked_ptr > - windows_frame_info_[WINDOWS_FRAME_INFO_LAST]; + windows_frame_info_[WindowsFrameInfo::STACK_INFO_LAST]; // DWARF CFI stack walking data. The Module stores the initial rule sets // and rule deltas as strings, just as they appear in the symbol file: @@ -230,53 +204,75 @@ BasicSourceLineResolver::~BasicSourceLineResolver() { delete modules_; } -bool BasicSourceLineResolver::LoadModule(const string &module_name, +bool BasicSourceLineResolver::LoadModule(const CodeModule *module, const string &map_file) { + if (module == NULL) + return false; + // Make sure we don't already have a module with the given name. - if (modules_->find(module_name) != modules_->end()) { - BPLOG(INFO) << "Symbols for module " << module_name << " already loaded"; + if (modules_->find(module->code_file()) != modules_->end()) { + BPLOG(INFO) << "Symbols for module " << module->code_file() + << " already loaded"; return false; } - BPLOG(INFO) << "Loading symbols for module " << module_name << " from " << - map_file; + BPLOG(INFO) << "Loading symbols for module " << module->code_file() + << " from " << map_file; - Module *module = new Module(module_name); - if (!module->LoadMap(map_file)) { - delete module; + Module *basic_module = new Module(module->code_file()); + if (!basic_module->LoadMap(map_file)) { + delete basic_module; return false; } - modules_->insert(make_pair(module_name, module)); + modules_->insert(make_pair(module->code_file(), basic_module)); return true; } bool BasicSourceLineResolver::LoadModuleUsingMapBuffer( - const string &module_name, + const CodeModule *module, const string &map_buffer) { + if (!module) + return false; + // Make sure we don't already have a module with the given name. - if (modules_->find(module_name) != modules_->end()) { - BPLOG(INFO) << "Symbols for module " << module_name << " already loaded"; + if (modules_->find(module->code_file()) != modules_->end()) { + BPLOG(INFO) << "Symbols for module " << module->code_file() + << " already loaded"; return false; } - BPLOG(INFO) << "Loading symbols for module " << module_name << " from buffer"; + BPLOG(INFO) << "Loading symbols for module " << module->code_file() + << " from buffer"; - Module *module = new Module(module_name); - if (!module->LoadMapFromBuffer(map_buffer)) { - delete module; + Module *basic_module = new Module(module->code_file()); + if (!basic_module->LoadMapFromBuffer(map_buffer)) { + delete basic_module; return false; } - modules_->insert(make_pair(module_name, module)); + modules_->insert(make_pair(module->code_file(), basic_module)); return true; } -bool BasicSourceLineResolver::HasModule(const string &module_name) const { - return modules_->find(module_name) != modules_->end(); +void BasicSourceLineResolver::UnloadModule(const CodeModule *module) +{ + if (!module) + return; + + ModuleMap::iterator iter = modules_->find(module->code_file()); + if (iter != modules_->end()) { + modules_->erase(iter); + } +} + +bool BasicSourceLineResolver::HasModule(const CodeModule *module) { + if (!module) + return false; + return modules_->find(module->code_file()) != modules_->end(); } -void BasicSourceLineResolver::FillSourceLineInfo(StackFrame *frame) const { +void BasicSourceLineResolver::FillSourceLineInfo(StackFrame *frame) { if (frame->module) { ModuleMap::const_iterator it = modules_->find(frame->module->code_file()); if (it != modules_->end()) { @@ -286,7 +282,7 @@ void BasicSourceLineResolver::FillSourceLineInfo(StackFrame *frame) const { } WindowsFrameInfo *BasicSourceLineResolver::FindWindowsFrameInfo( - const StackFrame *frame) const { + const StackFrame *frame) { if (frame->module) { ModuleMap::const_iterator it = modules_->find(frame->module->code_file()); if (it != modules_->end()) { @@ -297,7 +293,7 @@ WindowsFrameInfo *BasicSourceLineResolver::FindWindowsFrameInfo( } CFIFrameInfo *BasicSourceLineResolver::FindCFIFrameInfo( - const StackFrame *frame) const { + const StackFrame *frame) { if (frame->module) { ModuleMap::const_iterator it = modules_->find(frame->module->code_file()); if (it != modules_->end()) { @@ -516,15 +512,16 @@ WindowsFrameInfo *BasicSourceLineResolver::Module::FindWindowsFrameInfo( MemAddr address = frame->instruction - frame->module->base_address(); scoped_ptr result(new WindowsFrameInfo()); - // We only know about WINDOWS_FRAME_INFO_FRAME_DATA and - // WINDOWS_FRAME_INFO_FPO. Prefer them in this order. - // WINDOWS_FRAME_INFO_FRAME_DATA is the newer type that includes its - // own program string. WINDOWS_FRAME_INFO_FPO is the older type + // We only know about WindowsFrameInfo::STACK_INFO_FRAME_DATA and + // WindowsFrameInfo::STACK_INFO_FPO. Prefer them in this order. + // WindowsFrameInfo::STACK_INFO_FRAME_DATA is the newer type that + // includes its own program string. + // WindowsFrameInfo::STACK_INFO_FPO is the older type // corresponding to the FPO_DATA struct. See stackwalker_x86.cc. linked_ptr frame_info; - if ((windows_frame_info_[WINDOWS_FRAME_INFO_FRAME_DATA] + if ((windows_frame_info_[WindowsFrameInfo::STACK_INFO_FRAME_DATA] .RetrieveRange(address, &frame_info)) - || (windows_frame_info_[WINDOWS_FRAME_INFO_FPO] + || (windows_frame_info_[WindowsFrameInfo::STACK_INFO_FPO] .RetrieveRange(address, &frame_info))) { result->CopyFrom(*frame_info.get()); return result.release(); @@ -600,40 +597,12 @@ bool BasicSourceLineResolver::Module::ParseCFIRuleSet( return parser.Parse(rule_set); } -// static -bool BasicSourceLineResolver::Module::Tokenize(char *line, int max_tokens, - vector *tokens) { - tokens->clear(); - tokens->reserve(max_tokens); - - int remaining = max_tokens; - - // Split tokens on the space character. Look for newlines too to - // strip them out before exhausting max_tokens. - char *save_ptr; - char *token = strtok_r(line, " \r\n", &save_ptr); - while (token && --remaining > 0) { - tokens->push_back(token); - if (remaining > 1) - token = strtok_r(NULL, " \r\n", &save_ptr); - } - - // If there's anything left, just add it as a single token. - if (!remaining > 0) { - if ((token = strtok_r(NULL, "\r\n", &save_ptr))) { - tokens->push_back(token); - } - } - - return tokens->size() == static_cast(max_tokens); -} - bool BasicSourceLineResolver::Module::ParseFile(char *file_line) { // FILE file_line += 5; // skip prefix vector tokens; - if (!Tokenize(file_line, 2, &tokens)) { + if (!Tokenize(file_line, kWhitespace, 2, &tokens)) { return false; } @@ -657,7 +626,7 @@ BasicSourceLineResolver::Module::ParseFunction(char *function_line) { function_line += 5; // skip prefix vector tokens; - if (!Tokenize(function_line, 4, &tokens)) { + if (!Tokenize(function_line, kWhitespace, 4, &tokens)) { return NULL; } @@ -673,7 +642,7 @@ BasicSourceLineResolver::Line* BasicSourceLineResolver::Module::ParseLine( char *line_line) { //
vector tokens; - if (!Tokenize(line_line, 4, &tokens)) { + if (!Tokenize(line_line, kWhitespace, 4, &tokens)) { return NULL; } @@ -695,7 +664,7 @@ bool BasicSourceLineResolver::Module::ParsePublicSymbol(char *public_line) { public_line += 7; vector tokens; - if (!Tokenize(public_line, 3, &tokens)) { + if (!Tokenize(public_line, kWhitespace, 3, &tokens)) { return false; } @@ -727,87 +696,51 @@ bool BasicSourceLineResolver::Module::ParseStackInfo(char *stack_info_line) { while (*stack_info_line == ' ') stack_info_line++; const char *platform = stack_info_line; - while (!strchr(" \r\n", *stack_info_line)) + while (!strchr(kWhitespace, *stack_info_line)) stack_info_line++; *stack_info_line++ = '\0'; // MSVC stack frame info. - if (strcmp(platform, "WIN") == 0) - return ParseWindowsFrameInfo(stack_info_line); - - // DWARF CFI stack frame info - else if (strcmp(platform, "CFI") == 0) + if (strcmp(platform, "WIN") == 0) { + int type; + u_int64_t rva, code_size; + linked_ptr + stack_frame_info(WindowsFrameInfo::ParseFromString(stack_info_line, + type, + rva, + code_size)); + if (stack_frame_info == NULL) + return false; + + // TODO(mmentovai): I wanted to use StoreRange's return value as this + // method's return value, but MSVC infrequently outputs stack info that + // violates the containment rules. This happens with a section of code + // in strncpy_s in test_app.cc (testdata/minidump2). There, problem looks + // like this: + // STACK WIN 4 4242 1a a 0 ... (STACK WIN 4 base size prolog 0 ...) + // STACK WIN 4 4243 2e 9 0 ... + // ContainedRangeMap treats these two blocks as conflicting. In reality, + // when the prolog lengths are taken into account, the actual code of + // these blocks doesn't conflict. However, we can't take the prolog lengths + // into account directly here because we'd wind up with a different set + // of range conflicts when MSVC outputs stack info like this: + // STACK WIN 4 1040 73 33 0 ... + // STACK WIN 4 105a 59 19 0 ... + // because in both of these entries, the beginning of the code after the + // prolog is at 0x1073, and the last byte of contained code is at 0x10b2. + // Perhaps we could get away with storing ranges by rva + prolog_size + // if ContainedRangeMap were modified to allow replacement of + // already-stored values. + + windows_frame_info_[type].StoreRange(rva, code_size, stack_frame_info); + return true; + } else if (strcmp(platform, "CFI") == 0) { + // DWARF CFI stack frame info return ParseCFIFrameInfo(stack_info_line); - - // Something unrecognized. - else - return false; -} - -bool BasicSourceLineResolver::Module::ParseWindowsFrameInfo( - char *stack_info_line) { - // The format of a STACK WIN record is documented at: - // - // http://code.google.com/p/google-breakpad/wiki/SymbolFiles - - vector tokens; - if (!Tokenize(stack_info_line, 11, &tokens)) - return false; - - int type = strtol(tokens[0], NULL, 16); - if (type < 0 || type > WINDOWS_FRAME_INFO_LAST - 1) - return false; - - u_int64_t rva = strtoull(tokens[1], NULL, 16); - u_int64_t code_size = strtoull(tokens[2], NULL, 16); - u_int32_t prolog_size = strtoul(tokens[3], NULL, 16); - u_int32_t epilog_size = strtoul(tokens[4], NULL, 16); - u_int32_t parameter_size = strtoul(tokens[5], NULL, 16); - u_int32_t saved_register_size = strtoul(tokens[6], NULL, 16); - u_int32_t local_size = strtoul(tokens[7], NULL, 16); - u_int32_t max_stack_size = strtoul(tokens[8], NULL, 16); - int has_program_string = strtoul(tokens[9], NULL, 16); - - const char *program_string = ""; - int allocates_base_pointer = 0; - if (has_program_string) { - program_string = tokens[10]; } else { - allocates_base_pointer = strtoul(tokens[10], NULL, 16); - } - - // TODO(mmentovai): I wanted to use StoreRange's return value as this - // method's return value, but MSVC infrequently outputs stack info that - // violates the containment rules. This happens with a section of code - // in strncpy_s in test_app.cc (testdata/minidump2). There, problem looks - // like this: - // STACK WIN 4 4242 1a a 0 ... (STACK WIN 4 base size prolog 0 ...) - // STACK WIN 4 4243 2e 9 0 ... - // ContainedRangeMap treats these two blocks as conflicting. In reality, - // when the prolog lengths are taken into account, the actual code of - // these blocks doesn't conflict. However, we can't take the prolog lengths - // into account directly here because we'd wind up with a different set - // of range conflicts when MSVC outputs stack info like this: - // STACK WIN 4 1040 73 33 0 ... - // STACK WIN 4 105a 59 19 0 ... - // because in both of these entries, the beginning of the code after the - // prolog is at 0x1073, and the last byte of contained code is at 0x10b2. - // Perhaps we could get away with storing ranges by rva + prolog_size - // if ContainedRangeMap were modified to allow replacement of - // already-stored values. - - linked_ptr stack_frame_info( - new WindowsFrameInfo(prolog_size, - epilog_size, - parameter_size, - saved_register_size, - local_size, - max_stack_size, - allocates_base_pointer, - program_string)); - windows_frame_info_[type].StoreRange(rva, code_size, stack_frame_info); - - return true; + // Something unrecognized. + return false; + } } bool BasicSourceLineResolver::Module::ParseCFIFrameInfo( -- cgit v1.2.1