From fd38d48e6d5e56cb66b0fa0f7e25f840a83dac5c Mon Sep 17 00:00:00 2001 From: bryner Date: Mon, 11 Dec 2006 23:22:54 +0000 Subject: Add an abstract interface to SourceLineResolver, and allow any implementation to be used with MinidumpProcessor. The basic SourceLineResolver is now a public interface (#89) git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@83 4c0a9323-5329-0410-9bdc-e9ce6186880e --- src/processor/source_line_resolver.cc | 561 ---------------------------------- 1 file changed, 561 deletions(-) delete mode 100644 src/processor/source_line_resolver.cc (limited to 'src/processor/source_line_resolver.cc') diff --git a/src/processor/source_line_resolver.cc b/src/processor/source_line_resolver.cc deleted file mode 100644 index 1ebc1bf4..00000000 --- a/src/processor/source_line_resolver.cc +++ /dev/null @@ -1,561 +0,0 @@ -// Copyright (c) 2006, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include -#include - -#include -#include -#include - -#include "processor/address_map-inl.h" -#include "processor/contained_range_map-inl.h" -#include "processor/range_map-inl.h" - -#include "processor/source_line_resolver.h" -#include "google_airbag/processor/code_module.h" -#include "google_airbag/processor/stack_frame.h" -#include "processor/linked_ptr.h" -#include "processor/scoped_ptr.h" -#include "processor/stack_frame_info.h" - -using std::map; -using std::vector; -using std::make_pair; -using __gnu_cxx::hash; - -namespace google_airbag { - -struct SourceLineResolver::Line { - Line(MemAddr addr, MemAddr code_size, int file_id, int source_line) - : address(addr) - , size(code_size) - , source_file_id(file_id) - , line(source_line) { } - - MemAddr address; - MemAddr size; - int source_file_id; - int line; -}; - -struct SourceLineResolver::Function { - Function(const string &function_name, - MemAddr function_address, - MemAddr code_size, - int set_parameter_size) - : name(function_name), address(function_address), size(code_size), - parameter_size(set_parameter_size) { } - - string name; - MemAddr address; - MemAddr size; - - // The size of parameters passed to this function on the stack. - int parameter_size; - - RangeMap< MemAddr, linked_ptr > lines; -}; - -struct SourceLineResolver::PublicSymbol { - PublicSymbol(const string& set_name, - MemAddr set_address, - int set_parameter_size) - : name(set_name), - address(set_address), - parameter_size(set_parameter_size) {} - - string name; - MemAddr address; - - // If the public symbol is used as a function entry point, parameter_size - // is set to the size of the parameters passed to the funciton on the - // stack, if known. - int parameter_size; -}; - -class SourceLineResolver::Module { - public: - Module(const string &name) : name_(name) { } - - // Loads the given map file, returning true on success. - bool LoadMap(const string &map_file); - - // Looks up the given relative address, and fills the StackFrame struct - // with the result. Additional debugging information, if available, is - // returned. If no additional information is available, returns NULL. - // A NULL return value is not an error. The caller takes ownership of - // any returned StackFrameInfo object. - StackFrameInfo* LookupAddress(StackFrame *frame) const; - - private: - friend class SourceLineResolver; - typedef hash_map FileMap; - - // The types for stack_info_. This is equivalent to MS DIA's - // StackFrameTypeEnum. Each identifies a different type of frame - // information, although all are represented in the symbol file in the - // same format. These are used as indices to the stack_info_ array. - enum StackInfoTypes { - STACK_INFO_FPO = 0, - STACK_INFO_TRAP, // not used here - STACK_INFO_TSS, // not used here - STACK_INFO_STANDARD, - STACK_INFO_FRAME_DATA, - STACK_INFO_LAST, // must be the last sequentially-numbered item - STACK_INFO_UNKNOWN = -1 - }; - - // Splits line into at most max_tokens space-separated tokens, placing - // them in the tokens vector. line is a 0-terminated string that - // optionally ends with a newline character or combination, which will - // be removed. line must not contain any embedded '\n' or '\r' characters. - // If more tokens than max_tokens are present, the final token is placed - // into the vector without splitting it up at all. This modifies line as - // a side effect. Returns true if exactly max_tokens tokens are returned, - // and false if fewer are returned. This is not considered a failure of - // Tokenize, but may be treated as a failure if the caller expects an - // exact, as opposed to maximum, number of tokens. - static bool Tokenize(char *line, int max_tokens, vector *tokens); - - // Parses a file declaration - void ParseFile(char *file_line); - - // Parses a function declaration, returning a new Function object. - Function* ParseFunction(char *function_line); - - // Parses a line declaration, returning a new Line object. - Line* ParseLine(char *line_line); - - // Parses a PUBLIC symbol declaration, storing it in public_symbols_. - // Returns false if an error occurs. - bool ParsePublicSymbol(char *public_line); - - // Parses a stack frame info declaration, storing it in stack_info_. - bool ParseStackInfo(char *stack_info_line); - - string name_; - FileMap files_; - RangeMap< MemAddr, linked_ptr > functions_; - AddressMap< MemAddr, linked_ptr > public_symbols_; - - // Each element in the array is a ContainedRangeMap for a type listed in - // StackInfoTypes. These are split by type because there may be overlaps - // between maps of different types, but some information is only available - // as certain types. - ContainedRangeMap< MemAddr, linked_ptr > - stack_info_[STACK_INFO_LAST]; -}; - -SourceLineResolver::SourceLineResolver() : modules_(new ModuleMap) { -} - -SourceLineResolver::~SourceLineResolver() { - ModuleMap::iterator it; - for (it = modules_->begin(); it != modules_->end(); ++it) { - delete it->second; - } - delete modules_; -} - -bool SourceLineResolver::LoadModule(const string &module_name, - const string &map_file) { - // Make sure we don't already have a module with the given name. - if (modules_->find(module_name) != modules_->end()) { - return false; - } - - Module *module = new Module(module_name); - if (!module->LoadMap(map_file)) { - delete module; - return false; - } - - modules_->insert(make_pair(module_name, module)); - return true; -} - -bool SourceLineResolver::HasModule(const string &module_name) const { - return modules_->find(module_name) != modules_->end(); -} - -StackFrameInfo* SourceLineResolver::FillSourceLineInfo( - StackFrame *frame) const { - if (frame->module) { - ModuleMap::const_iterator it = modules_->find(frame->module->code_file()); - if (it != modules_->end()) { - return it->second->LookupAddress(frame); - } - } - return NULL; -} - -bool SourceLineResolver::Module::LoadMap(const string &map_file) { - FILE *f = fopen(map_file.c_str(), "r"); - if (!f) { - return false; - } - - // TODO(mmentovai): this might not be large enough to handle really long - // lines, which might be present for FUNC lines of highly-templatized - // code. - char buffer[8192]; - Function *cur_func = NULL; - - while (fgets(buffer, sizeof(buffer), f)) { - if (strncmp(buffer, "FILE ", 5) == 0) { - ParseFile(buffer); - } else if (strncmp(buffer, "STACK ", 6) == 0) { - if (!ParseStackInfo(buffer)) { - return false; - } - } else if (strncmp(buffer, "FUNC ", 5) == 0) { - cur_func = ParseFunction(buffer); - if (!cur_func) { - return false; - } - if (!functions_.StoreRange(cur_func->address, cur_func->size, - linked_ptr(cur_func))) { - return false; - } - } else if (strncmp(buffer, "PUBLIC ", 7) == 0) { - // Clear cur_func: public symbols don't contain line number information. - cur_func = NULL; - - if (!ParsePublicSymbol(buffer)) { - return false; - } - } else if (strncmp(buffer, "MODULE ", 7) == 0) { - // Ignore these. They're not of any use to SourceLineResolver, which - // is fed modules by a SymbolSupplier. These lines are present to - // aid other tools in properly placing symbol files so that they can - // be accessed by a SymbolSupplier. - // - // MODULE - } else { - if (!cur_func) { - return false; - } - Line *line = ParseLine(buffer); - if (!line) { - return false; - } - cur_func->lines.StoreRange(line->address, line->size, - linked_ptr(line)); - } - } - - fclose(f); - return true; -} - -StackFrameInfo* SourceLineResolver::Module::LookupAddress(StackFrame *frame) - const { - MemAddr address = frame->instruction - frame->module->base_address(); - - linked_ptr retrieved_info; - // Check for debugging info first, before any possible early returns. - // - // We only know about STACK_INFO_FRAME_DATA and STACK_INFO_FPO. Prefer - // them in this order. STACK_INFO_FRAME_DATA is the newer type that - // includes its own program string. STACK_INFO_FPO is the older type - // corresponding to the FPO_DATA struct. See stackwalker_x86.cc. - if (!stack_info_[STACK_INFO_FRAME_DATA].RetrieveRange(address, - &retrieved_info)) { - stack_info_[STACK_INFO_FPO].RetrieveRange(address, &retrieved_info); - } - - scoped_ptr frame_info; - if (retrieved_info.get()) { - frame_info.reset(new StackFrameInfo()); - frame_info->CopyFrom(*retrieved_info.get()); - } - - // First, look for a matching FUNC range. Use RetrieveNearestRange instead - // of RetrieveRange so that the nearest function can be compared to the - // nearest PUBLIC symbol if the address does not lie within the function. - // Having access to the highest function below address, even when address - // is outside of the function, is useful: if the function is higher than - // the nearest PUBLIC symbol, then it means that the PUBLIC symbols is not - // valid for the address, and no function information should be filled in. - // Using RetrieveNearestRange instead of RetrieveRange means that we need - // to verify that address is within the range before using a FUNC. - // - // If no FUNC containing the address is found, look for the nearest PUBLIC - // symbol, being careful not to use a public symbol at a lower address than - // the nearest FUNC. - int parameter_size = 0; - linked_ptr func; - linked_ptr public_symbol; - MemAddr function_base; - MemAddr function_size; - MemAddr public_address; - if (functions_.RetrieveNearestRange(address, &func, - &function_base, &function_size) && - address >= function_base && address < function_base + function_size) { - parameter_size = func->parameter_size; - - frame->function_name = func->name; - frame->function_base = frame->module->base_address() + function_base; - - linked_ptr line; - MemAddr line_base; - if (func->lines.RetrieveRange(address, &line, &line_base, NULL)) { - FileMap::const_iterator it = files_.find(line->source_file_id); - if (it != files_.end()) { - frame->source_file_name = files_.find(line->source_file_id)->second; - } - frame->source_line = line->line; - frame->source_line_base = frame->module->base_address() + line_base; - } - } else if (public_symbols_.Retrieve(address, - &public_symbol, &public_address) && - (!func.get() || public_address > function_base + function_size)) { - parameter_size = public_symbol->parameter_size; - - frame->function_name = public_symbol->name; - frame->function_base = frame->module->base_address() + public_address; - } else { - // No FUNC or PUBLIC data available. - return frame_info.release(); - } - - if (!frame_info.get()) { - // Even without a relevant STACK line, many functions contain information - // about how much space their parameters consume on the stack. Prefer - // the STACK stuff (above), but if it's not present, take the - // information from the FUNC or PUBLIC line. - frame_info.reset(new StackFrameInfo()); - frame_info->parameter_size = parameter_size; - frame_info->valid |= StackFrameInfo::VALID_PARAMETER_SIZE; - } - - return frame_info.release(); -} - -// static -bool SourceLineResolver::Module::Tokenize(char *line, int max_tokens, - vector *tokens) { - tokens->clear(); - tokens->reserve(max_tokens); - - int remaining = max_tokens; - - // Split tokens on the space character. Look for newlines too to - // strip them out before exhausting max_tokens. - char *save_ptr; - char *token = strtok_r(line, " \r\n", &save_ptr); - while (token && --remaining > 0) { - tokens->push_back(token); - if (remaining > 1) - token = strtok_r(NULL, " \r\n", &save_ptr); - } - - // If there's anything left, just add it as a single token. - if (!remaining > 0) { - if ((token = strtok_r(NULL, "\r\n", &save_ptr))) { - tokens->push_back(token); - } - } - - return tokens->size() == static_cast(max_tokens); -} - -void SourceLineResolver::Module::ParseFile(char *file_line) { - // FILE - file_line += 5; // skip prefix - - vector tokens; - if (!Tokenize(file_line, 2, &tokens)) { - return; - } - - int index = atoi(tokens[0]); - if (index < 0) { - return; - } - - char *filename = tokens[1]; - if (filename) { - files_.insert(make_pair(index, string(filename))); - } -} - -SourceLineResolver::Function* SourceLineResolver::Module::ParseFunction( - char *function_line) { - // FUNC
- function_line += 5; // skip prefix - - vector tokens; - if (!Tokenize(function_line, 4, &tokens)) { - return NULL; - } - - u_int64_t address = strtoull(tokens[0], NULL, 16); - u_int64_t size = strtoull(tokens[1], NULL, 16); - int stack_param_size = strtoull(tokens[2], NULL, 16); - char *name = tokens[3]; - - return new Function(name, address, size, stack_param_size); -} - -SourceLineResolver::Line* SourceLineResolver::Module::ParseLine( - char *line_line) { - //
- vector tokens; - if (!Tokenize(line_line, 4, &tokens)) { - return NULL; - } - - u_int64_t address = strtoull(tokens[0], NULL, 16); - u_int64_t size = strtoull(tokens[1], NULL, 16); - int line_number = atoi(tokens[2]); - int source_file = atoi(tokens[3]); - if (line_number <= 0) { - return NULL; - } - - return new Line(address, size, source_file, line_number); -} - -bool SourceLineResolver::Module::ParsePublicSymbol(char *public_line) { - // PUBLIC
- - // Skip "PUBLIC " prefix. - public_line += 7; - - vector tokens; - if (!Tokenize(public_line, 3, &tokens)) { - return false; - } - - u_int64_t address = strtoull(tokens[0], NULL, 16); - int stack_param_size = strtoull(tokens[1], NULL, 16); - char *name = tokens[2]; - - // A few public symbols show up with an address of 0. This has been seen - // in the dumped output of ntdll.pdb for symbols such as _CIlog, _CIpow, - // RtlDescribeChunkLZNT1, and RtlReserveChunkLZNT1. They would conflict - // with one another if they were allowed into the public_symbols_ map, - // but since the address is obviously invalid, gracefully accept them - // as input without putting them into the map. - if (address == 0) { - return true; - } - - linked_ptr symbol(new PublicSymbol(name, address, - stack_param_size)); - return public_symbols_.Store(address, symbol); -} - -bool SourceLineResolver::Module::ParseStackInfo(char *stack_info_line) { - // STACK WIN - // - // - // - // If has_program_string is 1, the rest of the line is a program string. - // Otherwise, the final token tells whether the stack info indicates that - // a base pointer has been allocated. - // - // Expect has_program_string to be 1 when type is STACK_INFO_FRAME_DATA and - // 0 when type is STACK_INFO_FPO, but don't enforce this. - - // Skip "STACK " prefix. - stack_info_line += 6; - - vector tokens; - if (!Tokenize(stack_info_line, 12, &tokens)) - return false; - - // Only MSVC stack frame info is understood for now. - const char *platform = tokens[0]; - if (strcmp(platform, "WIN") != 0) - return false; - - int type = strtol(tokens[1], NULL, 16); - if (type < 0 || type > STACK_INFO_LAST - 1) - return false; - - u_int64_t rva = strtoull(tokens[2], NULL, 16); - u_int64_t code_size = strtoull(tokens[3], NULL, 16); - u_int32_t prolog_size = strtoul(tokens[4], NULL, 16); - u_int32_t epilog_size = strtoul(tokens[5], NULL, 16); - u_int32_t parameter_size = strtoul(tokens[6], NULL, 16); - u_int32_t saved_register_size = strtoul(tokens[7], NULL, 16); - u_int32_t local_size = strtoul(tokens[8], NULL, 16); - u_int32_t max_stack_size = strtoul(tokens[9], NULL, 16); - int has_program_string = strtoul(tokens[10], NULL, 16); - - const char *program_string = ""; - int allocates_base_pointer = 0; - if (has_program_string) { - program_string = tokens[11]; - } else { - allocates_base_pointer = strtoul(tokens[11], NULL, 16); - } - - // TODO(mmentovai): I wanted to use StoreRange's return value as this - // method's return value, but MSVC infrequently outputs stack info that - // violates the containment rules. This happens with a section of code - // in strncpy_s in test_app.cc (testdata/minidump2). There, problem looks - // like this: - // STACK WIN 4 4242 1a a 0 ... (STACK WIN 4 base size prolog 0 ...) - // STACK WIN 4 4243 2e 9 0 ... - // ContainedRangeMap treats these two blocks as conflicting. In reality, - // when the prolog lengths are taken into account, the actual code of - // these blocks doesn't conflict. However, we can't take the prolog lengths - // into account directly here because we'd wind up with a different set - // of range conflicts when MSVC outputs stack info like this: - // STACK WIN 4 1040 73 33 0 ... - // STACK WIN 4 105a 59 19 0 ... - // because in both of these entries, the beginning of the code after the - // prolog is at 0x1073, and the last byte of contained code is at 0x10b2. - // Perhaps we could get away with storing ranges by rva + prolog_size - // if ContainedRangeMap were modified to allow replacement of - // already-stored values. - - linked_ptr stack_frame_info( - new StackFrameInfo(prolog_size, - epilog_size, - parameter_size, - saved_register_size, - local_size, - max_stack_size, - allocates_base_pointer, - program_string)); - stack_info_[type].StoreRange(rva, code_size, stack_frame_info); - - return true; -} - -size_t SourceLineResolver::HashString::operator()(const string &s) const { - return hash()(s.c_str()); -} - -} // namespace google_airbag -- cgit v1.2.1