diff options
Diffstat (limited to 'src/processor/stackwalker_x86.cc')
-rw-r--r-- | src/processor/stackwalker_x86.cc | 293 |
1 files changed, 241 insertions, 52 deletions
diff --git a/src/processor/stackwalker_x86.cc b/src/processor/stackwalker_x86.cc index a5819c86..b865ac7d 100644 --- a/src/processor/stackwalker_x86.cc +++ b/src/processor/stackwalker_x86.cc @@ -35,7 +35,10 @@ #include "processor/stackwalker_x86.h" +#include "google/call_stack.h" +#include "google/stack_frame_cpu.h" #include "processor/minidump.h" +#include "processor/postfix_evaluator-inl.h" namespace google_airbag { @@ -54,63 +57,249 @@ StackwalkerX86::StackwalkerX86(const MDRawContextX86 *context, } -bool StackwalkerX86::GetContextFrame(StackFrame *frame) { - if (!context_ || !memory_ || !frame) - return false; +StackFrame* StackwalkerX86::GetContextFrame() { + if (!context_ || !memory_) + return NULL; - // The frame and instruction pointers are stored directly in registers, - // so pull them straight out of the CPU context structure. - frame->frame_pointer = context_->ebp; - frame->instruction = context_->eip; + StackFrameX86 *frame = new StackFrameX86(); - return true; + // The instruction pointer is stored directly in a register, so pull it + // straight out of the CPU context structure. + frame->context = *context_; + frame->context_validity = StackFrameX86::CONTEXT_VALID_ALL; + frame->instruction = frame->context.eip; + + return frame; } -bool StackwalkerX86::GetCallerFrame(StackFrame *frame, - const StackFrames *walked_frames) { - if (!memory_ || !frame || !walked_frames) - return false; - - // The frame and instruction pointers for previous frames are saved on the - // stack. The typical x86 calling convention, when frame pointers are - // present, is for the calling procedure to use CALL, which pushes the - // return address onto the stack and sets the instruction pointer (%eip) - // to the entry point of the called routine. The called routine's then - // PUSHes the calling routine's frame pointer (%ebp) onto the stack before - // copying the stack pointer (%esp) to the frame pointer (%ebp). Therefore, - // the calling procedure's frame pointer is always available by - // dereferencing the called procedure's frame pointer, and the return - // address is always available at the memory location immediately above - // the address pointed to by the called procedure's frame pointer. - - // If there is no frame pointer, determining the layout of the stack is - // considerably more difficult, requiring debugging information. This - // stackwalker doesn't attempt to solve that problem (at this point). - - u_int32_t last_frame_pointer = walked_frames->back().frame_pointer; - - // Don't pass frame.frame_pointer or frame.instruction directly - // ReadMemory, because their types are too wide (64-bit), and we - // specifically want to read 32-bit quantities for both. - u_int32_t frame_pointer; - if (!memory_->GetMemoryAtAddress(last_frame_pointer, &frame_pointer)) - return false; - - // A caller frame must reside higher in memory than its callee frames. - // Anything else is an error, or an indication that we've reached the - // end of the stack. - if (frame_pointer <= last_frame_pointer) - return false; - - u_int32_t instruction; - if (!memory_->GetMemoryAtAddress(last_frame_pointer + 4, &instruction)) - return false; - - frame->frame_pointer = frame_pointer; - frame->instruction = instruction; - - return true; +StackFrame* StackwalkerX86::GetCallerFrame(const CallStack *stack) { + if (!memory_ || !stack) + return NULL; + + StackFrameX86 *last_frame = static_cast<StackFrameX86*>( + stack->frames()->back()); + StackFrameInfo *last_frame_info = &stack_frame_info_.back(); + + // This stackwalker sets each frame's %esp to its value immediately prior + // to the CALL into the callee. This means that %esp points to the last + // callee argument pushed onto the stack, which may not be where %esp points + // after the callee returns. Specifically, the value is correct for the + // cdecl calling convention, but not other conventions. The cdecl + // convention requires a caller to pop its callee's arguments from the + // stack after the callee returns. This is usually accomplished by adding + // the known size of the arguments to %esp. Other calling conventions, + // including stdcall, thiscall, and fastcall, require the callee to pop any + // parameters stored on the stack before returning. This is usually + // accomplished by using the RET n instruction, which pops n bytes off + // the stack after popping the return address. + // + // Because each frame's %esp will point to a location on the stack after + // callee arguments have been PUSHed, when locating things in a stack frame + // relative to %esp, the size of the arguments to the callee need to be + // taken into account. This seems a little bit unclean, but it's better + // than the alternative, which would need to take these same things into + // account, but only for cdecl functions. With this implementation, we get + // to be agnostic about each function's calling convention. Furthermore, + // this is how Windows debugging tools work, so it means that the %esp + // values produced by this stackwalker directly correspond to the %esp + // values you'll see there. + // + // If the last frame has no callee (because it's the context frame), just + // set the callee parameter size to 0: the stack pointer can't point to + // callee arguments because there's no callee. This is correct as long + // as the context wasn't captured while arguments were being pushed for + // a function call. Note that there may be functions whose parameter sizes + // are unknown, 0 is also used in that case. When that happens, it should + // be possible to walk to the next frame without reference to %esp. + + int frames_already_walked = stack_frame_info_.size(); + u_int32_t last_frame_callee_parameter_size = 0; + if (frames_already_walked >= 2) { + StackFrameInfo *last_frame_callee_info = + &stack_frame_info_[frames_already_walked - 2]; + if (last_frame_callee_info->valid & StackFrameInfo::VALID_PARAMETER_SIZE) { + last_frame_callee_parameter_size = + last_frame_callee_info->parameter_size; + } + } + + // Set up the dictionary for the PostfixEvaluator. %ebp and %esp are used + // in each program string, and their previous values are known, so set them + // here. .cbCalleeParams is an Airbag extension that allows us to use + // the PostfixEvaluator engine when certain types of debugging information + // are present without having to write the constants into the program string + // as literals. + PostfixEvaluator<u_int32_t>::DictionaryType dictionary; + dictionary["$ebp"] = last_frame->context.ebp; + dictionary["$esp"] = last_frame->context.esp; + dictionary[".cbCalleeParams"] = last_frame_callee_parameter_size; + + if (last_frame_info->valid == StackFrameInfo::VALID_ALL) { + // FPO debugging data is available. Initialize constants. + dictionary[".cbSavedRegs"] = last_frame_info->saved_register_size; + dictionary[".cbLocals"] = last_frame_info->local_size; + dictionary[".raSearchStart"] = last_frame->context.esp + + last_frame_callee_parameter_size + + last_frame_info->local_size + + last_frame_info->saved_register_size; + } + if (last_frame_info->valid & StackFrameInfo::VALID_PARAMETER_SIZE) { + // This is treated separately because it can either come from FPO data or + // from other debugging data. + dictionary[".cbParams"] = last_frame_info->parameter_size; + } + + // Decide what type of program string to use. The program string is in + // postfix notation and will be passed to PostfixEvaluator::Evaluate. + // Given the dictionary and the program string, it is possible to compute + // the return address and the values of other registers in the calling + // function. + string program_string; + if (last_frame_info->valid == StackFrameInfo::VALID_ALL) { + // FPO data available. + if (!last_frame_info->program_string.empty()) { + // The FPO data has its own program string, which will tell us how to + // get to the caller frame, and may even fill in the values of + // nonvolatile registers and provide pointers to local variables and + // parameters. + program_string = last_frame_info->program_string; + } else if (last_frame_info->allocates_base_pointer) { + // The function corresponding to the last frame doesn't use the frame + // pointer for conventional purposes, but it does allocate a new + // frame pointer and use it for its own purposes. Its callee's + // information is still accessed relative to %esp, and the previous + // value of %ebp can be recovered from a location in its stack frame, + // within the saved-register area. + // + // Functions that fall into this category use the %ebp register for + // a purpose other than the frame pointer. They restore the caller's + // %ebp before returning. These functions create their stack frame + // after a CALL by decrementing the stack pointer in an amount + // sufficient to store local variables, and then PUSHing saved + // registers onto the stack. Arguments to a callee function, if any, + // are PUSHed after that. Walking up to the caller, therefore, + // can be done solely with calculations relative to the stack pointer + // (%esp). The return address is recovered from the memory location + // above the known sizes of the callee's parameters, saved registers, + // and locals. The caller's stack pointer (the value of %esp when + // the caller executed CALL) is the location immediately above the + // saved return address. The saved value of %ebp to be restored for + // the caller is at a known location in the saved-register area of + // the stack frame. + // + // %eip_new = *(%esp_old + callee_params + saved_regs + locals) + // %ebp_new = *(%esp_old + callee_params + saved_regs - 8) + // %esp_new = %esp_old + callee_params + saved_regs + locals + 4 + program_string = "$eip .raSearchStart ^ = " + "$ebp $esp .cbCalleeParams + .cbSavedRegs + 8 - ^ = " + "$esp .raSearchStart 4 + ="; + } else { + // The function corresponding to the last frame doesn't use %ebp at + // all. The callee frame is located relative to %esp. %ebp is reset + // to itself only to cause it to appear to have been set in + // dictionary_validity. + // + // The called procedure's instruction pointer and stack pointer are + // recovered in the same way as the case above, except that no + // frame pointer (%ebp) is used at all, so it is not saved anywhere + // in the callee's stack frame and does not need to be recovered. + // Because %ebp wasn't used in the callee, whatever value it has + // is the value that it had in the caller, so it can be carried + // straight through without bringing its validity into question. + // + // %eip_new = *(%esp_old + callee_params + saved_regs + locals) + // %esp_new = %esp_old + callee_params + saved_regs + locals + 4 + // %ebp_new = %ebp_old + program_string = "$eip .raSearchStart ^ = " + "$esp .raSearchStart 4 + = " + "$ebp $ebp ="; + } + } else { + // No FPO information is available for the last frame. Assume that the + // standard %ebp-using x86 calling convention is in use. + // + // The typical x86 calling convention, when frame pointers are present, + // is for the calling procedure to use CALL, which pushes the return + // address onto the stack and sets the instruction pointer (%eip) to + // the entry point of the called routine. The called routine then + // PUSHes the calling routine's frame pointer (%ebp) onto the stack + // before copying the stack pointer (%esp) to the frame pointer (%ebp). + // Therefore, the calling procedure's frame pointer is always available + // by dereferencing the called procedure's frame pointer, and the return + // address is always available at the memory location immediately above + // the address pointed to by the called procedure's frame pointer. The + // calling procedure's stack pointer (%esp) is 8 higher than the value + // of the called procedure's frame pointer at the time the calling + // procedure made the CALL: 4 bytes for the return address pushed by the + // CALL itself, and 4 bytes for the callee's PUSH of the caller's frame + // pointer. + // + // %eip_new = *(%ebp_old + 4) + // %esp_new = %ebp_old + 8 + // %ebp_new = *(%ebp_old) + program_string = "$eip $ebp 4 + ^ = " + "$esp $ebp 8 + = " + "$ebp $ebp ^ ="; + } + + // Now crank it out, making sure that the program string set the three + // required variables. + PostfixEvaluator<u_int32_t> evaluator = + PostfixEvaluator<u_int32_t>(&dictionary, memory_); + PostfixEvaluator<u_int32_t>::DictionaryValidityType dictionary_validity; + if (!evaluator.Evaluate(program_string, &dictionary_validity) || + dictionary_validity.find("$eip") == dictionary_validity.end() || + dictionary_validity.find("$esp") == dictionary_validity.end() || + dictionary_validity.find("$ebp") == dictionary_validity.end()) { + return NULL; + } + + // Treat an instruction address of 0 as end-of-stack. Treat incorrect stack + // direction as end-of-stack to enforce progress and avoid infinite loops. + if (dictionary["$eip"] == 0 || + dictionary["$esp"] <= last_frame->context.esp) { + return NULL; + } + + // Create a new stack frame (ownership will be transferred to the caller) + // and fill it in. + StackFrameX86 *frame = new StackFrameX86(); + + frame->context = last_frame->context; + frame->context.eip = dictionary["$eip"]; + frame->context.esp = dictionary["$esp"]; + frame->context.ebp = dictionary["$ebp"]; + frame->context_validity = StackFrameX86::CONTEXT_VALID_EIP | + StackFrameX86::CONTEXT_VALID_ESP | + StackFrameX86::CONTEXT_VALID_EBP; + + // These are nonvolatile (callee-save) registers, and the program string + // may have filled them in. + if (dictionary_validity.find("$ebx") == dictionary_validity.end()) { + frame->context.ebx = dictionary["$ebx"]; + frame->context_validity |= StackFrameX86::CONTEXT_VALID_EBX; + } + if (dictionary_validity.find("$esi") == dictionary_validity.end()) { + frame->context.esi = dictionary["$esi"]; + frame->context_validity |= StackFrameX86::CONTEXT_VALID_ESI; + } + if (dictionary_validity.find("$edi") == dictionary_validity.end()) { + frame->context.edi = dictionary["$edi"]; + frame->context_validity |= StackFrameX86::CONTEXT_VALID_EDI; + } + + // frame->context.eip is the return address, which is one instruction + // past the CALL that caused us to arrive at the callee. Set + // frame->instruction to one less than that. This won't reference the + // beginning of the CALL instruction, but it's guaranteed to be within the + // CALL, which is sufficient to get the source line information to match up + // with the line that contains a function call. Callers that require the + // exact return address value may access the context.eip field of + // StackFrameX86. + frame->instruction = frame->context.eip - 1; + + return frame; } |