From b934bb974afdc018252e15248455c4cc7730caa0 Mon Sep 17 00:00:00 2001 From: mmentovai Date: Fri, 22 Sep 2006 00:42:23 +0000 Subject: Handle frame pointer omission (#21), part 2: PostfixEvaluator. r=bryner. - A postfix (reverse-Polish) notation expression evaluator that we can feed stack-frame debug information into, crank, and get pointers to the calling frame from. http://groups.google.com/group/airbag-dev/browse_thread/thread/a1d6c5d1fd5fdcb9 git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@32 4c0a9323-5329-0410-9bdc-e9ce6186880e --- src/processor/postfix_evaluator-inl.h | 238 ++++++++++++++++++++++++++ src/processor/postfix_evaluator.h | 141 ++++++++++++++++ src/processor/postfix_evaluator_unittest.cc | 248 ++++++++++++++++++++++++++++ 3 files changed, 627 insertions(+) create mode 100644 src/processor/postfix_evaluator-inl.h create mode 100644 src/processor/postfix_evaluator.h create mode 100644 src/processor/postfix_evaluator_unittest.cc (limited to 'src/processor') diff --git a/src/processor/postfix_evaluator-inl.h b/src/processor/postfix_evaluator-inl.h new file mode 100644 index 00000000..a45a9a08 --- /dev/null +++ b/src/processor/postfix_evaluator-inl.h @@ -0,0 +1,238 @@ +// Copyright (C) 2006 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// postfix_evaluator-inl.h: Postfix (reverse Polish) notation expression +// evaluator. +// +// Documentation in postfix_evaluator.h. +// +// Author: Mark Mentovai + +#ifndef PROCESSOR_POSTFIX_EVALUATOR_INL_H__ +#define PROCESSOR_POSTFIX_EVALUATOR_INL_H__ + + +#include + +#include "processor/postfix_evaluator.h" +#include "processor/memory_region.h" + +namespace google_airbag { + +using std::istringstream; +using std::ostringstream; + + +// A small class used in Evaluate to make sure to clean up the stack +// before returning failure. +class AutoStackClearer { + public: + AutoStackClearer(vector *stack) : stack_(stack) {} + ~AutoStackClearer() { stack_->clear(); } + + private: + vector *stack_; +}; + + +template +bool PostfixEvaluator::Evaluate(const string &expression) { + // Ensure that the stack is cleared before returning. + AutoStackClearer clearer(&stack_); + + // Tokenize, splitting on whitespace. + istringstream stream(expression); + string token; + while (stream >> token) { + // There are enough binary operations that do exactly the same thing + // (other than the specific operation, of course) that it makes sense + // to share as much code as possible. + enum BinaryOperation { + BINARY_OP_NONE = 0, + BINARY_OP_ADD, + BINARY_OP_SUBTRACT, + BINARY_OP_MULTIPLY, + BINARY_OP_DIVIDE_QUOTIENT, + BINARY_OP_DIVIDE_MODULUS + }; + + BinaryOperation operation = BINARY_OP_NONE; + if (token == "+") + operation = BINARY_OP_ADD; + else if (token == "-") + operation = BINARY_OP_SUBTRACT; + else if (token == "*") + operation = BINARY_OP_MULTIPLY; + else if (token == "/") + operation = BINARY_OP_DIVIDE_QUOTIENT; + else if (token == "%") + operation = BINARY_OP_DIVIDE_MODULUS; + + if (operation != BINARY_OP_NONE) { + // Get the operands. + ValueType operand1, operand2; + if (!PopValues(&operand1, &operand2)) + return false; + + // Perform the operation. + ValueType result; + switch (operation) { + case BINARY_OP_ADD: + result = operand1 + operand2; + break; + case BINARY_OP_SUBTRACT: + result = operand1 - operand2; + break; + case BINARY_OP_MULTIPLY: + result = operand1 * operand2; + break; + case BINARY_OP_DIVIDE_QUOTIENT: + result = operand1 / operand2; + break; + case BINARY_OP_DIVIDE_MODULUS: + result = operand1 % operand2; + break; + case BINARY_OP_NONE: + // This will not happen, but compilers will want a default or + // BINARY_OP_NONE case. + return false; + break; + } + + // Save the result. + PushValue(result); + } else if (token == "^") { + // ^ for unary dereference. Can't dereference without memory. + if (!memory_) + return false; + + ValueType address; + if (!PopValue(&address)) + return false; + + ValueType value; + if (!memory_->GetMemoryAtAddress(address, &value)) + return false; + + PushValue(value); + } else if (token == "=") { + // = for assignment. + ValueType value; + if (!PopValue(&value)) + return false; + + // Assignment is only meaningful when assigning into an identifier. + // The identifier must name a variable, not a constant. Variables + // begin with '$'. + string identifier; + if (PopValueOrIdentifier(NULL, &identifier) != POP_RESULT_IDENTIFIER) + return false; + if (identifier.empty() || identifier[0] != '$') + return false; + + (*dictionary_)[identifier] = value; + } else { + // The token is not an operator, it's a literal value or an identifier. + // Push it onto the stack as-is. Use push_back instead of PushValue + // because PushValue pushes ValueType as a string, but token is already + // a string. + stack_.push_back(token); + } + } + + // If there's anything left on the stack, it indicates incomplete execution. + // This is a failure case. If the stack is empty, evalution was complete + // and successful. + return stack_.empty(); +} + + +template +typename PostfixEvaluator::PopResult +PostfixEvaluator::PopValueOrIdentifier( + ValueType *value, string *identifier) { + // There needs to be at least one element on the stack to pop. + if (!stack_.size()) + return POP_RESULT_FAIL; + + string token = stack_.back(); + stack_.pop_back(); + + // First, try to treat the value as a literal. In order for this to + // succed, the entire string must be parseable as ValueType. If this + // isn't possible, it can't be a literal, so treat it as an identifier + // instead. + istringstream token_stream(token); + ValueType literal; + if (token_stream >> literal && token_stream.peek() == EOF) { + if (value) { + *value = literal; + } + return POP_RESULT_VALUE; + } else { + if (identifier) { + *identifier = token; + } + return POP_RESULT_IDENTIFIER; + } +} + + +template +bool PostfixEvaluator::PopValue(ValueType *value) { + ValueType literal; + string token; + PopResult result; + if ((result = PopValueOrIdentifier(&literal, &token)) == POP_RESULT_FAIL) { + return false; + } else if (result == POP_RESULT_VALUE) { + // This is the easy case. + *value = literal; + } else { // result == POP_RESULT_IDENTIFIER + // There was an identifier at the top of the stack. Resolve it to a + // value by looking it up in the dictionary. + typename DictionaryType::const_iterator iterator = + dictionary_->find(token); + if (iterator == dictionary_->end()) { + // The identifier wasn't found in the dictionary. Don't imply any + // default value, just fail. + return false; + } + + *value = iterator->second; + } + + return true; +} + + +template +bool PostfixEvaluator::PopValues(ValueType *value1, + ValueType *value2) { + return PopValue(value2) && PopValue(value1); +} + + +template +void PostfixEvaluator::PushValue(const ValueType &value) { + ostringstream token_stream; + token_stream << value; + stack_.push_back(token_stream.str()); +} + + +} // namespace google_airbag + + +#endif // PROCESSOR_POSTFIX_EVALUATOR_INL_H__ diff --git a/src/processor/postfix_evaluator.h b/src/processor/postfix_evaluator.h new file mode 100644 index 00000000..28d8023d --- /dev/null +++ b/src/processor/postfix_evaluator.h @@ -0,0 +1,141 @@ +// Copyright (C) 2006 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// postfix_evaluator.h: Postfix (reverse Polish) notation expression evaluator. +// +// PostfixEvaluator evaluates an expression, using the expression itself +// in postfix (reverse Polish) notation and a dictionary mapping constants +// and variables to their values. The evaluator supports standard +// arithmetic operations, assignment into variables, and when an optional +// MemoryRange is provided, dereferencing. (Any unary key-to-value operation +// may be used with a MemoryRange implementation that returns the appropriate +// values, but PostfixEvaluator was written with dereferencing in mind.) +// +// The expression language is simple. Expressions are supplied as strings, +// with operands and operators delimited by whitespace. Operands may be +// either literal values suitable for ValueType, or constants or variables, +// which reference the dictionary. The supported binary operators are + +// (addition), - (subtraction), * (multiplication), / (quotient of division), +// and % (modulus of division). The unary ^ (dereference) operator is also +// provided. These operators allow any operand to be either a literal +// value, constant, or variable. Assignment (=) of any type of operand into +// a variable is also supported. +// +// The dictionary is provided as a map with string keys. Keys beginning +// with the '$' character are treated as variables. All other keys are +// treated as constants. Any results must be assigned into variables in the +// dictionary. These variables do not need to exist prior to calling +// Evaluate, unless used in an expression prior to being assigned to. The +// internal stack state is not made available after evaluation, and any +// values remaining on the stack are treated as evidence of incomplete +// execution and cause the evaluator to indicate failure. +// +// PostfixEvaluator is intended to support evaluation of "program strings" +// obtained from MSVC frame data debugging information in pdb files as +// returned by the DIA APIs. +// +// Author: Mark Mentovai + +#ifndef PROCESSOR_POSTFIX_EVALUATOR_H__ +#define PROCESSOR_POSTFIX_EVALUATOR_H__ + + +#include +#include +#include + +#include "processor/memory_region.h" + +namespace google_airbag { + +using std::map; +using std::string; +using std::vector; + +class MemoryRegion; + +template +class PostfixEvaluator { + public: + typedef map DictionaryType; + + // Create a PostfixEvaluator object that may be used (with Evaluate) on + // one or more expressions. PostfixEvaluator does not take ownership of + // either argument. |memory| may be NULL, in which case dereferencing + // (^) will not be supported. |dictionary| may be NULL, but evaluation + // will fail in that case unless set_dictionary is used before calling + // Evaluate. + PostfixEvaluator(DictionaryType *dictionary, MemoryRegion *memory) + : dictionary_(dictionary), memory_(memory), stack_() {} + + // Evaluate the expression. The results of execution will be stored + // in one (or more) variables in the dictionary. Returns false if any + // failures occure during execution, leaving variables in the dictionary + // in an indeterminate state. + bool Evaluate(const string &expression); + + DictionaryType* dictionary() const { return dictionary_; } + + // Reset the dictionary. PostfixEvaluator does not take ownership. + void set_dictionary(DictionaryType *dictionary) {dictionary_ = dictionary; } + + private: + // Return values for PopValueOrIdentifier + enum PopResult { + POP_RESULT_FAIL = 0, + POP_RESULT_VALUE, + POP_RESULT_IDENTIFIER + }; + + // Retrieves the topmost literal value, constant, or variable from the + // stack. Returns POP_RESULT_VALUE if the topmost entry is a literal + // value, and sets |value| accordingly. Returns POP_RESULT_IDENTIFIER + // if the topmost entry is a constant or variable identifier, and sets + // |identifier| accordingly. Returns POP_RESULT_FAIL on failure, such + // as when the stack is empty. + PopResult PopValueOrIdentifier(ValueType *value, string *identifier); + + // Retrieves the topmost value on the stack. If the topmost entry is + // an identifier, the dictionary is queried for the identifier's value. + // Returns false on failure, such as when the stack is empty or when + // a nonexistent identifier is named. + bool PopValue(ValueType *value); + + // Retrieves the top two values on the stack, in the style of PopValue. + // value2 is popped before value1, so that value1 corresponds to the + // entry that was pushed prior to value2. Returns false on failure. + bool PopValues(ValueType *value1, ValueType *value2); + + // Pushes a new value onto the stack. + void PushValue(const ValueType &value); + + // The dictionary mapping constant and variable identifiers (strings) to + // values. Keys beginning with '$' are treated as variable names, and + // PostfixEvaluator is free to create and modify these keys. Weak pointer. + DictionaryType *dictionary_; + + // If non-NULL, the MemoryRegion used for dereference (^) operations. + // If NULL, dereferencing is unsupported and will fail. Weak pointer. + MemoryRegion *memory_; + + // The stack contains state information as execution progresses. Values + // are pushed on to it as the expression string is read and as operations + // yield values; values are popped when used as operands to operators. + vector stack_; +}; + +} // namespace google_airbag + + +#endif // PROCESSOR_POSTFIX_EVALUATOR_H__ diff --git a/src/processor/postfix_evaluator_unittest.cc b/src/processor/postfix_evaluator_unittest.cc new file mode 100644 index 00000000..5db2e27c --- /dev/null +++ b/src/processor/postfix_evaluator_unittest.cc @@ -0,0 +1,248 @@ +// Copyright (C) 2006 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// postfix_evaluator_unittest.cc: Unit tests for PostfixEvaluator. +// +// Author: Mark Mentovai + +#include +#include +#include + +#include "google/airbag_types.h" +#include "processor/memory_region.h" +#include "processor/postfix_evaluator-inl.h" + + +using std::map; +using std::string; +using google_airbag::MemoryRegion; +using google_airbag::PostfixEvaluator; + + +// FakeMemoryRegion is used to test PostfixEvaluator's dereference (^) +// operator. The result of dereferencing a value is one greater than +// the value. +class FakeMemoryRegion : public MemoryRegion { + public: + virtual u_int64_t GetBase() { return 0; } + virtual u_int32_t GetSize() { return 0; } + virtual bool GetMemoryAtAddress(u_int64_t address, u_int8_t *value) + { *value = address + 1; return true; } + virtual bool GetMemoryAtAddress(u_int64_t address, u_int16_t *value) + { *value = address + 1; return true; } + virtual bool GetMemoryAtAddress(u_int64_t address, u_int32_t *value) + { *value = address + 1; return true; } + virtual bool GetMemoryAtAddress(u_int64_t address, u_int64_t *value) + { *value = address + 1; return true; } +}; + + +struct EvaluateTest { + // Expression passed to PostfixEvaluator::Evaluate. + const string expression; + + // True if the expression is expected to be evaluable, false if evaluation + // is expected to fail. + bool evaluable; +}; + + +struct EvaluateTestSet { + // The dictionary used for all tests in the set. + map *dictionary; + + // The list of tests. + const EvaluateTest *evaluate_tests; + + // The number of tests. + unsigned int evaluate_test_count; + + // Identifiers and their expected values upon completion of the Evaluate + // tests in the set. + map *validate_data; +}; + + +bool RunTests() { + // The first test set checks the basic operations and failure modes. + map dictionary_0; + const EvaluateTest evaluate_tests_0[] = { + { "$rAdd 2 2 + =", true }, // $rAdd = 2 + 2 = 4 + { "$rAdd $rAdd 2 + =", true }, // $rAdd = $rAdd + 2 = 6 + { "$rAdd 2 $rAdd + =", true }, // $rAdd = 2 + $rAdd = 8 + { "99", false }, // put some junk on the stack... + { "$rAdd2 2 2 + =", true }, // ...and make sure things still work + { "$rAdd2\t2\n2 + =", true }, // same but with different whitespace + { "$rAdd2 2 2 + = ", true }, // trailing whitespace + { " $rAdd2 2 2 + =", true }, // leading whitespace + { "$rAdd2 2 2 + =", true }, // extra whitespace + { "$T0 2 = +", false }, // too few operands for add + { "2 + =", false }, // too few operands for add + { "2 +", false }, // too few operands for add + { "+", false }, // too few operands for add + { "^", false }, // too few operands for dereference + { "=", false }, // too few operands for assignment + { "2 =", false }, // too few operands for assignment + { "2 2 + =", false }, // too few operands for assignment + { "2 2 =", false }, // can't assign into a literal + { "k 2 =", false }, // can't assign into a constant + { "2", false }, // leftover data on stack + { "2 2 +", false }, // leftover data on stack + { "$rAdd", false }, // leftover data on stack + { "0 $T1 0 0 + =", false }, // leftover data on stack + { "$T2 $T2 2 + =", false }, // can't operate on an undefined value + { "$rMul 9 6 * =", true }, // $rMul = 9 * 6 = 54 + { "$rSub 9 6 - =", true }, // $rSub = 9 - 6 = 3 + { "$rDivQ 9 6 / =", true }, // $rDivQ = 9 / 6 = 1 + { "$rDivM 9 6 % =", true }, // $rDivM = 9 % 6 = 3 + { "$rDeref 9 ^ =", true } // $rDeref = ^9 = 10 (FakeMemoryRegion) + }; + map validate_data_0; + validate_data_0["$rAdd"] = 8; + validate_data_0["$rAdd2"] = 4; + validate_data_0["$rSub"] = 3; + validate_data_0["$rMul"] = 54; + validate_data_0["$rDivQ"] = 1; + validate_data_0["$rDivM"] = 3; + validate_data_0["$rDeref"] = 10; + + // The second test set simulates a couple of MSVC program strings. + // The data is fudged a little bit because the tests use FakeMemoryRegion + // instead of a real stack snapshot, but the program strings are real and + // the implementation doesn't know or care that the data is not real. + map dictionary_1; + dictionary_1["$ebp"] = 0xbfff0010; + dictionary_1["$eip"] = 0x10000000; + dictionary_1["$esp"] = 0xbfff0000; + dictionary_1[".cbSavedRegs"] = 4; + dictionary_1[".cbParams"] = 4; + dictionary_1[".raSearchStart"] = 0xbfff0020; + const EvaluateTest evaluate_tests_1[] = { + { "$T0 $ebp = $eip $T0 4 + ^ = $ebp $T0 ^ = $esp $T0 8 + = " + "$L $T0 .cbSavedRegs - = $P $T0 8 + .cbParams + =", true }, + // Intermediate state: $T0 = 0xbfff0010, $eip = 0xbfff0015, + // $ebp = 0xbfff0011, $esp = 0xbfff0018, + // $L = 0xbfff000c, $P = 0xbfff001c + { "$T0 $ebp = $eip $T0 4 + ^ = $ebp $T0 ^ = $esp $T0 8 + = " + "$L $T0 .cbSavedRegs - = $P $T0 8 + .cbParams + = $ebx $T0 28 - ^ =", + true }, + // Intermediate state: $T0 = 0xbfff0011, $eip = 0xbfff0016, + // $ebp = 0xbfff0012, $esp = 0xbfff0019, + // $L = 0xbfff000d, $P = 0xbfff001d, + // $ebx = 0xbffefff6 + { "$T0 $ebp = $T2 $esp = $T1 .raSearchStart = $eip $T1 ^ = $ebp $T0 = " + "$esp $T1 4 + = $L $T0 .cbSavedRegs - = $P $T1 4 + .cbParams + = " + "$ebx $T0 28 - ^ =", + true } + }; + map validate_data_1; + validate_data_1["$T0"] = 0xbfff0012; + validate_data_1["$T1"] = 0xbfff0020; + validate_data_1["$T2"] = 0xbfff0019; + validate_data_1["$eip"] = 0xbfff0021; + validate_data_1["$ebp"] = 0xbfff0012; + validate_data_1["$esp"] = 0xbfff0024; + validate_data_1["$L"] = 0xbfff000e; + validate_data_1["$P"] = 0xbfff0028; + validate_data_1["$ebx"] = 0xbffefff7; + validate_data_1[".cbSavedRegs"] = 4; + validate_data_1[".cbParams"] = 4; + + EvaluateTestSet evaluate_test_sets[] = { + { &dictionary_0, evaluate_tests_0, + sizeof(evaluate_tests_0) / sizeof(EvaluateTest), &validate_data_0 }, + { &dictionary_1, evaluate_tests_1, + sizeof(evaluate_tests_1) / sizeof(EvaluateTest), &validate_data_1 }, + }; + + unsigned int evaluate_test_set_count = sizeof(evaluate_test_sets) / + sizeof(EvaluateTestSet); + + FakeMemoryRegion fake_memory; + PostfixEvaluator postfix_evaluator = + PostfixEvaluator(NULL, &fake_memory); + + for (unsigned int evaluate_test_set_index = 0; + evaluate_test_set_index < evaluate_test_set_count; + ++evaluate_test_set_index) { + EvaluateTestSet *evaluate_test_set = + &evaluate_test_sets[evaluate_test_set_index]; + const EvaluateTest *evaluate_tests = evaluate_test_set->evaluate_tests; + unsigned int evaluate_test_count = evaluate_test_set->evaluate_test_count; + + // The same dictionary will be used for each test in the set. Earlier + // tests can affect the state of the dictionary for later tests. + postfix_evaluator.set_dictionary(evaluate_test_set->dictionary); + + for (unsigned int evaluate_test_index = 0; + evaluate_test_index < evaluate_test_count; + ++evaluate_test_index) { + const EvaluateTest *evaluate_test = &evaluate_tests[evaluate_test_index]; + + // Do the test. + bool result = postfix_evaluator.Evaluate(evaluate_test->expression); + if (result != evaluate_test->evaluable) { + fprintf(stderr, "FAIL: evaluate set %d/%d, test %d/%d, " + "expression \"%s\", expected %s, observed %s\n", + evaluate_test_set_index, evaluate_test_set_count, + evaluate_test_index, evaluate_test_count, + evaluate_test->expression.c_str(), + evaluate_test->evaluable ? "evaluable" : "not evaluable", + result ? "evaluted" : "not evaluated"); + return false; + } + } + + // Validate the results. + for(map::const_iterator validate_iterator = + evaluate_test_set->validate_data->begin(); + validate_iterator != evaluate_test_set->validate_data->end(); + ++validate_iterator) { + const string identifier = validate_iterator->first; + unsigned int expected_value = validate_iterator->second; + + map::const_iterator dictionary_iterator = + evaluate_test_set->dictionary->find(identifier); + + // The identifier must exist in the dictionary. + if (dictionary_iterator == evaluate_test_set->dictionary->end()) { + fprintf(stderr, "FAIL: evaluate test set %d/%d, " + "validate identifier \"%s\", " + "expected %d, observed not found\n", + evaluate_test_set_index, evaluate_test_set_count, + identifier.c_str(), expected_value); + return false; + } + + // The value in the dictionary must be the same as the expected value. + unsigned int observed_value = dictionary_iterator->second; + if (expected_value != observed_value) { + fprintf(stderr, "FAIL: evaluate test set %d/%d, " + "validate identifier \"%s\", " + "expected %d, observed %d\n", + evaluate_test_set_index, evaluate_test_set_count, + identifier.c_str(), expected_value, observed_value); + return false; + } + } + } + + return true; +} + + +int main(int argc, char **argv) { + return RunTests() ? 0 : 1; +} -- cgit v1.2.1