diff options
author | Mike Wittman <wittman@chromium.org> | 2017-11-29 13:29:37 -0800 |
---|---|---|
committer | Mark Mentovai <mark@chromium.org> | 2017-11-29 21:33:23 +0000 |
commit | b1226959a25b6a5311801d6f204b088c706e7c25 (patch) | |
tree | 3dfc65dab8bb0dcd929748873de179ce259030f5 /src/processor | |
parent | Update test data for identical-code-folded symbol changes (diff) | |
download | breakpad-b1226959a25b6a5311801d6f204b088c706e7c25.tar.xz |
Add optional field indicating multiple symbols at an address
Adds an optional 'm' as the first field in FUNCTION and PUBLIC records
to indicate that the address corresponds to more than one symbol.
Controls this by a command line flag for now to give symbol file users
a chance to update.
Also reduces the number of IDiaSymbols retained in memory to one per
address. This reduces memory consumption by 8% when processing
chrome.dll.pdb.
Updates the processor to parse the new optional field.
Bug: google-breakpad:751
Change-Id: I6503edaf057312d21a1d63d9c84e5a4fa019dc46
Reviewed-on: https://chromium-review.googlesource.com/773418
Reviewed-by: Mark Mentovai <mark@chromium.org>
Diffstat (limited to 'src/processor')
-rw-r--r-- | src/processor/basic_source_line_resolver.cc | 85 | ||||
-rw-r--r-- | src/processor/basic_source_line_resolver_types.h | 12 | ||||
-rw-r--r-- | src/processor/basic_source_line_resolver_unittest.cc | 149 | ||||
-rw-r--r-- | src/processor/source_line_resolver_base_types.h | 17 |
4 files changed, 190 insertions, 73 deletions
diff --git a/src/processor/basic_source_line_resolver.cc b/src/processor/basic_source_line_resolver.cc index aa66e159..c4aa949c 100644 --- a/src/processor/basic_source_line_resolver.cc +++ b/src/processor/basic_source_line_resolver.cc @@ -62,6 +62,42 @@ namespace google_breakpad { #define strtoull _strtoui64 #endif +namespace { + +// Utility function to tokenize given the presence of an optional initial +// field. In this case, optional_field is the expected string for the optional +// field, and max_tokens is the maximum number of tokens including the optional +// field. Refer to the documentation for Tokenize for descriptions of the other +// arguments. +bool TokenizeWithOptionalField(char *line, + const char *optional_field, + const char *separators, + int max_tokens, + vector<char*> *tokens) { + // First tokenize assuming the optional field is not present. If we then see + // the optional field, additionally tokenize the last token into two tokens. + if (!Tokenize(line, separators, max_tokens - 1, tokens)) { + return false; + } + + if (strcmp(tokens->front(), optional_field) == 0) { + // The optional field is present. Split the last token in two to recover the + // field prior to the last. + vector<char*> last_tokens; + if (!Tokenize(tokens->back(), separators, 2, &last_tokens)) { + return false; + } + // Replace the previous last token with the two new tokens. + tokens->pop_back(); + tokens->push_back(last_tokens[0]); + tokens->push_back(last_tokens[1]); + } + + return true; +} + +} // namespace + static const char *kWhitespace = " \r\n"; static const int kMaxErrorsPrinted = 5; static const int kMaxErrorsBeforeBailing = 100; @@ -323,13 +359,14 @@ bool BasicSourceLineResolver::Module::ParseFile(char *file_line) { BasicSourceLineResolver::Function* BasicSourceLineResolver::Module::ParseFunction(char *function_line) { + bool is_multiple; uint64_t address; uint64_t size; long stack_param_size; char *name; - if (SymbolParseHelper::ParseFunction(function_line, &address, &size, - &stack_param_size, &name)) { - return new Function(name, address, size, stack_param_size); + if (SymbolParseHelper::ParseFunction(function_line, &is_multiple, &address, + &size, &stack_param_size, &name)) { + return new Function(name, address, size, stack_param_size, is_multiple); } return NULL; } @@ -349,11 +386,12 @@ BasicSourceLineResolver::Line* BasicSourceLineResolver::Module::ParseLine( } bool BasicSourceLineResolver::Module::ParsePublicSymbol(char *public_line) { + bool is_multiple; uint64_t address; long stack_param_size; char *name; - if (SymbolParseHelper::ParsePublicSymbol(public_line, &address, + if (SymbolParseHelper::ParsePublicSymbol(public_line, &is_multiple, &address, &stack_param_size, &name)) { // A few public symbols show up with an address of 0. This has been seen // in the dumped output of ntdll.pdb for symbols such as _CIlog, _CIpow, @@ -366,7 +404,8 @@ bool BasicSourceLineResolver::Module::ParsePublicSymbol(char *public_line) { } linked_ptr<PublicSymbol> symbol(new PublicSymbol(name, address, - stack_param_size)); + stack_param_size, + is_multiple)); return public_symbols_.Store(address, symbol); } return false; @@ -491,36 +530,39 @@ bool SymbolParseHelper::ParseFile(char *file_line, long *index, } // static -bool SymbolParseHelper::ParseFunction(char *function_line, uint64_t *address, - uint64_t *size, long *stack_param_size, - char **name) { - // FUNC <address> <size> <stack_param_size> <name> +bool SymbolParseHelper::ParseFunction(char *function_line, bool *is_multiple, + uint64_t *address, uint64_t *size, + long *stack_param_size, char **name) { + // FUNC [<multiple>] <address> <size> <stack_param_size> <name> assert(strncmp(function_line, "FUNC ", 5) == 0); function_line += 5; // skip prefix vector<char*> tokens; - if (!Tokenize(function_line, kWhitespace, 4, &tokens)) { + if (!TokenizeWithOptionalField(function_line, "m", kWhitespace, 5, &tokens)) { return false; } + *is_multiple = strcmp(tokens[0], "m") == 0; + int next_token = *is_multiple ? 1 : 0; + char *after_number; - *address = strtoull(tokens[0], &after_number, 16); + *address = strtoull(tokens[next_token++], &after_number, 16); if (!IsValidAfterNumber(after_number) || *address == std::numeric_limits<unsigned long long>::max()) { return false; } - *size = strtoull(tokens[1], &after_number, 16); + *size = strtoull(tokens[next_token++], &after_number, 16); if (!IsValidAfterNumber(after_number) || *size == std::numeric_limits<unsigned long long>::max()) { return false; } - *stack_param_size = strtol(tokens[2], &after_number, 16); + *stack_param_size = strtol(tokens[next_token++], &after_number, 16); if (!IsValidAfterNumber(after_number) || *stack_param_size == std::numeric_limits<long>::max() || *stack_param_size < 0) { return false; } - *name = tokens[3]; + *name = tokens[next_token++]; return true; } @@ -571,32 +613,35 @@ bool SymbolParseHelper::ParseLine(char *line_line, uint64_t *address, } // static -bool SymbolParseHelper::ParsePublicSymbol(char *public_line, +bool SymbolParseHelper::ParsePublicSymbol(char *public_line, bool *is_multiple, uint64_t *address, long *stack_param_size, char **name) { - // PUBLIC <address> <stack_param_size> <name> + // PUBLIC [<multiple>] <address> <stack_param_size> <name> assert(strncmp(public_line, "PUBLIC ", 7) == 0); public_line += 7; // skip prefix vector<char*> tokens; - if (!Tokenize(public_line, kWhitespace, 3, &tokens)) { + if (!TokenizeWithOptionalField(public_line, "m", kWhitespace, 4, &tokens)) { return false; } + *is_multiple = strcmp(tokens[0], "m") == 0; + int next_token = *is_multiple ? 1 : 0; + char *after_number; - *address = strtoull(tokens[0], &after_number, 16); + *address = strtoull(tokens[next_token++], &after_number, 16); if (!IsValidAfterNumber(after_number) || *address == std::numeric_limits<unsigned long long>::max()) { return false; } - *stack_param_size = strtol(tokens[1], &after_number, 16); + *stack_param_size = strtol(tokens[next_token++], &after_number, 16); if (!IsValidAfterNumber(after_number) || *stack_param_size == std::numeric_limits<long>::max() || *stack_param_size < 0) { return false; } - *name = tokens[2]; + *name = tokens[next_token++]; return true; } diff --git a/src/processor/basic_source_line_resolver_types.h b/src/processor/basic_source_line_resolver_types.h index a022bc0d..89eb57e8 100644 --- a/src/processor/basic_source_line_resolver_types.h +++ b/src/processor/basic_source_line_resolver_types.h @@ -60,11 +60,13 @@ BasicSourceLineResolver::Function : public SourceLineResolverBase::Function { Function(const string &function_name, MemAddr function_address, MemAddr code_size, - int set_parameter_size) : Base(function_name, - function_address, - code_size, - set_parameter_size), - lines() { } + int set_parameter_size, + bool is_mutiple) : Base(function_name, + function_address, + code_size, + set_parameter_size, + is_mutiple), + lines() { } RangeMap< MemAddr, linked_ptr<Line> > lines; private: typedef SourceLineResolverBase::Function Base; diff --git a/src/processor/basic_source_line_resolver_unittest.cc b/src/processor/basic_source_line_resolver_unittest.cc index 9fab8ca6..90c34172 100644 --- a/src/processor/basic_source_line_resolver_unittest.cc +++ b/src/processor/basic_source_line_resolver_unittest.cc @@ -455,16 +455,19 @@ TEST(SymbolParseHelper, ParseFileInvalid) { } // Test parsing of valid FUNC lines. The format is: -// FUNC <address> <size> <stack_param_size> <name> +// FUNC [<multiple>] <address> <size> <stack_param_size> <name> TEST(SymbolParseHelper, ParseFunctionValid) { + bool multiple; uint64_t address; uint64_t size; long stack_param_size; char *name; char kTestLine[] = "FUNC 1 2 3 function name"; - ASSERT_TRUE(SymbolParseHelper::ParseFunction(kTestLine, &address, &size, - &stack_param_size, &name)); + ASSERT_TRUE(SymbolParseHelper::ParseFunction(kTestLine, &multiple, &address, + &size, &stack_param_size, + &name)); + EXPECT_FALSE(multiple); EXPECT_EQ(1ULL, address); EXPECT_EQ(2ULL, size); EXPECT_EQ(3, stack_param_size); @@ -472,25 +475,41 @@ TEST(SymbolParseHelper, ParseFunctionValid) { // Test hex address, size, and param size. char kTestLine1[] = "FUNC a1 a2 a3 function name"; - ASSERT_TRUE(SymbolParseHelper::ParseFunction(kTestLine1, &address, &size, - &stack_param_size, &name)); + ASSERT_TRUE(SymbolParseHelper::ParseFunction(kTestLine1, &multiple, &address, + &size, &stack_param_size, + &name)); + EXPECT_FALSE(multiple); EXPECT_EQ(0xa1ULL, address); EXPECT_EQ(0xa2ULL, size); EXPECT_EQ(0xa3, stack_param_size); EXPECT_EQ("function name", string(name)); char kTestLine2[] = "FUNC 0 0 0 function name"; - ASSERT_TRUE(SymbolParseHelper::ParseFunction(kTestLine2, &address, &size, - &stack_param_size, &name)); + ASSERT_TRUE(SymbolParseHelper::ParseFunction(kTestLine2, &multiple, &address, + &size, &stack_param_size, + &name)); + EXPECT_FALSE(multiple); EXPECT_EQ(0ULL, address); EXPECT_EQ(0ULL, size); EXPECT_EQ(0, stack_param_size); EXPECT_EQ("function name", string(name)); + + // Test optional multiple field. + char kTestLine3[] = "FUNC m a1 a2 a3 function name"; + ASSERT_TRUE(SymbolParseHelper::ParseFunction(kTestLine3, &multiple, &address, + &size, &stack_param_size, + &name)); + EXPECT_TRUE(multiple); + EXPECT_EQ(0xa1ULL, address); + EXPECT_EQ(0xa2ULL, size); + EXPECT_EQ(0xa3, stack_param_size); + EXPECT_EQ("function name", string(name)); } // Test parsing of invalid FUNC lines. The format is: -// FUNC <address> <size> <stack_param_size> <name> +// FUNC [<multiple>] <address> <size> <stack_param_size> <name> TEST(SymbolParseHelper, ParseFunctionInvalid) { + bool multiple; uint64_t address; uint64_t size; long stack_param_size; @@ -498,36 +517,49 @@ TEST(SymbolParseHelper, ParseFunctionInvalid) { // Test missing function name. char kTestLine[] = "FUNC 1 2 3 "; - ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine, &address, &size, - &stack_param_size, &name)); + ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine, &multiple, &address, + &size, &stack_param_size, + &name)); // Test bad address. char kTestLine1[] = "FUNC 1z 2 3 function name"; - ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine1, &address, &size, - &stack_param_size, &name)); + ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine1, &multiple, &address, + &size, &stack_param_size, + &name)); // Test large address. char kTestLine2[] = "FUNC 123123123123123123123123123 2 3 function name"; - ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine2, &address, &size, - &stack_param_size, &name)); + ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine2, &multiple, &address, + &size, &stack_param_size, + &name)); // Test bad size. char kTestLine3[] = "FUNC 1 z2 3 function name"; - ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine3, &address, &size, - &stack_param_size, &name)); + ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine3, &multiple, &address, + &size, &stack_param_size, + &name)); // Test large size. char kTestLine4[] = "FUNC 1 231231231231231231231231232 3 function name"; - ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine4, &address, &size, - &stack_param_size, &name)); + ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine4, &multiple, &address, + &size, &stack_param_size, + &name)); // Test bad param size. char kTestLine5[] = "FUNC 1 2 3z function name"; - ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine5, &address, &size, - &stack_param_size, &name)); + ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine5, &multiple, &address, + &size, &stack_param_size, + &name)); // Test large param size. char kTestLine6[] = "FUNC 1 2 312312312312312312312312323 function name"; - ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine6, &address, &size, - &stack_param_size, &name)); + ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine6, &multiple, &address, + &size, &stack_param_size, + &name)); // Negative param size. char kTestLine7[] = "FUNC 1 2 -5 function name"; - ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine7, &address, &size, - &stack_param_size, &name)); + ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine7, &multiple, &address, + &size, &stack_param_size, + &name)); + // Test invalid optional field. + char kTestLine8[] = "FUNC x 1 2 5 function name"; + ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine8, &multiple, &address, + &size, &stack_param_size, + &name)); } // Test parsing of valid lines. The format is: @@ -612,67 +644,96 @@ TEST(SymbolParseHelper, ParseLineInvalid) { } // Test parsing of valid PUBLIC lines. The format is: -// PUBLIC <address> <stack_param_size> <name> +// PUBLIC [<multiple>] <address> <stack_param_size> <name> TEST(SymbolParseHelper, ParsePublicSymbolValid) { + bool multiple; uint64_t address; long stack_param_size; char *name; char kTestLine[] = "PUBLIC 1 2 3"; - ASSERT_TRUE(SymbolParseHelper::ParsePublicSymbol(kTestLine, &address, - &stack_param_size, &name)); + ASSERT_TRUE(SymbolParseHelper::ParsePublicSymbol(kTestLine, &multiple, + &address, &stack_param_size, + &name)); + EXPECT_FALSE(multiple); EXPECT_EQ(1ULL, address); EXPECT_EQ(2, stack_param_size); EXPECT_EQ("3", string(name)); // Test hex size and address. char kTestLine1[] = "PUBLIC a1 a2 function name"; - ASSERT_TRUE(SymbolParseHelper::ParsePublicSymbol(kTestLine1, &address, - &stack_param_size, &name)); + ASSERT_TRUE(SymbolParseHelper::ParsePublicSymbol(kTestLine1, &multiple, + &address, &stack_param_size, + &name)); + EXPECT_FALSE(multiple); EXPECT_EQ(0xa1ULL, address); EXPECT_EQ(0xa2, stack_param_size); EXPECT_EQ("function name", string(name)); // Test 0 is a valid address. char kTestLine2[] = "PUBLIC 0 a2 function name"; - ASSERT_TRUE(SymbolParseHelper::ParsePublicSymbol(kTestLine2, &address, - &stack_param_size, &name)); + ASSERT_TRUE(SymbolParseHelper::ParsePublicSymbol(kTestLine2, &multiple, + &address, &stack_param_size, + &name)); + EXPECT_FALSE(multiple); EXPECT_EQ(0ULL, address); EXPECT_EQ(0xa2, stack_param_size); EXPECT_EQ("function name", string(name)); + + // Test optional multiple field. + char kTestLine3[] = "PUBLIC m a1 a2 function name"; + ASSERT_TRUE(SymbolParseHelper::ParsePublicSymbol(kTestLine3, &multiple, + &address, &stack_param_size, + &name)); + EXPECT_TRUE(multiple); + EXPECT_EQ(0xa1ULL, address); + EXPECT_EQ(0xa2, stack_param_size); + EXPECT_EQ("function name", string(name)); } // Test parsing of invalid PUBLIC lines. The format is: -// PUBLIC <address> <stack_param_size> <name> +// PUBLIC [<multiple>] <address> <stack_param_size> <name> TEST(SymbolParseHelper, ParsePublicSymbolInvalid) { + bool multiple; uint64_t address; long stack_param_size; char *name; // Test missing source function name. char kTestLine[] = "PUBLIC 1 2 "; - ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine, &address, - &stack_param_size, &name)); + ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine, &multiple, + &address, &stack_param_size, + &name)); // Test bad address. char kTestLine1[] = "PUBLIC 1z 2 3"; - ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine1, &address, - &stack_param_size, &name)); + ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine1, &multiple, + &address, &stack_param_size, + &name)); // Test large address. char kTestLine2[] = "PUBLIC 123123123123123123123123 2 3"; - ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine2, &address, - &stack_param_size, &name)); + ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine2, &multiple, + &address, &stack_param_size, + &name)); // Test bad param stack size. char kTestLine3[] = "PUBLIC 1 z2 3"; - ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine3, &address, - &stack_param_size, &name)); + ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine3, &multiple, + &address, &stack_param_size, + &name)); // Test large param stack size. char kTestLine4[] = "PUBLIC 1 123123123123123123123123123 3"; - ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine4, &address, - &stack_param_size, &name)); + ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine4, &multiple, + &address, &stack_param_size, + &name)); // Test negative param stack size. char kTestLine5[] = "PUBLIC 1 -5 3"; - ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine5, &address, - &stack_param_size, &name)); + ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine5, &multiple, + &address, &stack_param_size, + &name)); + // Test invalid optional field. + char kTestLine6[] = "PUBLIC x 1 5 3"; + ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine6, &multiple, + &address, &stack_param_size, + &name)); } } // namespace diff --git a/src/processor/source_line_resolver_base_types.h b/src/processor/source_line_resolver_base_types.h index 4a9dfb3c..ca744e00 100644 --- a/src/processor/source_line_resolver_base_types.h +++ b/src/processor/source_line_resolver_base_types.h @@ -85,9 +85,10 @@ struct SourceLineResolverBase::Function { Function(const string &function_name, MemAddr function_address, MemAddr code_size, - int set_parameter_size) + int set_parameter_size, + bool is_multiple) : name(function_name), address(function_address), size(code_size), - parameter_size(set_parameter_size) { } + parameter_size(set_parameter_size), is_multiple(is_multiple) { } string name; MemAddr address; @@ -95,16 +96,21 @@ struct SourceLineResolverBase::Function { // The size of parameters passed to this function on the stack. int32_t parameter_size; + + // If the function's instructions correspond to multiple symbols. + bool is_multiple; }; struct SourceLineResolverBase::PublicSymbol { PublicSymbol() { } PublicSymbol(const string& set_name, MemAddr set_address, - int set_parameter_size) + int set_parameter_size, + bool is_multiple) : name(set_name), address(set_address), - parameter_size(set_parameter_size) {} + parameter_size(set_parameter_size), + is_multiple(is_multiple) {} string name; MemAddr address; @@ -113,6 +119,9 @@ struct SourceLineResolverBase::PublicSymbol { // is set to the size of the parameters passed to the funciton on the // stack, if known. int32_t parameter_size; + + // If the function's instructions correspond to multiple symbols. + bool is_multiple; }; class SourceLineResolverBase::Module { |