From bd71bdd74252585a1d567a480fa97e19884fa3fb Mon Sep 17 00:00:00 2001 From: "ivan.penkov@gmail.com" Date: Wed, 25 Sep 2013 18:25:13 +0000 Subject: Adding stricter validation checks to various symbol parser functions. More specifically, the validation of the following record types is improved: - FILE records - FUNC records - Line record - PUBLIC records Adding unittests. Review URL: https://breakpad.appspot.com/632003 git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@1217 4c0a9323-5329-0410-9bdc-e9ce6186880e --- .../processor/basic_source_line_resolver.h | 58 +++++ src/processor/basic_source_line_resolver.cc | 269 +++++++++++++++------ .../basic_source_line_resolver_unittest.cc | 263 ++++++++++++++++++++ 3 files changed, 511 insertions(+), 79 deletions(-) (limited to 'src') diff --git a/src/google_breakpad/processor/basic_source_line_resolver.h b/src/google_breakpad/processor/basic_source_line_resolver.h index c1946ca4..6bb6d863 100644 --- a/src/google_breakpad/processor/basic_source_line_resolver.h +++ b/src/google_breakpad/processor/basic_source_line_resolver.h @@ -81,6 +81,64 @@ class BasicSourceLineResolver : public SourceLineResolverBase { void operator=(const BasicSourceLineResolver&); }; +// Helper class, containing useful methods for parsing of Breakpad symbol files. +class SymbolParseHelper { + public: + // Parses a |file_line| declaration. Returns true on success. + // Format: FILE . + // Notice, that this method modifies the input |file_line| which is why it + // can't be const. On success, , and are stored in |*index|, + // and |*filename|. No allocation is done, |*filename| simply points inside + // |file_line|. + static bool ParseFile(char *file_line, // in + long *index, // out + char **filename); // out + + // Parses a |function_line| declaration. Returns true on success. + // Format: FUNC
. + // Notice, that this method modifies the input |function_line| which is why it + // can't be const. On success,
, , , and + // are stored in |*address|, |*size|, |*stack_param_size|, and |*name|. + // No allocation is done, |*name| simply points inside |function_line|. + static bool ParseFunction(char *function_line, // in + uint64_t *address, // out + uint64_t *size, // out + long *stack_param_size, // out + char **name); // out + + // Parses a |line| declaration. Returns true on success. + // Format:
+ // Notice, that this method modifies the input |function_line| which is why + // it can't be const. On success,
, , , and + // are stored in |*address|, |*size|, |*line_number|, and + // |*source_file|. + static bool ParseLine(char *line_line, // in + uint64_t *address, // out + uint64_t *size, // out + long *line_number, // out + long *source_file); // out + + // Parses a |public_line| declaration. Returns true on success. + // Format: PUBLIC
+ // Notice, that this method modifies the input |function_line| which is why + // it can't be const. On success,
, , + // are stored in |*address|, |*stack_param_size|, and |*name|. + // No allocation is done, |*name| simply points inside |public_line|. + static bool ParsePublicSymbol(char *public_line, // in + uint64_t *address, // out + long *stack_param_size, // out + char **name); // out + + private: + // Used for success checks after strtoull and strtol. + static bool IsValidAfterNumber(char *after_number); + + // Only allow static methods. + SymbolParseHelper(); + SymbolParseHelper(const SymbolParseHelper&); + void operator=(const SymbolParseHelper&); +}; + } // namespace google_breakpad #endif // GOOGLE_BREAKPAD_PROCESSOR_BASIC_SOURCE_LINE_RESOLVER_H__ diff --git a/src/processor/basic_source_line_resolver.cc b/src/processor/basic_source_line_resolver.cc index 799045ac..62aa4138 100644 --- a/src/processor/basic_source_line_resolver.cc +++ b/src/processor/basic_source_line_resolver.cc @@ -32,12 +32,13 @@ // See basic_source_line_resolver.h and basic_source_line_resolver_types.h // for documentation. - +#include #include #include #include #include +#include #include #include #include @@ -308,100 +309,64 @@ CFIFrameInfo *BasicSourceLineResolver::Module::FindCFIFrameInfo( } bool BasicSourceLineResolver::Module::ParseFile(char *file_line) { - // FILE - file_line += 5; // skip prefix - - vector tokens; - if (!Tokenize(file_line, kWhitespace, 2, &tokens)) { - return false; - } - - int index = atoi(tokens[0]); - if (index < 0) { - return false; - } - - char *filename = tokens[1]; - if (!filename) { - return false; + long index; + char *filename; + if (SymbolParseHelper::ParseFile(file_line, &index, &filename)) { + files_.insert(make_pair(index, string(filename))); + return true; } - - files_.insert(make_pair(index, string(filename))); - return true; + return false; } BasicSourceLineResolver::Function* BasicSourceLineResolver::Module::ParseFunction(char *function_line) { - // FUNC
- function_line += 5; // skip prefix - - vector tokens; - if (!Tokenize(function_line, kWhitespace, 4, &tokens)) { - return NULL; + uint64_t address; + uint64_t size; + long stack_param_size; + char *name; + if (SymbolParseHelper::ParseFunction(function_line, &address, &size, + &stack_param_size, &name)) { + return new Function(name, address, size, stack_param_size); } - - uint64_t address = strtoull(tokens[0], NULL, 16); - uint64_t size = strtoull(tokens[1], NULL, 16); - int stack_param_size = strtoull(tokens[2], NULL, 16); - char *name = tokens[3]; - - return new Function(name, address, size, stack_param_size); + return NULL; } BasicSourceLineResolver::Line* BasicSourceLineResolver::Module::ParseLine( char *line_line) { - //
- vector tokens; - if (!Tokenize(line_line, kWhitespace, 4, &tokens)) { - return NULL; + uint64_t address; + uint64_t size; + long line_number; + long source_file; + + if (SymbolParseHelper::ParseLine(line_line, &address, &size, &line_number, + &source_file)) { + return new Line(address, size, source_file, line_number); } - - uint64_t address = strtoull(tokens[0], NULL, 16); - uint64_t size = strtoull(tokens[1], NULL, 16); - int line_number = atoi(tokens[2]); - int source_file = atoi(tokens[3]); - - // Valid line numbers normally start from 1, however there are functions that - // are associated with a source file but not associated with any line number - // (block helper function) and for such functions the symbol file contains 0 - // for the line numbers. Hence, 0 shoud be treated as a valid line number. - // For more information on block helper functions, please, take a look at: - // http://clang.llvm.org/docs/Block-ABI-Apple.html - if (line_number < 0) { - return NULL; - } - - return new Line(address, size, source_file, line_number); + return NULL; } bool BasicSourceLineResolver::Module::ParsePublicSymbol(char *public_line) { - // PUBLIC
- - // Skip "PUBLIC " prefix. - public_line += 7; - - vector tokens; - if (!Tokenize(public_line, kWhitespace, 3, &tokens)) { - return false; - } - - uint64_t address = strtoull(tokens[0], NULL, 16); - int stack_param_size = strtoull(tokens[1], NULL, 16); - char *name = tokens[2]; + uint64_t address; + long stack_param_size; + char *name; + + if (SymbolParseHelper::ParsePublicSymbol(public_line, &address, + &stack_param_size, &name)) { + // A few public symbols show up with an address of 0. This has been seen + // in the dumped output of ntdll.pdb for symbols such as _CIlog, _CIpow, + // RtlDescribeChunkLZNT1, and RtlReserveChunkLZNT1. They would conflict + // with one another if they were allowed into the public_symbols_ map, + // but since the address is obviously invalid, gracefully accept them + // as input without putting them into the map. + if (address == 0) { + return true; + } - // A few public symbols show up with an address of 0. This has been seen - // in the dumped output of ntdll.pdb for symbols such as _CIlog, _CIpow, - // RtlDescribeChunkLZNT1, and RtlReserveChunkLZNT1. They would conflict - // with one another if they were allowed into the public_symbols_ map, - // but since the address is obviously invalid, gracefully accept them - // as input without putting them into the map. - if (address == 0) { - return true; + linked_ptr symbol(new PublicSymbol(name, address, + stack_param_size)); + return public_symbols_.Store(address, symbol); } - - linked_ptr symbol(new PublicSymbol(name, address, - stack_param_size)); - return public_symbols_.Store(address, symbol); + return false; } bool BasicSourceLineResolver::Module::ParseStackInfo(char *stack_info_line) { @@ -495,4 +460,150 @@ bool BasicSourceLineResolver::Module::ParseCFIFrameInfo( return true; } +// static +bool SymbolParseHelper::ParseFile(char *file_line, long *index, + char **filename) { + // FILE + assert(strncmp(file_line, "FILE ", 5) == 0); + file_line += 5; // skip prefix + + vector tokens; + if (!Tokenize(file_line, kWhitespace, 2, &tokens)) { + return false; + } + + char *after_number; + *index = strtol(tokens[0], &after_number, 10); + if (!IsValidAfterNumber(after_number) || *index < 0 || + *index == std::numeric_limits::max()) { + return false; + } + + *filename = tokens[1]; + if (!filename) { + return false; + } + + return true; +} + +// static +bool SymbolParseHelper::ParseFunction(char *function_line, uint64_t *address, + uint64_t *size, long *stack_param_size, + char **name) { + // FUNC
+ assert(strncmp(function_line, "FUNC ", 5) == 0); + function_line += 5; // skip prefix + + vector tokens; + if (!Tokenize(function_line, kWhitespace, 4, &tokens)) { + return false; + } + + char *after_number; + *address = strtoull(tokens[0], &after_number, 16); + if (!IsValidAfterNumber(after_number) || + *address == std::numeric_limits::max()) { + return false; + } + *size = strtoull(tokens[1], &after_number, 16); + if (!IsValidAfterNumber(after_number) || + *size == std::numeric_limits::max()) { + return false; + } + *stack_param_size = strtol(tokens[2], &after_number, 16); + if (!IsValidAfterNumber(after_number) || + *stack_param_size == std::numeric_limits::max() || + *stack_param_size < 0) { + return false; + } + *name = tokens[3]; + + return true; +} + +// static +bool SymbolParseHelper::ParseLine(char *line_line, uint64_t *address, + uint64_t *size, long *line_number, + long *source_file) { + //
+ vector tokens; + if (!Tokenize(line_line, kWhitespace, 4, &tokens)) { + return false; + } + + char *after_number; + *address = strtoull(tokens[0], &after_number, 16); + if (!IsValidAfterNumber(after_number) || + *address == std::numeric_limits::max()) { + return false; + } + *size = strtoull(tokens[1], &after_number, 16); + if (!IsValidAfterNumber(after_number) || + *size == std::numeric_limits::max()) { + return false; + } + *line_number = strtol(tokens[2], &after_number, 10); + if (!IsValidAfterNumber(after_number) || + *line_number == std::numeric_limits::max()) { + return false; + } + *source_file = strtol(tokens[3], &after_number, 10); + if (!IsValidAfterNumber(after_number) || *source_file < 0 || + *source_file == std::numeric_limits::max()) { + return false; + } + + // Valid line numbers normally start from 1, however there are functions that + // are associated with a source file but not associated with any line number + // (block helper function) and for such functions the symbol file contains 0 + // for the line numbers. Hence, 0 should be treated as a valid line number. + // For more information on block helper functions, please, take a look at: + // http://clang.llvm.org/docs/Block-ABI-Apple.html + if (*line_number < 0) { + return false; + } + + return true; +} + +// static +bool SymbolParseHelper::ParsePublicSymbol(char *public_line, + uint64_t *address, + long *stack_param_size, + char **name) { + // PUBLIC
+ assert(strncmp(public_line, "PUBLIC ", 7) == 0); + public_line += 7; // skip prefix + + vector tokens; + if (!Tokenize(public_line, kWhitespace, 3, &tokens)) { + return false; + } + + char *after_number; + *address = strtoull(tokens[0], &after_number, 16); + if (!IsValidAfterNumber(after_number) || + *address == std::numeric_limits::max()) { + return false; + } + *stack_param_size = strtol(tokens[1], &after_number, 16); + if (!IsValidAfterNumber(after_number) || + *stack_param_size == std::numeric_limits::max() || + *stack_param_size < 0) { + return false; + } + *name = tokens[2]; + + return true; +} + +// static +bool SymbolParseHelper::IsValidAfterNumber(char *after_number) { + if (after_number != NULL && strchr(kWhitespace, *after_number) != NULL) { + return true; + } + return false; +} + } // namespace google_breakpad diff --git a/src/processor/basic_source_line_resolver_unittest.cc b/src/processor/basic_source_line_resolver_unittest.cc index 1a760bbf..fcf3fa48 100644 --- a/src/processor/basic_source_line_resolver_unittest.cc +++ b/src/processor/basic_source_line_resolver_unittest.cc @@ -53,6 +53,7 @@ using google_breakpad::StackFrame; using google_breakpad::WindowsFrameInfo; using google_breakpad::linked_ptr; using google_breakpad::scoped_ptr; +using google_breakpad::SymbolParseHelper; class TestCodeModule : public CodeModule { public: @@ -405,6 +406,268 @@ TEST_F(TestBasicSourceLineResolver, TestUnload) ASSERT_TRUE(resolver.HasModule(&module1)); } +// Test parsing of valid FILE lines. The format is: +// FILE +TEST(SymbolParseHelper, ParseFileValid) { + long index; + char *filename; + + char kTestLine[] = "FILE 1 file name"; + ASSERT_TRUE(SymbolParseHelper::ParseFile(kTestLine, &index, &filename)); + EXPECT_EQ(1, index); + EXPECT_EQ("file name", string(filename)); + + // 0 is a valid index. + char kTestLine1[] = "FILE 0 file name"; + ASSERT_TRUE(SymbolParseHelper::ParseFile(kTestLine1, &index, &filename)); + EXPECT_EQ(0, index); + EXPECT_EQ("file name", string(filename)); +} + +// Test parsing of invalid FILE lines. The format is: +// FILE +TEST(SymbolParseHelper, ParseFileInvalid) { + long index; + char *filename; + + // Test missing file name. + char kTestLine[] = "FILE 1 "; + ASSERT_FALSE(SymbolParseHelper::ParseFile(kTestLine, &index, &filename)); + + // Test bad index. + char kTestLine1[] = "FILE x1 file name"; + ASSERT_FALSE(SymbolParseHelper::ParseFile(kTestLine1, &index, &filename)); + + // Test large index. + char kTestLine2[] = "FILE 123123123123123123123123 file name"; + ASSERT_FALSE(SymbolParseHelper::ParseFile(kTestLine2, &index, &filename)); + + // Test negative index. + char kTestLine3[] = "FILE -2 file name"; + ASSERT_FALSE(SymbolParseHelper::ParseFile(kTestLine3, &index, &filename)); +} + +// Test parsing of valid FUNC lines. The format is: +// FUNC
+TEST(SymbolParseHelper, ParseFunctionValid) { + uint64_t address; + uint64_t size; + long stack_param_size; + char *name; + + char kTestLine[] = "FUNC 1 2 3 function name"; + ASSERT_TRUE(SymbolParseHelper::ParseFunction(kTestLine, &address, &size, + &stack_param_size, &name)); + EXPECT_EQ(1ULL, address); + EXPECT_EQ(2ULL, size); + EXPECT_EQ(3, stack_param_size); + EXPECT_EQ("function name", string(name)); + + // Test hex address, size, and param size. + char kTestLine1[] = "FUNC a1 a2 a3 function name"; + ASSERT_TRUE(SymbolParseHelper::ParseFunction(kTestLine1, &address, &size, + &stack_param_size, &name)); + EXPECT_EQ(0xa1ULL, address); + EXPECT_EQ(0xa2ULL, size); + EXPECT_EQ(0xa3, stack_param_size); + EXPECT_EQ("function name", string(name)); + + char kTestLine2[] = "FUNC 0 0 0 function name"; + ASSERT_TRUE(SymbolParseHelper::ParseFunction(kTestLine2, &address, &size, + &stack_param_size, &name)); + EXPECT_EQ(0ULL, address); + EXPECT_EQ(0ULL, size); + EXPECT_EQ(0, stack_param_size); + EXPECT_EQ("function name", string(name)); +} + +// Test parsing of invalid FUNC lines. The format is: +// FUNC
+TEST(SymbolParseHelper, ParseFunctionInvalid) { + uint64_t address; + uint64_t size; + long stack_param_size; + char *name; + + // Test missing function name. + char kTestLine[] = "FUNC 1 2 3 "; + ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine, &address, &size, + &stack_param_size, &name)); + // Test bad address. + char kTestLine1[] = "FUNC 1z 2 3 function name"; + ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine1, &address, &size, + &stack_param_size, &name)); + // Test large address. + char kTestLine2[] = "FUNC 123123123123123123123123123 2 3 function name"; + ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine2, &address, &size, + &stack_param_size, &name)); + // Test bad size. + char kTestLine3[] = "FUNC 1 z2 3 function name"; + ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine3, &address, &size, + &stack_param_size, &name)); + // Test large size. + char kTestLine4[] = "FUNC 1 231231231231231231231231232 3 function name"; + ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine4, &address, &size, + &stack_param_size, &name)); + // Test bad param size. + char kTestLine5[] = "FUNC 1 2 3z function name"; + ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine5, &address, &size, + &stack_param_size, &name)); + // Test large param size. + char kTestLine6[] = "FUNC 1 2 312312312312312312312312323 function name"; + ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine6, &address, &size, + &stack_param_size, &name)); + // Negative param size. + char kTestLine7[] = "FUNC 1 2 -5 function name"; + ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine7, &address, &size, + &stack_param_size, &name)); +} + +// Test parsing of valid lines. The format is: +//
+TEST(SymbolParseHelper, ParseLineValid) { + uint64_t address; + uint64_t size; + long line_number; + long source_file; + + char kTestLine[] = "1 2 3 4"; + ASSERT_TRUE(SymbolParseHelper::ParseLine(kTestLine, &address, &size, + &line_number, &source_file)); + EXPECT_EQ(1ULL, address); + EXPECT_EQ(2ULL, size); + EXPECT_EQ(3, line_number); + EXPECT_EQ(4, source_file); + + // Test hex size and address. + char kTestLine1[] = "a1 a2 3 4 // some comment"; + ASSERT_TRUE(SymbolParseHelper::ParseLine(kTestLine1, &address, &size, + &line_number, &source_file)); + EXPECT_EQ(0xa1ULL, address); + EXPECT_EQ(0xa2ULL, size); + EXPECT_EQ(3, line_number); + EXPECT_EQ(4, source_file); + + // 0 is a valid line number. + char kTestLine2[] = "a1 a2 0 4 // some comment"; + ASSERT_TRUE(SymbolParseHelper::ParseLine(kTestLine2, &address, &size, + &line_number, &source_file)); + EXPECT_EQ(0xa1ULL, address); + EXPECT_EQ(0xa2ULL, size); + EXPECT_EQ(0, line_number); + EXPECT_EQ(4, source_file); +} + +// Test parsing of invalid lines. The format is: +//
+TEST(SymbolParseHelper, ParseLineInvalid) { + uint64_t address; + uint64_t size; + long line_number; + long source_file; + + // Test missing source file id. + char kTestLine[] = "1 2 3"; + ASSERT_FALSE(SymbolParseHelper::ParseLine(kTestLine, &address, &size, + &line_number, &source_file)); + // Test bad address. + char kTestLine1[] = "1z 2 3 4"; + ASSERT_FALSE(SymbolParseHelper::ParseLine(kTestLine1, &address, &size, + &line_number, &source_file)); + // Test large address. + char kTestLine2[] = "123123123123123123123123 2 3 4"; + ASSERT_FALSE(SymbolParseHelper::ParseLine(kTestLine2, &address, &size, + &line_number, &source_file)); + // Test bad size. + char kTestLine3[] = "1 z2 3 4"; + ASSERT_FALSE(SymbolParseHelper::ParseLine(kTestLine3, &address, &size, + &line_number, &source_file)); + // Test large size. + char kTestLine4[] = "1 123123123123123123123123 3 4"; + ASSERT_FALSE(SymbolParseHelper::ParseLine(kTestLine4, &address, &size, + &line_number, &source_file)); + // Test bad line number. + char kTestLine5[] = "1 2 z3 4"; + ASSERT_FALSE(SymbolParseHelper::ParseLine(kTestLine5, &address, &size, + &line_number, &source_file)); + // Test negative line number. + char kTestLine6[] = "1 2 -1 4"; + ASSERT_FALSE(SymbolParseHelper::ParseLine(kTestLine6, &address, &size, + &line_number, &source_file)); + // Test large line number. + char kTestLine7[] = "1 2 123123123123123123123 4"; + ASSERT_FALSE(SymbolParseHelper::ParseLine(kTestLine7, &address, &size, + &line_number, &source_file)); + // Test bad source file id. + char kTestLine8[] = "1 2 3 f"; + ASSERT_FALSE(SymbolParseHelper::ParseLine(kTestLine8, &address, &size, + &line_number, &source_file)); +} + +// Test parsing of valid PUBLIC lines. The format is: +// PUBLIC
+TEST(SymbolParseHelper, ParsePublicSymbolValid) { + uint64_t address; + long stack_param_size; + char *name; + + char kTestLine[] = "PUBLIC 1 2 3"; + ASSERT_TRUE(SymbolParseHelper::ParsePublicSymbol(kTestLine, &address, + &stack_param_size, &name)); + EXPECT_EQ(1ULL, address); + EXPECT_EQ(2, stack_param_size); + EXPECT_EQ("3", string(name)); + + // Test hex size and address. + char kTestLine1[] = "PUBLIC a1 a2 function name"; + ASSERT_TRUE(SymbolParseHelper::ParsePublicSymbol(kTestLine1, &address, + &stack_param_size, &name)); + EXPECT_EQ(0xa1ULL, address); + EXPECT_EQ(0xa2, stack_param_size); + EXPECT_EQ("function name", string(name)); + + // Test 0 is a valid address. + char kTestLine2[] = "PUBLIC 0 a2 function name"; + ASSERT_TRUE(SymbolParseHelper::ParsePublicSymbol(kTestLine2, &address, + &stack_param_size, &name)); + EXPECT_EQ(0ULL, address); + EXPECT_EQ(0xa2, stack_param_size); + EXPECT_EQ("function name", string(name)); +} + +// Test parsing of invalid PUBLIC lines. The format is: +// PUBLIC
+TEST(SymbolParseHelper, ParsePublicSymbolInvalid) { + uint64_t address; + long stack_param_size; + char *name; + + // Test missing source function name. + char kTestLine[] = "PUBLIC 1 2 "; + ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine, &address, + &stack_param_size, &name)); + // Test bad address. + char kTestLine1[] = "PUBLIC 1z 2 3"; + ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine1, &address, + &stack_param_size, &name)); + // Test large address. + char kTestLine2[] = "PUBLIC 123123123123123123123123 2 3"; + ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine2, &address, + &stack_param_size, &name)); + // Test bad param stack size. + char kTestLine3[] = "PUBLIC 1 z2 3"; + ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine3, &address, + &stack_param_size, &name)); + // Test large param stack size. + char kTestLine4[] = "PUBLIC 1 123123123123123123123123123 3"; + ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine4, &address, + &stack_param_size, &name)); + // Test negative param stack size. + char kTestLine5[] = "PUBLIC 1 -5 3"; + ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine5, &address, + &stack_param_size, &name)); +} + } // namespace int main(int argc, char *argv[]) { -- cgit v1.2.1