From 7c2799f3ba6f8a8186c8883b213c3e59768b1287 Mon Sep 17 00:00:00 2001 From: Tobias Sargeant Date: Tue, 31 Jan 2017 13:42:52 +0000 Subject: Sanitize dumped stacks to remove data that may be identifiable. In order to sanitize the stack contents we erase any pointer-aligned word that could not be interpreted as a pointer into one of the processes' memory mappings, or a small integer (+/-4096). This still retains enough information to unwind stack frames, and also to recover some register values. BUG=682278 Change-Id: I541a13b2e92a9d1aea2c06a50bd769a9e25601d3 Reviewed-on: https://chromium-review.googlesource.com/430050 Reviewed-by: Robert Sesek --- src/client/linux/minidump_writer/linux_dumper.cc | 105 +++++++++++- src/client/linux/minidump_writer/linux_dumper.h | 13 ++ .../linux_ptrace_dumper_unittest.cc | 181 +++++++++++++++------ 3 files changed, 245 insertions(+), 54 deletions(-) (limited to 'src/client/linux/minidump_writer') diff --git a/src/client/linux/minidump_writer/linux_dumper.cc b/src/client/linux/minidump_writer/linux_dumper.cc index 4a15f0cc..e1e7b49c 100644 --- a/src/client/linux/minidump_writer/linux_dumper.cc +++ b/src/client/linux/minidump_writer/linux_dumper.cc @@ -84,10 +84,15 @@ inline static bool IsMappedFileOpenUnsafe( namespace google_breakpad { -#if defined(__CHROMEOS__) - namespace { +bool MappingContainsAddress(const MappingInfo& mapping, uintptr_t address) { + return mapping.system_mapping_info.start_addr <= address && + address < mapping.system_mapping_info.end_addr; +} + +#if defined(__CHROMEOS__) + // Recover memory mappings before writing dump on ChromeOS // // On Linux, breakpad relies on /proc/[pid]/maps to associate symbols from @@ -248,9 +253,10 @@ void CrOSPostProcessMappings(wasteful_vector& mappings) { mappings.resize(f); } -} // namespace #endif // __CHROMEOS__ +} // namespace + // All interesting auvx entry types are below AT_SYSINFO_EHDR #define AT_MAX AT_SYSINFO_EHDR @@ -705,6 +711,99 @@ bool LinuxDumper::GetStackInfo(const void** stack, size_t* stack_len, return true; } +void LinuxDumper::SanitizeStackCopy(uint8_t* stack_copy, size_t stack_len, + uintptr_t stack_pointer, + uintptr_t sp_offset) { + // We optimize the search for containing mappings in three ways: + // 1) We expect that pointers into the stack mapping will be common, so + // we cache that address range. + // 2) The last referenced mapping is a reasonable predictor for the next + // referenced mapping, so we test that first. + // 3) We precompute a bitfield based upon bits 32:32-n of the start and + // stop addresses, and use that to short circuit any values that can + // not be pointers. (n=11) + const uintptr_t defaced = +#if defined(__LP64__) + 0x0defaced0defaced; +#else + 0x0defaced; +#endif + // the bitfield length is 2^test_bits long. + const unsigned int test_bits = 11; + // byte length of the corresponding array. + const unsigned int array_size = 1 << (test_bits - 3); + const unsigned int array_mask = array_size - 1; + // The amount to right shift pointers by. This captures the top bits + // on 32 bit architectures. On 64 bit architectures this would be + // uninformative so we take the same range of bits. + const unsigned int shift = 32 - 11; + const MappingInfo* last_hit_mapping = nullptr; + const MappingInfo* hit_mapping = nullptr; + const MappingInfo* stack_mapping = FindMappingNoBias(stack_pointer); + // The magnitude below which integers are considered to be to be + // 'small', and not constitute a PII risk. These are included to + // avoid eliding useful register values. + const ssize_t small_int_magnitude = 4096; + + char could_hit_mapping[array_size]; + my_memset(could_hit_mapping, 0, array_size); + + // Initialize the bitfield such that if the (pointer >> shift)'th + // bit, modulo the bitfield size, is not set then there does not + // exist a mapping in mappings_ that would contain that pointer. + for (size_t i = 0; i < mappings_.size(); ++i) { + // For each mapping, work out the (unmodulo'ed) range of bits to + // set. + uintptr_t start = mappings_[i]->start_addr; + uintptr_t end = start + mappings_[i]->size; + start >>= shift; + end >>= shift; + for (size_t bit = start; bit <= end; ++bit) { + // Set each bit in the range, applying the modulus. + could_hit_mapping[(bit >> 3) & array_mask] |= 1 << (bit & 7); + } + } + + // Zero memory that is below the current stack pointer. + const uintptr_t offset = + (sp_offset + sizeof(uintptr_t) - 1) & ~(sizeof(uintptr_t) - 1); + if (offset) { + my_memset(stack_copy, 0, offset); + } + + // Apply sanitization to each complete pointer-aligned word in the + // stack. + uint8_t* sp; + for (sp = stack_copy + offset; + sp <= stack_copy + stack_len - sizeof(uintptr_t); + sp += sizeof(uintptr_t)) { + uintptr_t addr; + my_memcpy(&addr, sp, sizeof(uintptr_t)); + if (static_cast(addr) <= small_int_magnitude && + static_cast(addr) >= -small_int_magnitude) { + continue; + } + if (stack_mapping && MappingContainsAddress(*stack_mapping, addr)) { + continue; + } + if (last_hit_mapping && MappingContainsAddress(*last_hit_mapping, addr)) { + continue; + } + uintptr_t test = addr >> shift; + if (could_hit_mapping[(test >> 3) & array_mask] & (1 << (test & 7)) && + (hit_mapping = FindMappingNoBias(addr)) != nullptr) { + last_hit_mapping = hit_mapping; + continue; + } + my_memcpy(sp, &defaced, sizeof(uintptr_t)); + } + // Zero any partial word at the top of the stack, if alignment is + // such that that is required. + if (sp < stack_copy + stack_len) { + my_memset(sp, 0, stack_copy + stack_len - sp); + } +} + bool LinuxDumper::StackHasPointerToMapping(const uint8_t* stack_copy, size_t stack_len, uintptr_t sp_offset, diff --git a/src/client/linux/minidump_writer/linux_dumper.h b/src/client/linux/minidump_writer/linux_dumper.h index 0e20209b..23c78e08 100644 --- a/src/client/linux/minidump_writer/linux_dumper.h +++ b/src/client/linux/minidump_writer/linux_dumper.h @@ -116,6 +116,19 @@ class LinuxDumper { // stack_top: the current top of the stack bool GetStackInfo(const void** stack, size_t* stack_len, uintptr_t stack_top); + // Sanitize a copy of the stack by overwriting words that are not + // pointers with a sentinel (0x0defaced). + // stack_copy: a copy of the stack to sanitize. |stack_copy| might + // not be word aligned, but it represents word aligned + // data copied from another location. + // stack_len: the length of the allocation pointed to by |stack_copy|. + // stack_pointer: the address of the stack pointer (used to locate + // the stack mapping, as an optimization). + // sp_offset: the offset relative to stack_copy that reflects the + // current value of the stack pointer. + void SanitizeStackCopy(uint8_t* stack_copy, size_t stack_len, + uintptr_t stack_pointer, uintptr_t sp_offset); + // Test whether |stack_copy| contains a pointer-aligned word that // could be an address within a given mapping. // stack_copy: a copy of the stack to check. |stack_copy| might diff --git a/src/client/linux/minidump_writer/linux_ptrace_dumper_unittest.cc b/src/client/linux/minidump_writer/linux_ptrace_dumper_unittest.cc index be533e15..ae30e606 100644 --- a/src/client/linux/minidump_writer/linux_ptrace_dumper_unittest.cc +++ b/src/client/linux/minidump_writer/linux_ptrace_dumper_unittest.cc @@ -66,6 +66,62 @@ using namespace google_breakpad; namespace { +pid_t SetupChildProcess(int number_of_threads) { + char kNumberOfThreadsArgument[2]; + sprintf(kNumberOfThreadsArgument, "%d", number_of_threads); + + int fds[2]; + EXPECT_NE(-1, pipe(fds)); + + pid_t child_pid = fork(); + if (child_pid == 0) { + // In child process. + close(fds[0]); + + string helper_path(GetHelperBinary()); + if (helper_path.empty()) { + ADD_FAILURE() << "Couldn't find helper binary"; + return -1; + } + + // Pass the pipe fd and the number of threads as arguments. + char pipe_fd_string[8]; + sprintf(pipe_fd_string, "%d", fds[1]); + execl(helper_path.c_str(), + "linux_dumper_unittest_helper", + pipe_fd_string, + kNumberOfThreadsArgument, + NULL); + // Kill if we get here. + printf("Errno from exec: %d", errno); + ADD_FAILURE() << "Exec of " << helper_path << " failed: " << strerror(errno); + return -1; + } + close(fds[1]); + + // Wait for all child threads to indicate that they have started + for (int threads = 0; threads < number_of_threads; threads++) { + struct pollfd pfd; + memset(&pfd, 0, sizeof(pfd)); + pfd.fd = fds[0]; + pfd.events = POLLIN | POLLERR; + + const int r = HANDLE_EINTR(poll(&pfd, 1, 1000)); + EXPECT_EQ(1, r); + EXPECT_TRUE(pfd.revents & POLLIN); + uint8_t junk; + EXPECT_EQ(read(fds[0], &junk, sizeof(junk)), + static_cast(sizeof(junk))); + } + close(fds[0]); + + // There is a race here because we may stop a child thread before + // it is actually running the busy loop. Empirically this sleep + // is sufficient to avoid the race. + usleep(100000); + return child_pid; +} + typedef wasteful_vector id_vector; typedef testing::Test LinuxPtraceDumperTest; @@ -370,58 +426,9 @@ TEST_F(LinuxPtraceDumperChildTest, FileIDsMatch) { TEST(LinuxPtraceDumperTest, VerifyStackReadWithMultipleThreads) { static const int kNumberOfThreadsInHelperProgram = 5; - char kNumberOfThreadsArgument[2]; - sprintf(kNumberOfThreadsArgument, "%d", kNumberOfThreadsInHelperProgram); - int fds[2]; - ASSERT_NE(-1, pipe(fds)); - - pid_t child_pid = fork(); - if (child_pid == 0) { - // In child process. - close(fds[0]); - - string helper_path(GetHelperBinary()); - if (helper_path.empty()) { - FAIL() << "Couldn't find helper binary"; - exit(1); - } - - // Pass the pipe fd and the number of threads as arguments. - char pipe_fd_string[8]; - sprintf(pipe_fd_string, "%d", fds[1]); - execl(helper_path.c_str(), - "linux_dumper_unittest_helper", - pipe_fd_string, - kNumberOfThreadsArgument, - NULL); - // Kill if we get here. - printf("Errno from exec: %d", errno); - FAIL() << "Exec of " << helper_path << " failed: " << strerror(errno); - exit(0); - } - close(fds[1]); - - // Wait for all child threads to indicate that they have started - for (int threads = 0; threads < kNumberOfThreadsInHelperProgram; threads++) { - struct pollfd pfd; - memset(&pfd, 0, sizeof(pfd)); - pfd.fd = fds[0]; - pfd.events = POLLIN | POLLERR; - - const int r = HANDLE_EINTR(poll(&pfd, 1, 1000)); - ASSERT_EQ(1, r); - ASSERT_TRUE(pfd.revents & POLLIN); - uint8_t junk; - ASSERT_EQ(read(fds[0], &junk, sizeof(junk)), - static_cast(sizeof(junk))); - } - close(fds[0]); - - // There is a race here because we may stop a child thread before - // it is actually running the busy loop. Empirically this sleep - // is sufficient to avoid the race. - usleep(100000); + pid_t child_pid = SetupChildProcess(kNumberOfThreadsInHelperProgram); + ASSERT_NE(child_pid, -1); // Children are ready now. LinuxPtraceDumper dumper(child_pid); @@ -468,3 +475,75 @@ TEST(LinuxPtraceDumperTest, VerifyStackReadWithMultipleThreads) { ASSERT_TRUE(WIFSIGNALED(status)); ASSERT_EQ(SIGKILL, WTERMSIG(status)); } + +TEST_F(LinuxPtraceDumperTest, SanitizeStackCopy) { + static const int kNumberOfThreadsInHelperProgram = 1; + + pid_t child_pid = SetupChildProcess(kNumberOfThreadsInHelperProgram); + ASSERT_NE(child_pid, -1); + + LinuxPtraceDumper dumper(child_pid); + ASSERT_TRUE(dumper.Init()); + EXPECT_TRUE(dumper.ThreadsSuspend()); + + ThreadInfo thread_info; + EXPECT_TRUE(dumper.GetThreadInfoByIndex(0, &thread_info)); + + const void* stack; + size_t stack_len; + EXPECT_TRUE(dumper.GetStackInfo(&stack, &stack_len, thread_info.stack_pointer)); + + uint8_t* stack_copy = new uint8_t[stack_len]; + dumper.CopyFromProcess(stack_copy, child_pid, stack, stack_len); + + size_t stack_offset = + thread_info.stack_pointer - reinterpret_cast(stack); + + const size_t word_count = (stack_len - stack_offset) / sizeof(uintptr_t); + uintptr_t* stack_words = new uintptr_t[word_count]; + + memcpy(stack_words, stack_copy + stack_offset, word_count * sizeof(uintptr_t)); + std::map pre_sanitization_words; + for (size_t i = 0; i < word_count; ++i) + ++pre_sanitization_words[stack_words[i]]; + + fprintf(stderr, "stack_offset=%lu stack_len=%lu stack=%p\n", stack_offset, stack_len, stack); + dumper.SanitizeStackCopy(stack_copy, stack_len, thread_info.stack_pointer, + stack_offset); + + // Memory below the stack pointer should be zeroed. + for (size_t i = 0; i < stack_offset; ++i) { + ASSERT_EQ(0, stack_copy[i]); + } + + memcpy(stack_words, stack_copy + stack_offset, word_count * sizeof(uintptr_t)); + std::map post_sanitization_words; + for (size_t i = 0; i < word_count; ++i) + ++post_sanitization_words[stack_words[i]]; + + std::set words; + for (auto &word : pre_sanitization_words) words.insert(word.first); + for (auto &word : post_sanitization_words) words.insert(word.first); + + for (auto word : words) { + if (word == static_cast(0X0DEFACED0DEFACEDull)) { + continue; + } + + bool should_be_sanitized = true; + if (static_cast(word) <= 4096 && + static_cast(word) >= -4096) should_be_sanitized = false; + if (dumper.FindMappingNoBias(word)) should_be_sanitized = false; + + ASSERT_EQ(should_be_sanitized, post_sanitization_words[word] == 0); + } + + EXPECT_TRUE(dumper.ThreadsResume()); + kill(child_pid, SIGKILL); + + // Reap child + int status; + ASSERT_NE(-1, HANDLE_EINTR(waitpid(child_pid, &status, 0))); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_EQ(SIGKILL, WTERMSIG(status)); +} -- cgit v1.2.1