From 8507f63d38371a217fd0d8dacc8bbfee8c41b187 Mon Sep 17 00:00:00 2001 From: "chrisha@chromium.org" Date: Wed, 1 May 2013 18:18:46 +0000 Subject: Add explicit OMAP support to dump_syms. This CL adds new utilities to common/windows for handling OMAP information in PDB files. It then augments PdbSourceLineWriter with explicit OMAP knowledge so that symbolization will proceed more cleanly for images whose PDB files contain OMAP information. This makes breakpad handle OMAPped symbol files as cleanly as WinDbg. Review URL: https://breakpad.appspot.com/570002/ git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@1167 4c0a9323-5329-0410-9bdc-e9ce6186880e --- src/common/windows/common_windows.gyp | 87 ++++ src/common/windows/dia_util.cc | 92 ++++ src/common/windows/dia_util.h | 59 +++ src/common/windows/omap.cc | 694 +++++++++++++++++++++++++++ src/common/windows/omap.h | 72 +++ src/common/windows/omap_internal.h | 137 ++++++ src/common/windows/omap_unittest.cc | 330 +++++++++++++ src/common/windows/pdb_source_line_writer.cc | 159 ++++-- src/common/windows/pdb_source_line_writer.h | 5 + 9 files changed, 1588 insertions(+), 47 deletions(-) create mode 100644 src/common/windows/common_windows.gyp create mode 100644 src/common/windows/dia_util.cc create mode 100644 src/common/windows/dia_util.h create mode 100644 src/common/windows/omap.cc create mode 100644 src/common/windows/omap.h create mode 100644 src/common/windows/omap_internal.h create mode 100644 src/common/windows/omap_unittest.cc (limited to 'src/common/windows') diff --git a/src/common/windows/common_windows.gyp b/src/common/windows/common_windows.gyp new file mode 100644 index 00000000..6b877759 --- /dev/null +++ b/src/common/windows/common_windows.gyp @@ -0,0 +1,87 @@ +# Copyright 2013 Google Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +{ + 'includes': [ + '../../client/windows/build/common.gypi', + ], + 'targets': [ + { + 'target_name': 'dia_sdk', + 'type': 'none', + 'all_dependent_settings': { + 'include_dirs': [ + '<(DEPTH)', + '$(VSInstallDir)\DIA SDK\include', + ], + 'msvs_settings': { + 'VCLinkerTool': { + 'AdditionalDependencies': [ + '$(VSInstallDir)\DIA SDK\lib\diaguids.lib', + 'imagehlp.lib', + ], + }, + }, + }, + }, + { + 'target_name': 'common_windows_lib', + 'type': 'static_library', + 'sources': [ + 'dia_util.cc', + 'dia_util.h', + 'guid_string.cc', + 'guid_string.h', + 'http_upload.cc', + 'http_upload.h', + 'omap.cc', + 'omap.h', + 'omap_internal.h', + 'pdb_source_line_writer.cc', + 'pdb_source_line_writer.h', + 'string_utils.cc', + 'string_utils-inl.h', + ], + 'dependencies': [ + 'dia_sdk', + ], + }, + { + 'target_name': 'common_windows_unittests', + 'type': 'executable', + 'sources': [ + 'omap_unittest.cc', + ], + 'dependencies': [ + '<(DEPTH)/client/windows/unittests/testing.gyp:gmock', + '<(DEPTH)/client/windows/unittests/testing.gyp:gtest', + 'common_windows_lib', + ], + }, + ], +} diff --git a/src/common/windows/dia_util.cc b/src/common/windows/dia_util.cc new file mode 100644 index 00000000..25a9a33f --- /dev/null +++ b/src/common/windows/dia_util.cc @@ -0,0 +1,92 @@ +// Copyright 2013 Google Inc. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "common/windows/dia_util.h" + +#include + +namespace google_breakpad { + +bool FindDebugStream(const wchar_t* name, + IDiaSession* session, + IDiaEnumDebugStreamData** debug_stream) { + CComPtr enum_debug_streams; + if (FAILED(session->getEnumDebugStreams(&enum_debug_streams))) { + fprintf(stderr, "IDiaSession::getEnumDebugStreams failed\n"); + return false; + } + + CComPtr temp_debug_stream; + ULONG fetched = 0; + while (SUCCEEDED(enum_debug_streams->Next(1, &temp_debug_stream, &fetched)) && + fetched == 1) { + CComBSTR stream_name; + if (FAILED(temp_debug_stream->get_name(&stream_name))) { + fprintf(stderr, "IDiaEnumDebugStreamData::get_name failed\n"); + return false; + } + + // Found the stream? + if (wcsncmp((LPWSTR)stream_name, name, stream_name.Length()) == 0) { + *debug_stream = temp_debug_stream.Detach(); + return true; + } + + temp_debug_stream.Release(); + } + + // No table was found. + return false; +} + +bool FindTable(REFIID iid, IDiaSession* session, void** table) { + // Get the table enumerator. + CComPtr enum_tables; + if (FAILED(session->getEnumTables(&enum_tables))) { + fprintf(stderr, "IDiaSession::getEnumTables failed\n"); + return false; + } + + // Iterate through the tables. + CComPtr temp_table; + ULONG fetched = 0; + while (SUCCEEDED(enum_tables->Next(1, &temp_table, &fetched)) && + fetched == 1) { + void* temp = NULL; + if (SUCCEEDED(temp_table->QueryInterface(iid, &temp))) { + *table = temp; + return true; + } + temp_table.Release(); + } + + // The table was not found. + return false; +} + +} // namespace google_breakpad diff --git a/src/common/windows/dia_util.h b/src/common/windows/dia_util.h new file mode 100644 index 00000000..4b259b45 --- /dev/null +++ b/src/common/windows/dia_util.h @@ -0,0 +1,59 @@ +// Copyright 2013 Google Inc. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Utilities for loading debug streams and tables from a PDB file. + +#include +#include + +namespace google_breakpad { + +// Find the debug stream of the given |name| in the given |session|. Returns +// true on success, false on error of if the stream does not exist. On success +// the stream will be returned via |debug_stream|. +bool FindDebugStream(const wchar_t* name, + IDiaSession* session, + IDiaEnumDebugStreamData** debug_stream); + +// Finds the first table implementing the COM interface with ID |iid| in the +// given |session|. Returns true on success, false on error or if no such +// table is found. On success the table will be returned via |table|. +bool FindTable(REFIID iid, IDiaSession* session, void** table); + +// A templated version of FindTable. Finds the first table implementing type +// |InterfaceType| in the given |session|. Returns true on success, false on +// error or if no such table is found. On success the table will be returned via +// |table|. +template +bool FindTable(IDiaSession* session, InterfaceType** table) { + return FindTable(__uuidof(InterfaceType), + session, + reinterpret_cast(table)); +} + +} // namespace google_breakpad diff --git a/src/common/windows/omap.cc b/src/common/windows/omap.cc new file mode 100644 index 00000000..129ee4ae --- /dev/null +++ b/src/common/windows/omap.cc @@ -0,0 +1,694 @@ +// Copyright 2013 Google Inc. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This contains a suite of tools for transforming symbol information when +// when that information has been extracted from a PDB containing OMAP +// information. + +// OMAP information is a lightweight description of a mapping between two +// address spaces. It consists of two streams, each of them a vector 2-tuples. +// The OMAPTO stream contains tuples of the form +// +// (RVA in transformed image, RVA in original image) +// +// while the OMAPFROM stream contains tuples of the form +// +// (RVA in original image, RVA in transformed image) +// +// The entries in each vector are sorted by the first value of the tuple, and +// the lengths associated with a mapping are implicit as the distance between +// two successive addresses in the vector. + +// Consider a trivial 10-byte function described by the following symbol: +// +// Function: RVA 0x00001000, length 10, "foo" +// +// Now consider the same function, but with 5-bytes of instrumentation injected +// at offset 5. The OMAP streams describing this would look like: +// +// OMAPTO : [ [0x00001000, 0x00001000], +// [0x00001005, 0xFFFFFFFF], +// [0x0000100a, 0x00001005] ] +// OMAPFROM: [ [0x00001000, 0x00001000], +// [0x00001005, 0x0000100a] ] +// +// In this case the injected code has been marked as not originating in the +// source image, and thus it will have no symbol information at all. However, +// the injected code may also be associated with an original address range; +// for example, when prepending instrumentation to a basic block the +// instrumentation can be labelled as originating from the same source BB such +// that symbol resolution will still find the appropriate source code line +// number. In this case the OMAP stream would look like: +// +// OMAPTO : [ [0x00001000, 0x00001000], +// [0x00001005, 0x00001005], +// [0x0000100a, 0x00001005] ] +// OMAPFROM: [ [0x00001000, 0x00001000], +// [0x00001005, 0x0000100a] ] +// +// Suppose we asked DIA to lookup the symbol at location 0x0000100a of the +// instrumented image. It would first run this through the OMAPTO table and +// translate that address to 0x00001005. It would then lookup the symbol +// at that address and return the symbol for the function "foo". This is the +// correct result. +// +// However, if we query DIA for the length and address of the symbol it will +// tell us that it has length 10 and is at RVA 0x00001000. The location is +// correct, but the length doesn't take into account the 5-bytes of injected +// code. Symbol resolution works (starting from an instrumented address, +// mapping to an original address, and looking up a symbol), but the symbol +// metadata is incorrect. +// +// If we dump the symbols using DIA they will have their addresses +// appropriately transformed and reflect positions in the instrumented image. +// However, if we try to do a lookup using those symbols resolution can fail. +// For example, the address 0x0000100a will not map to the symbol for "foo", +// because DIA tells us it is at location 0x00001000 (correct) with length +// 10 (incorrect). The problem is one of order of operations: in this case +// we're attempting symbol resolution by looking up an instrumented address +// in the table of translated symbols. +// +// One way to handle this is to dump the OMAP information as part of the +// breakpad symbols. This requires the rest of the toolchain to be aware of +// OMAP information and to use it when present prior to performing lookup. The +// other option is to properly transform the symbols (updating length as well as +// position) so that resolution will work as expected for translated addresses. +// This is transparent to the rest of the toolchain. + +#include "common/windows/omap.h" + +#include + +#include +#include +#include + +#include "common/windows/dia_util.h" + +namespace google_breakpad { + +namespace { + +static const wchar_t kOmapToDebugStreamName[] = L"OMAPTO"; +static const wchar_t kOmapFromDebugStreamName[] = L"OMAPFROM"; + +// Dependending on where this is used in breakpad we sometimes get min/max from +// windef, and other times from algorithm. To get around this we simply +// define our own min/max functions. +template +const T& Min(const T& t1, const T& t2) { return t1 < t2 ? t1 : t2; } +template +const T& Max(const T& t1, const T& t2) { return t1 > t2 ? t1 : t2; } + +// It makes things more readable to have two different OMAP types. We cast +// normal OMAPs into these. They must be the same size as the OMAP structure +// for this to work, hence the static asserts. +struct OmapOrigToTran { + DWORD rva_original; + DWORD rva_transformed; +}; +struct OmapTranToOrig { + DWORD rva_transformed; + DWORD rva_original; +}; +static_assert(sizeof(OmapOrigToTran) == sizeof(OMAP), + "OmapOrigToTran must have same size as OMAP."); +static_assert(sizeof(OmapTranToOrig) == sizeof(OMAP), + "OmapTranToOrig must have same size as OMAP."); +typedef std::vector OmapFromTable; +typedef std::vector OmapToTable; + +// Used for sorting and searching through a Mapping. +bool MappedRangeOriginalLess(const MappedRange& lhs, const MappedRange& rhs) { + if (lhs.rva_original < rhs.rva_original) + return true; + if (lhs.rva_original > rhs.rva_original) + return false; + return lhs.length < rhs.length; +} +bool MappedRangeMappedLess(const MappedRange& lhs, const MappedRange& rhs) { + if (lhs.rva_transformed < rhs.rva_transformed) + return true; + if (lhs.rva_transformed > rhs.rva_transformed) + return false; + return lhs.length < rhs.length; +} + +// Used for searching through the EndpointIndexMap. +bool EndpointIndexLess(const EndpointIndex& ei1, const EndpointIndex& ei2) { + return ei1.endpoint < ei2.endpoint; +} + +// Finds the debug stream with the given |name| in the given |session|, and +// populates |table| with its contents. Casts the data directly into OMAP +// structs. +bool FindAndLoadOmapTable(const wchar_t* name, + IDiaSession* session, + OmapTable* table) { + assert(name != NULL); + assert(session != NULL); + assert(table != NULL); + + CComPtr stream; + if (!FindDebugStream(name, session, &stream)) + return false; + assert(stream.p != NULL); + + LONG count = 0; + if (FAILED(stream->get_Count(&count))) { + fprintf(stderr, "IDiaEnumDebugStreamData::get_Count failed for stream " + "\"%ws\"\n", name); + return false; + } + + // Get the length of the stream in bytes. + DWORD bytes_read = 0; + ULONG count_read = 0; + if (FAILED(stream->Next(count, 0, &bytes_read, NULL, &count_read))) { + fprintf(stderr, "IDiaEnumDebugStreamData::Next failed while reading " + "length of stream \"%ws\"\n", name); + return false; + } + + // Ensure it's consistent with the OMAP data type. + DWORD bytes_expected = count * sizeof(OmapTable::value_type); + if (count * sizeof(OmapTable::value_type) != bytes_read) { + fprintf(stderr, "DIA debug stream \"%ws\" has an unexpected length", name); + return false; + } + + // Read the table. + table->resize(count); + bytes_read = 0; + count_read = 0; + if (FAILED(stream->Next(count, bytes_expected, &bytes_read, + reinterpret_cast(&table->at(0)), + &count_read))) { + fprintf(stderr, "IDiaEnumDebugStreamData::Next failed while reading " + "data from stream \"%ws\"\n"); + return false; + } + + return true; +} + +// This determines the original image length by looking through the segment +// table. +bool GetOriginalImageLength(IDiaSession* session, DWORD* image_length) { + assert(session != NULL); + assert(image_length != NULL); + + CComPtr enum_segments; + if (!FindTable(session, &enum_segments)) + return false; + assert(enum_segments.p != NULL); + + DWORD temp_image_length = 0; + CComPtr segment; + ULONG fetched = 0; + while (SUCCEEDED(enum_segments->Next(1, &segment, &fetched)) && + fetched == 1) { + assert(segment.p != NULL); + + DWORD rva = 0; + DWORD length = 0; + DWORD frame = 0; + if (FAILED(segment->get_relativeVirtualAddress(&rva)) || + FAILED(segment->get_length(&length)) || + FAILED(segment->get_frame(&frame))) { + fprintf(stderr, "Failed to get basic properties for IDiaSegment\n"); + return false; + } + + if (frame > 0) { + DWORD segment_end = rva + length; + if (segment_end > temp_image_length) + temp_image_length = segment_end; + } + segment.Release(); + } + + *image_length = temp_image_length; + return true; +} + +// Detects regions of the original image that have been removed in the +// transformed image, and sets the 'removed' property on all mapped ranges +// immediately preceding a gap. The mapped ranges must be sorted by +// 'rva_original'. +void FillInRemovedLengths(Mapping* mapping) { + assert(mapping != NULL); + + // Find and fill gaps. We do this with two sweeps. We first sweep forward + // looking for gaps. When we identify a gap we then sweep forward with a + // second scan and set the 'removed' property for any intervals that + // immediately precede the gap. + // + // Gaps are typically between two successive intervals, but not always: + // + // Range 1: --------------- + // Range 2: ------- + // Range 3: ------------- + // Gap : ****** + // + // In the above example the gap is between range 1 and range 3. A forward + // sweep finds the gap, and a second forward sweep identifies that range 1 + // immediately precedes the gap and sets its 'removed' property. + + size_t fill = 0; + DWORD rva_front = 0; + for (size_t find = 0; find < mapping->size(); ++find) { +#ifndef NDEBUG + // We expect the mapped ranges to be sorted by 'rva_original'. + if (find > 0) { + assert(mapping->at(find - 1).rva_original <= + mapping->at(find).rva_original); + } +#endif + + if (rva_front < mapping->at(find).rva_original) { + // We've found a gap. Fill it in by setting the 'removed' property for + // any affected intervals. + DWORD removed = mapping->at(find).rva_original - rva_front; + for (; fill < find; ++fill) { + if (mapping->at(fill).rva_original + mapping->at(fill).length != + rva_front) { + continue; + } + + // This interval ends right where the gap starts. It needs to have its + // 'removed' information filled in. + mapping->at(fill).removed = removed; + } + } + + // Advance the front that indicates the covered portion of the image. + rva_front = mapping->at(find).rva_original + mapping->at(find).length; + } +} + +// Builds a unified view of the mapping between the original and transformed +// image space by merging OMAPTO and OMAPFROM data. +void BuildMapping(const OmapData& omap_data, Mapping* mapping) { + assert(mapping != NULL); + + mapping->clear(); + + if (omap_data.omap_from.empty() || omap_data.omap_to.empty()) + return; + + // The names 'omap_to' and 'omap_from' are awfully confusing, so we make + // ourselves more explicit here. This cast is only safe because the underlying + // types have the exact same size. + const OmapToTable& tran2orig = + reinterpret_cast(omap_data.omap_to); + const OmapFromTable& orig2tran = reinterpret_cast( + omap_data.omap_from); + + // Handle the range of data at the beginning of the image. This is not usually + // specified by the OMAP data. + if (tran2orig[0].rva_transformed > 0 && orig2tran[0].rva_original > 0) { + DWORD header_transformed = tran2orig[0].rva_transformed; + DWORD header_original = orig2tran[0].rva_original; + DWORD header = Min(header_transformed, header_original); + + MappedRange mr = {}; + mr.length = header; + mr.injected = header_transformed - header; + mr.removed = header_original - header; + mapping->push_back(mr); + } + + // Convert the implied lengths to explicit lengths, and infer which content + // has been injected into the transformed image. Injected content is inferred + // as regions of the transformed address space that does not map back to + // known valid content in the original image. + for (size_t i = 0; i < tran2orig.size(); ++i) { + const OmapTranToOrig& o1 = tran2orig[i]; + + // This maps to content that is outside the original image, thus it + // describes injected content. We can skip this entry. + if (o1.rva_original >= omap_data.length_original) + continue; + + // Calculate the length of the current OMAP entry. This is implicit as the + // distance between successive |rva| values, capped at the end of the + // original image. + DWORD length = 0; + if (i + 1 < tran2orig.size()) { + const OmapTranToOrig& o2 = tran2orig[i + 1]; + + // We expect the table to be sorted by rva_transformed. + assert(o1.rva_transformed <= o2.rva_transformed); + + length = o2.rva_transformed - o1.rva_transformed; + if (o1.rva_original + length > omap_data.length_original) { + length = omap_data.length_original - o1.rva_original; + } + } else { + length = omap_data.length_original - o1.rva_original; + } + + // Zero-length entries don't describe anything and can be ignored. + if (length == 0) + continue; + + // Any gaps in the transformed address-space are due to injected content. + if (!mapping->empty()) { + MappedRange& prev_mr = mapping->back(); + prev_mr.injected += o1.rva_transformed - + (prev_mr.rva_transformed + prev_mr.length); + } + + MappedRange mr = {}; + mr.rva_original = o1.rva_original; + mr.rva_transformed = o1.rva_transformed; + mr.length = length; + mapping->push_back(mr); + } + + // Sort based on the original image addresses. + std::sort(mapping->begin(), mapping->end(), MappedRangeOriginalLess); + + // Fill in the 'removed' lengths by looking for gaps in the coverage of the + // original address space. + FillInRemovedLengths(mapping); + + return; +} + +void BuildEndpointIndexMap(ImageMap* image_map) { + assert(image_map != NULL); + + if (image_map->mapping.size() == 0) + return; + + const Mapping& mapping = image_map->mapping; + EndpointIndexMap& eim = image_map->endpoint_index_map; + + // Get the unique set of interval endpoints. + std::set endpoints; + for (size_t i = 0; i < mapping.size(); ++i) { + endpoints.insert(mapping[i].rva_original); + endpoints.insert(mapping[i].rva_original + + mapping[i].length + + mapping[i].removed); + } + + // Use the endpoints to initialize the secondary search structure for the + // mapping. + eim.resize(endpoints.size()); + std::set::const_iterator it = endpoints.begin(); + for (size_t i = 0; it != endpoints.end(); ++it, ++i) { + eim[i].endpoint = *it; + eim[i].index = mapping.size(); + } + + // For each endpoint we want the smallest index of any interval containing + // it. We iterate over the intervals and update the indices associated with + // each interval endpoint contained in the current interval. In the general + // case of an arbitrary set of intervals this is O(n^2), but the structure of + // OMAP data makes this O(n). + for (size_t i = 0; i < mapping.size(); ++i) { + EndpointIndex ei1 = { mapping[i].rva_original, 0 }; + EndpointIndexMap::iterator it1 = std::lower_bound( + eim.begin(), eim.end(), ei1, EndpointIndexLess); + + EndpointIndex ei2 = { mapping[i].rva_original + mapping[i].length + + mapping[i].removed, 0 }; + EndpointIndexMap::iterator it2 = std::lower_bound( + eim.begin(), eim.end(), ei2, EndpointIndexLess); + + for (; it1 != it2; ++it1) + it1->index = Min(i, it1->index); + } +} + +// Clips the given mapped range. +void ClipMappedRangeOriginal(const AddressRange& clip_range, + MappedRange* mapped_range) { + assert(mapped_range != NULL); + + // The clipping range is entirely outside of the mapped range. + if (clip_range.end() <= mapped_range->rva_original || + mapped_range->rva_original + mapped_range->length + + mapped_range->removed <= clip_range.rva) { + mapped_range->length = 0; + mapped_range->injected = 0; + mapped_range->removed = 0; + return; + } + + // Clip the left side. + if (mapped_range->rva_original < clip_range.rva) { + DWORD clip_left = clip_range.rva - mapped_range->rva_original; + mapped_range->rva_original += clip_left; + mapped_range->rva_transformed += clip_left; + + if (clip_left > mapped_range->length) { + // The left clipping boundary entirely erases the content section of the + // range. + DWORD trim = clip_left - mapped_range->length; + mapped_range->length = 0; + mapped_range->injected -= Min(trim, mapped_range->injected); + // We know that trim <= mapped_range->remove. + mapped_range->removed -= trim; + } else { + // The left clipping boundary removes some, but not all, of the content. + // As such it leaves the removed/injected component intact. + mapped_range->length -= clip_left; + } + } + + // Clip the right side. + DWORD end_original = mapped_range->rva_original + mapped_range->length; + if (clip_range.end() < end_original) { + // The right clipping boundary lands in the 'content' section of the range, + // entirely clearing the injected/removed portion. + DWORD clip_right = end_original - clip_range.end(); + mapped_range->length -= clip_right; + mapped_range->injected = 0; + mapped_range->removed = 0; + return; + } else { + // The right clipping boundary is outside of the content, but may affect + // the removed/injected portion of the range. + DWORD end_removed = end_original + mapped_range->removed; + if (clip_range.end() < end_removed) + mapped_range->removed = clip_range.end() - end_original; + + DWORD end_injected = end_original + mapped_range->injected; + if (clip_range.end() < end_injected) + mapped_range->injected = clip_range.end() - end_original; + } + + return; +} + +} // namespace + +int AddressRange::Compare(const AddressRange& rhs) const { + if (end() <= rhs.rva) + return -1; + if (rhs.end() <= rva) + return 1; + return 0; +} + +bool GetOmapDataAndDisableTranslation(IDiaSession* session, + OmapData* omap_data) { + assert(session != NULL); + assert(omap_data != NULL); + + CComPtr address_map; + if (FAILED(session->QueryInterface(&address_map))) { + fprintf(stderr, "IDiaSession::QueryInterface(IDiaAddressMap) failed\n"); + return false; + } + assert(address_map.p != NULL); + + BOOL omap_enabled = FALSE; + if (FAILED(address_map->get_addressMapEnabled(&omap_enabled))) { + fprintf(stderr, "IDiaAddressMap::get_addressMapEnabled failed\n"); + return false; + } + + if (!omap_enabled) { + // We indicate the non-presence of OMAP data by returning empty tables. + omap_data->omap_from.clear(); + omap_data->omap_to.clear(); + omap_data->length_original = 0; + return true; + } + + // OMAP data is present. Disable translation. + if (FAILED(address_map->put_addressMapEnabled(FALSE))) { + fprintf(stderr, "IDiaAddressMap::put_addressMapEnabled failed\n"); + return false; + } + + // Read the OMAP streams. + if (!FindAndLoadOmapTable(kOmapFromDebugStreamName, + session, + &omap_data->omap_from)) { + return false; + } + if (!FindAndLoadOmapTable(kOmapToDebugStreamName, + session, + &omap_data->omap_to)) { + return false; + } + + // Get the lengths of the address spaces. + if (!GetOriginalImageLength(session, &omap_data->length_original)) + return false; + + return true; +} + +void BuildImageMap(const OmapData& omap_data, ImageMap* image_map) { + assert(image_map != NULL); + + BuildMapping(omap_data, &image_map->mapping); + BuildEndpointIndexMap(image_map); +} + +void MapAddressRange(const ImageMap& image_map, + const AddressRange& original_range, + AddressRangeVector* mapped_ranges) { + assert(mapped_ranges != NULL); + + const Mapping& map = image_map.mapping; + + // Handle the trivial case of an empty image_map. This means that there is + // no transformation to be applied, and we can simply return the original + // range. + if (map.empty()) { + mapped_ranges->push_back(original_range); + return; + } + + // If we get a query of length 0 we need to handle it by using a non-zero + // query length. + AddressRange query_range(original_range); + if (query_range.length == 0) + query_range.length = 1; + + // Find the range of intervals that can potentially intersect our query range. + size_t imin = 0; + size_t imax = 0; + { + // The index of the earliest possible range that can affect is us done by + // searching through the secondary indexing structure. + const EndpointIndexMap& eim = image_map.endpoint_index_map; + EndpointIndex q1 = { query_range.rva, 0 }; + EndpointIndexMap::const_iterator it1 = std::lower_bound( + eim.begin(), eim.end(), q1, EndpointIndexLess); + if (it1 == eim.end()) { + imin = map.size(); + } else { + // Backup to find the interval that contains our query point. + if (it1 != eim.begin() && query_range.rva < it1->endpoint) + --it1; + imin = it1->index; + } + + // The first range that can't possibly intersect us is found by searching + // through the image map directly as it is already sorted by interval start + // point. + MappedRange q2 = { query_range.end(), 0 }; + Mapping::const_iterator it2 = std::lower_bound( + map.begin(), map.end(), q2, MappedRangeOriginalLess); + imax = it2 - map.begin(); + } + + // Find all intervals that intersect the query range. + Mapping temp_map; + for (size_t i = imin; i < imax; ++i) { + MappedRange mr = map[i]; + ClipMappedRangeOriginal(query_range, &mr); + if (mr.length + mr.injected > 0) + temp_map.push_back(mr); + } + + // If there are no intersecting ranges then the query range has been removed + // from the image in question. + if (temp_map.empty()) + return; + + // Sort based on transformed addresses. + std::sort(temp_map.begin(), temp_map.end(), MappedRangeMappedLess); + + // Zero-length queries can't actually be merged. We simply output the set of + // unique RVAs that correspond to the query RVA. + if (original_range.length == 0) { + mapped_ranges->push_back(AddressRange(temp_map[0].rva_transformed, 0)); + for (size_t i = 1; i < temp_map.size(); ++i) { + if (temp_map[i].rva_transformed > mapped_ranges->back().rva) + mapped_ranges->push_back(AddressRange(temp_map[i].rva_transformed, 0)); + } + return; + } + + // Merge any ranges that are consecutive in the mapped image. We merge over + // injected content if it makes ranges contiguous, but we ignore any injected + // content at the tail end of a range. This allows us to detect symbols that + // have been lengthened by injecting content in the middle. However, it + // misses the case where content has been injected at the head or the tail. + // The problem is that it doesn't know whether to attribute it to the + // preceding or following symbol. It is up to the author of the transform to + // output explicit OMAP info in these cases to ensure full coverage of the + // transformed address space. + DWORD rva_begin = temp_map[0].rva_transformed; + DWORD rva_cur_content = rva_begin + temp_map[0].length; + DWORD rva_cur_injected = rva_cur_content + temp_map[0].injected; + for (size_t i = 1; i < temp_map.size(); ++i) { + if (rva_cur_injected < temp_map[i].rva_transformed) { + // This marks the end of a continuous range in the image. Output the + // current range and start a new one. + if (rva_begin < rva_cur_content) { + mapped_ranges->push_back( + AddressRange(rva_begin, rva_cur_content - rva_begin)); + } + rva_begin = temp_map[i].rva_transformed; + } + + rva_cur_content = temp_map[i].rva_transformed + temp_map[i].length; + rva_cur_injected = rva_cur_content + temp_map[i].injected; + } + + // Output the range in progress. + if (rva_begin < rva_cur_content) { + mapped_ranges->push_back( + AddressRange(rva_begin, rva_cur_content - rva_begin)); + } + + return; +} + +} // namespace google_breakpad diff --git a/src/common/windows/omap.h b/src/common/windows/omap.h new file mode 100644 index 00000000..88e8589e --- /dev/null +++ b/src/common/windows/omap.h @@ -0,0 +1,72 @@ +// Copyright 2013 Google Inc. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Provides an API for mapping symbols through OMAP information, if a PDB file +// is augmented with it. This allows breakpad to work with addresses in +// transformed images by transforming the symbols themselves, rather than +// transforming addresses prior to querying symbols (the way it is typically +// done by Windows-native tools, including the DIA). + +#ifndef COMMON_WINDOWS_OMAP_H__ +#define COMMON_WINDOWS_OMAP_H__ + +#include "common/windows/omap_internal.h" + +namespace google_breakpad { + +// If the given session contains OMAP data this extracts it, populating +// |omap_data|, and then disabling automatic translation for the session. +// OMAP data is present in the PDB if |omap_data| is not empty. This returns +// true on success, false otherwise. +bool GetOmapDataAndDisableTranslation(IDiaSession* dia_session, + OmapData* omap_data); + +// Given raw OMAP data builds an ImageMap. This can be used to query individual +// image ranges using MapAddressRange. +// |omap_data|| is the OMAP data extracted from the PDB. +// |image_map| will be populated with a description of the image mapping. If +// |omap_data| is empty then this will also be empty. +void BuildImageMap(const OmapData& omap_data, ImageMap* image_map); + +// Given an address range in the original image space determines how exactly it +// has been tranformed. +// |omap_data| is the OMAP data extracted from the PDB, which must not be +// empty. +// |original_range| is the address range in the original image being queried. +// |mapped_ranges| will be populated with a full description of the mapping. +// They may be disjoint in the transformed image so a vector is needed to +// fully represent the mapping. This will be appended to if it is not +// empty. If |omap_data| is empty then |mapped_ranges| will simply be +// populated with a copy of |original_range| (the identity transform). +void MapAddressRange(const ImageMap& image_map, + const AddressRange& original_range, + AddressRangeVector* mapped_ranges); + +} // namespace google_breakpad + +#endif // COMMON_WINDOWS_OMAP_H__ diff --git a/src/common/windows/omap_internal.h b/src/common/windows/omap_internal.h new file mode 100644 index 00000000..1364eb9e --- /dev/null +++ b/src/common/windows/omap_internal.h @@ -0,0 +1,137 @@ +// Copyright 2013 Google Inc. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Declares internal implementation details for functionality in omap.h and +// omap.cc. + +#ifndef COMMON_WINDOWS_OMAP_INTERNAL_H__ +#define COMMON_WINDOWS_OMAP_INTERNAL_H__ + +#include +#include + +#include + +namespace google_breakpad { + +// The OMAP struct is defined by debughlp.h, which doesn't play nicely with +// imagehlp.h. We simply redefine it. +struct OMAP { + DWORD rva; + DWORD rvaTo; +}; +static_assert(sizeof(OMAP) == 8, "Wrong size for OMAP structure."); +typedef std::vector OmapTable; + +// This contains the OMAP data extracted from an image. +struct OmapData { + // The table of OMAP entries describing the transformation from the + // original image to the transformed image. + OmapTable omap_from; + // The table of OMAP entries describing the transformation from the + // instrumented image to the original image. + OmapTable omap_to; + // The length of the original untransformed image. + DWORD length_original; + + OmapData() : length_original(0) { } +}; + +// This represents a range of addresses in an image. +struct AddressRange { + DWORD rva; + DWORD length; + + AddressRange() : rva(0), length(0) { } + AddressRange(DWORD rva, DWORD length) : rva(rva), length(length) { } + + // Returns the end address of this range. + DWORD end() const { return rva + length; } + + // Addreses only compare as less-than or greater-than if they are not + // overlapping. Otherwise, they compare equal. + int Compare(const AddressRange& rhs) const; + bool operator<(const AddressRange& rhs) const { return Compare(rhs) == -1; } + bool operator>(const AddressRange& rhs) const { return Compare(rhs) == 1; } + + // Equality operators compare exact values. + bool operator==(const AddressRange& rhs) const { + return rva == rhs.rva && length == rhs.length; + } + bool operator!=(const AddressRange& rhs) const { return !((*this) == rhs); } +}; + +typedef std::vector AddressRangeVector; + +// This represents an address range in an original image, and its corresponding +// range in the transformed image. +struct MappedRange { + // An address in the original image. + DWORD rva_original; + // The corresponding addresses in the transformed image. + DWORD rva_transformed; + // The length of the address range. + DWORD length; + // It is possible for code to be injected into a transformed image, for which + // there is no corresponding code in the original image. If this range of + // transformed image is immediately followed by such injected code we maintain + // a record of its length here. + DWORD injected; + // It is possible for code to be removed from the original image. This happens + // for things like padding between blocks. There is no actual content lost, + // but the spacing between items may be lost. This keeps track of any removed + // content immediately following the |original| range. + DWORD removed; +}; +// A vector of mapped ranges is used as a more useful representation of +// OMAP data. +typedef std::vector Mapping; + +// Used as a secondary search structure accompanying a Mapping. +struct EndpointIndex { + DWORD endpoint; + size_t index; +}; +typedef std::vector EndpointIndexMap; + +// An ImageMap is vector of mapped ranges, plus a secondary index into it for +// doing interval searches. (An interval tree would also work, but is overkill +// because we don't need insertion and deletion.) +struct ImageMap { + // This is a description of the mapping between original and transformed + // image, sorted by addresses in the original image. + Mapping mapping; + // For all interval endpoints in |mapping| this stores the minimum index of + // an interval in |mapping| that contains the endpoint. Useful for doing + // interval intersection queries. + EndpointIndexMap endpoint_index_map; +}; + +} // namespace google_breakpad + +#endif // COMMON_WINDOWS_OMAP_INTERNAL_H__ diff --git a/src/common/windows/omap_unittest.cc b/src/common/windows/omap_unittest.cc new file mode 100644 index 00000000..960a33f4 --- /dev/null +++ b/src/common/windows/omap_unittest.cc @@ -0,0 +1,330 @@ +// Copyright 2013 Google Inc. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Unittests for OMAP related functions. + +#include "common/windows/omap.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace google_breakpad { + +// Equality operators for ContainerEq. These must be outside of the anonymous +// namespace in order for them to be found. +bool operator==(const MappedRange& mr1, const MappedRange& mr2) { + return mr1.rva_original == mr2.rva_original && + mr1.rva_transformed == mr2.rva_transformed && + mr1.length == mr2.length && + mr1.injected == mr2.injected && + mr1.removed == mr2.removed; +} +bool operator==(const EndpointIndex& ei1, const EndpointIndex& ei2) { + return ei1.endpoint == ei2.endpoint && ei1.index == ei2.index; +} + +// Pretty printers for more meaningful error messages. Also need to be outside +// the anonymous namespace. +std::ostream& operator<<(std::ostream& os, const MappedRange& mr) { + os << "MappedRange(rva_original=" << mr.rva_original + << ", rva_transformed=" << mr.rva_transformed + << ", length=" << mr.length + << ", injected=" << mr.injected + << ", removed=" << mr.removed << ")"; + return os; +} +std::ostream& operator<<(std::ostream& os, const EndpointIndex& ei) { + os << "EndpointIndex(endpoint=" << ei.endpoint + << ", index=" << ei.index << ")"; + return os; +} +std::ostream& operator<<(std::ostream& os, const AddressRange& ar) { + os << "AddressRange(rva=" << ar.rva << ", length=" << ar.length << ")"; + return os; +} + +namespace { + +OMAP CreateOmap(DWORD rva, DWORD rvaTo) { + OMAP o = { rva, rvaTo }; + return o; +} + +MappedRange CreateMappedRange(DWORD rva_original, + DWORD rva_transformed, + DWORD length, + DWORD injected, + DWORD removed) { + MappedRange mr = { rva_original, rva_transformed, length, injected, removed }; + return mr; +} + +EndpointIndex CreateEndpointIndex(DWORD endpoint, size_t index) { + EndpointIndex ei = { endpoint, index }; + return ei; +} + +// (C is removed) +// Original : A B C D E F G H +// Transformed: A B D F E * H1 G1 G2 H2 +// (* is injected, G is copied, H is split) +// A is implied. + +// Layout of the original image. +const AddressRange B(100, 15); +const AddressRange C(B.end(), 10); +const AddressRange D(C.end(), 25); +const AddressRange E(D.end(), 10); +const AddressRange F(E.end(), 40); +const AddressRange G(F.end(), 3); +const AddressRange H(G.end(), 7); + +// Layout of the transformed image. +const AddressRange Bt(100, 15); +const AddressRange Dt(Bt.end(), 20); // D is shortened. +const AddressRange Ft(Dt.end(), F.length); +const AddressRange Et(Ft.end(), E.length); +const AddressRange injected(Et.end(), 5); +const AddressRange H1t(injected.end(), 4); // H is split. +const AddressRange G1t(H1t.end(), G.length); // G is copied. +const AddressRange G2t(G1t.end(), G.length); // G is copied. +const AddressRange H2t(G2t.end(), 3); // H is split. + +class BuildImageMapTest : public testing::Test { + public: + static const DWORD kInvalidAddress = 0xFFFFFFFF; + + void InitOmapData() { + omap_data.length_original = H.end(); + + // Build the OMAPTO vector (from transformed to original). + omap_data.omap_to.push_back(CreateOmap(Bt.rva, B.rva)); + omap_data.omap_to.push_back(CreateOmap(Dt.rva, D.rva)); + omap_data.omap_to.push_back(CreateOmap(Ft.rva, F.rva)); + omap_data.omap_to.push_back(CreateOmap(Et.rva, E.rva)); + omap_data.omap_to.push_back(CreateOmap(injected.rva, kInvalidAddress)); + omap_data.omap_to.push_back(CreateOmap(H1t.rva, H.rva)); + omap_data.omap_to.push_back(CreateOmap(G1t.rva, G.rva)); + omap_data.omap_to.push_back(CreateOmap(G2t.rva, G.rva)); + omap_data.omap_to.push_back(CreateOmap(H2t.rva, H.rva + H1t.length)); + omap_data.omap_to.push_back(CreateOmap(H2t.end(), kInvalidAddress)); + + // Build the OMAPFROM vector (from original to transformed). + omap_data.omap_from.push_back(CreateOmap(B.rva, Bt.rva)); + omap_data.omap_from.push_back(CreateOmap(C.rva, kInvalidAddress)); + omap_data.omap_from.push_back(CreateOmap(D.rva, Dt.rva)); + omap_data.omap_from.push_back(CreateOmap(E.rva, Et.rva)); + omap_data.omap_from.push_back(CreateOmap(F.rva, Ft.rva)); + omap_data.omap_from.push_back(CreateOmap(G.rva, G1t.rva)); + omap_data.omap_from.push_back(CreateOmap(H.rva, H1t.rva)); + omap_data.omap_from.push_back(CreateOmap(H.rva + H1t.length, H2t.rva)); + omap_data.omap_from.push_back(CreateOmap(H.end(), kInvalidAddress)); + } + + OmapData omap_data; +}; + +} // namespace + +TEST_F(BuildImageMapTest, EmptyImageMapOnEmptyOmapData) { + ASSERT_EQ(0u, omap_data.omap_from.size()); + ASSERT_EQ(0u, omap_data.omap_to.size()); + ASSERT_EQ(0u, omap_data.length_original); + + ImageMap image_map; + BuildImageMap(omap_data, &image_map); + EXPECT_EQ(0u, image_map.mapping.size()); + EXPECT_EQ(0u, image_map.endpoint_index_map.size()); +} + +TEST_F(BuildImageMapTest, ImageMapIsCorrect) { + InitOmapData(); + ASSERT_LE(0u, omap_data.omap_from.size()); + ASSERT_LE(0u, omap_data.omap_to.size()); + ASSERT_LE(0u, omap_data.length_original); + + ImageMap image_map; + BuildImageMap(omap_data, &image_map); + EXPECT_LE(9u, image_map.mapping.size()); + EXPECT_LE(9u, image_map.endpoint_index_map.size()); + + Mapping mapping; + mapping.push_back(CreateMappedRange(0, 0, B.rva, 0, 0)); + // C is removed, and it originally comes immediately after B. + mapping.push_back(CreateMappedRange(B.rva, Bt.rva, B.length, 0, C.length)); + // D is shortened by a length of 5. + mapping.push_back(CreateMappedRange(D.rva, Dt.rva, Dt.length, 0, 5)); + // The injected content comes immediately after E in the transformed image. + mapping.push_back(CreateMappedRange(E.rva, Et.rva, E.length, injected.length, + 0)); + mapping.push_back(CreateMappedRange(F.rva, Ft.rva, F.length, 0, 0)); + // G is copied so creates two entries. + mapping.push_back(CreateMappedRange(G.rva, G1t.rva, G.length, 0, 0)); + mapping.push_back(CreateMappedRange(G.rva, G2t.rva, G.length, 0, 0)); + // H is split, so create two entries. + mapping.push_back(CreateMappedRange(H.rva, H1t.rva, H1t.length, 0, 0)); + mapping.push_back(CreateMappedRange(H.rva + H1t.length, H2t.rva, H2t.length, + 0, 0)); + EXPECT_THAT(mapping, + testing::ContainerEq(image_map.mapping)); + + EndpointIndexMap endpoint_index_map; + endpoint_index_map.push_back(CreateEndpointIndex(0, 0)); + endpoint_index_map.push_back(CreateEndpointIndex(B.rva, 1)); + endpoint_index_map.push_back(CreateEndpointIndex(D.rva, 2)); + endpoint_index_map.push_back(CreateEndpointIndex(E.rva, 3)); + endpoint_index_map.push_back(CreateEndpointIndex(F.rva, 4)); + // G is duplicated so 2 ranges map back to it, hence the skip from 5 to 7. + endpoint_index_map.push_back(CreateEndpointIndex(G.rva, 5)); + // H is split so we expect 2 endpoints to show up attributed to it. + endpoint_index_map.push_back(CreateEndpointIndex(H.rva, 7)); + endpoint_index_map.push_back(CreateEndpointIndex(H.rva + H1t.length, 8)); + endpoint_index_map.push_back(CreateEndpointIndex(H.end(), 9)); + EXPECT_THAT(endpoint_index_map, + testing::ContainerEq(image_map.endpoint_index_map)); +} + +namespace { + +class MapAddressRangeTest : public BuildImageMapTest { + public: + typedef BuildImageMapTest Super; + virtual void SetUp() { + Super::SetUp(); + InitOmapData(); + BuildImageMap(omap_data, &image_map); + } + + ImageMap image_map; + + private: + using BuildImageMapTest::InitOmapData; + using BuildImageMapTest::omap_data; +}; + +} // namespace + +TEST_F(MapAddressRangeTest, EmptyImageMapReturnsIdentity) { + ImageMap im; + AddressRangeVector mapped_ranges; + AddressRange ar(0, 1024); + MapAddressRange(im, ar, &mapped_ranges); + EXPECT_EQ(1u, mapped_ranges.size()); + EXPECT_EQ(ar, mapped_ranges[0]); +} + +TEST_F(MapAddressRangeTest, MapOutOfImage) { + AddressRangeVector mapped_ranges; + MapAddressRange(image_map, AddressRange(H.end() + 10, 10), &mapped_ranges); + EXPECT_EQ(0u, mapped_ranges.size()); +} + +TEST_F(MapAddressRangeTest, MapIdentity) { + AddressRangeVector mapped_ranges; + MapAddressRange(image_map, B, &mapped_ranges); + EXPECT_EQ(1u, mapped_ranges.size()); + EXPECT_THAT(mapped_ranges, testing::ElementsAre(B)); +} + +TEST_F(MapAddressRangeTest, MapReorderedContiguous) { + AddressRangeVector mapped_ranges; + + AddressRange DEF(D.rva, F.end() - D.rva); + MapAddressRange(image_map, DEF, &mapped_ranges); + EXPECT_EQ(1u, mapped_ranges.size()); + + AddressRange DFEt(Dt.rva, Et.end() - Dt.rva); + EXPECT_THAT(mapped_ranges, testing::ElementsAre(DFEt)); +} + +TEST_F(MapAddressRangeTest, MapEmptySingle) { + AddressRangeVector mapped_ranges; + MapAddressRange(image_map, AddressRange(D.rva, 0), &mapped_ranges); + EXPECT_EQ(1u, mapped_ranges.size()); + EXPECT_THAT(mapped_ranges, testing::ElementsAre(AddressRange(Dt.rva, 0))); +} + +TEST_F(MapAddressRangeTest, MapEmptyCopied) { + AddressRangeVector mapped_ranges; + MapAddressRange(image_map, AddressRange(G.rva, 0), &mapped_ranges); + EXPECT_EQ(2u, mapped_ranges.size()); + EXPECT_THAT(mapped_ranges, testing::ElementsAre(AddressRange(G1t.rva, 0), + AddressRange(G2t.rva, 0))); +} + +TEST_F(MapAddressRangeTest, MapCopiedContiguous) { + AddressRangeVector mapped_ranges; + MapAddressRange(image_map, G, &mapped_ranges); + EXPECT_EQ(1u, mapped_ranges.size()); + EXPECT_THAT(mapped_ranges, testing::ElementsAre( + AddressRange(G1t.rva, G2t.end() - G1t.rva))); +} + +TEST_F(MapAddressRangeTest, MapSplitDiscontiguous) { + AddressRangeVector mapped_ranges; + MapAddressRange(image_map, H, &mapped_ranges); + EXPECT_EQ(2u, mapped_ranges.size()); + EXPECT_THAT(mapped_ranges, testing::ElementsAre(H1t, H2t)); +} + +TEST_F(MapAddressRangeTest, MapInjected) { + AddressRangeVector mapped_ranges; + + AddressRange EFGH(E.rva, H.end() - E.rva); + MapAddressRange(image_map, EFGH, &mapped_ranges); + EXPECT_EQ(1u, mapped_ranges.size()); + + AddressRange FEHGGHt(Ft.rva, H2t.end() - Ft.rva); + EXPECT_THAT(mapped_ranges, testing::ElementsAre(FEHGGHt)); +} + +TEST_F(MapAddressRangeTest, MapRemovedEntirely) { + AddressRangeVector mapped_ranges; + MapAddressRange(image_map, C, &mapped_ranges); + EXPECT_EQ(0u, mapped_ranges.size()); +} + +TEST_F(MapAddressRangeTest, MapRemovedPartly) { + AddressRangeVector mapped_ranges; + MapAddressRange(image_map, D, &mapped_ranges); + EXPECT_EQ(1u, mapped_ranges.size()); + EXPECT_THAT(mapped_ranges, testing::ElementsAre(Dt)); +} + +TEST_F(MapAddressRangeTest, MapFull) { + AddressRangeVector mapped_ranges; + + AddressRange AH(0, H.end()); + MapAddressRange(image_map, AH, &mapped_ranges); + EXPECT_EQ(1u, mapped_ranges.size()); + + AddressRange AHt(0, H2t.end()); + EXPECT_THAT(mapped_ranges, testing::ElementsAre(AHt)); +} + +} // namespace google_breakpad diff --git a/src/common/windows/pdb_source_line_writer.cc b/src/common/windows/pdb_source_line_writer.cc index 8d8e55c9..dba2ea76 100644 --- a/src/common/windows/pdb_source_line_writer.cc +++ b/src/common/windows/pdb_source_line_writer.cc @@ -27,15 +27,18 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#include "common/windows/pdb_source_line_writer.h" + +#include +#include #include #include #include #include -#include "common/windows/string_utils-inl.h" - -#include "common/windows/pdb_source_line_writer.h" +#include "common/windows/dia_util.h" #include "common/windows/guid_string.h" +#include "common/windows/string_utils-inl.h" // This constant may be missing from DbgHelp.h. See the documentation for // IDiaSymbol::get_undecoratedNameEx. @@ -45,6 +48,8 @@ namespace google_breakpad { +namespace { + using std::vector; // A helper class to scope a PLOADED_IMAGE. @@ -63,6 +68,8 @@ class AutoImage { PLOADED_IMAGE img_; }; +} // namespace + PDBSourceLineWriter::PDBSourceLineWriter() : output_(NULL) { } @@ -109,7 +116,7 @@ bool PDBSourceLineWriter::Open(const wstring &file, FileFormat format) { fprintf(stderr, "loadDataForPdb and loadDataFromExe failed for %ws\n", file.c_str()); return false; } - code_file_ = file; + code_file_ = file; } break; default: @@ -157,7 +164,12 @@ bool PDBSourceLineWriter::PrintLines(IDiaEnumLineNumbers *lines) { return false; } - fprintf(output_, "%x %x %d %d\n", rva, length, line_num, source_id); + AddressRangeVector ranges; + MapAddressRange(image_map_, AddressRange(rva, length), &ranges); + for (size_t i = 0; i < ranges.size(); ++i) { + fprintf(output_, "%x %x %d %d\n", ranges[i].rva, ranges[i].length, + line_num, source_id); + } line.Release(); } return true; @@ -196,8 +208,13 @@ bool PDBSourceLineWriter::PrintFunction(IDiaSymbol *function, stack_param_size = GetFunctionStackParamSize(function); } - fprintf(output_, "FUNC %x %" WIN_STRING_FORMAT_LL "x %x %ws\n", - rva, length, stack_param_size, name); + AddressRangeVector ranges; + MapAddressRange(image_map_, AddressRange(rva, static_cast(length)), + &ranges); + for (size_t i = 0; i < ranges.size(); ++i) { + fprintf(output_, "FUNC %x %x %x %ws\n", + ranges[i].rva, ranges[i].length, stack_param_size, name); + } CComPtr lines; if (FAILED(session_->findLinesByRVA(rva, DWORD(length), &lines))) { @@ -369,22 +386,8 @@ bool PDBSourceLineWriter::PrintFrameData() { // associated function, as is done with line numbers, but the DIA API // doesn't make it possible to get the frame data in that way. - CComPtr tables; - if (FAILED(session_->getEnumTables(&tables))) - return false; - - // Pick up the first table that supports IDiaEnumFrameData. CComPtr frame_data_enum; - CComPtr table; - ULONG count; - while (!frame_data_enum && - SUCCEEDED(tables->Next(1, &table, &count)) && - count == 1) { - table->QueryInterface(_uuidof(IDiaEnumFrameData), - reinterpret_cast(&frame_data_enum)); - table.Release(); - } - if (!frame_data_enum) + if (!FindTable(session_, &frame_data_enum)) return false; DWORD last_type = -1; @@ -393,6 +396,7 @@ bool PDBSourceLineWriter::PrintFrameData() { DWORD last_prolog_size = -1; CComPtr frame_data; + ULONG count = 0; while (SUCCEEDED(frame_data_enum->Next(1, &frame_data, &count)) && count == 1) { DWORD type; @@ -411,9 +415,6 @@ bool PDBSourceLineWriter::PrintFrameData() { if (FAILED(frame_data->get_lengthProlog(&prolog_size))) return false; - // epliog_size is always 0. - DWORD epilog_size = 0; - // parameter_size is the size of parameters passed on the stack. If any // parameters are not passed on the stack (such as in registers), their // sizes will not be included in parameter_size. @@ -460,14 +461,67 @@ bool PDBSourceLineWriter::PrintFrameData() { // this check reduces the size of the dumped symbol file by a third. if (type != last_type || rva != last_rva || code_size != last_code_size || prolog_size != last_prolog_size) { - fprintf(output_, "STACK WIN %x %x %x %x %x %x %x %x %x %d ", - type, rva, code_size, prolog_size, epilog_size, - parameter_size, saved_register_size, local_size, max_stack_size, - program_string_result == S_OK); - if (program_string_result == S_OK) { - fprintf(output_, "%ws\n", program_string); + // The prolog and the code portions of the frame have to be treated + // independently as they may have independently changed in size, or may + // even have been split. + // NOTE: If epilog size is ever non-zero, we have to do something + // similar with it. + + // Figure out where the prolog bytes have landed. + AddressRangeVector prolog_ranges; + if (prolog_size > 0) { + MapAddressRange(image_map_, AddressRange(rva, prolog_size), + &prolog_ranges); + } + + // And figure out where the code bytes have landed. + AddressRangeVector code_ranges; + MapAddressRange(image_map_, + AddressRange(rva + prolog_size, + code_size - prolog_size), + &code_ranges); + + struct FrameInfo { + DWORD rva; + DWORD code_size; + DWORD prolog_size; + }; + std::vector frame_infos; + + // Special case: The prolog and the code bytes remain contiguous. This is + // only done for compactness of the symbol file, and we could actually + // be outputting independent frame info for the prolog and code portions. + if (prolog_ranges.size() == 1 && code_ranges.size() == 1 && + prolog_ranges[0].end() == code_ranges[0].rva) { + FrameInfo fi = { prolog_ranges[0].rva, + prolog_ranges[0].length + code_ranges[0].length, + prolog_ranges[0].length }; + frame_infos.push_back(fi); } else { - fprintf(output_, "%d\n", allocates_base_pointer); + // Otherwise we output the prolog and code frame info independently. + for (size_t i = 0; i < prolog_ranges.size(); ++i) { + FrameInfo fi = { prolog_ranges[i].rva, + prolog_ranges[i].length, + prolog_ranges[i].length }; + frame_infos.push_back(fi); + } + for (size_t i = 0; i < code_ranges.size(); ++i) { + FrameInfo fi = { code_ranges[i].rva, code_ranges[i].length, 0 }; + frame_infos.push_back(fi); + } + } + + for (size_t i = 0; i < frame_infos.size(); ++i) { + const FrameInfo& fi(frame_infos[i]); + fprintf(output_, "STACK WIN %x %x %x %x %x %x %x %x %x %d ", + type, fi.rva, fi.code_size, fi.prolog_size, + 0 /* epilog_size */, parameter_size, saved_register_size, + local_size, max_stack_size, program_string_result == S_OK); + if (program_string_result == S_OK) { + fprintf(output_, "%ws\n", program_string); + } else { + fprintf(output_, "%d\n", allocates_base_pointer); + } } last_type = type; @@ -502,8 +556,12 @@ bool PDBSourceLineWriter::PrintCodePublicSymbol(IDiaSymbol *symbol) { return false; } - fprintf(output_, "PUBLIC %x %x %ws\n", rva, - stack_param_size > 0 ? stack_param_size : 0, name); + AddressRangeVector ranges; + MapAddressRange(image_map_, AddressRange(rva, 1), &ranges); + for (size_t i = 0; i < ranges.size(); ++i) { + fprintf(output_, "PUBLIC %x %x %ws\n", ranges[i].rva, + stack_param_size > 0 ? stack_param_size : 0, name); + } return true; } @@ -530,8 +588,8 @@ bool PDBSourceLineWriter::PrintPEInfo() { } fprintf(output_, "INFO CODE_ID %ws %ws\n", - info.code_identifier.c_str(), - info.code_file.c_str()); + info.code_identifier.c_str(), + info.code_file.c_str()); return true; } @@ -588,12 +646,12 @@ bool PDBSourceLineWriter::FindPEFile() { for (int i = 0; i < sizeof(extensions) / sizeof(extensions[0]); i++) { size_t dot_pos = file.find_last_of(L"."); if (dot_pos != wstring::npos) { - file.replace(dot_pos + 1, wstring::npos, extensions[i]); - // Check if this file exists. - if (GetFileAttributesW(file.c_str()) != INVALID_FILE_ATTRIBUTES) { - code_file_ = file; - return true; - } + file.replace(dot_pos + 1, wstring::npos, extensions[i]); + // Check if this file exists. + if (GetFileAttributesW(file.c_str()) != INVALID_FILE_ATTRIBUTES) { + code_file_ = file; + return true; + } } } } @@ -803,13 +861,20 @@ next_child: bool PDBSourceLineWriter::WriteMap(FILE *map_file) { output_ = map_file; + // Load the OMAP information, and disable auto-translation of addresses in + // preference of doing it ourselves. + OmapData omap_data; + if (!GetOmapDataAndDisableTranslation(session_, &omap_data)) + return false; + BuildImageMap(omap_data, &image_map_); + bool ret = PrintPDBInfo(); // This is not a critical piece of the symbol file. PrintPEInfo(); ret = ret && - PrintSourceFiles() && - PrintFunctions() && - PrintFrameData(); + PrintSourceFiles() && + PrintFunctions() && + PrintFrameData(); output_ = NULL; return ret; @@ -956,8 +1021,8 @@ bool PDBSourceLineWriter::GetPEInfo(PEModuleInfo *info) { } wchar_t code_identifier[32]; swprintf(code_identifier, - sizeof(code_identifier) / sizeof(code_identifier[0]), - L"%08X%X", TimeDateStamp, SizeOfImage); + sizeof(code_identifier) / sizeof(code_identifier[0]), + L"%08X%X", TimeDateStamp, SizeOfImage); info->code_identifier = code_identifier; return true; diff --git a/src/common/windows/pdb_source_line_writer.h b/src/common/windows/pdb_source_line_writer.h index ef2a27db..be6121c3 100644 --- a/src/common/windows/pdb_source_line_writer.h +++ b/src/common/windows/pdb_source_line_writer.h @@ -38,6 +38,8 @@ #include #include +#include "common/windows/omap.h" + struct IDiaEnumLineNumbers; struct IDiaSession; struct IDiaSymbol; @@ -228,6 +230,9 @@ class PDBSourceLineWriter { // This maps unique filenames to file IDs. hash_map unique_files_; + // This is used for calculating post-transform symbol addresses and lengths. + ImageMap image_map_; + // Disallow copy ctor and operator= PDBSourceLineWriter(const PDBSourceLineWriter&); void operator=(const PDBSourceLineWriter&); -- cgit v1.2.1