diff options
author | Sterling Augustine <saugustine@google.com> | 2020-06-30 13:59:15 -0700 |
---|---|---|
committer | Sterling Augustine <saugustine@google.com> | 2020-07-14 00:46:58 +0000 |
commit | a7410275336a40323bac607a3aac97cc2a1fc491 (patch) | |
tree | bb6735781d669611b130e9fe59dbfa9974e548d4 /src/common/dwarf | |
parent | Move GetSectionByName out of CompilationUnit. (diff) | |
download | breakpad-a7410275336a40323bac607a3aac97cc2a1fc491.tar.xz |
Add support for dwarf5 line tables.
Change-Id: I2c0cd0e7163502e52fbf0745b611befb2e219071
Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/2276193
Reviewed-by: Sterling Augustine <saugustine@google.com>
Diffstat (limited to 'src/common/dwarf')
-rw-r--r-- | src/common/dwarf/dwarf2enums.h | 8 | ||||
-rw-r--r-- | src/common/dwarf/dwarf2reader.cc | 248 | ||||
-rw-r--r-- | src/common/dwarf/dwarf2reader.h | 36 | ||||
-rw-r--r-- | src/common/dwarf/dwarf2reader_lineinfo_unittest.cc | 187 | ||||
-rw-r--r-- | src/common/dwarf/functioninfo.cc | 12 |
5 files changed, 445 insertions, 46 deletions
diff --git a/src/common/dwarf/dwarf2enums.h b/src/common/dwarf/dwarf2enums.h index 2b93aba7..7bd39792 100644 --- a/src/common/dwarf/dwarf2enums.h +++ b/src/common/dwarf/dwarf2enums.h @@ -316,6 +316,14 @@ enum DwarfAttribute { DW_AT_PGI_lstride = 0x3a02 }; +// Line number content type codes (DWARF 5). +enum DwarfLineNumberContentType { + DW_LNCT_path = 1, + DW_LNCT_directory_index = 2, + DW_LNCT_timestamp = 3, + DW_LNCT_size = 4, + DW_LNCT_MD5 = 5, +}; // Line number opcodes. enum DwarfLineNumberOps { diff --git a/src/common/dwarf/dwarf2reader.cc b/src/common/dwarf/dwarf2reader.cc index a7eedaa1..ad82a5f4 100644 --- a/src/common/dwarf/dwarf2reader.cc +++ b/src/common/dwarf/dwarf2reader.cc @@ -1039,10 +1039,17 @@ uint32_t DwpReader::LookupCUv2(uint64_t dwo_id) { } LineInfo::LineInfo(const uint8_t *buffer, uint64_t buffer_length, - ByteReader* reader, LineInfoHandler* handler): - handler_(handler), reader_(reader), buffer_(buffer) { + ByteReader* reader, const uint8_t* string_buffer, + size_t string_buffer_length, + const uint8_t* line_string_buffer, + size_t line_string_buffer_length, LineInfoHandler* handler): + handler_(handler), reader_(reader), buffer_(buffer), + string_buffer_(string_buffer), + line_string_buffer_(line_string_buffer) { #ifndef NDEBUG buffer_length_ = buffer_length; + string_buffer_length_ = string_buffer_length; + line_string_buffer_length_ = line_string_buffer_length; #endif header_.std_opcode_lengths = NULL; } @@ -1053,6 +1060,128 @@ uint64_t LineInfo::Start() { return after_header_ - buffer_; } +void LineInfo::ReadTypesAndForms(const uint8_t** lineptr, + uint32_t* content_types, + uint32_t* content_forms, + uint32_t max_types, + uint32_t* format_count) { + size_t len; + + uint32_t count = reader_->ReadUnsignedLEB128(*lineptr, &len); + *lineptr += len; + if (count < 1 || count > max_types) { + return; + } + for (uint32_t col = 0; col < count; ++col) { + content_types[col] = reader_->ReadUnsignedLEB128(*lineptr, &len); + *lineptr += len; + content_forms[col] = reader_->ReadUnsignedLEB128(*lineptr, &len); + *lineptr += len; + } + *format_count = count; +} + +const char* LineInfo::ReadStringForm(uint32_t form, const uint8_t** lineptr) { + const char* name = nullptr; + if (form == DW_FORM_string) { + name = reinterpret_cast<const char*>(*lineptr); + *lineptr += strlen(name) + 1; + return name; + } else if (form == DW_FORM_strp) { + uint64_t offset = reader_->ReadOffset(*lineptr); + assert(offset < string_buffer_length_); + *lineptr += reader_->OffsetSize(); + if (string_buffer_ != nullptr) { + name = reinterpret_cast<const char*>(string_buffer_) + offset; + return name; + } + } else if (form == DW_FORM_line_strp) { + uint64_t offset = reader_->ReadOffset(*lineptr); + assert(offset < line_string_buffer_length_); + *lineptr += reader_->OffsetSize(); + if (line_string_buffer_ != nullptr) { + name = reinterpret_cast<const char*>(line_string_buffer_) + offset; + return name; + } + } + // Shouldn't be called with a non-string-form, and + // if there is a string form but no string buffer, + // that is a problem too. + assert(0); + return nullptr; +} + +uint64_t LineInfo::ReadUnsignedData(uint32_t form, const uint8_t** lineptr) { + size_t len; + uint64_t value; + + switch (form) { + case DW_FORM_data1: + value = reader_->ReadOneByte(*lineptr); + *lineptr += 1; + return value; + case DW_FORM_data2: + value = reader_->ReadTwoBytes(*lineptr); + *lineptr += 2; + return value; + case DW_FORM_data4: + value = reader_->ReadFourBytes(*lineptr); + *lineptr += 4; + return value; + case DW_FORM_data8: + value = reader_->ReadEightBytes(*lineptr); + *lineptr += 8; + return value; + case DW_FORM_udata: + value = reader_->ReadUnsignedLEB128(*lineptr, &len); + *lineptr += len; + return value; + default: + fprintf(stderr, "Unrecognized data form."); + return 0; + } +} + +void LineInfo::ReadFileRow(const uint8_t** lineptr, + const uint32_t* content_types, + const uint32_t* content_forms, uint32_t row, + uint32_t format_count) { + const char* filename = nullptr; + uint64_t dirindex = 0; + uint64_t mod_time = 0; + uint64_t filelength = 0; + + for (uint32_t col = 0; col < format_count; ++col) { + switch (content_types[col]) { + case DW_LNCT_path: + filename = ReadStringForm(content_forms[col], lineptr); + break; + case DW_LNCT_directory_index: + dirindex = ReadUnsignedData(content_forms[col], lineptr); + break; + case DW_LNCT_timestamp: + mod_time = ReadUnsignedData(content_forms[col], lineptr); + break; + case DW_LNCT_size: + filelength = ReadUnsignedData(content_forms[col], lineptr); + break; + case DW_LNCT_MD5: + // MD5 entries help a debugger sort different versions of files with + // the same name. It is always paired with a DW_FORM_data16 and is + // unused in this case. + lineptr += 16; + break; + default: + fprintf(stderr, "Unrecognized form in line table header. %d\n", + content_types[col]); + assert(false); + break; + } + } + assert(filename != nullptr); + handler_->DefineFile(filename, row, dirindex, mod_time, filelength); +} + // The header for a debug_line section is mildly complicated, because // the line info is very tightly encoded. void LineInfo::ReadHeader() { @@ -1067,12 +1196,24 @@ void LineInfo::ReadHeader() { assert(buffer_ + initial_length_size + header_.total_length <= buffer_ + buffer_length_); - // Address size *must* be set by CU ahead of time. - assert(reader_->AddressSize() != 0); header_.version = reader_->ReadTwoBytes(lineptr); lineptr += 2; + if (header_.version >= 5) { + uint8_t address_size = reader_->ReadOneByte(lineptr); + reader_->SetAddressSize(address_size); + lineptr += 1; + uint8_t segment_selector_size = reader_->ReadOneByte(lineptr); + if (segment_selector_size != 0) { + fprintf(stderr,"No support for segmented memory."); + } + lineptr += 1; + } else { + // Address size *must* be set by CU ahead of time. + assert(reader_->AddressSize() != 0); + } + header_.prologue_length = reader_->ReadOffset(lineptr); lineptr += reader_->OffsetSize(); @@ -1106,41 +1247,84 @@ void LineInfo::ReadHeader() { lineptr += 1; } - // It is legal for the directory entry table to be empty. - if (*lineptr) { - uint32_t dirindex = 1; - while (*lineptr) { - const char *dirname = reinterpret_cast<const char *>(lineptr); - handler_->DefineDir(dirname, dirindex); - lineptr += strlen(dirname) + 1; - dirindex++; + if (header_.version <= 4) { + // Directory zero is assumed to be the compilation directory and special + // cased where used. It is not actually stored in the dwarf data. But an + // empty entry here avoids off-by-one errors elsewhere in the code. + handler_->DefineDir("", 0); + // It is legal for the directory entry table to be empty. + if (*lineptr) { + uint32_t dirindex = 1; + while (*lineptr) { + const char* dirname = reinterpret_cast<const char*>(lineptr); + handler_->DefineDir(dirname, dirindex); + lineptr += strlen(dirname) + 1; + dirindex++; + } } - } - lineptr++; - - // It is also legal for the file entry table to be empty. - if (*lineptr) { - uint32_t fileindex = 1; + lineptr++; + // It is also legal for the file entry table to be empty. + + // Similarly for file zero. + handler_->DefineFile("", 0, 0, 0, 0); + if (*lineptr) { + uint32_t fileindex = 1; + size_t len; + while (*lineptr) { + const char* filename = ReadStringForm(DW_FORM_string, &lineptr); + + uint64_t dirindex = reader_->ReadUnsignedLEB128(lineptr, &len); + lineptr += len; + + uint64_t mod_time = reader_->ReadUnsignedLEB128(lineptr, &len); + lineptr += len; + + uint64_t filelength = reader_->ReadUnsignedLEB128(lineptr, &len); + lineptr += len; + handler_->DefineFile(filename, fileindex, + static_cast<uint32_t>(dirindex), mod_time, + filelength); + fileindex++; + } + } + lineptr++; + } else { + // Read the DWARF-5 directory table. + + // Dwarf5 supports five different types and forms per directory- and + // file-table entry. Theoretically, there could be duplicate entries + // in this table, but that would be quite unusual. + static const uint32_t kMaxTypesAndForms = 5; + uint32_t content_types[kMaxTypesAndForms]; + uint32_t content_forms[kMaxTypesAndForms]; + uint32_t format_count; size_t len; - while (*lineptr) { - const char *filename = reinterpret_cast<const char *>(lineptr); - lineptr += strlen(filename) + 1; - uint64_t dirindex = reader_->ReadUnsignedLEB128(lineptr, &len); - lineptr += len; + ReadTypesAndForms(&lineptr, content_types, content_forms, kMaxTypesAndForms, + &format_count); + uint32_t entry_count = reader_->ReadUnsignedLEB128(lineptr, &len); + lineptr += len; + for (uint32_t row = 0; row < entry_count; ++row) { + const char* dirname = nullptr; + for (uint32_t col = 0; col < format_count; ++col) { + // The path is the only relevant content type for this implementation. + if (content_types[col] == DW_LNCT_path) { + dirname = ReadStringForm(content_forms[col], &lineptr); + } + } + handler_->DefineDir(dirname, row); + } - uint64_t mod_time = reader_->ReadUnsignedLEB128(lineptr, &len); - lineptr += len; + // Read the DWARF-5 filename table. + ReadTypesAndForms(&lineptr, content_types, content_forms, kMaxTypesAndForms, + &format_count); + entry_count = reader_->ReadUnsignedLEB128(lineptr, &len); + lineptr += len; - uint64_t filelength = reader_->ReadUnsignedLEB128(lineptr, &len); - lineptr += len; - handler_->DefineFile(filename, fileindex, static_cast<uint32_t>(dirindex), - mod_time, filelength); - fileindex++; + for (uint32_t row = 0; row < entry_count; ++row) { + ReadFileRow(&lineptr, content_types, content_forms, row, format_count); } } - lineptr++; - after_header_ = lineptr; } diff --git a/src/common/dwarf/dwarf2reader.h b/src/common/dwarf/dwarf2reader.h index 228c867f..e52f74c5 100644 --- a/src/common/dwarf/dwarf2reader.h +++ b/src/common/dwarf/dwarf2reader.h @@ -98,8 +98,10 @@ class LineInfo { // to the beginning and length of the line information to read. // Reader is a ByteReader class that has the endianness set // properly. - LineInfo(const uint8_t *buffer_, uint64_t buffer_length, - ByteReader* reader, LineInfoHandler* handler); + LineInfo(const uint8_t* buffer, uint64_t buffer_length, + ByteReader* reader, const uint8_t* string_buffer, + size_t string_buffer_length, const uint8_t* line_string_buffer, + size_t line_string_buffer_length, LineInfoHandler* handler); virtual ~LineInfo() { if (header_.std_opcode_lengths) { @@ -137,15 +139,32 @@ class LineInfo { // Reads the DWARF2/3 line information void ReadLines(); + // Read the DWARF5 types and forms for the file and directory tables. + void ReadTypesAndForms(const uint8_t** lineptr, uint32_t* content_types, + uint32_t* content_forms, uint32_t max_types, + uint32_t* format_count); + + // Read a row from the dwarf5 LineInfo file table. + void ReadFileRow(const uint8_t** lineptr, const uint32_t* content_types, + const uint32_t* content_forms, uint32_t row, + uint32_t format_count); + + // Read and return the data at *lineptr according to form. Advance + // *lineptr appropriately. + uint64_t ReadUnsignedData(uint32_t form, const uint8_t** lineptr); + + // Read and return the data at *lineptr according to form. Advance + // *lineptr appropriately. + const char* ReadStringForm(uint32_t form, const uint8_t** lineptr); + // The associated handler to call processing functions in LineInfoHandler* handler_; // The associated ByteReader that handles endianness issues for us ByteReader* reader_; - // A DWARF2/3 line info header. This is not the same size as - // in the actual file, as the one in the file may have a 32 bit or - // 64 bit lengths + // A DWARF line info header. This is not the same size as in the actual file, + // as the one in the file may have a 32 bit or 64 bit lengths struct LineInfoHeader header_; @@ -156,6 +175,13 @@ class LineInfo { #ifndef NDEBUG uint64_t buffer_length_; #endif + // Convenience pointers into .debug_str and .debug_line_str. These exactly + // correspond to those in the compilation unit. + const uint8_t* string_buffer_; + uint64_t string_buffer_length_; + const uint8_t* line_string_buffer_; + uint64_t line_string_buffer_length_; + const uint8_t *after_header_; }; diff --git a/src/common/dwarf/dwarf2reader_lineinfo_unittest.cc b/src/common/dwarf/dwarf2reader_lineinfo_unittest.cc new file mode 100644 index 00000000..99fc1a07 --- /dev/null +++ b/src/common/dwarf/dwarf2reader_lineinfo_unittest.cc @@ -0,0 +1,187 @@ +// Copyright (c) 2020, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Sterling Augustine <saugustine@google.com> + +// dwarf2reader_lineinfo_unittest.cc: Unit tests for dwarf2reader::LineInfo + +#include <stdint.h> +#include <stdlib.h> + +#include <string> +#include <vector> + +#include "breakpad_googletest_includes.h" +#include "common/dwarf/bytereader.h" +#include "common/dwarf/dwarf2reader.h" +#include "google_breakpad/common/breakpad_types.h" + +using std::vector; +using testing::InSequence; +using testing::Return; +using testing::Sequence; +using testing::Test; +using testing::_; + +using namespace dwarf2reader; + +namespace { + +const uint8_t dwarf5_line_program[] = { + 0x40, 0x0, 0x0, 0x0, // unit_length (end - begin) + // begin + 0x05, 0x0, // version + 0x8, // address_size + 0x0, // segment_selector_size + 0x26, 0x0, 0x0, 0x0, // header_length (end_header_end - begin_header) + // begin_header: + 0x1, // minimum_instruction_length + 0x1, // maximum_operations_per_instruction + 0x1, // default_is_stmt + 0xfb, // line_base + 0xe, // line_range + 0xd, // opcode_base and lengths + 0x0, 0x1, 0x1, 0x1, 0x1, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x1, + 0x1, // directory entry format count + DW_LNCT_path, DW_FORM_strp, + 0x1, // directories count + 0x1, 0x0, 0x0, 0x0, // offset into .debug_line_str + 0x2, // file_name_entry_format_count + DW_LNCT_directory_index, DW_FORM_data1, + DW_LNCT_path, DW_FORM_line_strp, + 0x1, // filename count + 0x0, // directory index + 0x1, 0x0, 0x0, 0x0, // offset into .debug_str + // end_header + DW_LNS_set_file, 0x0, + // set address to 0x0 + 0x0, 0x9, 0x2, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + // Advance Address by 0 and line by 3 + 0x15, + // Advance PC by 1 + 0x2, 0x1, + 0x0, + DW_LNE_end_sequence, + DW_LNE_end_sequence, + // end +}; + +const uint8_t dwarf4_line_program[] = { + 0x37, 0x0, 0x0, 0x0, // unit_length (end - begin) + // begin + 0x04, 0x0, // version + 0x1d, 0x0, 0x0, 0x0, // header_length (end_header - begin_header) + // begin_header: + 0x1, // minimum_instruction_length + 0x1, // maximum_operations_per_instruction + 0x1, // default_is_stmt + 0xfb, // line_base + 0xe, // line_range + 0xd, // opcode_base and lengths + 0x0, 0x1, 0x1, 0x1, 0x1, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x1, + '/', 'a', '\0', // directory entry 1 (zeroth entry implied) + '\0', // end of directory table + 'b', '/', 'c', '\0', // file entry 1 (zeroth entry implied) + 0, // file 1 directory + 0, // file 1 modification time + 0, // file 1 length + '\0', // end of file table + // end_header + DW_LNS_set_file, 0x0, + // set address to 0x0 + 0x0, 0x9, 0x2, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + // Advance Address by 0 and line by 3 + 0x15, + // Advance PC by 1 + 0x2, 0x1, + 0x0, + DW_LNE_end_sequence, + DW_LNE_end_sequence, + // end +}; + +class MockLineInfoHandler: public LineInfoHandler { + public: + MOCK_METHOD(void, DefineDir, (const string&, uint32_t dir_num), (override)); + MOCK_METHOD(void, DefineFile, (const string& name, int32_t file_num, + uint32_t dir_num, uint64_t mod_time, + uint64_t length), (override)); + MOCK_METHOD(void, AddLine, (uint64_t address, uint64_t length, + uint32_t file_num, uint32_t line_num, + uint32_t column_num), (override)); +}; + +const uint8_t string_section[] = {'x', '/', 'a', '\0'}; +const uint8_t line_string_section[] = {'x', 'b', '/', 'c', '\0' }; + +struct LineProgram: public Test { + MockLineInfoHandler handler_; +}; + +TEST_F(LineProgram, ReadLinesDwarf5) { + ByteReader byte_reader(ENDIANNESS_LITTLE); + // LineTables don't specify the offset size like Compilation Units do. + byte_reader.SetOffsetSize(4); + LineInfo line_reader(dwarf5_line_program, + sizeof(dwarf5_line_program), + &byte_reader, + string_section, + sizeof(string_section), + line_string_section, + sizeof(line_string_section), + &handler_); + EXPECT_CALL(handler_, DefineDir("/a", 0)).Times(1); + EXPECT_CALL(handler_, DefineFile("b/c", 0, 0, 0, 0)).Times(1); + EXPECT_CALL(handler_, AddLine(0, 1, 0, 4, 0)).Times(1); + EXPECT_EQ(line_reader.Start(), sizeof(dwarf5_line_program)); +} + +TEST_F(LineProgram, ReadLinesDwarf4) { + ByteReader byte_reader(ENDIANNESS_LITTLE); + // LineTables don't specify the offset size like Compilation Units do. + byte_reader.SetOffsetSize(4); + // dwarf4 line info headers don't encode the address size. + byte_reader.SetAddressSize(8); + LineInfo line_reader(dwarf4_line_program, + sizeof(dwarf5_line_program), + &byte_reader, + // dwarf4 line tables can't access the string sections + // so pass values likely to make assertions fail if + // the code uses them improperly. + nullptr, 0, nullptr, 0, + &handler_); + EXPECT_CALL(handler_, DefineDir("", 0)).Times(1); + EXPECT_CALL(handler_, DefineDir("/a", 1)).Times(1); + EXPECT_CALL(handler_, DefineFile("", 0, 0, 0, 0)).Times(1); + EXPECT_CALL(handler_, DefineFile("b/c", 1, 0, 0, 0)).Times(1); + EXPECT_CALL(handler_, AddLine(0, 1, 0, 4, 0)).Times(1); + EXPECT_EQ(line_reader.Start(), sizeof(dwarf4_line_program)); +} + +} // anonymous namespace diff --git a/src/common/dwarf/functioninfo.cc b/src/common/dwarf/functioninfo.cc index edd1bb70..28c4f935 100644 --- a/src/common/dwarf/functioninfo.cc +++ b/src/common/dwarf/functioninfo.cc @@ -51,15 +51,9 @@ CULineInfoHandler::CULineInfoHandler(std::vector<SourceFileInfo>* files, LineMap* linemap):linemap_(linemap), files_(files), dirs_(dirs) { - // The dirs and files are 1 indexed, so just make sure we put - // nothing in the 0 vector. - assert(dirs->size() == 0); - assert(files->size() == 0); - dirs->push_back(""); - SourceFileInfo s; - s.name = ""; - s.lowpc = ULLONG_MAX; - files->push_back(s); + // In dwarf4, the dirs and files are 1 indexed, and in dwarf5 they are zero + // indexed. This is handled in the LineInfo reader, so empty files are not + // needed here. } void CULineInfoHandler::DefineDir(const string& name, uint32_t dir_num) { |