diff options
author | jimblandy@gmail.com <jimblandy@gmail.com@4c0a9323-5329-0410-9bdc-e9ce6186880e> | 2009-08-07 19:24:32 +0000 |
---|---|---|
committer | jimblandy@gmail.com <jimblandy@gmail.com@4c0a9323-5329-0410-9bdc-e9ce6186880e> | 2009-08-07 19:24:32 +0000 |
commit | 54bc5cfa2d594cc9719bef016657bc80a15d968c (patch) | |
tree | e58df570c01fca69b302486c57d6a0b0cfbd8700 | |
parent | Linux dumper: Make the 'name' field of FuncInfo a std::string instead of a ch... (diff) | |
download | breakpad-54bc5cfa2d594cc9719bef016657bc80a15d968c.tar.xz |
Linux dumper: Move STABS parsing into its own class.
With this patch, dump_symbols.cc no longer knows about the details of
the STABS debugging format; that is handled by the StabsReader class.
dump_symbols.cc provides a subclass of StabsHandler that builds
dump_symbols' own representation of the data.
a=jimblandy
r=nealsid
git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@378 4c0a9323-5329-0410-9bdc-e9ce6186880e
-rw-r--r-- | src/common/linux/dump_symbols.cc | 242 | ||||
-rw-r--r-- | src/tools/linux/dump_syms/Makefile | 6 |
2 files changed, 112 insertions, 136 deletions
diff --git a/src/common/linux/dump_symbols.cc b/src/common/linux/dump_symbols.cc index 505c517e..a87ab97e 100644 --- a/src/common/linux/dump_symbols.cc +++ b/src/common/linux/dump_symbols.cc @@ -27,7 +27,6 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#include <a.out.h> #include <cstdarg> #include <cstdlib> #include <cstdio> @@ -37,7 +36,6 @@ #include <fcntl.h> #include <link.h> #include <sys/mman.h> -#include <stab.h> #include <sys/stat.h> #include <sys/types.h> #include <unistd.h> @@ -50,6 +48,7 @@ #include <map> #include <string.h> +#include "common/linux/stabs_reader.h" #include "common/linux/dump_symbols.h" #include "common/linux/file_id.h" #include "common/linux/guid_creator.h" @@ -154,10 +153,6 @@ struct SymbolInfo { // than create FuncInfoList for such entries, we record their // addresses here. These are not necessarily sorted. std::vector<ElfW(Addr)> file_boundaries; - - // The current source file, for line number information. This is - // persistent across functions. - SourceFileInfo *current_source_file; }; // Stab section name. @@ -252,108 +247,6 @@ static SourceFileInfo *FindSourceFileInfo(SymbolInfo *symbols, return file; } -static int LoadLineInfo(struct nlist *list, - struct nlist *list_end, - SymbolInfo *symbols, - struct SourceFileInfo *source_file_info, - struct FuncInfo *func_info, - const ElfW(Shdr) *stabstr_section) { - struct nlist *cur_list = list; - // The name of the file any subsequent lines would belong to. - const char *last_source_name = symbols->current_source_file->name; - do { - // Skip non line information. - while (cur_list < list_end && cur_list->n_type != N_SLINE) { - // Only exit when got another function, or source file. - if (cur_list->n_type == N_FUN || cur_list->n_type == N_SO) - return cur_list - list; - // N_SOL means source lines following it will be from another - // source file. But don't actually create a file entry yet; - // wait until we see executable code attributed to the file. - if (cur_list->n_type == N_SOL - && cur_list->n_un.n_strx > 0) - last_source_name = reinterpret_cast<char *>(cur_list->n_un.n_strx - + stabstr_section->sh_offset); - ++cur_list; - } - struct LineInfo line; - while (cur_list < list_end && cur_list->n_type == N_SLINE) { - // If this line is attributed to a new file, create its entry now. - if (last_source_name != symbols->current_source_file->name) { - symbols->current_source_file - = FindSourceFileInfo(symbols, last_source_name); - } - line.file = symbols->current_source_file; - line.rva_to_func = cur_list->n_value; - // n_desc is a signed short - line.line_num = (unsigned short)cur_list->n_desc; - // We will compute these later. For now, pacify compiler warnings. - line.size = 0; - line.rva_to_base = 0; - func_info->line_info.push_back(line); - ++cur_list; - } - } while (list < list_end); - - return cur_list - list; -} - -static int LoadFuncSymbols(struct nlist *list, - struct nlist *list_end, - SymbolInfo *symbols, - struct SourceFileInfo *source_file_info, - const ElfW(Shdr) *stabstr_section) { - struct nlist *cur_list = list; - assert(cur_list->n_type == N_SO); - ++cur_list; - source_file_info->func_info.clear(); - while (cur_list < list_end) { - // Go until the function symbol. - while (cur_list < list_end && cur_list->n_type != N_FUN) { - if (cur_list->n_type == N_SO) { - return cur_list - list; - } - ++cur_list; - continue; - } - if (cur_list->n_type == N_FUN) { - struct FuncInfo func_info; - // The STABS data for an N_FUN entry is the function's (mangled) - // name, followed by a colon, followed by type information. We - // want to retain the name only. - const char *stabs_name - = reinterpret_cast<char *>(cur_list->n_un.n_strx + - stabstr_section->sh_offset); - const char *name_end = strchr(stabs_name, ':'); - if (! name_end) - name_end = stabs_name + strlen(stabs_name); - func_info.name = std::string(stabs_name, name_end - stabs_name); - func_info.addr = cur_list->n_value; - func_info.rva_to_base = 0; - func_info.size = 0; - func_info.stack_param_size = 0; - cur_list++; - - // Line info. - cur_list += LoadLineInfo(cur_list, - list_end, - symbols, - source_file_info, - &func_info, - stabstr_section); - - // Functions in this module should have address bigger than the module - // startring address. - // There maybe a lot of duplicated entry for a function in the symbol, - // only one of them can met this. - if (func_info.addr >= source_file_info->addr) { - source_file_info->func_info.push_back(func_info); - } - } - } - return cur_list - list; -} - // Compute size and rva information based on symbols loaded from stab section. static bool ComputeSizeAndRVA(ElfW(Addr) loading_addr, struct SymbolInfo *symbols) { @@ -454,6 +347,104 @@ static bool ComputeSizeAndRVA(ElfW(Addr) loading_addr, return true; } +// Our handler class for STABS data. +class DumpStabsHandler: public google_breakpad::StabsHandler { + public: + DumpStabsHandler(struct SymbolInfo *symbols, ElfW(Addr) loading_addr): + symbols_(symbols), + loading_addr_(loading_addr), + current_comp_unit_(NULL), + current_source_file_(NULL) { + current_function_.addr = 0; + } + + bool StartCompilationUnit(const char *name, uint64_t address, + const char *build_directory); + bool EndCompilationUnit(uint64_t address); + bool StartFunction(const std::string &name, uint64_t address); + bool EndFunction(uint64_t address); + bool Line(uint64_t address, const char *name, int number); + + private: + // The symbol info we're contributing to. + struct SymbolInfo *symbols_; + + // The address at which this module gets loaded. + ElfW(Addr) loading_addr_; + + // The main file we're currently contributing functions/lines to. + struct SourceFileInfo *current_comp_unit_; + + // The function we're currently contributing lines to. + // FIXME: This gets copied, along with all its lines. Should be a pointer. + struct FuncInfo current_function_; + + // The SourceFileInfo structure for the last file we got a line + // number in. Instead of hashing on the name ('s address) on every + // line, we just check whether the name is the same as this file's + // (which it usually is). + SourceFileInfo *current_source_file_; +}; + +bool DumpStabsHandler::StartCompilationUnit(const char *name, uint64_t address, + const char *build_directory) { + assert(! current_comp_unit_); + current_comp_unit_ = FindSourceFileInfo(symbols_, name); + current_source_file_ = current_comp_unit_; + // Add it to the list; use ADDR to tell whether we've already done so. + if (! current_comp_unit_->addr) + symbols_->main_files.push_back(current_comp_unit_); + current_comp_unit_->addr = address; + return true; +} + +bool DumpStabsHandler::EndCompilationUnit(uint64_t address) { + assert(current_comp_unit_); + // We compute everything's size later. + symbols_->file_boundaries.push_back(address); + current_comp_unit_ = NULL; + current_source_file_ = NULL; + return true; +} + +bool DumpStabsHandler::StartFunction(const std::string &name, + uint64_t address) { + assert(! current_function_.addr); + current_function_.name = name; + current_function_.rva_to_base = 0; + current_function_.addr = address; + current_function_.size = 0; + current_function_.stack_param_size = 0; + current_function_.line_info.clear(); + return true; +} + +bool DumpStabsHandler::EndFunction(uint64_t address) { + assert(current_function_.addr); + if (current_function_.addr >= current_comp_unit_->addr) + // This is a big copy, then free. Should use a pointer. + current_comp_unit_->func_info.push_back(current_function_); + current_function_.addr = 0; + current_function_.line_info.clear(); + return true; +} + +bool DumpStabsHandler::Line(uint64_t address, const char *name, int number) { + assert(current_function_.addr); + assert(current_source_file_); + if (name != current_source_file_->name) + current_source_file_ = FindSourceFileInfo(symbols_, name); + struct LineInfo line; + // FIXME: might as well set rva_to_base directly. + line.rva_to_func = address - current_function_.addr; + line.file = current_source_file_; + line.line_num = number; + line.size = 0; + line.rva_to_base = 0; + current_function_.line_info.push_back(line); + return true; +} + static bool LoadSymbols(const ElfW(Shdr) *stab_section, const ElfW(Shdr) *stabstr_section, ElfW(Addr) loading_addr, @@ -461,33 +452,14 @@ static bool LoadSymbols(const ElfW(Shdr) *stab_section, if (stab_section == NULL || stabstr_section == NULL) return false; - struct nlist *lists = - reinterpret_cast<struct nlist *>(stab_section->sh_offset); - int nstab = stab_section->sh_size / sizeof(struct nlist); - // First pass, load all symbols from the object file. - for (int i = 0; i < nstab; ) { - int step = 1; - struct nlist *cur_list = lists + i; - if (cur_list->n_type == N_SO) { - if (cur_list->n_un.n_strx) { - const char *name = reinterpret_cast<char *>(cur_list->n_un.n_strx - + stabstr_section->sh_offset); - struct SourceFileInfo *source_file_info - = FindSourceFileInfo(symbols, name); - // Add it to the list; use ADDR to tell whether we've already done so. - if (! source_file_info->addr) - symbols->main_files.push_back(source_file_info); - source_file_info->addr = cur_list->n_value; - symbols->current_source_file = source_file_info; - step = LoadFuncSymbols(cur_list, lists + nstab, symbols, - source_file_info, stabstr_section); - } else { - // N_SO entries with no name mark file boundary addresses. - symbols->file_boundaries.push_back(cur_list->n_value); - } - } - i += step; - } + uint8_t *stabs = reinterpret_cast<uint8_t *>(stab_section->sh_offset); + uint8_t *stabstr = reinterpret_cast<uint8_t *>(stabstr_section->sh_offset); + DumpStabsHandler handler(symbols, loading_addr); + google_breakpad::StabsReader reader(stabs, stab_section->sh_size, + stabstr, stabstr_section->sh_size, + &handler); + if (! reader.Process()) + return false; // Second pass, compute the size of functions and lines. return ComputeSizeAndRVA(loading_addr, symbols); diff --git a/src/tools/linux/dump_syms/Makefile b/src/tools/linux/dump_syms/Makefile index 21d848d7..a5cf974a 100644 --- a/src/tools/linux/dump_syms/Makefile +++ b/src/tools/linux/dump_syms/Makefile @@ -16,7 +16,8 @@ BIN=dump_syms all:$(BIN) -DUMP_OBJ=dump_symbols.o guid_creator.o dump_syms.o file_id.o md5.o +DUMP_OBJ=dump_symbols.o guid_creator.o dump_syms.o file_id.o md5.o \ + stabs_reader.o dump_syms:$(DUMP_OBJ) $(CXX) $(CPPFLAGS) $(CXXFLAGS) -o $@ $^ @@ -24,6 +25,9 @@ dump_syms:$(DUMP_OBJ) dump_symbols.o:../../../common/linux/dump_symbols.cc $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $^ +stabs_reader.o:../../../common/linux/stabs_reader.cc + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $^ + guid_creator.o:../../../common/linux/guid_creator.cc $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $^ |