// Copyright (c) 2013 Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// exploitability_linux.cc: Linux specific exploitability engine.
//
// Provides a guess at the exploitability of the crash for the Linux
// platform given a minidump and process_state.
//
// Author: Matthew Riley

#include "processor/exploitability_linux.h"

#ifndef _WIN32
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <sstream>
#include <iterator>
#endif  // _WIN32

#include "google_breakpad/common/minidump_exception_linux.h"
#include "google_breakpad/processor/call_stack.h"
#include "google_breakpad/processor/process_state.h"
#include "google_breakpad/processor/stack_frame.h"
#include "processor/logging.h"

namespace {

// This function in libc is called if the program was compiled with
// -fstack-protector and a function's stack canary changes.
const char kStackCheckFailureFunction[] = "__stack_chk_fail";

// This function in libc is called if the program was compiled with
// -D_FORTIFY_SOURCE=2, a function like strcpy() is called, and the runtime
// can determine that the call would overflow the target buffer.
const char kBoundsCheckFailureFunction[] = "__chk_fail";

#ifndef _WIN32
const unsigned int MAX_INSTRUCTION_LEN = 15;
const unsigned int MAX_OBJDUMP_BUFFER_LEN = 4096;
#endif  // _WIN32

}  // namespace

namespace google_breakpad {

ExploitabilityLinux::ExploitabilityLinux(Minidump *dump,
                                         ProcessState *process_state)
    : Exploitability(dump, process_state),
      enable_objdump_(false) { }

ExploitabilityLinux::ExploitabilityLinux(Minidump *dump,
                                         ProcessState *process_state,
                                         bool enable_objdump)
    : Exploitability(dump, process_state),
      enable_objdump_(enable_objdump) { }


ExploitabilityRating ExploitabilityLinux::CheckPlatformExploitability() {
  // Check the crashing thread for functions suggesting a buffer overflow or
  // stack smash.
  if (process_state_->requesting_thread() != -1) {
    CallStack* crashing_thread =
        process_state_->threads()->at(process_state_->requesting_thread());
    const vector<StackFrame*>& crashing_thread_frames =
        *crashing_thread->frames();
    for (size_t i = 0; i < crashing_thread_frames.size(); ++i) {
      if (crashing_thread_frames[i]->function_name ==
          kStackCheckFailureFunction) {
        return EXPLOITABILITY_HIGH;
      }

      if (crashing_thread_frames[i]->function_name ==
          kBoundsCheckFailureFunction) {
        return EXPLOITABILITY_HIGH;
      }
    }
  }

  // Getting exception data. (It should exist for all minidumps.)
  MinidumpException *exception = dump_->GetException();
  if (exception == NULL) {
    BPLOG(INFO) << "No exception record.";
    return EXPLOITABILITY_ERR_PROCESSING;
  }
  const MDRawExceptionStream *raw_exception_stream = exception->exception();
  if (raw_exception_stream == NULL) {
    BPLOG(INFO) << "No raw exception stream.";
    return EXPLOITABILITY_ERR_PROCESSING;
  }

  // Checking for benign exceptions that caused the crash.
  if (this->BenignCrashTrigger(raw_exception_stream)) {
    return EXPLOITABILITY_NONE;
  }

  // Check if the instruction pointer is in a valid instruction region
  // by finding if it maps to an executable part of memory.
  uint64_t instruction_ptr = 0;
  uint64_t stack_ptr = 0;

  const MinidumpContext *context = exception->GetContext();
  if (context == NULL) {
    BPLOG(INFO) << "No exception context.";
    return EXPLOITABILITY_ERR_PROCESSING;
  }

  // Getting the instruction pointer.
  if (!context->GetInstructionPointer(&instruction_ptr)) {
    BPLOG(INFO) << "Failed to retrieve instruction pointer.";
    return EXPLOITABILITY_ERR_PROCESSING;
  }

  // Getting the stack pointer.
  if (!context->GetStackPointer(&stack_ptr)) {
    BPLOG(INFO) << "Failed to retrieve stack pointer.";
    return EXPLOITABILITY_ERR_PROCESSING;
  }

  // Checking for the instruction pointer in a valid instruction region,
  // a misplaced stack pointer, and an executable stack or heap.
  if (!this->InstructionPointerInCode(instruction_ptr) ||
       this->StackPointerOffStack(stack_ptr) ||
       this->ExecutableStackOrHeap()) {
    return EXPLOITABILITY_HIGH;
  }

  // Check for write to read only memory or invalid memory, shelling out
  // to objdump is enabled.
  if (enable_objdump_ && this->EndedOnIllegalWrite(instruction_ptr)) {
    return EXPLOITABILITY_HIGH;
  }

  // There was no strong evidence suggesting exploitability, but the minidump
  // does not appear totally benign either.
  return EXPLOITABILITY_INTERESTING;
}

bool ExploitabilityLinux::EndedOnIllegalWrite(uint64_t instruction_ptr) {
#ifdef _WIN32
  BPLOG(INFO) << "MinGW does not support fork and exec. Terminating method.";
#else
  // Get memory region containing instruction pointer.
  MinidumpMemoryList *memory_list = dump_->GetMemoryList();
  MinidumpMemoryRegion *memory_region =
      memory_list ?
      memory_list->GetMemoryRegionForAddress(instruction_ptr) : NULL;
  if (!memory_region) {
    BPLOG(INFO) << "No memory region around instruction pointer.";
    return false;
  }

  // Get exception data to find architecture.
  string architecture = "";
  MinidumpException *exception = dump_->GetException();
  // This should never evaluate to true, since this should not be reachable
  // without checking for exception data earlier.
  if (!exception) {
    BPLOG(INFO) << "No exception data.";
    return false;
  }
  const MDRawExceptionStream *raw_exception_stream = exception->exception();
  const MinidumpContext *context = exception->GetContext();
  // This should not evaluate to true, for the same reason mentioned above.
  if (!raw_exception_stream || !context) {
    BPLOG(INFO) << "No exception or architecture data.";
    return false;
  }
  // Check architecture and set architecture variable to corresponding flag
  // in objdump.
  switch (context->GetContextCPU()) {
    case MD_CONTEXT_X86:
      architecture = "i386";
      break;
    case MD_CONTEXT_AMD64:
      architecture = "i386:x86-64";
      break;
    default:
      // Unsupported architecture. Note that ARM architectures are not
      // supported because objdump does not support ARM.
      return false;
      break;
  }

  // Get memory region around instruction pointer and the number of bytes
  // before and after the instruction pointer in the memory region.
  const uint8_t *raw_memory = memory_region->GetMemory();
  const uint64_t base = memory_region->GetBase();
  if (base > instruction_ptr) {
    BPLOG(ERROR) << "Memory region base value exceeds instruction pointer.";
    return false;
  }
  const uint64_t offset = instruction_ptr - base;
  if (memory_region->GetSize() < MAX_INSTRUCTION_LEN + offset) {
    BPLOG(INFO) << "Not enough bytes left to guarantee complete instruction.";
    return false;
  }

  // Convert bytes into objdump output.
  char objdump_output_buffer[MAX_OBJDUMP_BUFFER_LEN] = {0};
  DisassembleBytes(architecture,
                   raw_memory + offset,
                   MAX_OBJDUMP_BUFFER_LEN,
                   objdump_output_buffer);

  // Put buffer data into stream to output line-by-line.
  std::stringstream objdump_stream;
  objdump_stream.str(string(objdump_output_buffer));
  string line;

  // Pipe each output line into the string until the string contains
  // the first instruction from objdump.
  // Loop until the line shows the first instruction or there are no lines left.
  do {
    if (!getline(objdump_stream, line)) {
      BPLOG(INFO) << "Objdump instructions not found";
      return false;
    }
  } while (line.find("0:") == string::npos);
  // This first instruction contains the above substring.

  // Convert objdump instruction line into the operation and operands.
  string instruction = "";
  string dest = "";
  string src = "";
  TokenizeObjdumpInstruction(line, &instruction, &dest, &src);

  // Check if the operation is a write to memory. First, the instruction
  // must one that can write to memory. Second, the write destination
  // must be a spot in memory rather than a register. Since there are no
  // symbols from objdump, the destination will be enclosed by brackets.
  if (dest.size() > 2 && dest.at(0) == '[' && dest.at(dest.size() - 1) == ']' &&
      (!instruction.compare("mov") || !instruction.compare("inc") ||
       !instruction.compare("dec") || !instruction.compare("and") ||
       !instruction.compare("or") || !instruction.compare("xor") ||
       !instruction.compare("not") || !instruction.compare("neg") ||
       !instruction.compare("add") || !instruction.compare("sub") ||
       !instruction.compare("shl") || !instruction.compare("shr"))) {
    // Strip away enclosing brackets from the destination address.
    dest = dest.substr(1, dest.size() - 2);
    uint64_t write_address = 0;
    CalculateAddress(dest, *context, &write_address);

    // If the program crashed as a result of a write, the destination of
    // the write must have been an address that did not permit writing.
    // However, if the address is under 4k, due to program protections,
    // the crash does not suggest exploitability for writes with such a
    // low target address.
    return write_address > 4096;
  }
#endif  // _WIN32
  return false;
}

#ifndef _WIN32
bool ExploitabilityLinux::CalculateAddress(const string &address_expression,
                                           const DumpContext &context,
                                           uint64_t *write_address) {
  // The destination should be the format reg+a or reg-a, where reg
  // is a register and a is a hexadecimal constant. Although more complex
  // expressions can make valid instructions, objdump's disassembly outputs
  // it in this simpler format.
  // TODO(liuandrew): Handle more complex formats, should they arise.

  if (!write_address) {
    BPLOG(ERROR) << "Null parameter.";
    return false;
  }

  // Clone parameter into a non-const string.
  string expression = address_expression;

  // Parse out the constant that is added to the address (if it exists).
  size_t delim = expression.find('+');
  bool positive_add_constant = true;
  // Check if constant is subtracted instead of added.
  if (delim == string::npos) {
    positive_add_constant = false;
    delim = expression.find('-');
  }
  uint32_t add_constant = 0;
  // Save constant and remove it from the expression.
  if (delim != string::npos) {
    if (!sscanf(expression.substr(delim + 1).c_str(), "%x", &add_constant)) {
      BPLOG(ERROR) << "Failed to scan constant.";
      return false;
    }
    expression = expression.substr(0, delim);
  }

  // Set the the write address to the corresponding register.
  // TODO(liuandrew): Add support for partial registers, such as
  // the rax/eax/ax/ah/al chain.
  switch (context.GetContextCPU()) {
    case MD_CONTEXT_X86:
      if (!expression.compare("eax")) {
        *write_address = context.GetContextX86()->eax;
      } else if (!expression.compare("ebx")) {
        *write_address = context.GetContextX86()->ebx;
      } else if (!expression.compare("ecx")) {
        *write_address = context.GetContextX86()->ecx;
      } else if (!expression.compare("edx")) {
        *write_address = context.GetContextX86()->edx;
      } else if (!expression.compare("edi")) {
        *write_address = context.GetContextX86()->edi;
      } else if (!expression.compare("esi")) {
        *write_address = context.GetContextX86()->esi;
      } else if (!expression.compare("ebp")) {
        *write_address = context.GetContextX86()->ebp;
      } else if (!expression.compare("esp")) {
        *write_address = context.GetContextX86()->esp;
      } else if (!expression.compare("eip")) {
        *write_address = context.GetContextX86()->eip;
      } else {
        BPLOG(ERROR) << "Unsupported register";
        return false;
      }
      break;
    case MD_CONTEXT_AMD64:
      if (!expression.compare("rax")) {
        *write_address = context.GetContextAMD64()->rax;
      } else if (!expression.compare("rbx")) {
        *write_address = context.GetContextAMD64()->rbx;
      } else if (!expression.compare("rcx")) {
        *write_address = context.GetContextAMD64()->rcx;
      } else if (!expression.compare("rdx")) {
        *write_address = context.GetContextAMD64()->rdx;
      } else if (!expression.compare("rdi")) {
        *write_address = context.GetContextAMD64()->rdi;
      } else if (!expression.compare("rsi")) {
        *write_address = context.GetContextAMD64()->rsi;
      } else if (!expression.compare("rbp")) {
        *write_address = context.GetContextAMD64()->rbp;
      } else if (!expression.compare("rsp")) {
        *write_address = context.GetContextAMD64()->rsp;
      } else if (!expression.compare("rip")) {
        *write_address = context.GetContextAMD64()->rip;
      } else if (!expression.compare("r8")) {
        *write_address = context.GetContextAMD64()->r8;
      } else if (!expression.compare("r9")) {
        *write_address = context.GetContextAMD64()->r9;
      } else if (!expression.compare("r10")) {
        *write_address = context.GetContextAMD64()->r10;
      } else if (!expression.compare("r11")) {
        *write_address = context.GetContextAMD64()->r11;
      } else if (!expression.compare("r12")) {
        *write_address = context.GetContextAMD64()->r12;
      } else if (!expression.compare("r13")) {
        *write_address = context.GetContextAMD64()->r13;
      } else if (!expression.compare("r14")) {
        *write_address = context.GetContextAMD64()->r14;
      } else if (!expression.compare("r15")) {
        *write_address = context.GetContextAMD64()->r15;
      } else {
        BPLOG(ERROR) << "Unsupported register";
        return false;
      }
      break;
    default:
      // This should not occur since the same switch condition
      // should have terminated this method.
      return false;
      break;
  }

  // Add or subtract constant from write address (if applicable).
  *write_address =
      positive_add_constant ?
      *write_address + add_constant : *write_address - add_constant;

  return true;
}

bool ExploitabilityLinux::TokenizeObjdumpInstruction(const string &line,
                                                     string *operation,
                                                     string *dest,
                                                     string *src) {
  if (!operation || !dest || !src) {
    BPLOG(ERROR) << "Null parameters passed.";
    return false;
  }

  // Set all pointer values to empty strings.
  *operation = "";
  *dest = "";
  *src = "";

  // Tokenize the objdump line.
  vector<string> tokens;
  std::istringstream line_stream(line);
  copy(std::istream_iterator<string>(line_stream),
       std::istream_iterator<string>(),
       std::back_inserter(tokens));

  // Regex for the data in hex form. Each byte is two hex digits.
  regex_t regex;
  regcomp(&regex, "^[[:xdigit:]]{2}$", REG_EXTENDED | REG_NOSUB);

  // Find and set the location of the operator. The operator appears
  // directly after the chain of bytes that define the instruction. The
  // operands will be the last token, given that the instruction has operands.
  // If not, the operator is the last token. The loop skips the first token
  // because the first token is the instruction number (namely "0:").
  string operands = "";
  for (size_t i = 1; i < tokens.size(); i++) {
    // Check if current token no longer is in byte format.
    if (regexec(&regex, tokens[i].c_str(), 0, NULL, 0)) {
      // instruction = tokens[i];
      *operation = tokens[i];
      // If the operator is the last token, there are no operands.
      if (i != tokens.size() - 1) {
        operands = tokens[tokens.size() - 1];
      }
      break;
    }
  }
  regfree(&regex);

  if (operation->empty()) {
    BPLOG(ERROR) << "Failed to parse out operation from objdump instruction.";
    return false;
  }

  // Split operands into source and destination (if applicable).
  if (!operands.empty()) {
    size_t delim = operands.find(',');
    if (delim == string::npos) {
      *dest = operands;
    } else {
      *dest = operands.substr(0, delim);
      *src = operands.substr(delim + 1);
    }
  }
  return true;
}

bool ExploitabilityLinux::DisassembleBytes(const string &architecture,
                                           const uint8_t *raw_bytes,
                                           const unsigned int buffer_len,
                                           char *objdump_output_buffer) {
  if (!raw_bytes || !objdump_output_buffer) {
    BPLOG(ERROR) << "Bad input parameters.";
    return false;
  }

  // Write raw bytes around instruction pointer to a temporary file to
  // pass as an argument to objdump.
  char raw_bytes_tmpfile[] = "/tmp/breakpad_mem_region-raw_bytes-XXXXXX";
  int raw_bytes_fd = mkstemp(raw_bytes_tmpfile);
  if (raw_bytes_fd < 0) {
    BPLOG(ERROR) << "Failed to create tempfile.";
    unlink(raw_bytes_tmpfile);
    return false;
  }
  if (write(raw_bytes_fd, raw_bytes, MAX_INSTRUCTION_LEN)
      != MAX_INSTRUCTION_LEN) {
    BPLOG(ERROR) << "Writing of raw bytes failed.";
    unlink(raw_bytes_tmpfile);
    return false;
  }

  char cmd[1024] = {0};
  snprintf(cmd,
           1024,
           "objdump -D -b binary -M intel -m %s %s",
           architecture.c_str(),
           raw_bytes_tmpfile);
  FILE *objdump_fp = popen(cmd, "r");
  if (!objdump_fp) {
    fclose(objdump_fp);
    unlink(raw_bytes_tmpfile);
    BPLOG(ERROR) << "Failed to call objdump.";
    return false;
  }
  if (fread(objdump_output_buffer, 1, buffer_len, objdump_fp) <= 0) {
    fclose(objdump_fp);
    unlink(raw_bytes_tmpfile);
    BPLOG(ERROR) << "Failed to read objdump output.";
    return false;
  }
  fclose(objdump_fp);
  unlink(raw_bytes_tmpfile);
  return true;
}
#endif  // _WIN32

bool ExploitabilityLinux::StackPointerOffStack(uint64_t stack_ptr) {
  MinidumpLinuxMapsList *linux_maps_list = dump_->GetLinuxMapsList();
  // Inconclusive if there are no mappings available.
  if (!linux_maps_list) {
    return false;
  }
  const MinidumpLinuxMaps *linux_maps =
      linux_maps_list->GetLinuxMapsForAddress(stack_ptr);
  // Checks if the stack pointer maps to a valid mapping and if the mapping
  // is not the stack. If the mapping has no name, it is inconclusive whether
  // it is off the stack.
  return !linux_maps ||
         (linux_maps->GetPathname().compare("") &&
          linux_maps->GetPathname().compare("[stack]"));
}

bool ExploitabilityLinux::ExecutableStackOrHeap() {
  MinidumpLinuxMapsList *linux_maps_list = dump_->GetLinuxMapsList();
  if (linux_maps_list) {
    for (size_t i = 0; i < linux_maps_list->get_maps_count(); i++) {
      const MinidumpLinuxMaps *linux_maps =
          linux_maps_list->GetLinuxMapsAtIndex(i);
      // Check for executable stack or heap for each mapping.
      if (linux_maps &&
          (!linux_maps->GetPathname().compare("[stack]") ||
           !linux_maps->GetPathname().compare("[heap]")) &&
          linux_maps->IsExecutable()) {
        return true;
      }
    }
  }
  return false;
}

bool ExploitabilityLinux::InstructionPointerInCode(uint64_t instruction_ptr) {
  // Get Linux memory mapping from /proc/self/maps. Checking whether the
  // region the instruction pointer is in has executable permission can tell
  // whether it is in a valid code region. If there is no mapping for the
  // instruction pointer, it is indicative that the instruction pointer is
  // not within a module, which implies that it is outside a valid area.
  MinidumpLinuxMapsList *linux_maps_list = dump_->GetLinuxMapsList();
  const MinidumpLinuxMaps *linux_maps =
      linux_maps_list ?
      linux_maps_list->GetLinuxMapsForAddress(instruction_ptr) : NULL;
  return linux_maps ? linux_maps->IsExecutable() : false;
}

bool ExploitabilityLinux::BenignCrashTrigger(const MDRawExceptionStream
                                                  *raw_exception_stream) {
  // Check the cause of crash.
  // If the exception of the crash is a benign exception,
  // it is probably not exploitable.
  switch (raw_exception_stream->exception_record.exception_code) {
    case MD_EXCEPTION_CODE_LIN_SIGHUP:
    case MD_EXCEPTION_CODE_LIN_SIGINT:
    case MD_EXCEPTION_CODE_LIN_SIGQUIT:
    case MD_EXCEPTION_CODE_LIN_SIGTRAP:
    case MD_EXCEPTION_CODE_LIN_SIGABRT:
    case MD_EXCEPTION_CODE_LIN_SIGFPE:
    case MD_EXCEPTION_CODE_LIN_SIGKILL:
    case MD_EXCEPTION_CODE_LIN_SIGUSR1:
    case MD_EXCEPTION_CODE_LIN_SIGUSR2:
    case MD_EXCEPTION_CODE_LIN_SIGPIPE:
    case MD_EXCEPTION_CODE_LIN_SIGALRM:
    case MD_EXCEPTION_CODE_LIN_SIGTERM:
    case MD_EXCEPTION_CODE_LIN_SIGCHLD:
    case MD_EXCEPTION_CODE_LIN_SIGCONT:
    case MD_EXCEPTION_CODE_LIN_SIGSTOP:
    case MD_EXCEPTION_CODE_LIN_SIGTSTP:
    case MD_EXCEPTION_CODE_LIN_SIGTTIN:
    case MD_EXCEPTION_CODE_LIN_SIGTTOU:
    case MD_EXCEPTION_CODE_LIN_SIGURG:
    case MD_EXCEPTION_CODE_LIN_SIGXCPU:
    case MD_EXCEPTION_CODE_LIN_SIGXFSZ:
    case MD_EXCEPTION_CODE_LIN_SIGVTALRM:
    case MD_EXCEPTION_CODE_LIN_SIGPROF:
    case MD_EXCEPTION_CODE_LIN_SIGWINCH:
    case MD_EXCEPTION_CODE_LIN_SIGIO:
    case MD_EXCEPTION_CODE_LIN_SIGPWR:
    case MD_EXCEPTION_CODE_LIN_SIGSYS:
    case MD_EXCEPTION_CODE_LIN_DUMP_REQUESTED:
      return true;
      break;
    default:
      return false;
      break;
  }
}

}  // namespace google_breakpad