aboutsummaryrefslogtreecommitdiff
path: root/src/third_party/libdisasm/ia32_insn.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/third_party/libdisasm/ia32_insn.c')
-rw-r--r--src/third_party/libdisasm/ia32_insn.c625
1 files changed, 625 insertions, 0 deletions
diff --git a/src/third_party/libdisasm/ia32_insn.c b/src/third_party/libdisasm/ia32_insn.c
new file mode 100644
index 00000000..e4f42108
--- /dev/null
+++ b/src/third_party/libdisasm/ia32_insn.c
@@ -0,0 +1,625 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "qword.h"
+
+#include "ia32_insn.h"
+#include "ia32_opcode_tables.h"
+
+#include "ia32_reg.h"
+#include "ia32_operand.h"
+#include "ia32_implicit.h"
+#include "ia32_settings.h"
+
+#include "libdis.h"
+
+extern ia32_table_desc_t ia32_tables[];
+extern ia32_settings_t ia32_settings;
+
+#define IS_SP( op ) (op->type == op_register && \
+ (op->data.reg.id == REG_ESP_INDEX || \
+ op->data.reg.alias == REG_ESP_INDEX) )
+#define IS_IMM( op ) (op->type == op_immediate )
+
+#ifdef WIN32
+# define INLINE
+#else
+# define INLINE inline
+#endif
+
+/* for calculating stack modification based on an operand */
+static INLINE int32_t long_from_operand( x86_op_t *op ) {
+
+ if (! IS_IMM(op) ) {
+ return 0L;
+ }
+
+ switch ( op->datatype ) {
+ case op_byte:
+ return (int32_t) op->data.sbyte;
+ case op_word:
+ return (int32_t) op->data.sword;
+ case op_qword:
+ return (int32_t) op->data.sqword;
+ case op_dword:
+ return op->data.sdword;
+ default:
+ /* these are not used in stack insn */
+ break;
+ }
+
+ return 0L;
+}
+
+
+/* determine what this insn does to the stack */
+static void ia32_stack_mod(x86_insn_t *insn) {
+ x86_op_t *dest, *src = NULL;
+
+ if (! insn || ! insn->operands ) {
+ return;
+ }
+
+ dest = &insn->operands->op;
+ if ( dest ) {
+ src = &insn->operands->next->op;
+ }
+
+ insn->stack_mod = 0;
+ insn->stack_mod_val = 0;
+
+ switch ( insn->type ) {
+ case insn_call:
+ case insn_callcc:
+ insn->stack_mod = 1;
+ insn->stack_mod_val = insn->addr_size * -1;
+ break;
+ case insn_push:
+ insn->stack_mod = 1;
+ insn->stack_mod_val = insn->addr_size * -1;
+ break;
+ case insn_return:
+ insn->stack_mod = 1;
+ insn->stack_mod_val = insn->addr_size;
+ case insn_int: case insn_intcc:
+ case insn_iret:
+ break;
+ case insn_pop:
+ insn->stack_mod = 1;
+ if (! IS_SP( dest ) ) {
+ insn->stack_mod_val = insn->op_size;
+ } /* else we don't know the stack change in a pop esp */
+ break;
+ case insn_enter:
+ insn->stack_mod = 1;
+ insn->stack_mod_val = 0; /* TODO : FIX */
+ break;
+ case insn_leave:
+ insn->stack_mod = 1;
+ insn->stack_mod_val = 0; /* TODO : FIX */
+ break;
+ case insn_pushregs:
+ insn->stack_mod = 1;
+ insn->stack_mod_val = 0; /* TODO : FIX */
+ break;
+ case insn_popregs:
+ insn->stack_mod = 1;
+ insn->stack_mod_val = 0; /* TODO : FIX */
+ break;
+ case insn_pushflags:
+ insn->stack_mod = 1;
+ insn->stack_mod_val = 0; /* TODO : FIX */
+ break;
+ case insn_popflags:
+ insn->stack_mod = 1;
+ insn->stack_mod_val = 0; /* TODO : FIX */
+ break;
+ case insn_add:
+ if ( IS_SP( dest ) ) {
+ insn->stack_mod = 1;
+ insn->stack_mod_val = long_from_operand( src );
+ }
+ break;
+ case insn_sub:
+ if ( IS_SP( dest ) ) {
+ insn->stack_mod = 1;
+ insn->stack_mod_val = long_from_operand( src );
+ insn->stack_mod_val *= -1;
+ }
+ break;
+ case insn_inc:
+ if ( IS_SP( dest ) ) {
+ insn->stack_mod = 1;
+ insn->stack_mod_val = 1;
+ }
+ break;
+ case insn_dec:
+ if ( IS_SP( dest ) ) {
+ insn->stack_mod = 1;
+ insn->stack_mod_val = 1;
+ }
+ break;
+ case insn_mov: case insn_movcc:
+ case insn_xchg: case insn_xchgcc:
+ case insn_mul: case insn_div:
+ case insn_shl: case insn_shr:
+ case insn_rol: case insn_ror:
+ case insn_and: case insn_or:
+ case insn_not: case insn_neg:
+ case insn_xor:
+ if ( IS_SP( dest ) ) {
+ insn->stack_mod = 1;
+ }
+ break;
+ default:
+ break;
+ }
+ if (! strcmp("enter", insn->mnemonic) ) {
+ insn->stack_mod = 1;
+ } else if (! strcmp("leave", insn->mnemonic) ) {
+ insn->stack_mod = 1;
+ }
+
+ /* for mov, etc we return 0 -- unknown stack mod */
+
+ return;
+}
+
+/* get the cpu details for this insn from cpu flags int */
+static void ia32_handle_cpu( x86_insn_t *insn, unsigned int cpu ) {
+ insn->cpu = (enum x86_insn_cpu) CPU_MODEL(cpu);
+ insn->isa = (enum x86_insn_isa) (ISA_SUBSET(cpu)) >> 16;
+ return;
+}
+
+/* handle mnemonic type and group */
+static void ia32_handle_mnemtype(x86_insn_t *insn, unsigned int mnemtype) {
+ unsigned int type = mnemtype & ~INS_FLAG_MASK;
+ insn->group = (enum x86_insn_group) (INS_GROUP(type)) >> 12;
+ insn->type = (enum x86_insn_type) INS_TYPE(type);
+
+ return;
+}
+
+static void ia32_handle_notes(x86_insn_t *insn, unsigned int notes) {
+ insn->note = (enum x86_insn_note) notes;
+ return;
+}
+
+static void ia32_handle_eflags( x86_insn_t *insn, unsigned int eflags) {
+ unsigned int flags;
+
+ /* handle flags effected */
+ flags = INS_FLAGS_TEST(eflags);
+ /* handle weird OR cases */
+ /* these are either JLE (ZF | SF<>OF) or JBE (CF | ZF) */
+ if (flags & INS_TEST_OR) {
+ flags &= ~INS_TEST_OR;
+ if ( flags & INS_TEST_ZERO ) {
+ flags &= ~INS_TEST_ZERO;
+ if ( flags & INS_TEST_CARRY ) {
+ flags &= ~INS_TEST_CARRY ;
+ flags |= (int)insn_carry_or_zero_set;
+ } else if ( flags & INS_TEST_SFNEOF ) {
+ flags &= ~INS_TEST_SFNEOF;
+ flags |= (int)insn_zero_set_or_sign_ne_oflow;
+ }
+ }
+ }
+ insn->flags_tested = (enum x86_flag_status) flags;
+
+ insn->flags_set = (enum x86_flag_status) INS_FLAGS_SET(eflags) >> 16;
+
+ return;
+}
+
+static void ia32_handle_prefix( x86_insn_t *insn, unsigned int prefixes ) {
+
+ insn->prefix = (enum x86_insn_prefix) prefixes & PREFIX_MASK; // >> 20;
+ if (! (insn->prefix & PREFIX_PRINT_MASK) ) {
+ /* no printable prefixes */
+ insn->prefix = insn_no_prefix;
+ }
+
+ /* concat all prefix strings */
+ if ( (unsigned int)insn->prefix & PREFIX_LOCK ) {
+ strncat(insn->prefix_string, "lock ", 32 -
+ strlen(insn->prefix_string));
+ }
+
+ if ( (unsigned int)insn->prefix & PREFIX_REPNZ ) {
+ strncat(insn->prefix_string, "repnz ", 32 -
+ strlen(insn->prefix_string));
+ } else if ( (unsigned int)insn->prefix & PREFIX_REPZ ) {
+ strncat(insn->prefix_string, "repz ", 32 -
+ strlen(insn->prefix_string));
+ }
+
+ return;
+}
+
+
+static void reg_32_to_16( x86_op_t *op, x86_insn_t *insn, void *arg ) {
+
+ /* if this is a 32-bit register and it is a general register ... */
+ if ( op->type == op_register && op->data.reg.size == 4 &&
+ (op->data.reg.type & reg_gen) ) {
+ /* WORD registers are 8 indices off from DWORD registers */
+ ia32_handle_register( &(op->data.reg),
+ op->data.reg.id + 8 );
+ }
+}
+
+static void handle_insn_metadata( x86_insn_t *insn, ia32_insn_t *raw_insn ) {
+ ia32_handle_mnemtype( insn, raw_insn->mnem_flag );
+ ia32_handle_notes( insn, raw_insn->notes );
+ ia32_handle_eflags( insn, raw_insn->flags_effected );
+ ia32_handle_cpu( insn, raw_insn->cpu );
+ ia32_stack_mod( insn );
+}
+
+static size_t ia32_decode_insn( unsigned char *buf, size_t buf_len,
+ ia32_insn_t *raw_insn, x86_insn_t *insn,
+ unsigned int prefixes ) {
+ size_t size, op_size;
+ unsigned char modrm;
+
+ /* this should never happen, but just in case... */
+ if ( raw_insn->mnem_flag == INS_INVALID ) {
+ return 0;
+ }
+
+ if (ia32_settings.options & opt_16_bit) {
+ insn->op_size = ( prefixes & PREFIX_OP_SIZE ) ? 4 : 2;
+ insn->addr_size = ( prefixes & PREFIX_ADDR_SIZE ) ? 4 : 2;
+ } else {
+ insn->op_size = ( prefixes & PREFIX_OP_SIZE ) ? 2 : 4;
+ insn->addr_size = ( prefixes & PREFIX_ADDR_SIZE ) ? 2 : 4;
+ }
+
+
+ /* ++++ 1. Copy mnemonic and mnemonic-flags to CODE struct */
+ if ((ia32_settings.options & opt_att_mnemonics) && raw_insn->mnemonic_att[0]) {
+ strncpy( insn->mnemonic, raw_insn->mnemonic_att, 16 );
+ }
+ else {
+ strncpy( insn->mnemonic, raw_insn->mnemonic, 16 );
+ }
+ ia32_handle_prefix( insn, prefixes );
+
+ handle_insn_metadata( insn, raw_insn );
+
+ /* prefetch the next byte in case it is a modr/m byte -- saves
+ * worrying about whether the 'mod/rm' operand or the 'reg' operand
+ * occurs first */
+ modrm = GET_BYTE( buf, buf_len );
+
+ /* ++++ 2. Decode Explicit Operands */
+ /* Intel uses up to 3 explicit operands in its instructions;
+ * the first is 'dest', the second is 'src', and the third
+ * is an additional source value (usually an immediate value,
+ * e.g. in the MUL instructions). These three explicit operands
+ * are encoded in the opcode tables, even if they are not used
+ * by the instruction. Additional implicit operands are stored
+ * in a supplemental table and are handled later. */
+
+ op_size = ia32_decode_operand( buf, buf_len, insn, raw_insn->dest,
+ raw_insn->dest_flag, prefixes, modrm );
+ /* advance buffer, increase size if necessary */
+ buf += op_size;
+ buf_len -= op_size;
+ size = op_size;
+
+ op_size = ia32_decode_operand( buf, buf_len, insn, raw_insn->src,
+ raw_insn->src_flag, prefixes, modrm );
+ buf += op_size;
+ buf_len -= op_size;
+ size += op_size;
+
+ op_size = ia32_decode_operand( buf, buf_len, insn, raw_insn->aux,
+ raw_insn->aux_flag, prefixes, modrm );
+ size += op_size;
+
+
+ /* ++++ 3. Decode Implicit Operands */
+ /* apply implicit operands */
+ ia32_insn_implicit_ops( insn, raw_insn->implicit_ops );
+ /* we have one small inelegant hack here, to deal with
+ * the two prefixes that have implicit operands. If Intel
+ * adds more, we'll change the algorithm to suit :) */
+ if ( (prefixes & PREFIX_REPZ) || (prefixes & PREFIX_REPNZ) ) {
+ ia32_insn_implicit_ops( insn, IDX_IMPLICIT_REP );
+ }
+
+
+ /* 16-bit hack: foreach operand, if 32-bit reg, make 16-bit reg */
+ if ( insn->op_size == 2 ) {
+ x86_operand_foreach( insn, reg_32_to_16, NULL, op_any );
+ }
+
+ return size;
+}
+
+
+/* convenience routine */
+#define USES_MOD_RM(flag) \
+ (flag == ADDRMETH_E || flag == ADDRMETH_M || flag == ADDRMETH_Q || \
+ flag == ADDRMETH_W || flag == ADDRMETH_R)
+
+static int uses_modrm_flag( unsigned int flag ) {
+ unsigned int meth;
+ if ( flag == ARG_NONE ) {
+ return 0;
+ }
+ meth = (flag & ADDRMETH_MASK);
+ if ( USES_MOD_RM(meth) ) {
+ return 1;
+ }
+
+ return 0;
+}
+
+/* This routine performs the actual byte-by-byte opcode table lookup.
+ * Originally it was pretty simple: get a byte, adjust it to a proper
+ * index into the table, then check the table row at that index to
+ * determine what to do next. But is anything that simple with Intel?
+ * This is now a huge, convoluted mess, mostly of bitter comments. */
+/* buf: pointer to next byte to read from stream
+ * buf_len: length of buf
+ * table: index of table to use for lookups
+ * raw_insn: output pointer that receives opcode definition
+ * prefixes: output integer that is encoded with prefixes in insn
+ * returns : number of bytes consumed from stream during lookup */
+size_t ia32_table_lookup( unsigned char *buf, size_t buf_len,
+ unsigned int table, ia32_insn_t **raw_insn,
+ unsigned int *prefixes ) {
+ unsigned char *next, op = buf[0]; /* byte value -- 'opcode' */
+ size_t size = 1, sub_size = 0, next_len;
+ ia32_table_desc_t *table_desc;
+ unsigned int subtable, prefix = 0, recurse_table = 0;
+
+ table_desc = &ia32_tables[table];
+
+ op = GET_BYTE( buf, buf_len );
+
+ if ( table_desc->type == tbl_fpu && op > table_desc->maxlim) {
+ /* one of the fucking FPU tables out of the 00-BH range */
+ /* OK,. this is a bit of a hack -- the proper way would
+ * have been to use subtables in the 00-BF FPU opcode tables,
+ * but that is rather wasteful of space... */
+ table_desc = &ia32_tables[table +1];
+ }
+
+ /* PERFORM TABLE LOOKUP */
+
+ /* ModR/M trick: shift extension bits into lowest bits of byte */
+ /* Note: non-ModR/M tables have a shift value of 0 */
+ op >>= table_desc->shift;
+
+ /* ModR/M trick: mask out high bits to turn extension into an index */
+ /* Note: non-ModR/M tables have a mask value of 0xFF */
+ op &= table_desc->mask;
+
+
+ /* Sparse table trick: check that byte is <= max value */
+ /* Note: full (256-entry) tables have a maxlim of 155 */
+ if ( op > table_desc->maxlim ) {
+ /* this is a partial table, truncated at the tail,
+ and op is out of range! */
+ return INVALID_INSN;
+ }
+
+ /* Sparse table trick: check that byte is >= min value */
+ /* Note: full (256-entry) tables have a minlim of 0 */
+ if ( table_desc->minlim > op ) {
+ /* this is a partial table, truncated at the head,
+ and op is out of range! */
+ return INVALID_INSN;
+ }
+ /* adjust op to be an offset from table index 0 */
+ op -= table_desc->minlim;
+
+ /* Yay! 'op' is now fully adjusted to be an index into 'table' */
+ *raw_insn = &(table_desc->table[op]);
+ //printf("BYTE %X TABLE %d OP %X\n", buf[0], table, op );
+
+ if ( (*raw_insn)->mnem_flag & INS_FLAG_PREFIX ) {
+ prefix = (*raw_insn)->mnem_flag & PREFIX_MASK;
+ }
+
+
+ /* handle escape to a multibyte/coproc/extension/etc table */
+ /* NOTE: if insn is a prefix and has a subtable, then we
+ * only recurse if this is the first prefix byte --
+ * that is, if *prefixes is 0.
+ * NOTE also that suffix tables are handled later */
+ subtable = (*raw_insn)->table;
+
+ if ( subtable && ia32_tables[subtable].type != tbl_suffix &&
+ (! prefix || ! *prefixes) ) {
+
+ if ( ia32_tables[subtable].type == tbl_ext_ext ||
+ ia32_tables[subtable].type == tbl_fpu_ext ) {
+ /* opcode extension: reuse current byte in buffer */
+ next = buf;
+ next_len = buf_len;
+ } else {
+ /* "normal" opcode: advance to next byte in buffer */
+ if ( buf_len > 1 ) {
+ next = &buf[1];
+ next_len = buf_len - 1;
+ }
+ else {
+ // buffer is truncated
+ return INVALID_INSN;
+ }
+ }
+ /* we encountered a multibyte opcode: recurse using the
+ * table specified in the opcode definition */
+ sub_size = ia32_table_lookup( next, next_len, subtable,
+ raw_insn, prefixes );
+
+ /* SSE/prefix hack: if the original opcode def was a
+ * prefix that specified a subtable, and the subtable
+ * lookup returned a valid insn, then we have encountered
+ * an SSE opcode definition; otherwise, we pretend we
+ * never did the subtable lookup, and deal with the
+ * prefix normally later */
+ if ( prefix && ( sub_size == INVALID_INSN ||
+ INS_TYPE((*raw_insn)->mnem_flag) == INS_INVALID ) ) {
+ /* this is a prefix, not an SSE insn :
+ * lookup next byte in main table,
+ * subsize will be reset during the
+ * main table lookup */
+ recurse_table = 1;
+ } else {
+ /* this is either a subtable (two-byte) insn
+ * or an invalid insn: either way, set prefix
+ * to NULL and end the opcode lookup */
+ prefix = 0;
+ // short-circuit lookup on invalid insn
+ if (sub_size == INVALID_INSN) return INVALID_INSN;
+ }
+ } else if ( prefix ) {
+ recurse_table = 1;
+ }
+
+ /* by default, we assume that we have the opcode definition,
+ * and there is no need to recurse on the same table, but
+ * if we do then a prefix was encountered... */
+ if ( recurse_table ) {
+ /* this must have been a prefix: use the same table for
+ * lookup of the next byte */
+ sub_size = ia32_table_lookup( &buf[1], buf_len - 1, table,
+ raw_insn, prefixes );
+
+ // short-circuit lookup on invalid insn
+ if (sub_size == INVALID_INSN) return INVALID_INSN;
+
+ /* a bit of a hack for branch hints */
+ if ( prefix & BRANCH_HINT_MASK ) {
+ if ( INS_GROUP((*raw_insn)->mnem_flag) == INS_EXEC ) {
+ /* segment override prefixes are invalid for
+ * all branch instructions, so delete them */
+ prefix &= ~PREFIX_REG_MASK;
+ } else {
+ prefix &= ~BRANCH_HINT_MASK;
+ }
+ }
+
+ /* apply prefix to instruction */
+
+ /* TODO: implement something enforcing prefix groups */
+ (*prefixes) |= prefix;
+ }
+
+ /* if this lookup was in a ModR/M table, then an opcode byte is
+ * NOT consumed: subtract accordingly. NOTE that if none of the
+ * operands used the ModR/M, then we need to consume the byte
+ * here, but ONLY in the 'top-level' opcode extension table */
+
+ if ( table_desc->type == tbl_ext_ext ) {
+ /* extensions-to-extensions never consume a byte */
+ --size;
+ } else if ( (table_desc->type == tbl_extension ||
+ table_desc->type == tbl_fpu ||
+ table_desc->type == tbl_fpu_ext ) &&
+ /* extensions that have an operand encoded in ModR/M
+ * never consume a byte */
+ (uses_modrm_flag((*raw_insn)->dest_flag) ||
+ uses_modrm_flag((*raw_insn)->src_flag) ) ) {
+ --size;
+ }
+
+ size += sub_size;
+
+ return size;
+}
+
+static size_t handle_insn_suffix( unsigned char *buf, size_t buf_len,
+ ia32_insn_t *raw_insn, x86_insn_t * insn ) {
+ ia32_table_desc_t *table_desc;
+ ia32_insn_t *sfx_insn;
+ size_t size;
+ unsigned int prefixes = 0;
+
+ table_desc = &ia32_tables[raw_insn->table];
+ size = ia32_table_lookup( buf, buf_len, raw_insn->table, &sfx_insn,
+ &prefixes );
+ if (size == INVALID_INSN || sfx_insn->mnem_flag == INS_INVALID ) {
+ return 0;
+ }
+
+ strncpy( insn->mnemonic, sfx_insn->mnemonic, 16 );
+ handle_insn_metadata( insn, sfx_insn );
+
+ return 1;
+}
+
+/* invalid instructions are handled by returning 0 [error] from the
+ * function, setting the size of the insn to 1 byte, and copying
+ * the byte at the start of the invalid insn into the x86_insn_t.
+ * if the caller is saving the x86_insn_t for invalid instructions,
+ * instead of discarding them, this will maintain a consistent
+ * address space in the x86_insn_ts */
+
+/* this function is called by the controlling disassembler, so its name and
+ * calling convention cannot be changed */
+/* buf points to the loc of the current opcode (start of the
+ * instruction) in the instruction stream. The instruction
+ * stream is assumed to be a buffer of bytes read directly
+ * from the file for the purpose of disassembly; a mem-mapped
+ * file is ideal for * this.
+ * insn points to a code structure to be filled by instr_decode
+ * returns the size of the decoded instruction in bytes */
+size_t ia32_disasm_addr( unsigned char * buf, size_t buf_len,
+ x86_insn_t *insn ) {
+ ia32_insn_t *raw_insn = NULL;
+ unsigned int prefixes = 0;
+ size_t size, sfx_size;
+
+ if ( (ia32_settings.options & opt_ignore_nulls) && buf_len > 3 &&
+ !buf[0] && !buf[1] && !buf[2] && !buf[3]) {
+ /* IF IGNORE_NULLS is set AND
+ * first 4 bytes in the intruction stream are NULL
+ * THEN return 0 (END_OF_DISASSEMBLY) */
+ /* TODO: set errno */
+ MAKE_INVALID( insn, buf );
+ return 0; /* 4 00 bytes in a row? This isn't code! */
+ }
+
+ /* Perform recursive table lookup starting with main table (0) */
+ size = ia32_table_lookup(buf, buf_len, idx_Main, &raw_insn, &prefixes);
+ if ( size == INVALID_INSN || size > buf_len || raw_insn->mnem_flag == INS_INVALID ) {
+ MAKE_INVALID( insn, buf );
+ /* TODO: set errno */
+ return 0;
+ }
+
+ /* We now have the opcode itself figured out: we can decode
+ * the rest of the instruction. */
+ size += ia32_decode_insn( &buf[size], buf_len - size, raw_insn, insn,
+ prefixes );
+ if ( raw_insn->mnem_flag & INS_FLAG_SUFFIX ) {
+ /* AMD 3DNow! suffix -- get proper operand type here */
+ sfx_size = handle_insn_suffix( &buf[size], buf_len - size,
+ raw_insn, insn );
+ if (! sfx_size ) {
+ /* TODO: set errno */
+ MAKE_INVALID( insn, buf );
+ return 0;
+ }
+
+ size += sfx_size;
+ }
+
+ if (! size ) {
+ /* invalid insn */
+ MAKE_INVALID( insn, buf );
+ return 0;
+ }
+
+
+ insn->size = size;
+ return size; /* return size of instruction in bytes */
+}