1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
|
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "qword.h"
#include "ia32_insn.h"
#include "ia32_opcode_tables.h"
#include "ia32_reg.h"
#include "ia32_operand.h"
#include "ia32_implicit.h"
#include "ia32_settings.h"
#include "libdis.h"
extern ia32_table_desc_t ia32_tables[];
extern ia32_settings_t ia32_settings;
#define IS_SP( op ) (op->type == op_register && \
(op->data.reg.id == REG_ESP_INDEX || \
op->data.reg.alias == REG_ESP_INDEX) )
#define IS_IMM( op ) (op->type == op_immediate )
#ifdef WIN32
# define INLINE
#else
# define INLINE inline
#endif
/* for calculating stack modification based on an operand */
static INLINE int32_t long_from_operand( x86_op_t *op ) {
if (! IS_IMM(op) ) {
return 0L;
}
switch ( op->datatype ) {
case op_byte:
return (int32_t) op->data.sbyte;
case op_word:
return (int32_t) op->data.sword;
case op_qword:
return (int32_t) op->data.sqword;
case op_dword:
return op->data.sdword;
default:
/* these are not used in stack insn */
break;
}
return 0L;
}
/* determine what this insn does to the stack */
static void ia32_stack_mod(x86_insn_t *insn) {
x86_op_t *dest, *src = NULL;
if (! insn || ! insn->operands ) {
return;
}
dest = &insn->operands->op;
if ( dest ) {
src = &insn->operands->next->op;
}
insn->stack_mod = 0;
insn->stack_mod_val = 0;
switch ( insn->type ) {
case insn_call:
case insn_callcc:
insn->stack_mod = 1;
insn->stack_mod_val = insn->addr_size * -1;
break;
case insn_push:
insn->stack_mod = 1;
insn->stack_mod_val = insn->addr_size * -1;
break;
case insn_return:
insn->stack_mod = 1;
insn->stack_mod_val = insn->addr_size;
case insn_int: case insn_intcc:
case insn_iret:
break;
case insn_pop:
insn->stack_mod = 1;
if (! IS_SP( dest ) ) {
insn->stack_mod_val = insn->op_size;
} /* else we don't know the stack change in a pop esp */
break;
case insn_enter:
insn->stack_mod = 1;
insn->stack_mod_val = 0; /* TODO : FIX */
break;
case insn_leave:
insn->stack_mod = 1;
insn->stack_mod_val = 0; /* TODO : FIX */
break;
case insn_pushregs:
insn->stack_mod = 1;
insn->stack_mod_val = 0; /* TODO : FIX */
break;
case insn_popregs:
insn->stack_mod = 1;
insn->stack_mod_val = 0; /* TODO : FIX */
break;
case insn_pushflags:
insn->stack_mod = 1;
insn->stack_mod_val = 0; /* TODO : FIX */
break;
case insn_popflags:
insn->stack_mod = 1;
insn->stack_mod_val = 0; /* TODO : FIX */
break;
case insn_add:
if ( IS_SP( dest ) ) {
insn->stack_mod = 1;
insn->stack_mod_val = long_from_operand( src );
}
break;
case insn_sub:
if ( IS_SP( dest ) ) {
insn->stack_mod = 1;
insn->stack_mod_val = long_from_operand( src );
insn->stack_mod_val *= -1;
}
break;
case insn_inc:
if ( IS_SP( dest ) ) {
insn->stack_mod = 1;
insn->stack_mod_val = 1;
}
break;
case insn_dec:
if ( IS_SP( dest ) ) {
insn->stack_mod = 1;
insn->stack_mod_val = 1;
}
break;
case insn_mov: case insn_movcc:
case insn_xchg: case insn_xchgcc:
case insn_mul: case insn_div:
case insn_shl: case insn_shr:
case insn_rol: case insn_ror:
case insn_and: case insn_or:
case insn_not: case insn_neg:
case insn_xor:
if ( IS_SP( dest ) ) {
insn->stack_mod = 1;
}
break;
default:
break;
}
if (! strcmp("enter", insn->mnemonic) ) {
insn->stack_mod = 1;
} else if (! strcmp("leave", insn->mnemonic) ) {
insn->stack_mod = 1;
}
/* for mov, etc we return 0 -- unknown stack mod */
return;
}
/* get the cpu details for this insn from cpu flags int */
static void ia32_handle_cpu( x86_insn_t *insn, unsigned int cpu ) {
insn->cpu = (enum x86_insn_cpu) CPU_MODEL(cpu);
insn->isa = (enum x86_insn_isa) (ISA_SUBSET(cpu)) >> 16;
return;
}
/* handle mnemonic type and group */
static void ia32_handle_mnemtype(x86_insn_t *insn, unsigned int mnemtype) {
unsigned int type = mnemtype & ~INS_FLAG_MASK;
insn->group = (enum x86_insn_group) (INS_GROUP(type)) >> 12;
insn->type = (enum x86_insn_type) INS_TYPE(type);
return;
}
static void ia32_handle_notes(x86_insn_t *insn, unsigned int notes) {
insn->note = (enum x86_insn_note) notes;
return;
}
static void ia32_handle_eflags( x86_insn_t *insn, unsigned int eflags) {
unsigned int flags;
/* handle flags effected */
flags = INS_FLAGS_TEST(eflags);
/* handle weird OR cases */
/* these are either JLE (ZF | SF<>OF) or JBE (CF | ZF) */
if (flags & INS_TEST_OR) {
flags &= ~INS_TEST_OR;
if ( flags & INS_TEST_ZERO ) {
flags &= ~INS_TEST_ZERO;
if ( flags & INS_TEST_CARRY ) {
flags &= ~INS_TEST_CARRY ;
flags |= (int)insn_carry_or_zero_set;
} else if ( flags & INS_TEST_SFNEOF ) {
flags &= ~INS_TEST_SFNEOF;
flags |= (int)insn_zero_set_or_sign_ne_oflow;
}
}
}
insn->flags_tested = (enum x86_flag_status) flags;
insn->flags_set = (enum x86_flag_status) INS_FLAGS_SET(eflags) >> 16;
return;
}
static void ia32_handle_prefix( x86_insn_t *insn, unsigned int prefixes ) {
insn->prefix = (enum x86_insn_prefix) prefixes & PREFIX_MASK; // >> 20;
if (! (insn->prefix & PREFIX_PRINT_MASK) ) {
/* no printable prefixes */
insn->prefix = insn_no_prefix;
}
/* concat all prefix strings */
if ( (unsigned int)insn->prefix & PREFIX_LOCK ) {
strncat(insn->prefix_string, "lock ", 32 -
strlen(insn->prefix_string));
}
if ( (unsigned int)insn->prefix & PREFIX_REPNZ ) {
strncat(insn->prefix_string, "repnz ", 32 -
strlen(insn->prefix_string));
} else if ( (unsigned int)insn->prefix & PREFIX_REPZ ) {
strncat(insn->prefix_string, "repz ", 32 -
strlen(insn->prefix_string));
}
return;
}
static void reg_32_to_16( x86_op_t *op, x86_insn_t *insn, void *arg ) {
/* if this is a 32-bit register and it is a general register ... */
if ( op->type == op_register && op->data.reg.size == 4 &&
(op->data.reg.type & reg_gen) ) {
/* WORD registers are 8 indices off from DWORD registers */
ia32_handle_register( &(op->data.reg),
op->data.reg.id + 8 );
}
}
static void handle_insn_metadata( x86_insn_t *insn, ia32_insn_t *raw_insn ) {
ia32_handle_mnemtype( insn, raw_insn->mnem_flag );
ia32_handle_notes( insn, raw_insn->notes );
ia32_handle_eflags( insn, raw_insn->flags_effected );
ia32_handle_cpu( insn, raw_insn->cpu );
ia32_stack_mod( insn );
}
static size_t ia32_decode_insn( unsigned char *buf, size_t buf_len,
ia32_insn_t *raw_insn, x86_insn_t *insn,
unsigned int prefixes ) {
size_t size, op_size;
unsigned char modrm;
/* this should never happen, but just in case... */
if ( raw_insn->mnem_flag == INS_INVALID ) {
return 0;
}
if (ia32_settings.options & opt_16_bit) {
insn->op_size = ( prefixes & PREFIX_OP_SIZE ) ? 4 : 2;
insn->addr_size = ( prefixes & PREFIX_ADDR_SIZE ) ? 4 : 2;
} else {
insn->op_size = ( prefixes & PREFIX_OP_SIZE ) ? 2 : 4;
insn->addr_size = ( prefixes & PREFIX_ADDR_SIZE ) ? 2 : 4;
}
/* ++++ 1. Copy mnemonic and mnemonic-flags to CODE struct */
if ((ia32_settings.options & opt_att_mnemonics) && raw_insn->mnemonic_att[0]) {
strncpy( insn->mnemonic, raw_insn->mnemonic_att, 16 );
}
else {
strncpy( insn->mnemonic, raw_insn->mnemonic, 16 );
}
ia32_handle_prefix( insn, prefixes );
handle_insn_metadata( insn, raw_insn );
/* prefetch the next byte in case it is a modr/m byte -- saves
* worrying about whether the 'mod/rm' operand or the 'reg' operand
* occurs first */
modrm = GET_BYTE( buf, buf_len );
/* ++++ 2. Decode Explicit Operands */
/* Intel uses up to 3 explicit operands in its instructions;
* the first is 'dest', the second is 'src', and the third
* is an additional source value (usually an immediate value,
* e.g. in the MUL instructions). These three explicit operands
* are encoded in the opcode tables, even if they are not used
* by the instruction. Additional implicit operands are stored
* in a supplemental table and are handled later. */
op_size = ia32_decode_operand( buf, buf_len, insn, raw_insn->dest,
raw_insn->dest_flag, prefixes, modrm );
/* advance buffer, increase size if necessary */
buf += op_size;
buf_len -= op_size;
size = op_size;
op_size = ia32_decode_operand( buf, buf_len, insn, raw_insn->src,
raw_insn->src_flag, prefixes, modrm );
buf += op_size;
buf_len -= op_size;
size += op_size;
op_size = ia32_decode_operand( buf, buf_len, insn, raw_insn->aux,
raw_insn->aux_flag, prefixes, modrm );
size += op_size;
/* ++++ 3. Decode Implicit Operands */
/* apply implicit operands */
ia32_insn_implicit_ops( insn, raw_insn->implicit_ops );
/* we have one small inelegant hack here, to deal with
* the two prefixes that have implicit operands. If Intel
* adds more, we'll change the algorithm to suit :) */
if ( (prefixes & PREFIX_REPZ) || (prefixes & PREFIX_REPNZ) ) {
ia32_insn_implicit_ops( insn, IDX_IMPLICIT_REP );
}
/* 16-bit hack: foreach operand, if 32-bit reg, make 16-bit reg */
if ( insn->op_size == 2 ) {
x86_operand_foreach( insn, reg_32_to_16, NULL, op_any );
}
return size;
}
/* convenience routine */
#define USES_MOD_RM(flag) \
(flag == ADDRMETH_E || flag == ADDRMETH_M || flag == ADDRMETH_Q || \
flag == ADDRMETH_W || flag == ADDRMETH_R)
static int uses_modrm_flag( unsigned int flag ) {
unsigned int meth;
if ( flag == ARG_NONE ) {
return 0;
}
meth = (flag & ADDRMETH_MASK);
if ( USES_MOD_RM(meth) ) {
return 1;
}
return 0;
}
/* This routine performs the actual byte-by-byte opcode table lookup.
* Originally it was pretty simple: get a byte, adjust it to a proper
* index into the table, then check the table row at that index to
* determine what to do next. But is anything that simple with Intel?
* This is now a huge, convoluted mess, mostly of bitter comments. */
/* buf: pointer to next byte to read from stream
* buf_len: length of buf
* table: index of table to use for lookups
* raw_insn: output pointer that receives opcode definition
* prefixes: output integer that is encoded with prefixes in insn
* returns : number of bytes consumed from stream during lookup */
size_t ia32_table_lookup( unsigned char *buf, size_t buf_len,
unsigned int table, ia32_insn_t **raw_insn,
unsigned int *prefixes ) {
unsigned char *next, op = buf[0]; /* byte value -- 'opcode' */
size_t size = 1, sub_size = 0, next_len;
ia32_table_desc_t *table_desc;
unsigned int subtable, prefix = 0, recurse_table = 0;
table_desc = &ia32_tables[table];
op = GET_BYTE( buf, buf_len );
if ( table_desc->type == tbl_fpu && op > table_desc->maxlim) {
/* one of the fucking FPU tables out of the 00-BH range */
/* OK,. this is a bit of a hack -- the proper way would
* have been to use subtables in the 00-BF FPU opcode tables,
* but that is rather wasteful of space... */
table_desc = &ia32_tables[table +1];
}
/* PERFORM TABLE LOOKUP */
/* ModR/M trick: shift extension bits into lowest bits of byte */
/* Note: non-ModR/M tables have a shift value of 0 */
op >>= table_desc->shift;
/* ModR/M trick: mask out high bits to turn extension into an index */
/* Note: non-ModR/M tables have a mask value of 0xFF */
op &= table_desc->mask;
/* Sparse table trick: check that byte is <= max value */
/* Note: full (256-entry) tables have a maxlim of 155 */
if ( op > table_desc->maxlim ) {
/* this is a partial table, truncated at the tail,
and op is out of range! */
return INVALID_INSN;
}
/* Sparse table trick: check that byte is >= min value */
/* Note: full (256-entry) tables have a minlim of 0 */
if ( table_desc->minlim > op ) {
/* this is a partial table, truncated at the head,
and op is out of range! */
return INVALID_INSN;
}
/* adjust op to be an offset from table index 0 */
op -= table_desc->minlim;
/* Yay! 'op' is now fully adjusted to be an index into 'table' */
*raw_insn = &(table_desc->table[op]);
//printf("BYTE %X TABLE %d OP %X\n", buf[0], table, op );
if ( (*raw_insn)->mnem_flag & INS_FLAG_PREFIX ) {
prefix = (*raw_insn)->mnem_flag & PREFIX_MASK;
}
/* handle escape to a multibyte/coproc/extension/etc table */
/* NOTE: if insn is a prefix and has a subtable, then we
* only recurse if this is the first prefix byte --
* that is, if *prefixes is 0.
* NOTE also that suffix tables are handled later */
subtable = (*raw_insn)->table;
if ( subtable && ia32_tables[subtable].type != tbl_suffix &&
(! prefix || ! *prefixes) ) {
if ( ia32_tables[subtable].type == tbl_ext_ext ||
ia32_tables[subtable].type == tbl_fpu_ext ) {
/* opcode extension: reuse current byte in buffer */
next = buf;
next_len = buf_len;
} else {
/* "normal" opcode: advance to next byte in buffer */
if ( buf_len > 1 ) {
next = &buf[1];
next_len = buf_len - 1;
}
else {
// buffer is truncated
return INVALID_INSN;
}
}
/* we encountered a multibyte opcode: recurse using the
* table specified in the opcode definition */
sub_size = ia32_table_lookup( next, next_len, subtable,
raw_insn, prefixes );
/* SSE/prefix hack: if the original opcode def was a
* prefix that specified a subtable, and the subtable
* lookup returned a valid insn, then we have encountered
* an SSE opcode definition; otherwise, we pretend we
* never did the subtable lookup, and deal with the
* prefix normally later */
if ( prefix && ( sub_size == INVALID_INSN ||
INS_TYPE((*raw_insn)->mnem_flag) == INS_INVALID ) ) {
/* this is a prefix, not an SSE insn :
* lookup next byte in main table,
* subsize will be reset during the
* main table lookup */
recurse_table = 1;
} else {
/* this is either a subtable (two-byte) insn
* or an invalid insn: either way, set prefix
* to NULL and end the opcode lookup */
prefix = 0;
// short-circuit lookup on invalid insn
if (sub_size == INVALID_INSN) return INVALID_INSN;
}
} else if ( prefix ) {
recurse_table = 1;
}
/* by default, we assume that we have the opcode definition,
* and there is no need to recurse on the same table, but
* if we do then a prefix was encountered... */
if ( recurse_table ) {
/* this must have been a prefix: use the same table for
* lookup of the next byte */
sub_size = ia32_table_lookup( &buf[1], buf_len - 1, table,
raw_insn, prefixes );
// short-circuit lookup on invalid insn
if (sub_size == INVALID_INSN) return INVALID_INSN;
/* a bit of a hack for branch hints */
if ( prefix & BRANCH_HINT_MASK ) {
if ( INS_GROUP((*raw_insn)->mnem_flag) == INS_EXEC ) {
/* segment override prefixes are invalid for
* all branch instructions, so delete them */
prefix &= ~PREFIX_REG_MASK;
} else {
prefix &= ~BRANCH_HINT_MASK;
}
}
/* apply prefix to instruction */
/* TODO: implement something enforcing prefix groups */
(*prefixes) |= prefix;
}
/* if this lookup was in a ModR/M table, then an opcode byte is
* NOT consumed: subtract accordingly. NOTE that if none of the
* operands used the ModR/M, then we need to consume the byte
* here, but ONLY in the 'top-level' opcode extension table */
if ( table_desc->type == tbl_ext_ext ) {
/* extensions-to-extensions never consume a byte */
--size;
} else if ( (table_desc->type == tbl_extension ||
table_desc->type == tbl_fpu ||
table_desc->type == tbl_fpu_ext ) &&
/* extensions that have an operand encoded in ModR/M
* never consume a byte */
(uses_modrm_flag((*raw_insn)->dest_flag) ||
uses_modrm_flag((*raw_insn)->src_flag) ) ) {
--size;
}
size += sub_size;
return size;
}
static size_t handle_insn_suffix( unsigned char *buf, size_t buf_len,
ia32_insn_t *raw_insn, x86_insn_t * insn ) {
ia32_insn_t *sfx_insn;
size_t size;
unsigned int prefixes = 0;
size = ia32_table_lookup( buf, buf_len, raw_insn->table, &sfx_insn,
&prefixes );
if (size == INVALID_INSN || sfx_insn->mnem_flag == INS_INVALID ) {
return 0;
}
strncpy( insn->mnemonic, sfx_insn->mnemonic, 16 );
handle_insn_metadata( insn, sfx_insn );
return 1;
}
/* invalid instructions are handled by returning 0 [error] from the
* function, setting the size of the insn to 1 byte, and copying
* the byte at the start of the invalid insn into the x86_insn_t.
* if the caller is saving the x86_insn_t for invalid instructions,
* instead of discarding them, this will maintain a consistent
* address space in the x86_insn_ts */
/* this function is called by the controlling disassembler, so its name and
* calling convention cannot be changed */
/* buf points to the loc of the current opcode (start of the
* instruction) in the instruction stream. The instruction
* stream is assumed to be a buffer of bytes read directly
* from the file for the purpose of disassembly; a mem-mapped
* file is ideal for * this.
* insn points to a code structure to be filled by instr_decode
* returns the size of the decoded instruction in bytes */
size_t ia32_disasm_addr( unsigned char * buf, size_t buf_len,
x86_insn_t *insn ) {
ia32_insn_t *raw_insn = NULL;
unsigned int prefixes = 0;
size_t size, sfx_size;
if ( (ia32_settings.options & opt_ignore_nulls) && buf_len > 3 &&
!buf[0] && !buf[1] && !buf[2] && !buf[3]) {
/* IF IGNORE_NULLS is set AND
* first 4 bytes in the intruction stream are NULL
* THEN return 0 (END_OF_DISASSEMBLY) */
/* TODO: set errno */
MAKE_INVALID( insn, buf );
return 0; /* 4 00 bytes in a row? This isn't code! */
}
/* Perform recursive table lookup starting with main table (0) */
size = ia32_table_lookup(buf, buf_len, idx_Main, &raw_insn, &prefixes);
if ( size == INVALID_INSN || size > buf_len || raw_insn->mnem_flag == INS_INVALID ) {
MAKE_INVALID( insn, buf );
/* TODO: set errno */
return 0;
}
/* We now have the opcode itself figured out: we can decode
* the rest of the instruction. */
size += ia32_decode_insn( &buf[size], buf_len - size, raw_insn, insn,
prefixes );
if ( raw_insn->mnem_flag & INS_FLAG_SUFFIX ) {
/* AMD 3DNow! suffix -- get proper operand type here */
sfx_size = handle_insn_suffix( &buf[size], buf_len - size,
raw_insn, insn );
if (! sfx_size ) {
/* TODO: set errno */
MAKE_INVALID( insn, buf );
return 0;
}
size += sfx_size;
}
if (! size ) {
/* invalid insn */
MAKE_INVALID( insn, buf );
return 0;
}
insn->size = size;
return size; /* return size of instruction in bytes */
}
|