LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
X86DisassemblerDecoder.h
Go to the documentation of this file.
1 /*===-- X86DisassemblerDecoderInternal.h - Disassembler decoder ---*- C -*-===*
2  *
3  * The LLVM Compiler Infrastructure
4  *
5  * This file is distributed under the University of Illinois Open Source
6  * License. See LICENSE.TXT for details.
7  *
8  *===----------------------------------------------------------------------===*
9  *
10  * This file is part of the X86 Disassembler.
11  * It contains the public interface of the instruction decoder.
12  * Documentation for the disassembler can be found in X86Disassembler.h.
13  *
14  *===----------------------------------------------------------------------===*/
15 
16 #ifndef X86DISASSEMBLERDECODER_H
17 #define X86DISASSEMBLERDECODER_H
18 
19 #ifdef __cplusplus
20 extern "C" {
21 #endif
22 
23 #define INSTRUCTION_SPECIFIER_FIELDS \
24  uint16_t operands;
25 
26 #define INSTRUCTION_IDS \
27  uint16_t instructionIDs;
28 
30 
31 #undef INSTRUCTION_SPECIFIER_FIELDS
32 #undef INSTRUCTION_IDS
33 
34 /*
35  * Accessor functions for various fields of an Intel instruction
36  */
37 #define modFromModRM(modRM) (((modRM) & 0xc0) >> 6)
38 #define regFromModRM(modRM) (((modRM) & 0x38) >> 3)
39 #define rmFromModRM(modRM) ((modRM) & 0x7)
40 #define scaleFromSIB(sib) (((sib) & 0xc0) >> 6)
41 #define indexFromSIB(sib) (((sib) & 0x38) >> 3)
42 #define baseFromSIB(sib) ((sib) & 0x7)
43 #define wFromREX(rex) (((rex) & 0x8) >> 3)
44 #define rFromREX(rex) (((rex) & 0x4) >> 2)
45 #define xFromREX(rex) (((rex) & 0x2) >> 1)
46 #define bFromREX(rex) ((rex) & 0x1)
47 
48 #define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7)
49 #define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6)
50 #define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5)
51 #define mmmmmFromVEX2of3(vex) ((vex) & 0x1f)
52 #define wFromVEX3of3(vex) (((vex) & 0x80) >> 7)
53 #define vvvvFromVEX3of3(vex) (((~(vex)) & 0x78) >> 3)
54 #define lFromVEX3of3(vex) (((vex) & 0x4) >> 2)
55 #define ppFromVEX3of3(vex) ((vex) & 0x3)
56 
57 #define rFromVEX2of2(vex) (((~(vex)) & 0x80) >> 7)
58 #define vvvvFromVEX2of2(vex) (((~(vex)) & 0x78) >> 3)
59 #define lFromVEX2of2(vex) (((vex) & 0x4) >> 2)
60 #define ppFromVEX2of2(vex) ((vex) & 0x3)
61 
62 #define rFromXOP2of3(xop) (((~(xop)) & 0x80) >> 7)
63 #define xFromXOP2of3(xop) (((~(xop)) & 0x40) >> 6)
64 #define bFromXOP2of3(xop) (((~(xop)) & 0x20) >> 5)
65 #define mmmmmFromXOP2of3(xop) ((xop) & 0x1f)
66 #define wFromXOP3of3(xop) (((xop) & 0x80) >> 7)
67 #define vvvvFromXOP3of3(vex) (((~(vex)) & 0x78) >> 3)
68 #define lFromXOP3of3(xop) (((xop) & 0x4) >> 2)
69 #define ppFromXOP3of3(xop) ((xop) & 0x3)
70 
71 /*
72  * These enums represent Intel registers for use by the decoder.
73  */
74 
75 #define REGS_8BIT \
76  ENTRY(AL) \
77  ENTRY(CL) \
78  ENTRY(DL) \
79  ENTRY(BL) \
80  ENTRY(AH) \
81  ENTRY(CH) \
82  ENTRY(DH) \
83  ENTRY(BH) \
84  ENTRY(R8B) \
85  ENTRY(R9B) \
86  ENTRY(R10B) \
87  ENTRY(R11B) \
88  ENTRY(R12B) \
89  ENTRY(R13B) \
90  ENTRY(R14B) \
91  ENTRY(R15B) \
92  ENTRY(SPL) \
93  ENTRY(BPL) \
94  ENTRY(SIL) \
95  ENTRY(DIL)
96 
97 #define EA_BASES_16BIT \
98  ENTRY(BX_SI) \
99  ENTRY(BX_DI) \
100  ENTRY(BP_SI) \
101  ENTRY(BP_DI) \
102  ENTRY(SI) \
103  ENTRY(DI) \
104  ENTRY(BP) \
105  ENTRY(BX) \
106  ENTRY(R8W) \
107  ENTRY(R9W) \
108  ENTRY(R10W) \
109  ENTRY(R11W) \
110  ENTRY(R12W) \
111  ENTRY(R13W) \
112  ENTRY(R14W) \
113  ENTRY(R15W)
114 
115 #define REGS_16BIT \
116  ENTRY(AX) \
117  ENTRY(CX) \
118  ENTRY(DX) \
119  ENTRY(BX) \
120  ENTRY(SP) \
121  ENTRY(BP) \
122  ENTRY(SI) \
123  ENTRY(DI) \
124  ENTRY(R8W) \
125  ENTRY(R9W) \
126  ENTRY(R10W) \
127  ENTRY(R11W) \
128  ENTRY(R12W) \
129  ENTRY(R13W) \
130  ENTRY(R14W) \
131  ENTRY(R15W)
132 
133 #define EA_BASES_32BIT \
134  ENTRY(EAX) \
135  ENTRY(ECX) \
136  ENTRY(EDX) \
137  ENTRY(EBX) \
138  ENTRY(sib) \
139  ENTRY(EBP) \
140  ENTRY(ESI) \
141  ENTRY(EDI) \
142  ENTRY(R8D) \
143  ENTRY(R9D) \
144  ENTRY(R10D) \
145  ENTRY(R11D) \
146  ENTRY(R12D) \
147  ENTRY(R13D) \
148  ENTRY(R14D) \
149  ENTRY(R15D)
150 
151 #define REGS_32BIT \
152  ENTRY(EAX) \
153  ENTRY(ECX) \
154  ENTRY(EDX) \
155  ENTRY(EBX) \
156  ENTRY(ESP) \
157  ENTRY(EBP) \
158  ENTRY(ESI) \
159  ENTRY(EDI) \
160  ENTRY(R8D) \
161  ENTRY(R9D) \
162  ENTRY(R10D) \
163  ENTRY(R11D) \
164  ENTRY(R12D) \
165  ENTRY(R13D) \
166  ENTRY(R14D) \
167  ENTRY(R15D)
168 
169 #define EA_BASES_64BIT \
170  ENTRY(RAX) \
171  ENTRY(RCX) \
172  ENTRY(RDX) \
173  ENTRY(RBX) \
174  ENTRY(sib64) \
175  ENTRY(RBP) \
176  ENTRY(RSI) \
177  ENTRY(RDI) \
178  ENTRY(R8) \
179  ENTRY(R9) \
180  ENTRY(R10) \
181  ENTRY(R11) \
182  ENTRY(R12) \
183  ENTRY(R13) \
184  ENTRY(R14) \
185  ENTRY(R15)
186 
187 #define REGS_64BIT \
188  ENTRY(RAX) \
189  ENTRY(RCX) \
190  ENTRY(RDX) \
191  ENTRY(RBX) \
192  ENTRY(RSP) \
193  ENTRY(RBP) \
194  ENTRY(RSI) \
195  ENTRY(RDI) \
196  ENTRY(R8) \
197  ENTRY(R9) \
198  ENTRY(R10) \
199  ENTRY(R11) \
200  ENTRY(R12) \
201  ENTRY(R13) \
202  ENTRY(R14) \
203  ENTRY(R15)
204 
205 #define REGS_MMX \
206  ENTRY(MM0) \
207  ENTRY(MM1) \
208  ENTRY(MM2) \
209  ENTRY(MM3) \
210  ENTRY(MM4) \
211  ENTRY(MM5) \
212  ENTRY(MM6) \
213  ENTRY(MM7)
214 
215 #define REGS_XMM \
216  ENTRY(XMM0) \
217  ENTRY(XMM1) \
218  ENTRY(XMM2) \
219  ENTRY(XMM3) \
220  ENTRY(XMM4) \
221  ENTRY(XMM5) \
222  ENTRY(XMM6) \
223  ENTRY(XMM7) \
224  ENTRY(XMM8) \
225  ENTRY(XMM9) \
226  ENTRY(XMM10) \
227  ENTRY(XMM11) \
228  ENTRY(XMM12) \
229  ENTRY(XMM13) \
230  ENTRY(XMM14) \
231  ENTRY(XMM15) \
232  ENTRY(XMM16) \
233  ENTRY(XMM17) \
234  ENTRY(XMM18) \
235  ENTRY(XMM19) \
236  ENTRY(XMM20) \
237  ENTRY(XMM21) \
238  ENTRY(XMM22) \
239  ENTRY(XMM23) \
240  ENTRY(XMM24) \
241  ENTRY(XMM25) \
242  ENTRY(XMM26) \
243  ENTRY(XMM27) \
244  ENTRY(XMM28) \
245  ENTRY(XMM29) \
246  ENTRY(XMM30) \
247  ENTRY(XMM31)
248 
249 #define REGS_YMM \
250  ENTRY(YMM0) \
251  ENTRY(YMM1) \
252  ENTRY(YMM2) \
253  ENTRY(YMM3) \
254  ENTRY(YMM4) \
255  ENTRY(YMM5) \
256  ENTRY(YMM6) \
257  ENTRY(YMM7) \
258  ENTRY(YMM8) \
259  ENTRY(YMM9) \
260  ENTRY(YMM10) \
261  ENTRY(YMM11) \
262  ENTRY(YMM12) \
263  ENTRY(YMM13) \
264  ENTRY(YMM14) \
265  ENTRY(YMM15) \
266  ENTRY(YMM16) \
267  ENTRY(YMM17) \
268  ENTRY(YMM18) \
269  ENTRY(YMM19) \
270  ENTRY(YMM20) \
271  ENTRY(YMM21) \
272  ENTRY(YMM22) \
273  ENTRY(YMM23) \
274  ENTRY(YMM24) \
275  ENTRY(YMM25) \
276  ENTRY(YMM26) \
277  ENTRY(YMM27) \
278  ENTRY(YMM28) \
279  ENTRY(YMM29) \
280  ENTRY(YMM30) \
281  ENTRY(YMM31)
282 
283 #define REGS_ZMM \
284  ENTRY(ZMM0) \
285  ENTRY(ZMM1) \
286  ENTRY(ZMM2) \
287  ENTRY(ZMM3) \
288  ENTRY(ZMM4) \
289  ENTRY(ZMM5) \
290  ENTRY(ZMM6) \
291  ENTRY(ZMM7) \
292  ENTRY(ZMM8) \
293  ENTRY(ZMM9) \
294  ENTRY(ZMM10) \
295  ENTRY(ZMM11) \
296  ENTRY(ZMM12) \
297  ENTRY(ZMM13) \
298  ENTRY(ZMM14) \
299  ENTRY(ZMM15) \
300  ENTRY(ZMM16) \
301  ENTRY(ZMM17) \
302  ENTRY(ZMM18) \
303  ENTRY(ZMM19) \
304  ENTRY(ZMM20) \
305  ENTRY(ZMM21) \
306  ENTRY(ZMM22) \
307  ENTRY(ZMM23) \
308  ENTRY(ZMM24) \
309  ENTRY(ZMM25) \
310  ENTRY(ZMM26) \
311  ENTRY(ZMM27) \
312  ENTRY(ZMM28) \
313  ENTRY(ZMM29) \
314  ENTRY(ZMM30) \
315  ENTRY(ZMM31)
316 
317 #define REGS_SEGMENT \
318  ENTRY(ES) \
319  ENTRY(CS) \
320  ENTRY(SS) \
321  ENTRY(DS) \
322  ENTRY(FS) \
323  ENTRY(GS)
324 
325 #define REGS_DEBUG \
326  ENTRY(DR0) \
327  ENTRY(DR1) \
328  ENTRY(DR2) \
329  ENTRY(DR3) \
330  ENTRY(DR4) \
331  ENTRY(DR5) \
332  ENTRY(DR6) \
333  ENTRY(DR7)
334 
335 #define REGS_CONTROL \
336  ENTRY(CR0) \
337  ENTRY(CR1) \
338  ENTRY(CR2) \
339  ENTRY(CR3) \
340  ENTRY(CR4) \
341  ENTRY(CR5) \
342  ENTRY(CR6) \
343  ENTRY(CR7) \
344  ENTRY(CR8)
345 
346 #define ALL_EA_BASES \
347  EA_BASES_16BIT \
348  EA_BASES_32BIT \
349  EA_BASES_64BIT
350 
351 #define ALL_SIB_BASES \
352  REGS_32BIT \
353  REGS_64BIT
354 
355 #define ALL_REGS \
356  REGS_8BIT \
357  REGS_16BIT \
358  REGS_32BIT \
359  REGS_64BIT \
360  REGS_MMX \
361  REGS_XMM \
362  REGS_YMM \
363  REGS_ZMM \
364  REGS_SEGMENT \
365  REGS_DEBUG \
366  REGS_CONTROL \
367  ENTRY(RIP)
368 
369 /*
370  * EABase - All possible values of the base field for effective-address
371  * computations, a.k.a. the Mod and R/M fields of the ModR/M byte. We
372  * distinguish between bases (EA_BASE_*) and registers that just happen to be
373  * referred to when Mod == 0b11 (EA_REG_*).
374  */
375 typedef enum {
377 #define ENTRY(x) EA_BASE_##x,
379 #undef ENTRY
380 #define ENTRY(x) EA_REG_##x,
381  ALL_REGS
382 #undef ENTRY
384 } EABase;
385 
386 /*
387  * SIBIndex - All possible values of the SIB index field.
388  * Borrows entries from ALL_EA_BASES with the special case that
389  * sib is synonymous with NONE.
390  * Vector SIB: index can be XMM or YMM.
391  */
392 typedef enum {
394 #define ENTRY(x) SIB_INDEX_##x,
396  REGS_XMM
397  REGS_YMM
398  REGS_ZMM
399 #undef ENTRY
401 } SIBIndex;
402 
403 /*
404  * SIBBase - All possible values of the SIB base field.
405  */
406 typedef enum {
408 #define ENTRY(x) SIB_BASE_##x,
410 #undef ENTRY
412 } SIBBase;
413 
414 /*
415  * EADisplacement - Possible displacement types for effective-address
416  * computations.
417  */
418 typedef enum {
424 
425 /*
426  * Reg - All possible values of the reg field in the ModR/M byte.
427  */
428 typedef enum {
429 #define ENTRY(x) MODRM_REG_##x,
430  ALL_REGS
431 #undef ENTRY
433 } Reg;
434 
435 /*
436  * SegmentOverride - All possible segment overrides.
437  */
438 typedef enum {
448 
449 /*
450  * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field
451  */
452 
453 typedef enum {
454  VEX_LOB_0F = 0x1,
458 
459 typedef enum {
463 } XOPMapSelect;
464 
465 /*
466  * VEXPrefixCode - Possible values for the VEX.pp field
467  */
468 
469 typedef enum {
474 } VEXPrefixCode;
475 
476 typedef enum {
478  TYPE_VEX_2B = 0x1,
479  TYPE_VEX_3B = 0x2,
480  TYPE_XOP = 0x3
481 } VEXXOPType;
482 
483 typedef uint8_t BOOL;
484 
485 /*
486  * byteReader_t - Type for the byte reader that the consumer must provide to
487  * the decoder. Reads a single byte from the instruction's address space.
488  * @param arg - A baton that the consumer can associate with any internal
489  * state that it needs.
490  * @param byte - A pointer to a single byte in memory that should be set to
491  * contain the value at address.
492  * @param address - The address in the instruction's address space that should
493  * be read from.
494  * @return - -1 if the byte cannot be read for any reason; 0 otherwise.
495  */
496 typedef int (*byteReader_t)(const void* arg, uint8_t* byte, uint64_t address);
497 
498 /*
499  * dlog_t - Type for the logging function that the consumer can provide to
500  * get debugging output from the decoder.
501  * @param arg - A baton that the consumer can associate with any internal
502  * state that it needs.
503  * @param log - A string that contains the message. Will be reused after
504  * the logger returns.
505  */
506 typedef void (*dlog_t)(void* arg, const char *log);
507 
508 /*
509  * The x86 internal instruction, which is produced by the decoder.
510  */
512  /* Reader interface (C) */
514  /* Opaque value passed to the reader */
515  const void* readerArg;
516  /* The address of the next byte to read via the reader */
517  uint64_t readerCursor;
518 
519  /* Logger interface (C) */
521  /* Opaque value passed to the logger */
522  void* dlogArg;
523 
524  /* General instruction information */
525 
526  /* The mode to disassemble for (64-bit, protected, real) */
528  /* The start of the instruction, usable with the reader */
529  uint64_t startLocation;
530  /* The length of the instruction, in bytes */
531  size_t length;
532 
533  /* Prefix state */
534 
535  /* 1 if the prefix byte corresponding to the entry is present; 0 if not */
536  uint8_t prefixPresent[0x100];
537  /* contains the location (for use with the reader) of the prefix byte */
538  uint64_t prefixLocations[0x100];
539  /* The value of the VEX/XOP prefix, if present */
540  uint8_t vexXopPrefix[3];
541  /* The length of the VEX prefix (0 if not present) */
543  /* The value of the REX prefix, if present */
544  uint8_t rexPrefix;
545  /* The location where a mandatory prefix would have to be (i.e., right before
546  the opcode, or right before the REX prefix if one is present) */
548  /* The segment override type */
550  /* 1 if the prefix byte, 0xf2 or 0xf3 is xacquire or xrelease */
552 
553  /* Sizes of various critical pieces of data, in bytes */
554  uint8_t registerSize;
555  uint8_t addressSize;
557  uint8_t immediateSize;
558 
559  /* Offsets from the start of the instruction to the pieces of data, which is
560  needed to find relocation entries for adding symbolic operands */
563 
564  /* opcode state */
565 
566  /* The last byte of the opcode, not counting any ModR/M extension */
567  uint8_t opcode;
568  /* The ModR/M byte of the instruction, if it is an opcode extension */
569  uint8_t modRMExtension;
570 
571  /* decode state */
572 
573  /* The type of opcode, used for indexing into the array of decode tables */
575  /* The instruction ID, extracted from the decode table */
576  uint16_t instructionID;
577  /* The specifier for the instruction, from the instruction info table */
578  const struct InstructionSpecifier *spec;
579 
580  /* state for additional bytes, consumed during operand decode. Pattern:
581  consumed___ indicates that the byte was already consumed and does not
582  need to be consumed again */
583 
584  /* The VEX.vvvv field, which contains a third register operand for some AVX
585  instructions */
587 
588  /* The ModR/M byte, which contains most register operands and some portion of
589  all memory operands */
591  uint8_t modRM;
592 
593  /* The SIB byte, used for more complex 32- or 64-bit memory operands */
595  uint8_t sib;
596 
597  /* The displacement, used for memory operands */
599  int32_t displacement;
600 
601  /* Immediates. There can be two in some cases */
604  uint64_t immediates[2];
605 
606  /* A register or immediate operand encoded into the opcode */
608  uint8_t opcodeModifier;
610 
611  /* Portions of the ModR/M byte */
612 
613  /* These fields determine the allowable values for the ModR/M fields, which
614  depend on operand and address widths */
618 
619  /* The Mod and R/M fields can encode a base for an effective address, or a
620  register. These are separated into two fields here */
623  /* The reg field always encodes a register */
625 
626  /* SIB state */
628  uint8_t sibScale;
630 
631  const struct OperandSpecifier *operands;
632 };
633 
634 /* decodeInstruction - Decode one instruction and store the decoding results in
635  * a buffer provided by the consumer.
636  * @param insn - The buffer to store the instruction in. Allocated by the
637  * consumer.
638  * @param reader - The byteReader_t for the bytes to be read.
639  * @param readerArg - An argument to pass to the reader for storing context
640  * specific to the consumer. May be NULL.
641  * @param logger - The dlog_t to be used in printing status messages from the
642  * disassembler. May be NULL.
643  * @param loggerArg - An argument to pass to the logger for storing context
644  * specific to the logger. May be NULL.
645  * @param startLoc - The address (in the reader's address space) of the first
646  * byte in the instruction.
647  * @param mode - The mode (16-bit, 32-bit, 64-bit) to decode in.
648  * @return - Nonzero if there was an error during decode, 0 otherwise.
649  */
650 int decodeInstruction(struct InternalInstruction* insn,
651  byteReader_t reader,
652  const void* readerArg,
653  dlog_t logger,
654  void* loggerArg,
655  const void* miiArg,
656  uint64_t startLoc,
657  DisassemblerMode mode);
658 
659 /* x86DisassemblerDebug - C-accessible function for printing a message to
660  * debugs()
661  * @param file - The name of the file printing the debug message.
662  * @param line - The line number that printed the debug message.
663  * @param s - The message to print.
664  */
665 
666 void x86DisassemblerDebug(const char *file,
667  unsigned line,
668  const char *s);
669 
670 const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii);
671 
672 #ifdef __cplusplus
673 }
674 #endif
675 
676 #endif
void(* dlog_t)(void *arg, const char *log)
int(* byteReader_t)(const void *arg, uint8_t *byte, uint64_t address)
#define ALL_REGS
int decodeInstruction(struct InternalInstruction *insn, byteReader_t reader, const void *readerArg, dlog_t logger, void *loggerArg, const void *miiArg, uint64_t startLoc, DisassemblerMode mode)
uint64_t prefixLocations[0x100]
const struct InstructionSpecifier * spec
const char * x86DisassemblerGetInstrName(unsigned Opcode, const void *mii)
#define ALL_EA_BASES
VEXLeadingOpcodeByte
uint8_t BOOL
#define ALL_SIB_BASES
const struct OperandSpecifier * operands
#define REGS_ZMM
void x86DisassemblerDebug(const char *file, unsigned line, const char *s)
#define REGS_XMM
static void logger(void *arg, const char *log)
#define REGS_YMM
SegmentOverride segmentOverride