Index: src/trusted/validator_ragel/unreviewed/decoder_test.c |
=================================================================== |
--- src/trusted/validator_ragel/unreviewed/decoder_test.c (revision 9996) |
+++ src/trusted/validator_ragel/unreviewed/decoder_test.c (working copy) |
@@ -14,7 +14,7 @@ |
#include "native_client/src/include/elf64.h" |
#include "native_client/src/shared/platform/nacl_check.h" |
#include "native_client/src/shared/utils/types.h" |
-#include "native_client/src/trusted/validator_ragel/unreviewed/decoder.h" |
+#include "native_client/src/trusted/validator_ragel/decoder.h" |
/* This is a copy of NaClLog from shared/platform/nacl_log.c to avoid |
* linking in code in NaCl shared code in the unreviewed/Makefile and be able to |
@@ -150,128 +150,155 @@ |
} |
} |
-void ProcessInstruction(const uint8_t *begin, const uint8_t *end, |
- struct Instruction *instruction, void *userdata) { |
- const char *instruction_name = instruction->name; |
- unsigned char operands_count = instruction->operands_count; |
- unsigned char rex_prefix = instruction->prefix.rex; |
- enum OperandName rm_index = instruction->rm.index; |
- enum OperandName rm_base = instruction->rm.base; |
- Bool data16_prefix = instruction->prefix.data16; |
- const uint8_t *p; |
- char delimeter = ' '; |
- Bool print_rip = FALSE; |
- Bool empty_rex_prefix_ok = FALSE; |
- Bool spurious_rex_prefix = FALSE; |
-#define print_name(x) (printf((x)), shown_name += strlen((x))) |
- size_t shown_name = 0; |
- int i, operand_type; |
- |
- /* |
- * "fwait" is nasty: few of them will be included in other X87 instructions |
- * ("fclex", "finit", "fstcw", "fstsw", "fsave" have two names, other |
- * instructions are unchanged) - but if after them we see regular instruction |
- * then we must print them all. This convoluted logic is not needed when we |
- * don't print anything so decoder does not include it. |
- */ |
- if (((end == begin + 1) && (begin[0] == 0x9b)) || |
- ((end == begin + 2) && |
- ((begin[0] & 0xf0) == 0x40) && (begin[1] == 0x9b))) { |
- if (!(((struct DecodeState *)userdata)->fwait)) { |
- ((struct DecodeState *)userdata)->fwait = begin; |
+/* |
+ * "fwait" is nasty: few of them will be included in other X87 instructions |
+ * ("fclex", "finit", "fstcw", "fstsw", "fsave" have two names, other |
+ * instructions are unchanged) - but if after them we see regular instruction |
+ * then we must print them all. This convoluted logic is not needed when we |
+ * don't print anything so decoder does not include it. |
+ */ |
+static Bool ProcessFWait(const uint8_t **begin, const uint8_t *end, |
Dmitry Polukhin
2012/11/02 17:31:06
Too many arguments, class with state should solve
|
+ struct Instruction *instruction, |
+ struct DecodeState *userdata, |
+ unsigned char *rex_prefix, Bool *spurious_rex_prefix, |
+ enum OperandName *rm_base, |
+ enum OperandName *rm_index) { |
+ /* Instruction is "fwait" if it's 0x9b or if it's REX prefix plus 0x9b. */ |
+ if (((end == *begin + 1) && ((*begin)[0] == 0x9b)) || |
+ ((end == *begin + 2) && |
+ (((*begin)[0] & 0xf0) == 0x40) && ((*begin)[1] == 0x9b))) { |
+ if (!(userdata->fwait)) { |
+ userdata->fwait = *begin; |
} |
- return; |
- } else if (((struct DecodeState *)userdata)->fwait) { |
- /* If it's x87 instruction then we can fold some fwait's in the instruction |
- itself. */ |
- if (((begin[0] >= 0xd8) && (begin[0] <= 0xdf)) || |
- ((((begin[0] & 0xf0) == 0x40) || (begin[0] == 0x66)) && |
- (begin[1] >= 0xd8) && (begin[1] <= 0xdf)) || |
- ((begin[0] == 0x66) || ((begin[1] & 0xf0) == 0x40) || |
- (begin[2] >= 0xd8) || (begin[2] <= 0xdf))) { |
- /* fwait "prefix" can only include two 0x9b bytes or one rex byte - and |
- * then only if the instruction itself have no rex prefix. */ |
- int fwait_count = !!data16_prefix; |
- int rex_count = (!!rex_prefix) | (!!data16_prefix); |
+ return TRUE; |
+ } else if (userdata->fwait) { |
+ /* |
+ * If it's x87 instruction then we can fold some fwait's in the instruction |
+ * itself. |
+ * |
+ * Instruction is x87 instruction if it has opcode from 0xd8 to 0xdf, but it |
+ * can include either 0x66 prefix or REX prefix, or both. |
+ */ |
+ if ((((*begin)[0] & 0xf8) == 0xd8) || |
+ (((((*begin)[0] & 0xf0) == 0x40) || ((*begin)[0] == 0x66)) && |
+ (((*begin)[1] & 0xf8) == 0xd8)) || |
+ (((*begin)[0] == 0x66) && (((*begin)[1] & 0xf0) == 0x40) && |
+ (((*begin)[2] & 0xf8) == 0xd8))) { |
+ /* |
+ * fwait "prefix" can only include two 0x9b bytes or one rex byte - and |
+ * then only if the instruction itself have no rex prefix. |
+ */ |
+ int fwait_count = !!(instruction->prefix.data16); |
+ int rex_count = (!!*rex_prefix) | (!!(instruction->prefix.data16)); |
for (;;) { |
- if (begin == ((struct DecodeState *)userdata)->fwait) |
+ if (*begin == userdata->fwait) |
break; |
- if ((begin[-1]) == 0x9b) { |
+ if (((*begin)[-1]) == 0x9b) { |
if (fwait_count < 2) { |
- --begin; |
+ --*begin; |
++fwait_count; |
- if ((begin[1] & 0xf0) == 0x40) |
+ if (((*begin)[1] & 0xf0) == 0x40) |
break; |
} else { |
break; |
} |
- } else if ((begin[-1] & 0xf0) == 0x40) { |
+ } else if (((*begin)[-1] & 0xf0) == 0x40) { |
if (rex_count >= 1) |
break; |
- --begin; |
+ --*begin; |
++rex_count; |
- if (!rex_prefix) { |
- rex_prefix = *begin; |
- /* Bug-to-bug compatibility, fun... */ |
- if ((rex_prefix & 0x01) && (rm_base <= REG_RDI)) { |
- if (operands_count == 1 && |
+ if (!*rex_prefix) { |
+ *rex_prefix = **begin; |
+ /* |
+ * Bug-to-bug compatibility: objdump will erroneously use bits from |
+ * first REX prefix (attached to "fwait") and not from the |
+ * second REX prefix (attached to the instruction itself). |
+ * |
+ * Duplicate this error here till it'll be fixed in objdump. |
+ */ |
+ if ((*rex_prefix & 0x01) && (*rm_base <= REG_RDI)) { |
+ if (instruction->operands_count == 1 && |
instruction->operands[0].name == REG_RM) |
- rm_base |= REG_R8; |
+ *rm_base |= REG_R8; |
else |
- spurious_rex_prefix = TRUE; |
+ *spurious_rex_prefix = TRUE; |
} |
- if (rex_prefix & 0x02) { |
- if (operands_count == 1 && |
+ if (*rex_prefix & 0x02) { |
+ if (instruction->operands_count == 1 && |
instruction->operands[0].name == REG_RM) { |
- if (rm_index <= REG_RDI) |
- rm_index |= REG_R8; |
- else if (rm_index == REG_RIZ) |
- rm_index = REG_R12; |
- else if (rm_index == NO_REG) |
- spurious_rex_prefix = TRUE; |
+ if (*rm_index <= REG_RDI) |
+ *rm_index |= REG_R8; |
+ else if (*rm_index == REG_RIZ) |
+ *rm_index = REG_R12; |
+ else if (*rm_index == NO_REG) |
+ *spurious_rex_prefix = TRUE; |
} else { |
- spurious_rex_prefix = TRUE; |
+ *spurious_rex_prefix = TRUE; |
} |
} |
- if (rex_prefix & 0x0c) |
- spurious_rex_prefix = TRUE; |
+ if (*rex_prefix & 0x0c) |
+ *spurious_rex_prefix = TRUE; |
} |
} |
} |
- if (begin != ((struct DecodeState *)userdata)->fwait) { |
- while ((((struct DecodeState *)userdata)->fwait) < begin) { |
+ if (*begin != userdata->fwait) { |
+ while (userdata->fwait < *begin) { |
printf("%*lx:\t%02x \tfwait\n", |
- ((struct DecodeState *)userdata)->width, |
- (long)((((struct DecodeState *)userdata)->fwait) - |
- (((struct DecodeState *)userdata)->offset)), |
- *((struct DecodeState *)userdata)->fwait); |
- ++(((struct DecodeState *)userdata)->fwait); |
+ userdata->width, (long)(userdata->fwait - userdata->offset), |
+ *userdata->fwait); |
+ ++(userdata->fwait); |
} |
} |
} else { |
- while ((((struct DecodeState *)userdata)->fwait) < begin) { |
+ while ((userdata->fwait) < *begin) { |
printf("%*lx:\t%02x \tfwait\n", |
- ((struct DecodeState *)userdata)->width, |
- (long)((((struct DecodeState *)userdata)->fwait) - |
- (((struct DecodeState *)userdata)->offset)), |
- *((struct DecodeState *)userdata)->fwait); |
- ++(((struct DecodeState *)userdata)->fwait); |
+ userdata->width, (long)(userdata->fwait - userdata->offset), |
+ *userdata->fwait); |
+ ++(userdata->fwait); |
} |
} |
- ((struct DecodeState *)userdata)->fwait = FALSE; |
+ userdata->fwait = FALSE; |
} |
+ return FALSE; |
+} |
+void ProcessInstruction(const uint8_t *begin, const uint8_t *end, |
Dmitry Polukhin
2012/11/02 17:31:06
I think it should be converted to so class Instruc
|
+ struct Instruction *instruction, void *callback_data) { |
+ struct DecodeState *userdata = callback_data; |
+ const char *instruction_name = instruction->name; |
+ unsigned char operands_count = instruction->operands_count; |
+ unsigned char rex_prefix = instruction->prefix.rex; |
+ enum OperandName rm_index = instruction->rm.index; |
+ enum OperandName rm_base = instruction->rm.base; |
+ Bool data16_prefix = instruction->prefix.data16; |
+ const uint8_t *p; |
+ char delimeter = ' '; |
+ Bool print_rip = FALSE; |
+ Bool empty_rex_prefix_ok = FALSE; |
+ Bool spurious_rex_prefix = FALSE; |
+#define print_name(x) (printf((x)), shown_name += strlen((x))) |
Brad Chen
2012/10/22 21:29:05
What is your goal in making this a macro rather th
|
+ size_t shown_name = 0; |
+ int i, operand_type; |
+ |
+ if (ProcessFWait(&begin, end, instruction, callback_data, |
+ &rex_prefix, &spurious_rex_prefix, &rm_base, &rm_index)) |
+ return; |
+ |
+ /* |
+ * Objdump will print data16 (0x66) prefix on a separate line for "fbld", |
+ * "fbstp", "fild", "fistp", "fld", and "fstp" instructions. |
+ */ |
if ((data16_prefix) && (begin[0] == 0x66) && (!(rex_prefix & 0x08)) && |
(IsNameInList(instruction_name, |
"fbld", "fbstp", "fild", "fistp", "fld", "fstp", NULL))) { |
printf("%*lx:\t66 \tdata16\n", |
- ((struct DecodeState *)userdata)->width, |
- (long)(begin - (((struct DecodeState *)userdata)->offset))); |
+ userdata->width, (long)(begin - userdata->offset)); |
data16_prefix = FALSE; |
++begin; |
} |
- printf("%*lx:\t", ((struct DecodeState *)userdata)->width, |
- (long)(begin - (((struct DecodeState *)userdata)->offset))); |
+ |
+ /* Start the main processing part: print offset here. */ |
+ printf("%*lx:\t", userdata->width, (long)(begin - userdata->offset)); |
for (p = begin; p < begin + 7; ++p) { |
if (p >= end) |
printf(" "); |
@@ -279,6 +306,7 @@ |
printf("%02x ", *p); |
} |
printf("\t"); |
+ |
/* |
* "pclmulqdq" has two-operand mnemonic names for "imm8" equal to 0x01, 0x01, |
* 0x10, and 0x11. Objdump incorrectly mixes them up with 0x2 and 0x03. |
@@ -292,6 +320,7 @@ |
--operands_count; |
} |
} |
+ |
/* |
* "vpclmulqdq" has two-operand mnemonic names for "imm8" equal to 0x01, 0x01, |
* 0x10, and 0x11. Objdump mixes them with 0x2 and 0x03. |
@@ -305,14 +334,20 @@ |
--operands_count; |
} |
} |
+ |
spurious_rex_prefix |= |
rex_prefix && |
(instruction->prefix.rex_b_spurious || |
instruction->prefix.rex_x_spurious || |
instruction->prefix.rex_r_spurious || |
instruction->prefix.rex_w_spurious); |
+ |
+ /* |
+ * Objdump sometimes does not show spurious rex prefixes. Adjust the |
+ * spurious_rex_prefix variable here for these cases. |
+ */ |
if (operands_count > 0) { |
- if (!((struct DecodeState *)userdata)->ia32_mode) |
+ if (!userdata->ia32_mode) |
for (i=0; i<operands_count; ++i) |
/* |
* Objdump mistakenly allows "lock" with "mov %crX,%rXX" only in ia32 |
@@ -333,13 +368,7 @@ |
} |
} |
} |
- /* Only few rare instructions show spurious REX.B in objdump. */ |
- if (!spurious_rex_prefix && instruction->prefix.rex_b_spurious) |
- if (IsNameInList(instruction_name, |
- "ja", "jae", "jbe", "jb", "je", "jg", "jge", "jle", |
- "jl", "jne", "jno", "jnp", "jns", "jo", "jp", "js", |
- "jecxz", "jrcxz", "loop", "loope", "loopne", NULL)) |
- spurious_rex_prefix = TRUE; |
+ |
/* Some instructions don't show spurious REX.B in objdump. */ |
if (spurious_rex_prefix && |
instruction->prefix.rex_b_spurious && |
@@ -354,6 +383,7 @@ |
break; |
} |
} |
+ |
/* Some instructions don't show spurious REX.W in objdump. */ |
if (spurious_rex_prefix && |
!instruction->prefix.rex_b_spurious && |
@@ -394,6 +424,12 @@ |
"popf", "push", "pushf", NULL))) |
spurious_rex_prefix = TRUE; |
+ /* |
+ * Print prefixes. For the case where two prefixes are present we must print |
+ * them in a corrent order. First print prefixes from begin[0], then handle |
+ * prefixes from begin[1] (at this point we don't support more then two |
+ * prefixes). |
+ */ |
if (instruction->prefix.lock && (begin[0] == 0xf0)) |
print_name("lock "); |
if (instruction->prefix.repnz && (begin[0] == 0xf2)) |
@@ -409,7 +445,6 @@ |
else |
print_name("repz "); |
} |
- |
if (((data16_prefix) && (rex_prefix & 0x08)) && |
!IsNameInList(instruction_name, |
"bsf", "bsr", "fldenvs", "fnstenvs", "fnsaves", "frstors", |
@@ -418,7 +453,6 @@ |
(begin[0] != 0x66) || ((begin[1] & 0x48) != 0x48) || (begin[2] != 0x90)) |
print_name("data32 "); |
} |
- |
if (instruction->prefix.lock && (begin[0] != 0xf0)) { |
print_name("lock "); |
} |
@@ -437,6 +471,11 @@ |
print_name("repz "); |
} |
+ /* |
+ * REX prefix. Empty REX prefix (without REX.B, REX.X, REX.R, or REX.W bits) |
+ * is not always spurious: it can be used to select between 8-bit registers |
+ * (%ah vs %spl, %ch vs %bpl, %dh vs %dil, and %bh vs %sil). |
+ */ |
if (rex_prefix == 0x40) { |
if (operands_count > 0) |
for (i=0; i<operands_count; ++i) |
@@ -449,6 +488,9 @@ |
} |
if (!empty_rex_prefix_ok) |
print_name("rex "); |
+ /* |
+ * Non-empty REX prefix is shown if and only if if it's spurious. |
Brad Chen
2012/10/22 21:29:05
if if
|
+ */ |
} else if (spurious_rex_prefix) { |
print_name("rex."); |
if (rex_prefix & 0x08) { |
@@ -466,10 +508,13 @@ |
print_name(" "); |
} |
+ /* Prefixes are printed. Print the instruction name. */ |
printf("%s", instruction_name); |
shown_name += strlen(instruction_name); |
+ /* In some cases AT&T instruction uses suffix to show the size of operand. */ |
Brad Chen
2012/10/22 21:29:05
It looks to me like it may be the case that the st
|
if (instruction->att_instruction_suffix) { |
+ /* But special versions of "nop" never use these suffixes. */ |
if (!IsNameInList(instruction_name, |
"nopw 0x0(%eax,%eax,1)", |
"nopw 0x0(%rax,%rax,1)", |
@@ -493,11 +538,17 @@ |
print_name("q"); |
} |
} |
+ |
+ /* Regular "mov" with 64-bit immediate is printed as "movabs" by objdump. */ |
if (strcmp(instruction_name, "mov") == 0 && |
instruction->operands[1].name == REG_IMM && |
instruction->operands[1].type == OPERAND_TYPE_64_BIT) |
print_name("abs"); |
+ /* |
+ * Jump instructions can use branch-prediction prefixes. They are shown as |
+ * suffixes by objdump. |
+ */ |
if (IsNameInList(instruction_name, |
"ja", "jae", "jbe", "jb", "je", "jg", "jge", "jle", |
"jl", "jne", "jno", "jnp", "jns", "jo", "jp", "js", |
@@ -507,8 +558,9 @@ |
else if (instruction->prefix.branch_taken) |
print_name(",pt"); |
} |
+#undef print_name |
-#undef print_name |
+ /* Objdump does not print spaces after some "special" instructions. */ |
if ((strcmp(instruction_name, "nop") != 0 || operands_count != 0) && |
!IsNameInList( |
instruction_name, |
@@ -530,6 +582,10 @@ |
"data32 data32 data32 data32 data32 nopw %cs:0x0(%eax,%eax,1)", |
"data32 data32 data32 data32 data32 nopw %cs:0x0(%rax,%rax,1)", |
NULL)) { |
+ /* |
+ * In this is "regular" instruction with a short name then it's operands are |
+ * aligned. |
+ */ |
while (shown_name < 6) { |
printf(" "); |
++shown_name; |
@@ -537,8 +593,11 @@ |
if (operands_count == 0) |
printf(" "); |
} |
+ |
+ /* Print instruction operands. */ |
for (i=operands_count-1; i>=0; --i) { |
printf("%c", delimeter); |
+ /* If the instruction is "call" or "jmp" then we need to print asterics. */ |
if (IsNameInList(instruction_name, "call", "jmp", "lcall", "ljmp", NULL) && |
instruction->operands[i].name != JMP_TO) |
printf("*"); |
@@ -558,6 +617,8 @@ |
} else { |
operand_type = instruction->operands[i].type; |
} |
+ |
+ /* All the special cases are handled, let's print the normal operands! */ |
switch (instruction->operands[i].name) { |
case REG_RAX: |
case REG_RCX: |
@@ -590,7 +651,7 @@ |
else |
printf("-0x%"NACL_PRIx64, -instruction->rm.offset); |
} |
- if (((struct DecodeState *)userdata)->ia32_mode) { |
+ if (userdata->ia32_mode) { |
if ((rm_base != NO_REG) || |
(rm_index != NO_REG) || |
(instruction->rm.scale != 0)) |
@@ -649,19 +710,19 @@ |
printf("(%%dx)"); |
break; |
case REG_DS_RBX: |
- if (((struct DecodeState *)userdata)->ia32_mode) |
+ if (userdata->ia32_mode) |
printf("%%ds:(%%ebx)"); |
else |
printf("%%ds:(%%rbx)"); |
break; |
case REG_ES_RDI: |
- if (((struct DecodeState *)userdata)->ia32_mode) |
+ if (userdata->ia32_mode) |
printf("%%es:(%%edi)"); |
else |
printf("%%es:(%%rdi)"); |
break; |
case REG_DS_RSI: |
- if (((struct DecodeState *)userdata)->ia32_mode) |
+ if (userdata->ia32_mode) |
printf("%%ds:(%%esi)"); |
else |
printf("%%ds:(%%rsi)"); |
@@ -669,10 +730,10 @@ |
case JMP_TO: |
if (instruction->operands[0].type == OPERAND_TYPE_16_BIT) |
printf("0x%lx", (long)((end + instruction->rm.offset - |
- (((struct DecodeState *)userdata)->offset)) & 0xffff)); |
+ (userdata->offset)) & 0xffff)); |
else |
printf("0x%lx", (long)(end + instruction->rm.offset - |
- (((struct DecodeState *)userdata)->offset))); |
+ (userdata->offset))); |
break; |
case REG_RIP: |
case REG_RIZ: |
@@ -681,16 +742,23 @@ |
} |
delimeter = ','; |
} |
+ |
+ /* |
+ * If %rip was used then objdump will show the actual address as the comment. |
+ */ |
if (print_rip) { |
printf(" # 0x%8"NACL_PRIx64, |
(uint64_t) (end + instruction->rm.offset - |
- (((struct DecodeState *)userdata)->offset))); |
+ (userdata->offset))); |
} |
+ |
+ /* First line of instruction decoding is printed. Finish the line. */ |
printf("\n"); |
begin += 7; |
+ /* if there are more then seven bytes we need to print the rest. */ |
while (begin < end) { |
- printf("%*"NACL_PRIx64":\t", ((struct DecodeState *)userdata)->width, |
- (uint64_t) (begin - (((struct DecodeState *)userdata)->offset))); |
+ printf("%*"NACL_PRIx64":\t", userdata->width, |
+ (uint64_t) (begin - (userdata->offset))); |
for (p = begin; p < begin + 7; ++p) { |
if (p >= end) { |
printf("\n"); |