Chromium Code Reviews| Index: src/trusted/validator_ragel/unreviewed/decoder_test.c |
| =================================================================== |
| --- src/trusted/validator_ragel/unreviewed/decoder_test.c (revision 9996) |
| +++ src/trusted/validator_ragel/unreviewed/decoder_test.c (working copy) |
| @@ -14,7 +14,7 @@ |
| #include "native_client/src/include/elf64.h" |
| #include "native_client/src/shared/platform/nacl_check.h" |
| #include "native_client/src/shared/utils/types.h" |
| -#include "native_client/src/trusted/validator_ragel/unreviewed/decoder.h" |
| +#include "native_client/src/trusted/validator_ragel/decoder.h" |
| /* This is a copy of NaClLog from shared/platform/nacl_log.c to avoid |
| * linking in code in NaCl shared code in the unreviewed/Makefile and be able to |
| @@ -150,128 +150,155 @@ |
| } |
| } |
| -void ProcessInstruction(const uint8_t *begin, const uint8_t *end, |
| - struct Instruction *instruction, void *userdata) { |
| - const char *instruction_name = instruction->name; |
| - unsigned char operands_count = instruction->operands_count; |
| - unsigned char rex_prefix = instruction->prefix.rex; |
| - enum OperandName rm_index = instruction->rm.index; |
| - enum OperandName rm_base = instruction->rm.base; |
| - Bool data16_prefix = instruction->prefix.data16; |
| - const uint8_t *p; |
| - char delimeter = ' '; |
| - Bool print_rip = FALSE; |
| - Bool empty_rex_prefix_ok = FALSE; |
| - Bool spurious_rex_prefix = FALSE; |
| -#define print_name(x) (printf((x)), shown_name += strlen((x))) |
| - size_t shown_name = 0; |
| - int i, operand_type; |
| - |
| - /* |
| - * "fwait" is nasty: few of them will be included in other X87 instructions |
| - * ("fclex", "finit", "fstcw", "fstsw", "fsave" have two names, other |
| - * instructions are unchanged) - but if after them we see regular instruction |
| - * then we must print them all. This convoluted logic is not needed when we |
| - * don't print anything so decoder does not include it. |
| - */ |
| - if (((end == begin + 1) && (begin[0] == 0x9b)) || |
| - ((end == begin + 2) && |
| - ((begin[0] & 0xf0) == 0x40) && (begin[1] == 0x9b))) { |
| - if (!(((struct DecodeState *)userdata)->fwait)) { |
| - ((struct DecodeState *)userdata)->fwait = begin; |
| +/* |
| + * "fwait" is nasty: few of them will be included in other X87 instructions |
| + * ("fclex", "finit", "fstcw", "fstsw", "fsave" have two names, other |
| + * instructions are unchanged) - but if after them we see regular instruction |
| + * then we must print them all. This convoluted logic is not needed when we |
| + * don't print anything so decoder does not include it. |
| + */ |
| +static Bool ProcessFWait(const uint8_t **begin, const uint8_t *end, |
|
Dmitry Polukhin
2012/11/02 17:31:06
Too many arguments, class with state should solve
|
| + struct Instruction *instruction, |
| + struct DecodeState *userdata, |
| + unsigned char *rex_prefix, Bool *spurious_rex_prefix, |
| + enum OperandName *rm_base, |
| + enum OperandName *rm_index) { |
| + /* Instruction is "fwait" if it's 0x9b or if it's REX prefix plus 0x9b. */ |
| + if (((end == *begin + 1) && ((*begin)[0] == 0x9b)) || |
| + ((end == *begin + 2) && |
| + (((*begin)[0] & 0xf0) == 0x40) && ((*begin)[1] == 0x9b))) { |
| + if (!(userdata->fwait)) { |
| + userdata->fwait = *begin; |
| } |
| - return; |
| - } else if (((struct DecodeState *)userdata)->fwait) { |
| - /* If it's x87 instruction then we can fold some fwait's in the instruction |
| - itself. */ |
| - if (((begin[0] >= 0xd8) && (begin[0] <= 0xdf)) || |
| - ((((begin[0] & 0xf0) == 0x40) || (begin[0] == 0x66)) && |
| - (begin[1] >= 0xd8) && (begin[1] <= 0xdf)) || |
| - ((begin[0] == 0x66) || ((begin[1] & 0xf0) == 0x40) || |
| - (begin[2] >= 0xd8) || (begin[2] <= 0xdf))) { |
| - /* fwait "prefix" can only include two 0x9b bytes or one rex byte - and |
| - * then only if the instruction itself have no rex prefix. */ |
| - int fwait_count = !!data16_prefix; |
| - int rex_count = (!!rex_prefix) | (!!data16_prefix); |
| + return TRUE; |
| + } else if (userdata->fwait) { |
| + /* |
| + * If it's x87 instruction then we can fold some fwait's in the instruction |
| + * itself. |
| + * |
| + * Instruction is x87 instruction if it has opcode from 0xd8 to 0xdf, but it |
| + * can include either 0x66 prefix or REX prefix, or both. |
| + */ |
| + if ((((*begin)[0] & 0xf8) == 0xd8) || |
| + (((((*begin)[0] & 0xf0) == 0x40) || ((*begin)[0] == 0x66)) && |
| + (((*begin)[1] & 0xf8) == 0xd8)) || |
| + (((*begin)[0] == 0x66) && (((*begin)[1] & 0xf0) == 0x40) && |
| + (((*begin)[2] & 0xf8) == 0xd8))) { |
| + /* |
| + * fwait "prefix" can only include two 0x9b bytes or one rex byte - and |
| + * then only if the instruction itself have no rex prefix. |
| + */ |
| + int fwait_count = !!(instruction->prefix.data16); |
| + int rex_count = (!!*rex_prefix) | (!!(instruction->prefix.data16)); |
| for (;;) { |
| - if (begin == ((struct DecodeState *)userdata)->fwait) |
| + if (*begin == userdata->fwait) |
| break; |
| - if ((begin[-1]) == 0x9b) { |
| + if (((*begin)[-1]) == 0x9b) { |
| if (fwait_count < 2) { |
| - --begin; |
| + --*begin; |
| ++fwait_count; |
| - if ((begin[1] & 0xf0) == 0x40) |
| + if (((*begin)[1] & 0xf0) == 0x40) |
| break; |
| } else { |
| break; |
| } |
| - } else if ((begin[-1] & 0xf0) == 0x40) { |
| + } else if (((*begin)[-1] & 0xf0) == 0x40) { |
| if (rex_count >= 1) |
| break; |
| - --begin; |
| + --*begin; |
| ++rex_count; |
| - if (!rex_prefix) { |
| - rex_prefix = *begin; |
| - /* Bug-to-bug compatibility, fun... */ |
| - if ((rex_prefix & 0x01) && (rm_base <= REG_RDI)) { |
| - if (operands_count == 1 && |
| + if (!*rex_prefix) { |
| + *rex_prefix = **begin; |
| + /* |
| + * Bug-to-bug compatibility: objdump will erroneously use bits from |
| + * first REX prefix (attached to "fwait") and not from the |
| + * second REX prefix (attached to the instruction itself). |
| + * |
| + * Duplicate this error here till it'll be fixed in objdump. |
| + */ |
| + if ((*rex_prefix & 0x01) && (*rm_base <= REG_RDI)) { |
| + if (instruction->operands_count == 1 && |
| instruction->operands[0].name == REG_RM) |
| - rm_base |= REG_R8; |
| + *rm_base |= REG_R8; |
| else |
| - spurious_rex_prefix = TRUE; |
| + *spurious_rex_prefix = TRUE; |
| } |
| - if (rex_prefix & 0x02) { |
| - if (operands_count == 1 && |
| + if (*rex_prefix & 0x02) { |
| + if (instruction->operands_count == 1 && |
| instruction->operands[0].name == REG_RM) { |
| - if (rm_index <= REG_RDI) |
| - rm_index |= REG_R8; |
| - else if (rm_index == REG_RIZ) |
| - rm_index = REG_R12; |
| - else if (rm_index == NO_REG) |
| - spurious_rex_prefix = TRUE; |
| + if (*rm_index <= REG_RDI) |
| + *rm_index |= REG_R8; |
| + else if (*rm_index == REG_RIZ) |
| + *rm_index = REG_R12; |
| + else if (*rm_index == NO_REG) |
| + *spurious_rex_prefix = TRUE; |
| } else { |
| - spurious_rex_prefix = TRUE; |
| + *spurious_rex_prefix = TRUE; |
| } |
| } |
| - if (rex_prefix & 0x0c) |
| - spurious_rex_prefix = TRUE; |
| + if (*rex_prefix & 0x0c) |
| + *spurious_rex_prefix = TRUE; |
| } |
| } |
| } |
| - if (begin != ((struct DecodeState *)userdata)->fwait) { |
| - while ((((struct DecodeState *)userdata)->fwait) < begin) { |
| + if (*begin != userdata->fwait) { |
| + while (userdata->fwait < *begin) { |
| printf("%*lx:\t%02x \tfwait\n", |
| - ((struct DecodeState *)userdata)->width, |
| - (long)((((struct DecodeState *)userdata)->fwait) - |
| - (((struct DecodeState *)userdata)->offset)), |
| - *((struct DecodeState *)userdata)->fwait); |
| - ++(((struct DecodeState *)userdata)->fwait); |
| + userdata->width, (long)(userdata->fwait - userdata->offset), |
| + *userdata->fwait); |
| + ++(userdata->fwait); |
| } |
| } |
| } else { |
| - while ((((struct DecodeState *)userdata)->fwait) < begin) { |
| + while ((userdata->fwait) < *begin) { |
| printf("%*lx:\t%02x \tfwait\n", |
| - ((struct DecodeState *)userdata)->width, |
| - (long)((((struct DecodeState *)userdata)->fwait) - |
| - (((struct DecodeState *)userdata)->offset)), |
| - *((struct DecodeState *)userdata)->fwait); |
| - ++(((struct DecodeState *)userdata)->fwait); |
| + userdata->width, (long)(userdata->fwait - userdata->offset), |
| + *userdata->fwait); |
| + ++(userdata->fwait); |
| } |
| } |
| - ((struct DecodeState *)userdata)->fwait = FALSE; |
| + userdata->fwait = FALSE; |
| } |
| + return FALSE; |
| +} |
| +void ProcessInstruction(const uint8_t *begin, const uint8_t *end, |
|
Dmitry Polukhin
2012/11/02 17:31:06
I think it should be converted to so class Instruc
|
| + struct Instruction *instruction, void *callback_data) { |
| + struct DecodeState *userdata = callback_data; |
| + const char *instruction_name = instruction->name; |
| + unsigned char operands_count = instruction->operands_count; |
| + unsigned char rex_prefix = instruction->prefix.rex; |
| + enum OperandName rm_index = instruction->rm.index; |
| + enum OperandName rm_base = instruction->rm.base; |
| + Bool data16_prefix = instruction->prefix.data16; |
| + const uint8_t *p; |
| + char delimeter = ' '; |
| + Bool print_rip = FALSE; |
| + Bool empty_rex_prefix_ok = FALSE; |
| + Bool spurious_rex_prefix = FALSE; |
| +#define print_name(x) (printf((x)), shown_name += strlen((x))) |
|
Brad Chen
2012/10/22 21:29:05
What is your goal in making this a macro rather th
|
| + size_t shown_name = 0; |
| + int i, operand_type; |
| + |
| + if (ProcessFWait(&begin, end, instruction, callback_data, |
| + &rex_prefix, &spurious_rex_prefix, &rm_base, &rm_index)) |
| + return; |
| + |
| + /* |
| + * Objdump will print data16 (0x66) prefix on a separate line for "fbld", |
| + * "fbstp", "fild", "fistp", "fld", and "fstp" instructions. |
| + */ |
| if ((data16_prefix) && (begin[0] == 0x66) && (!(rex_prefix & 0x08)) && |
| (IsNameInList(instruction_name, |
| "fbld", "fbstp", "fild", "fistp", "fld", "fstp", NULL))) { |
| printf("%*lx:\t66 \tdata16\n", |
| - ((struct DecodeState *)userdata)->width, |
| - (long)(begin - (((struct DecodeState *)userdata)->offset))); |
| + userdata->width, (long)(begin - userdata->offset)); |
| data16_prefix = FALSE; |
| ++begin; |
| } |
| - printf("%*lx:\t", ((struct DecodeState *)userdata)->width, |
| - (long)(begin - (((struct DecodeState *)userdata)->offset))); |
| + |
| + /* Start the main processing part: print offset here. */ |
| + printf("%*lx:\t", userdata->width, (long)(begin - userdata->offset)); |
| for (p = begin; p < begin + 7; ++p) { |
| if (p >= end) |
| printf(" "); |
| @@ -279,6 +306,7 @@ |
| printf("%02x ", *p); |
| } |
| printf("\t"); |
| + |
| /* |
| * "pclmulqdq" has two-operand mnemonic names for "imm8" equal to 0x01, 0x01, |
| * 0x10, and 0x11. Objdump incorrectly mixes them up with 0x2 and 0x03. |
| @@ -292,6 +320,7 @@ |
| --operands_count; |
| } |
| } |
| + |
| /* |
| * "vpclmulqdq" has two-operand mnemonic names for "imm8" equal to 0x01, 0x01, |
| * 0x10, and 0x11. Objdump mixes them with 0x2 and 0x03. |
| @@ -305,14 +334,20 @@ |
| --operands_count; |
| } |
| } |
| + |
| spurious_rex_prefix |= |
| rex_prefix && |
| (instruction->prefix.rex_b_spurious || |
| instruction->prefix.rex_x_spurious || |
| instruction->prefix.rex_r_spurious || |
| instruction->prefix.rex_w_spurious); |
| + |
| + /* |
| + * Objdump sometimes does not show spurious rex prefixes. Adjust the |
| + * spurious_rex_prefix variable here for these cases. |
| + */ |
| if (operands_count > 0) { |
| - if (!((struct DecodeState *)userdata)->ia32_mode) |
| + if (!userdata->ia32_mode) |
| for (i=0; i<operands_count; ++i) |
| /* |
| * Objdump mistakenly allows "lock" with "mov %crX,%rXX" only in ia32 |
| @@ -333,13 +368,7 @@ |
| } |
| } |
| } |
| - /* Only few rare instructions show spurious REX.B in objdump. */ |
| - if (!spurious_rex_prefix && instruction->prefix.rex_b_spurious) |
| - if (IsNameInList(instruction_name, |
| - "ja", "jae", "jbe", "jb", "je", "jg", "jge", "jle", |
| - "jl", "jne", "jno", "jnp", "jns", "jo", "jp", "js", |
| - "jecxz", "jrcxz", "loop", "loope", "loopne", NULL)) |
| - spurious_rex_prefix = TRUE; |
| + |
| /* Some instructions don't show spurious REX.B in objdump. */ |
| if (spurious_rex_prefix && |
| instruction->prefix.rex_b_spurious && |
| @@ -354,6 +383,7 @@ |
| break; |
| } |
| } |
| + |
| /* Some instructions don't show spurious REX.W in objdump. */ |
| if (spurious_rex_prefix && |
| !instruction->prefix.rex_b_spurious && |
| @@ -394,6 +424,12 @@ |
| "popf", "push", "pushf", NULL))) |
| spurious_rex_prefix = TRUE; |
| + /* |
| + * Print prefixes. For the case where two prefixes are present we must print |
| + * them in a corrent order. First print prefixes from begin[0], then handle |
| + * prefixes from begin[1] (at this point we don't support more then two |
| + * prefixes). |
| + */ |
| if (instruction->prefix.lock && (begin[0] == 0xf0)) |
| print_name("lock "); |
| if (instruction->prefix.repnz && (begin[0] == 0xf2)) |
| @@ -409,7 +445,6 @@ |
| else |
| print_name("repz "); |
| } |
| - |
| if (((data16_prefix) && (rex_prefix & 0x08)) && |
| !IsNameInList(instruction_name, |
| "bsf", "bsr", "fldenvs", "fnstenvs", "fnsaves", "frstors", |
| @@ -418,7 +453,6 @@ |
| (begin[0] != 0x66) || ((begin[1] & 0x48) != 0x48) || (begin[2] != 0x90)) |
| print_name("data32 "); |
| } |
| - |
| if (instruction->prefix.lock && (begin[0] != 0xf0)) { |
| print_name("lock "); |
| } |
| @@ -437,6 +471,11 @@ |
| print_name("repz "); |
| } |
| + /* |
| + * REX prefix. Empty REX prefix (without REX.B, REX.X, REX.R, or REX.W bits) |
| + * is not always spurious: it can be used to select between 8-bit registers |
| + * (%ah vs %spl, %ch vs %bpl, %dh vs %dil, and %bh vs %sil). |
| + */ |
| if (rex_prefix == 0x40) { |
| if (operands_count > 0) |
| for (i=0; i<operands_count; ++i) |
| @@ -449,6 +488,9 @@ |
| } |
| if (!empty_rex_prefix_ok) |
| print_name("rex "); |
| + /* |
| + * Non-empty REX prefix is shown if and only if if it's spurious. |
|
Brad Chen
2012/10/22 21:29:05
if if
|
| + */ |
| } else if (spurious_rex_prefix) { |
| print_name("rex."); |
| if (rex_prefix & 0x08) { |
| @@ -466,10 +508,13 @@ |
| print_name(" "); |
| } |
| + /* Prefixes are printed. Print the instruction name. */ |
| printf("%s", instruction_name); |
| shown_name += strlen(instruction_name); |
| + /* In some cases AT&T instruction uses suffix to show the size of operand. */ |
|
Brad Chen
2012/10/22 21:29:05
It looks to me like it may be the case that the st
|
| if (instruction->att_instruction_suffix) { |
| + /* But special versions of "nop" never use these suffixes. */ |
| if (!IsNameInList(instruction_name, |
| "nopw 0x0(%eax,%eax,1)", |
| "nopw 0x0(%rax,%rax,1)", |
| @@ -493,11 +538,17 @@ |
| print_name("q"); |
| } |
| } |
| + |
| + /* Regular "mov" with 64-bit immediate is printed as "movabs" by objdump. */ |
| if (strcmp(instruction_name, "mov") == 0 && |
| instruction->operands[1].name == REG_IMM && |
| instruction->operands[1].type == OPERAND_TYPE_64_BIT) |
| print_name("abs"); |
| + /* |
| + * Jump instructions can use branch-prediction prefixes. They are shown as |
| + * suffixes by objdump. |
| + */ |
| if (IsNameInList(instruction_name, |
| "ja", "jae", "jbe", "jb", "je", "jg", "jge", "jle", |
| "jl", "jne", "jno", "jnp", "jns", "jo", "jp", "js", |
| @@ -507,8 +558,9 @@ |
| else if (instruction->prefix.branch_taken) |
| print_name(",pt"); |
| } |
| +#undef print_name |
| -#undef print_name |
| + /* Objdump does not print spaces after some "special" instructions. */ |
| if ((strcmp(instruction_name, "nop") != 0 || operands_count != 0) && |
| !IsNameInList( |
| instruction_name, |
| @@ -530,6 +582,10 @@ |
| "data32 data32 data32 data32 data32 nopw %cs:0x0(%eax,%eax,1)", |
| "data32 data32 data32 data32 data32 nopw %cs:0x0(%rax,%rax,1)", |
| NULL)) { |
| + /* |
| + * In this is "regular" instruction with a short name then it's operands are |
| + * aligned. |
| + */ |
| while (shown_name < 6) { |
| printf(" "); |
| ++shown_name; |
| @@ -537,8 +593,11 @@ |
| if (operands_count == 0) |
| printf(" "); |
| } |
| + |
| + /* Print instruction operands. */ |
| for (i=operands_count-1; i>=0; --i) { |
| printf("%c", delimeter); |
| + /* If the instruction is "call" or "jmp" then we need to print asterics. */ |
| if (IsNameInList(instruction_name, "call", "jmp", "lcall", "ljmp", NULL) && |
| instruction->operands[i].name != JMP_TO) |
| printf("*"); |
| @@ -558,6 +617,8 @@ |
| } else { |
| operand_type = instruction->operands[i].type; |
| } |
| + |
| + /* All the special cases are handled, let's print the normal operands! */ |
| switch (instruction->operands[i].name) { |
| case REG_RAX: |
| case REG_RCX: |
| @@ -590,7 +651,7 @@ |
| else |
| printf("-0x%"NACL_PRIx64, -instruction->rm.offset); |
| } |
| - if (((struct DecodeState *)userdata)->ia32_mode) { |
| + if (userdata->ia32_mode) { |
| if ((rm_base != NO_REG) || |
| (rm_index != NO_REG) || |
| (instruction->rm.scale != 0)) |
| @@ -649,19 +710,19 @@ |
| printf("(%%dx)"); |
| break; |
| case REG_DS_RBX: |
| - if (((struct DecodeState *)userdata)->ia32_mode) |
| + if (userdata->ia32_mode) |
| printf("%%ds:(%%ebx)"); |
| else |
| printf("%%ds:(%%rbx)"); |
| break; |
| case REG_ES_RDI: |
| - if (((struct DecodeState *)userdata)->ia32_mode) |
| + if (userdata->ia32_mode) |
| printf("%%es:(%%edi)"); |
| else |
| printf("%%es:(%%rdi)"); |
| break; |
| case REG_DS_RSI: |
| - if (((struct DecodeState *)userdata)->ia32_mode) |
| + if (userdata->ia32_mode) |
| printf("%%ds:(%%esi)"); |
| else |
| printf("%%ds:(%%rsi)"); |
| @@ -669,10 +730,10 @@ |
| case JMP_TO: |
| if (instruction->operands[0].type == OPERAND_TYPE_16_BIT) |
| printf("0x%lx", (long)((end + instruction->rm.offset - |
| - (((struct DecodeState *)userdata)->offset)) & 0xffff)); |
| + (userdata->offset)) & 0xffff)); |
| else |
| printf("0x%lx", (long)(end + instruction->rm.offset - |
| - (((struct DecodeState *)userdata)->offset))); |
| + (userdata->offset))); |
| break; |
| case REG_RIP: |
| case REG_RIZ: |
| @@ -681,16 +742,23 @@ |
| } |
| delimeter = ','; |
| } |
| + |
| + /* |
| + * If %rip was used then objdump will show the actual address as the comment. |
| + */ |
| if (print_rip) { |
| printf(" # 0x%8"NACL_PRIx64, |
| (uint64_t) (end + instruction->rm.offset - |
| - (((struct DecodeState *)userdata)->offset))); |
| + (userdata->offset))); |
| } |
| + |
| + /* First line of instruction decoding is printed. Finish the line. */ |
| printf("\n"); |
| begin += 7; |
| + /* if there are more then seven bytes we need to print the rest. */ |
| while (begin < end) { |
| - printf("%*"NACL_PRIx64":\t", ((struct DecodeState *)userdata)->width, |
| - (uint64_t) (begin - (((struct DecodeState *)userdata)->offset))); |
| + printf("%*"NACL_PRIx64":\t", userdata->width, |
| + (uint64_t) (begin - (userdata->offset))); |
| for (p = begin; p < begin + 7; ++p) { |
| if (p >= end) { |
| printf("\n"); |